/*
 * UtilizationMonitor
 * ------------------
 * This class monitors Linux per-core CPU activity by periodically reading
 * /proc/stat in a background Boost thread.
 *
 * Data source
 * -----------
 * The monitor reads the "cpuN" lines from /proc/stat, where each row
 * corresponds to one logical CPU core and each column corresponds to one
 * cumulative scheduler counter:
 *
 *   user, nice, system, idle, iowait, irq, softirq, steal, guest, guest_nice
 *
 * These values are cumulative since boot and are expressed in scheduler ticks.
 * They are not percentages by themselves.
 *
 * Internal storage
 * ----------------
 * The parsed /proc/stat values are stored in a dynamically allocated 2D array:
 *
 *   matrix[cpu_index][column_index]
 *
 * where:
 *   - row    = CPU core (cpu0, cpu1, ..., cpuN)
 *   - column = one of the counters above
 *
 * The monitor keeps two snapshots:
 *   1. previous_ : older sample
 *   2. current_  : newer sample
 *
 * All metrics are computed from the difference between these two snapshots.
 *
 * Sampling model
 * --------------
 * Every interval_seconds seconds, the monitoring thread:
 *
 *   1. Reads /proc/stat
 *   2. Extracts the per-core "cpuN" rows
 *   3. Computes delta values between the previous and current samples
 *   4. Derives per-core percentages from those deltas
 *   5. Updates per-core running means
 *   6. Updates the global EWMA utilization metric
 *
 * Per-core raw deltas
 * -------------------
 * For each CPU core, the class computes:
 *
 *   running_delta = delta(user + nice)
 *   iowait_delta  = delta(iowait)
 *   idle_delta    = delta(idle)
 *   total_delta   = delta(user + nice + system + idle + iowait
 *                         + irq + softirq + steal + guest + guest_nice)
 *
 * Busy time is defined as:
 *
 *   busy_delta = delta(user + nice + system + iowait
 *                     + irq + softirq + steal + guest + guest_nice)
 *
 * In other words:
 *
 *   total_delta = busy_delta + idle_delta
 *
 * Percent metrics
 * ---------------
 * The class converts per-core deltas to percentages using:
 *
 *   running_percent     = 100 * running_delta / total_delta
 *   iowait_percent      = 100 * iowait_delta  / total_delta
 *   idle_percent        = 100 * idle_delta    / total_delta
 *   utilization_percent = 100 * busy_delta    / total_delta
 *
 * Notes:
 *   - running_percent includes only user + nice time
 *   - utilization_percent includes all non-idle activity
 *   - iowait is included in utilization_percent
 *   - idle_percent is computed separately from the idle column only
 *
 * Per-core mean utilization
 * -------------------------
 * For each CPU core, the class maintains the arithmetic mean of the observed
 * utilization percentages across all completed sampling intervals:
 *
 *   mean_util_i(k) =
 *       (mean_util_i(k-1) * (k-1) + util_i(k)) / k
 *
 * where:
 *   - i = CPU index
 *   - k = sample number
 *   - util_i(k) = current utilization percent of core i at sample k
 *
 * This produces one long-term mean utilization value per core.
 *
 * Global EWMA utilization
 * -----------------------
 * The class also maintains one global utilization value based on:
 *
 *   combined = a * max(mean_utilization_per_core)
 *            + (1 - a) * min(current_utilization_per_core)
 *
 * Then an exponentially weighted moving average is applied:
 *
 *   ewma_t = beta * ewma_(t-1) + (1 - beta) * combined_t
 *
 * Initialization:
 *   - On the first valid sample, ewma is set directly to combined
 *   - On later samples, the recursive EWMA update is used
 *
 * Meaning of parameters
 * ---------------------
 * a
 *   Controls how much the combined metric favors the maximum long-term
 *   per-core mean utilization versus the minimum current per-core utilization.
 *
 *   a near 1.0:
 *       favors the most persistently busy core
 *
 *   a near 0.0:
 *       favors the least busy core at the current sample
 *
 * beta
 *   EWMA smoothing factor.
 *
 *   beta near 1.0:
 *       slower, smoother response
 *
 *   beta near 0.0:
 *       faster response to recent changes
 *
 * Threading model
 * ---------------
 * The monitor runs in its own Boost thread after start() is called.
 * The thread sleeps for the configured sampling interval and updates the
 * metrics in the background.
 *
 * Shared values such as the current EWMA utilization are protected with
 * a mutex so that another thread (for example main.cpp) can safely query them.
 *
 * Public usage
 * ------------
 * Typical use:
 *
 *   1. Construct the monitor
 *   2. Call start()
 *   3. Let the background thread collect samples
 *   4. Call getEwmaUtilization() whenever needed
 *   5. Call stop() before shutdown
 *
 * Important behavior
 * ------------------
 * - The class does not print or save results by itself
 * - The first useful metrics appear only after one full sampling interval
 * - The returned EWMA value is 0.0 until enough data has been collected
 * - CPU counts are detected from /proc/stat by counting cpuN rows
 */


#ifndef UTILIZATION_MONITOR_H
#define UTILIZATION_MONITOR_H

#include <boost/thread.hpp>
#include <boost/chrono.hpp>
#include <boost/atomic.hpp>

#include <cstddef>
#include <cstdint>
#include <string>

enum CpuColumn {
        COL_USER = 0,
        COL_NICE,
        COL_SYSTEM,
        COL_IDLE,
        COL_IOWAIT,
        COL_IRQ,
        COL_SOFTIRQ,
        COL_STEAL,
        COL_GUEST,
        COL_GUEST_NICE,
        COL_COUNT
    };

class UtilizationMonitor {
public:
    UtilizationMonitor(unsigned int interval_seconds = 60,
                       double a = 0.7,
                       double beta = 0.8);

    ~UtilizationMonitor();

    void start();
    void stop();

    double getEwmaUtilization();
    double getEwmaRawUtilization();
    bool isRunning();

private:

    struct CpuUsageResult {
        int cpu_index;

        unsigned long long running_delta;
        unsigned long long iowait_delta;
        unsigned long long idle_delta;
        unsigned long long total_delta;

        double running_percent;
        double iowait_percent;
        double idle_percent;
        double utilization_percent;
        double mean_utilization_percent;
    };

    void monitorLoop();

    bool readPerCpuStats(unsigned long long**& matrix, int& cpu_count);

    unsigned long long** allocateMatrix(int rows);
    void freeMatrix(unsigned long long** matrix, int rows);

    double* allocateDoubleArray(int n);
    void freeDoubleArray(double* arr);
    void ensureMeanStorage(int n);

    unsigned long long getRunningTime(unsigned long long* row);
    unsigned long long getIoWaitTime(unsigned long long* row);
    unsigned long long getIdleTime(unsigned long long* row);
    unsigned long long getTotalTime(unsigned long long* row);
    unsigned long long getBusyTime(unsigned long long* row);

    CpuUsageResult calculateUsage(unsigned long long* prev_row,
                                  unsigned long long* curr_row,
                                  int cpu_index);

    void updatePerCpuMeans(CpuUsageResult* results,
                           int count,
                           unsigned long long sample_index);

    double computeTotalUtilizationEwma(CpuUsageResult* results, int count, const bool raw);

private:
    boost::atomic<bool> stop_flag;
    boost::atomic<bool> running;
    unsigned int interval_seconds;
    boost::thread worker_thread;

    unsigned long long** previous;
    unsigned long long** current;
    int cpu_count;

    double* per_cpu_mean_util;
    int per_cpu_mean_count;

    double a;
    double beta;
    double total_util_ewma;
    double total_util_ewma_raw;
    bool ewma_initialized;

    boost::mutex data_mutex;
};

#endif
