#include <opencv2/opencv.hpp>
#include <iostream>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <vector>
#include <limits>
#include <unistd.h>
#include <sys/mman.h>
#include <boost/chrono.hpp>

#include "row.hpp"
#include "sort_algorithms.hpp"
#include "bounded_prolific_scheduler.hpp"

using namespace std;

inline uint8_t& PIXEL(uint8_t* video, int f, int i, int j, int k, int H, int W, int C)
{
    return video[(((size_t)f * H + i) * W + j) * C + k];
}

struct VideoSortContext
{
    uint8_t* video;
    int actualFrames;
    int H;
    int W;
    int C;
};

void sort_frame_task(int ff, int workerId, int workerCount, void* ctxPtr)
{
    (void)workerId;
    (void)workerCount;

    VideoSortContext* ctx = static_cast<VideoSortContext*>(ctxPtr);
    uint8_t* video = ctx->video;
    const int H = ctx->H;
    const int W = ctx->W;
    const int C = ctx->C;

    vector<uint8_t> grayBuf(W);
    row<uint8_t> grayRow(grayBuf.data(), static_cast<uint32_t>(W), false);

    for (int i = 0; i < H; i++)
    {
        for (int j = 0; j < W; j++)
        {
            uint8_t b = PIXEL(video, ff, i, j, 0, H, W, C);
            uint8_t g = PIXEL(video, ff, i, j, 1, H, W, C);
            uint8_t r = PIXEL(video, ff, i, j, 2, H, W, C);

            grayBuf[j] = static_cast<uint8_t>(0.114 * b + 0.587 * g + 0.299 * r);
        }

        quick_sort(grayRow);

        for (int j = 0; j < W; j++)
        {
            uint8_t gray = grayRow[j];
            PIXEL(video, ff, i, j, 0, H, W, C) = gray;
            PIXEL(video, ff, i, j, 1, H, W, C) = gray;
            PIXEL(video, ff, i, j, 2, H, W, C) = gray;
        }
    }
}

int main(int argc, char** argv)
{
    if (argc < 3)
    {
        return -1;
    }

    long numCPUs = sysconf(_SC_NPROCESSORS_ONLN);
    if (numCPUs <= 0)
    {
        numCPUs = 1;
    }

    cv::VideoCapture cap(argv[1]);
    if (!cap.isOpened())
    {
        return -1;
    }

    int F = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_COUNT));
    int W = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH));
    int H = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT));
    int C = 3;

    double fps = cap.get(cv::CAP_PROP_FPS);
    if (fps <= 0.0) fps = 30.0;

    if (F <= 0 || W <= 0 || H <= 0)
    {
        return -1;
    }

    if (static_cast<size_t>(H) > std::numeric_limits<size_t>::max() /
                                 static_cast<size_t>(W) /
                                 static_cast<size_t>(C))
    {
        return -1;
    }

    const size_t frameBytes = static_cast<size_t>(H) *
                              static_cast<size_t>(W) *
                              static_cast<size_t>(C);

    if (static_cast<size_t>(F) > std::numeric_limits<size_t>::max() / frameBytes)
    {
        return -1;
    }

    const size_t totalBytes = static_cast<size_t>(F) * frameBytes;

    uint8_t* video = static_cast<uint8_t*>(mmap(
        nullptr,
        totalBytes,
        PROT_READ | PROT_WRITE,
        MAP_SHARED | MAP_ANONYMOUS,
        -1,
        0
    ));

    if (video == MAP_FAILED)
    {
        perror("mmap failed");
        return -1;
    }
    cv::Mat frame;
    int f = 0;
    while (cap.read(frame) && f < F)
    {
        if (frame.empty())
            break;

        if (frame.type() != CV_8UC3)
        {
            munmap(video, totalBytes);
            return -1;
        }

        uint8_t* dst = video + static_cast<size_t>(f) * frameBytes;

        if (frame.isContinuous())
        {
            memcpy(dst, frame.data, frameBytes);
        }
        else
        {
            const size_t rowBytes = static_cast<size_t>(W) * static_cast<size_t>(C);
            for (int i = 0; i < H; i++)
            {
                memcpy(dst + static_cast<size_t>(i) * rowBytes,
                       frame.ptr<uint8_t>(i),
                       rowBytes);
            }
        }

        f++;
    }
    cap.release();

    int actualFrames = f;
    if (actualFrames == 0)
    {
        munmap(video, totalBytes);
        return -1;
    }
//--------------------------------------------------
    const int K = 36;

    VideoSortContext ctx;
    ctx.video = video;
    ctx.actualFrames = actualFrames;
    ctx.H = H;
    ctx.W = W;
    ctx.C = C;

    boost::chrono::high_resolution_clock::time_point t1 =
        boost::chrono::high_resolution_clock::now();

    int schedRc = run_bounded_prolific_scheduler(
        actualFrames,
        K,
        sort_frame_task,
        &ctx
    );

    if (schedRc != 0)
    {
        munmap(video, totalBytes);
        return -1;
    }

    boost::chrono::high_resolution_clock::time_point t2 =
        boost::chrono::high_resolution_clock::now();

    boost::chrono::milliseconds ms =
        boost::chrono::duration_cast<boost::chrono::milliseconds>(t2 - t1);

    cout << "[Parent " << getpid() << "] Worker processes completed." << endl;
    cout << "Total execution time: " << ms.count() << " ms" << endl;

    cv::VideoWriter writer(
        argv[2],
        cv::VideoWriter::fourcc('a', 'v', 'c', '1'),
        fps,
        cv::Size(W, H)
    );

    if (!writer.isOpened())
    {
        munmap(video, totalBytes);
        return -1;
    }

    for (int ff = 0; ff < actualFrames; ff++)
    {
        cv::Mat outFrame(H, W, CV_8UC3);
        uint8_t* src = video + static_cast<size_t>(ff) * frameBytes;

        if (outFrame.isContinuous())
        {
            memcpy(outFrame.data, src, frameBytes);
        }
        else
        {
            const size_t rowBytes = static_cast<size_t>(W) * static_cast<size_t>(C);
            for (int i = 0; i < H; i++)
            {
                memcpy(outFrame.ptr<uint8_t>(i),
                       src + static_cast<size_t>(i) * rowBytes,
                       rowBytes);
            }
        }

        writer.write(outFrame);
    }

    writer.release();
    munmap(video, totalBytes);

    cout << "Saved processed video to " << argv[2] << endl;
    return 0;
}
