#ifndef CHUNK_STEALING_SCHEDULER_HPP
#define CHUNK_STEALING_SCHEDULER_HPP

#include <cstdint>

// Thread-based chunk-stealing scheduler for a 3D uint8_t tensor A[F][H][W]
// stored as one flat row-major array:
//   A[(f * H + i) * W + j]
//
// Each task is a chunk of consecutive slices. Threads first consume chunks
// from their own deque. When empty, they steal chunks from the back of another
// thread's deque.
//
// Returns 0 on success, -1 on invalid arguments.
int run_chunk_stealing_scheduler(
    uint8_t* A,
    int F,
    int H,
    int W,
    int workerCount,
    int chunkSize
);

#endif
