#ifndef BOUNDED_COLLECTIVE_SCHEDULER_HPP
#define BOUNDED_COLLECTIVE_SCHEDULER_HPP

#include <cstddef>

typedef void (*collective_task_fn)(
    int taskIndex,
    int workerId,
    int workerCount,
    void* ctx
);

// Bounded collective: the parent forks exactly K workers in a flat fan-out,
// each worker handles taskIds workerId, workerId+K, workerId+2K, ...
// The parent then waitpid() reaps every child.
int run_bounded_collective_scheduler(
    int totalTasks,
    int workerCount,
    collective_task_fn taskFn,
    void* ctx,
    bool bindAffinity
);

#endif
