#include #include #include #include #include "row.hpp" #include "sort_algorithms.hpp" struct MatrixSortContext { uint8_t* A; int F, H, W; }; struct ChunkArgs { MatrixSortContext* ctx; std::atomic* next_row; int total_rows; int fixed_chunk; }; void* chunk_worker(void* arg) { ChunkArgs* ca = (ChunkArgs*)arg; int start; while ((start = ca->next_row->fetch_add(ca->fixed_chunk)) < ca->total_rows) { int end = std::min(start + ca->fixed_chunk, ca->total_rows); for (int i = start; i < end; ++i) { uint8_t* row_ptr = ca->ctx->A + (static_cast(i) * ca->ctx->W); row r(row_ptr, (uint32_t)ca->ctx->W, false); quick_sort(r); } } return nullptr; } long long run_chunk_scheduler(MatrixSortContext* ctx, int K, int chunk_size) { int total_rows = ctx->F * ctx->H; std::atomic next_row(0); pthread_t threads[K]; ChunkArgs args[K]; auto start_bench = std::chrono::high_resolution_clock::now(); for (int i = 0; i < K; ++i) { args[i] = {ctx, &next_row, total_rows, chunk_size}; pthread_create(&threads[i], nullptr, chunk_worker, &args[i]); } for (int i = 0; i < K; ++i) pthread_join(threads[i], nullptr); auto end_bench = std::chrono::high_resolution_clock::now(); return std::chrono::duration_cast(end_bench - start_bench).count(); }