From 584220edf01cc776169c4524295943276b0a0433 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 7 Oct 2016 04:01:38 +0200 Subject: [PATCH] Move DrawerCommandQueue to its own file --- src/CMakeLists.txt | 1 + src/r_draw_rgba.cpp | 184 ----------------------------------------- src/r_draw_rgba.h | 165 ++----------------------------------- src/r_thread.cpp | 196 ++++++++++++++++++++++++++++++++++++++++++++ src/r_thread.h | 157 +++++++++++++++++++++++++++++++++++ 5 files changed, 359 insertions(+), 344 deletions(-) create mode 100644 src/r_thread.cpp create mode 100644 src/r_thread.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index acbec1612b..9f2ee2e28c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1033,6 +1033,7 @@ set( FASTMATH_PCH_SOURCES r_draw_rgba.cpp r_drawt.cpp r_drawt_rgba.cpp + r_thread.cpp r_main.cpp r_plane.cpp r_segs.cpp diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7da2f183f8..43075d0a64 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -50,9 +50,6 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; -// Use multiple threads when drawing -CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - // Use linear filtering when scaling up CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); @@ -64,177 +61,6 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// -DrawerCommandQueue *DrawerCommandQueue::Instance() -{ - static DrawerCommandQueue queue; - return &queue; -} - -DrawerCommandQueue::DrawerCommandQueue() -{ -} - -DrawerCommandQueue::~DrawerCommandQueue() -{ - StopThreads(); -} - -void* DrawerCommandQueue::AllocMemory(size_t size) -{ - // Make sure allocations remain 16-byte aligned - size = (size + 15) / 16 * 16; - - auto queue = Instance(); - if (queue->memorypool_pos + size > memorypool_size) - return nullptr; - - void *data = queue->memorypool + queue->memorypool_pos; - queue->memorypool_pos += size; - return data; -} - -void DrawerCommandQueue::Begin() -{ - auto queue = Instance(); - queue->Finish(); - queue->threaded_render++; -} - -void DrawerCommandQueue::End() -{ - auto queue = Instance(); - queue->Finish(); - if (queue->threaded_render > 0) - queue->threaded_render--; -} - -void DrawerCommandQueue::WaitForWorkers() -{ - Instance()->Finish(); -} - -void DrawerCommandQueue::Finish() -{ - auto queue = Instance(); - if (queue->commands.empty()) - return; - - // Give worker threads something to do: - - std::unique_lock start_lock(queue->start_mutex); - queue->active_commands.swap(queue->commands); - queue->run_id++; - start_lock.unlock(); - - queue->StartThreads(); - queue->start_condition.notify_all(); - - // Do one thread ourselves: - - DrawerThread thread; - thread.core = 0; - thread.num_cores = (int)(queue->threads.size() + 1); - - for (int pass = 0; pass < queue->num_passes; pass++) - { - thread.pass_start_y = pass * queue->rows_in_pass; - thread.pass_end_y = (pass + 1) * queue->rows_in_pass; - if (pass + 1 == queue->num_passes) - thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT); - - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) - { - auto &command = queue->active_commands[i]; - command->Execute(&thread); - } - } - - // Wait for everyone to finish: - - std::unique_lock end_lock(queue->end_mutex); - queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); - - // Clean up batch: - - for (auto &command : queue->active_commands) - command->~DrawerCommand(); - queue->active_commands.clear(); - queue->memorypool_pos = 0; - queue->finished_threads = 0; -} - -void DrawerCommandQueue::StartThreads() -{ - if (!threads.empty()) - return; - - int num_threads = std::thread::hardware_concurrency(); - if (num_threads == 0) - num_threads = 4; - - threads.resize(num_threads - 1); - - for (int i = 0; i < num_threads - 1; i++) - { - DrawerCommandQueue *queue = this; - DrawerThread *thread = &threads[i]; - thread->core = i + 1; - thread->num_cores = num_threads; - thread->thread = std::thread([=]() - { - int run_id = 0; - while (true) - { - // Wait until we are signalled to run: - std::unique_lock start_lock(queue->start_mutex); - queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); - if (queue->shutdown_flag) - break; - run_id = queue->run_id; - start_lock.unlock(); - - // Do the work: - for (int pass = 0; pass < queue->num_passes; pass++) - { - thread->pass_start_y = pass * queue->rows_in_pass; - thread->pass_end_y = (pass + 1) * queue->rows_in_pass; - if (pass + 1 == queue->num_passes) - thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT); - - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) - { - auto &command = queue->active_commands[i]; - command->Execute(thread); - } - } - - // Notify main thread that we finished: - std::unique_lock end_lock(queue->end_mutex); - queue->finished_threads++; - end_lock.unlock(); - queue->end_condition.notify_all(); - } - }); - } -} - -void DrawerCommandQueue::StopThreads() -{ - std::unique_lock lock(start_mutex); - shutdown_flag = true; - lock.unlock(); - start_condition.notify_all(); - for (auto &thread : threads) - thread.thread.join(); - threads.clear(); - lock.lock(); - shutdown_flag = false; -} - -///////////////////////////////////////////////////////////////////////////// - class DrawSpanLLVMCommand : public DrawerCommand { protected: @@ -1294,16 +1120,6 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -void R_BeginDrawerCommands() -{ - DrawerCommandQueue::Begin(); -} - -void R_EndDrawerCommands() -{ - DrawerCommandQueue::End(); -} - void R_DrawColumn_rgba() { DrawerCommandQueue::QueueCommand(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 34b8741794..8b704c0a64 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -25,16 +25,16 @@ #include "r_draw.h" #include "v_palette.h" -#include -#include -#include -#include -#include +#include "r_thread.h" #ifndef NO_SSE #include #endif +struct FSpecialColormap; + +EXTERN_CVAR(Bool, r_mipmap) + ///////////////////////////////////////////////////////////////////////////// // Drawer functions: @@ -118,161 +118,6 @@ void tmvline4_revsubclamp_rgba(); void R_FillColumnHoriz_rgba(); void R_FillSpan_rgba(); -///////////////////////////////////////////////////////////////////////////// -// Multithreaded rendering infrastructure: - -// Redirect drawer commands to worker threads -void R_BeginDrawerCommands(); - -// Wait until all drawers finished executing -void R_EndDrawerCommands(); - -struct FSpecialColormap; -class DrawerCommandQueue; - -// Worker data for each thread executing drawer commands -class DrawerThread -{ -public: - std::thread thread; - - // Thread line index of this thread - int core = 0; - - // Number of active threads - int num_cores = 1; - - // Range of rows processed this pass - int pass_start_y = 0; - int pass_end_y = MAXHEIGHT; - - uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; - uint32_t *dc_temp_rgba; - - // Checks if a line is rendered by this thread - bool line_skipped_by_thread(int line) - { - return line < pass_start_y || line >= pass_end_y || line % num_cores != core; - } - - // The number of lines to skip to reach the first line to be rendered by this thread - int skipped_by_thread(int first_line) - { - int pass_skip = MAX(pass_start_y - first_line, 0); - int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; - return pass_skip + core_skip; - } - - // The number of lines to be rendered by this thread - int count_for_thread(int first_line, int count) - { - int lines_until_pass_end = MAX(pass_end_y - first_line, 0); - count = MIN(count, lines_until_pass_end); - int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; - return MAX(c, 0); - } - - // Calculate the dest address for the first line to be rendered by this thread - uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) - { - return dest + skipped_by_thread(first_line) * pitch; - } -}; - -// Task to be executed by each worker thread -class DrawerCommand -{ -protected: - int _dest_y; - -public: - DrawerCommand() - { - _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); - } - - virtual void Execute(DrawerThread *thread) = 0; -}; - -EXTERN_CVAR(Bool, r_multithreaded) -EXTERN_CVAR(Bool, r_mipmap) -EXTERN_CVAR(Int, r_multithreadedmax) - -// Manages queueing up commands and executing them on worker threads -class DrawerCommandQueue -{ - enum { memorypool_size = 16 * 1024 * 1024 }; - char memorypool[memorypool_size]; - size_t memorypool_pos = 0; - - std::vector commands; - - std::vector threads; - - std::mutex start_mutex; - std::condition_variable start_condition; - std::vector active_commands; - bool shutdown_flag = false; - int run_id = 0; - - std::mutex end_mutex; - std::condition_variable end_condition; - size_t finished_threads = 0; - - int threaded_render = 0; - DrawerThread single_core_thread; - int num_passes = 1; - int rows_in_pass = MAXHEIGHT; - - void StartThreads(); - void StopThreads(); - void Finish(); - - static DrawerCommandQueue *Instance(); - - DrawerCommandQueue(); - ~DrawerCommandQueue(); - -public: - // Allocate memory valid for the duration of a command execution - static void* AllocMemory(size_t size); - - // Queue command to be executed by drawer worker threads - template - static void QueueCommand(Types &&... args) - { - auto queue = Instance(); - if (queue->threaded_render == 0 || !r_multithreaded) - { - T command(std::forward(args)...); - command.Execute(&queue->single_core_thread); - } - else - { - void *ptr = AllocMemory(sizeof(T)); - if (!ptr) // Out of memory - render what we got - { - queue->Finish(); - ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; - } - T *command = new (ptr)T(std::forward(args)...); - queue->commands.push_back(command); - } - } - - // Redirects all drawing commands to worker threads until End is called - // Begin/End blocks can be nested. - static void Begin(); - - // End redirection and wait until all worker threads finished executing - static void End(); - - // Waits until all worker threads finished executing - static void WaitForWorkers(); -}; - ///////////////////////////////////////////////////////////////////////////// // Drawer commands: diff --git a/src/r_thread.cpp b/src/r_thread.cpp new file mode 100644 index 0000000000..dec0b8c6ca --- /dev/null +++ b/src/r_thread.cpp @@ -0,0 +1,196 @@ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_thread.h" + +CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +void R_BeginDrawerCommands() +{ + DrawerCommandQueue::Begin(); +} + +void R_EndDrawerCommands() +{ + DrawerCommandQueue::End(); +} + +///////////////////////////////////////////////////////////////////////////// + +DrawerCommandQueue *DrawerCommandQueue::Instance() +{ + static DrawerCommandQueue queue; + return &queue; +} + +DrawerCommandQueue::DrawerCommandQueue() +{ +} + +DrawerCommandQueue::~DrawerCommandQueue() +{ + StopThreads(); +} + +void* DrawerCommandQueue::AllocMemory(size_t size) +{ + // Make sure allocations remain 16-byte aligned + size = (size + 15) / 16 * 16; + + auto queue = Instance(); + if (queue->memorypool_pos + size > memorypool_size) + return nullptr; + + void *data = queue->memorypool + queue->memorypool_pos; + queue->memorypool_pos += size; + return data; +} + +void DrawerCommandQueue::Begin() +{ + auto queue = Instance(); + queue->Finish(); + queue->threaded_render++; +} + +void DrawerCommandQueue::End() +{ + auto queue = Instance(); + queue->Finish(); + if (queue->threaded_render > 0) + queue->threaded_render--; +} + +void DrawerCommandQueue::WaitForWorkers() +{ + Instance()->Finish(); +} + +void DrawerCommandQueue::Finish() +{ + auto queue = Instance(); + if (queue->commands.empty()) + return; + + // Give worker threads something to do: + + std::unique_lock start_lock(queue->start_mutex); + queue->active_commands.swap(queue->commands); + queue->run_id++; + start_lock.unlock(); + + queue->StartThreads(); + queue->start_condition.notify_all(); + + // Do one thread ourselves: + + DrawerThread thread; + thread.core = 0; + thread.num_cores = (int)(queue->threads.size() + 1); + + for (int pass = 0; pass < queue->num_passes; pass++) + { + thread.pass_start_y = pass * queue->rows_in_pass; + thread.pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(&thread); + } + } + + // Wait for everyone to finish: + + std::unique_lock end_lock(queue->end_mutex); + queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + + // Clean up batch: + + for (auto &command : queue->active_commands) + command->~DrawerCommand(); + queue->active_commands.clear(); + queue->memorypool_pos = 0; + queue->finished_threads = 0; +} + +void DrawerCommandQueue::StartThreads() +{ + if (!threads.empty()) + return; + + int num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) + num_threads = 4; + + threads.resize(num_threads - 1); + + for (int i = 0; i < num_threads - 1; i++) + { + DrawerCommandQueue *queue = this; + DrawerThread *thread = &threads[i]; + thread->core = i + 1; + thread->num_cores = num_threads; + thread->thread = std::thread([=]() + { + int run_id = 0; + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(queue->start_mutex); + queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); + if (queue->shutdown_flag) + break; + run_id = queue->run_id; + start_lock.unlock(); + + // Do the work: + for (int pass = 0; pass < queue->num_passes; pass++) + { + thread->pass_start_y = pass * queue->rows_in_pass; + thread->pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(thread); + } + } + + // Notify main thread that we finished: + std::unique_lock end_lock(queue->end_mutex); + queue->finished_threads++; + end_lock.unlock(); + queue->end_condition.notify_all(); + } + }); + } +} + +void DrawerCommandQueue::StopThreads() +{ + std::unique_lock lock(start_mutex); + shutdown_flag = true; + lock.unlock(); + start_condition.notify_all(); + for (auto &thread : threads) + thread.thread.join(); + threads.clear(); + lock.lock(); + shutdown_flag = false; +} diff --git a/src/r_thread.h b/src/r_thread.h new file mode 100644 index 0000000000..312c5ad226 --- /dev/null +++ b/src/r_thread.h @@ -0,0 +1,157 @@ + +#pragma once + +#include "r_draw.h" +#include +#include +#include +#include +#include + +// Use multiple threads when drawing +EXTERN_CVAR(Bool, r_multithreaded) + +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + +// Wait until all drawers finished executing +void R_EndDrawerCommands(); + +// Worker data for each thread executing drawer commands +class DrawerThread +{ +public: + std::thread thread; + + // Thread line index of this thread + int core = 0; + + // Number of active threads + int num_cores = 1; + + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = MAXHEIGHT; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + + // Calculate the dest address for the first line to be rendered by this thread + uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } +}; + +// Task to be executed by each worker thread +class DrawerCommand +{ +protected: + int _dest_y; + +public: + DrawerCommand() + { + _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + } + + virtual void Execute(DrawerThread *thread) = 0; +}; + +// Manages queueing up commands and executing them on worker threads +class DrawerCommandQueue +{ + enum { memorypool_size = 16 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; + + int threaded_render = 0; + DrawerThread single_core_thread; + int num_passes = 1; + int rows_in_pass = MAXHEIGHT; + + void StartThreads(); + void StopThreads(); + void Finish(); + + static DrawerCommandQueue *Instance(); + + DrawerCommandQueue(); + ~DrawerCommandQueue(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + auto queue = Instance(); + if (queue->threaded_render == 0 || !r_multithreaded) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) // Out of memory - render what we got + { + queue->Finish(); + ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + } + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } + } + + // Redirects all drawing commands to worker threads until End is called + // Begin/End blocks can be nested. + static void Begin(); + + // End redirection and wait until all worker threads finished executing + static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); +};