diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 0a58c6e60..1d5f3159c 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -375,8 +375,6 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) WorkerThreadData thread_data; thread_data.core = thread->core; thread_data.num_cores = thread->num_cores; - thread_data.pass_start_y = thread->pass_start_y; - thread_data.pass_end_y = thread->pass_end_y; thread_data.FullSpans = thread->FullSpansBuffer.data(); thread_data.PartialBlocks = thread->PartialBlocksBuffer.data(); diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index cd2a6cbe0..5539be305 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -46,8 +46,6 @@ struct WorkerThreadData { int32_t core; int32_t num_cores; - int32_t pass_start_y; - int32_t pass_end_y; uint32_t *temp; // Triangle working data: diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index c53de5d62..0dd147b12 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -78,7 +78,9 @@ void PolyRenderer::RenderView(player_t *player) Thread.DrawQueue->Push(cameraLight->ShaderColormap(), screen); } - DrawerThreads::Execute({ Thread.DrawQueue }); + DrawerThreads::Execute(Thread.DrawQueue); + DrawerThreads::WaitForWorkers(); + Thread.DrawQueue->Clear(); } void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) @@ -98,7 +100,8 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int canvas->Lock(true); RenderActorView(actor, dontmaplines); - DrawerThreads::Execute({ Thread.DrawQueue }); + DrawerThreads::Execute(Thread.DrawQueue); + DrawerThreads::WaitForWorkers(); canvas->Unlock(); diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index adcee2768..d8b3b3389 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -53,90 +53,36 @@ DrawerThreads::~DrawerThreads() StopThreads(); } -void DrawerThreads::Execute(const std::vector &queues) +void DrawerThreads::Execute(DrawerCommandQueuePtr commands) { - bool hasWork = false; - for (const auto &queue : queues) - hasWork = hasWork || !queue->commands.empty(); - if (!hasWork) + if (!commands || commands->commands.empty()) return; auto queue = Instance(); - - // Give worker threads something to do: - - std::unique_lock start_lock(queue->start_mutex); - queue->active_commands = queues; - queue->run_id++; - start_lock.unlock(); - queue->StartThreads(); - queue->start_condition.notify_all(); - - // Do one thread ourselves: - - static DrawerThread thread; - thread.core = 0; - thread.num_cores = (int)(queue->threads.size() + 1); - - struct TryCatchData - { - DrawerThreads *queue; - DrawerThread *thread; - size_t list_index; - size_t command_index; - } data; - - data.queue = queue; - data.thread = &thread; - data.list_index = 0; - data.command_index = 0; - VectoredTryCatch(&data, - [](void *data) - { - TryCatchData *d = (TryCatchData*)data; - - for (int pass = 0; pass < d->queue->num_passes; pass++) - { - d->thread->pass_start_y = pass * d->queue->rows_in_pass; - d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; - if (pass + 1 == d->queue->num_passes) - d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); - - for (auto &list : d->queue->active_commands) - { - size_t size = list->commands.size(); - for (d->command_index = 0; d->command_index < size; d->command_index++) - { - auto &command = list->commands[d->command_index]; - command->Execute(d->thread); - } - d->list_index++; - } - } - }, - [](void *data, const char *reason, bool fatal) - { - TryCatchData *d = (TryCatchData*)data; - ReportDrawerError(d->queue->active_commands[d->list_index]->commands[d->command_index], true, reason, fatal); - }); - - // Wait for everyone to finish: + // Add to queue and awaken worker threads + std::unique_lock start_lock(queue->start_mutex); std::unique_lock end_lock(queue->end_mutex); - queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + queue->active_commands.push_back(commands); + queue->tasks_left += queue->threads.size(); + end_lock.unlock(); + start_lock.unlock(); + queue->start_condition.notify_all(); +} - if (!queue->thread_error.IsEmpty()) - { - static bool first = true; - if (queue->thread_error_fatal) - I_FatalError("%s", queue->thread_error.GetChars()); - else if (first) - Printf("%s\n", queue->thread_error.GetChars()); - first = false; - } +void DrawerThreads::WaitForWorkers() +{ + // Wait for workers to finish + auto queue = Instance(); + std::unique_lock end_lock(queue->end_mutex); + queue->end_condition.wait(end_lock, [&]() { return queue->tasks_left == 0; }); + end_lock.unlock(); - // Clean up batch: + // Clean up + std::unique_lock start_lock(queue->start_mutex); + for (auto &thread : queue->threads) + thread.current_queue = 0; for (auto &list : queue->active_commands) { @@ -145,7 +91,39 @@ void DrawerThreads::Execute(const std::vector &queues) list->Clear(); } queue->active_commands.clear(); - queue->finished_threads = 0; +} + +void DrawerThreads::WorkerMain(DrawerThread *thread) +{ + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(start_mutex); + start_condition.wait(start_lock, [&]() { return thread->current_queue < active_commands.size() || shutdown_flag; }); + if (shutdown_flag) + break; + + // Grab the commands + DrawerCommandQueuePtr list = active_commands[thread->current_queue]; + thread->current_queue++; + start_lock.unlock(); + + // Do the work: + size_t size = list->commands.size(); + for (int i = 0; i < size; i++) + { + auto &command = list->commands[i]; + command->Execute(thread); + } + + // Notify main thread that we finished: + std::unique_lock end_lock(end_mutex); + tasks_left--; + bool finishedTasks = tasks_left == 0; + end_lock.unlock(); + if (finishedTasks) + end_condition.notify_all(); + } } void DrawerThreads::StartThreads() @@ -157,78 +135,15 @@ void DrawerThreads::StartThreads() if (num_threads == 0) num_threads = 4; - threads.resize(num_threads - 1); + threads.resize(num_threads); - for (int i = 0; i < num_threads - 1; i++) + for (int i = 0; i < num_threads; i++) { DrawerThreads *queue = this; DrawerThread *thread = &threads[i]; - thread->core = i + 1; + thread->core = i; thread->num_cores = num_threads; - thread->thread = std::thread([=]() - { - int run_id = 0; - while (true) - { - // Wait until we are signalled to run: - std::unique_lock start_lock(queue->start_mutex); - queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); - if (queue->shutdown_flag) - break; - run_id = queue->run_id; - start_lock.unlock(); - - // Do the work: - - struct TryCatchData - { - DrawerThreads *queue; - DrawerThread *thread; - size_t list_index; - size_t command_index; - } data; - - data.queue = queue; - data.thread = thread; - data.list_index = 0; - data.command_index = 0; - VectoredTryCatch(&data, - [](void *data) - { - TryCatchData *d = (TryCatchData*)data; - - for (int pass = 0; pass < d->queue->num_passes; pass++) - { - d->thread->pass_start_y = pass * d->queue->rows_in_pass; - d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; - if (pass + 1 == d->queue->num_passes) - d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); - - for (auto &list : d->queue->active_commands) - { - size_t size = list->commands.size(); - for (d->command_index = 0; d->command_index < size; d->command_index++) - { - auto &command = list->commands[d->command_index]; - command->Execute(d->thread); - } - d->list_index++; - } - } - }, - [](void *data, const char *reason, bool fatal) - { - TryCatchData *d = (TryCatchData*)data; - ReportDrawerError(d->queue->active_commands[d->list_index]->commands[d->command_index], true, reason, fatal); - }); - - // Notify main thread that we finished: - std::unique_lock end_lock(queue->end_mutex); - queue->finished_threads++; - end_lock.unlock(); - queue->end_condition.notify_all(); - } - }); + thread->thread = std::thread([=]() { queue->WorkerMain(thread); }); } } @@ -245,28 +160,6 @@ void DrawerThreads::StopThreads() shutdown_flag = false; } -void DrawerThreads::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal) -{ - if (worker_thread) - { - std::unique_lock end_lock(Instance()->end_mutex); - if (Instance()->thread_error.IsEmpty() || (!Instance()->thread_error_fatal && fatal)) - { - Instance()->thread_error = reason + (FString)": " + command->DebugInfo(); - Instance()->thread_error_fatal = fatal; - } - } - else - { - static bool first = true; - if (fatal) - I_FatalError("%s: %s", reason, command->DebugInfo().GetChars()); - else if (first) - Printf("%s: %s\n", reason, command->DebugInfo().GetChars()); - first = false; - } -} - #ifndef WIN32 void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)) diff --git a/src/swrenderer/drawers/r_thread.h b/src/swrenderer/drawers/r_thread.h index cc8d33eab..621128859 100644 --- a/src/swrenderer/drawers/r_thread.h +++ b/src/swrenderer/drawers/r_thread.h @@ -44,6 +44,7 @@ public: } std::thread thread; + size_t current_queue = 0; // Thread line index of this thread int core = 0; @@ -51,10 +52,6 @@ public: // Number of active threads int num_cores = 1; - // Range of rows processed this pass - int pass_start_y = 0; - int pass_end_y = MAXHEIGHT; - // Working buffer used by the tilted (sloped) span drawer const uint8_t *tiltlighting[MAXWIDTH]; @@ -65,22 +62,19 @@ public: // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) { - return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + return line % num_cores != core; } // The number of lines to skip to reach the first line to be rendered by this thread int skipped_by_thread(int first_line) { - int pass_skip = MAX(pass_start_y - first_line, 0); - int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; - return pass_skip + core_skip; + int core_skip = (num_cores - (first_line - core) % num_cores) % num_cores; + return core_skip; } // The number of lines to be rendered by this thread int count_for_thread(int first_line, int count) { - int lines_until_pass_end = MAX(pass_end_y - first_line, 0); - count = MIN(count, lines_until_pass_end); int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; return MAX(c, 0); } @@ -118,7 +112,10 @@ class DrawerThreads { public: // Runs the collected commands on worker threads - static void Execute(const std::vector &queues); + static void Execute(DrawerCommandQueuePtr queue); + + // Waits for all commands to finish executing + static void WaitForWorkers(); private: DrawerThreads(); @@ -126,6 +123,7 @@ private: void StartThreads(); void StopThreads(); + void WorkerMain(DrawerThread *thread); static DrawerThreads *Instance(); static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal); @@ -136,18 +134,12 @@ private: std::condition_variable start_condition; std::vector active_commands; bool shutdown_flag = false; - int run_id = 0; std::mutex end_mutex; std::condition_variable end_condition; - size_t finished_threads = 0; - FString thread_error; - bool thread_error_fatal = false; + size_t tasks_left = 0; - int threaded_render = 0; DrawerThread single_core_thread; - int num_passes = 1; - int rows_in_pass = MAXHEIGHT; friend class DrawerCommandQueue; }; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index 6ff2fd594..75460218d 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -106,23 +106,12 @@ namespace swrenderer // Apply special colormap if the target cannot do it if (CameraLight::Instance()->ShaderColormap() && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D)) { - MainThread()->DrawQueue->Push(CameraLight::Instance()->ShaderColormap(), screen); - RenderDrawQueues(); + auto queue = std::make_shared(MainThread()); + queue->Push(CameraLight::Instance()->ShaderColormap(), screen); + DrawerThreads::Execute(queue); } - } - void RenderScene::RenderDrawQueues() - { - // Use reverse order so main thread is drawn last - std::vector queues; - for (auto it = Threads.rbegin(); it != Threads.rend(); ++it) - { - queues.push_back((*it)->DrawQueue); - } - DrawerThreads::Execute(queues); - - //using namespace std::chrono_literals; - //std::this_thread::sleep_for(0.5s); + DrawerThreads::WaitForWorkers(); } void RenderScene::RenderActorView(AActor *actor, bool dontmaplines) @@ -154,8 +143,7 @@ namespace swrenderer } RenderThreadSlices(); - MainThread()->PlayerSprites->Render(); - RenderDrawQueues(); + RenderPSprites(); MainThread()->Viewport->viewpoint.camera->renderflags = savedflags; interpolator.RestoreInterpolations(); @@ -168,6 +156,16 @@ namespace swrenderer } } + void RenderScene::RenderPSprites() + { + // Player sprites needs to be rendered after all the slices because they may be hardware accelerated. + // If they are not hardware accelerated the drawers must run after all sliced drawers finished. + DrawerThreads::WaitForWorkers(); + MainThread()->DrawQueue->Clear(); + MainThread()->PlayerSprites->Render(); + DrawerThreads::Execute(MainThread()->DrawQueue); + } + void RenderScene::RenderThreadSlices() { int numThreads = std::thread::hardware_concurrency(); @@ -220,6 +218,7 @@ namespace swrenderer void RenderScene::RenderThreadSlice(RenderThread *thread) { + thread->DrawQueue->Clear(); thread->FrameMemory->Clear(); thread->Clip3D->Cleanup(); thread->Clip3D->ResetClip(); // reset clips (floor/ceiling) @@ -278,6 +277,8 @@ namespace swrenderer if (thread->MainThread) NetUpdate(); } + + DrawerThreads::Execute(thread->DrawQueue); } void RenderScene::StartThreads(size_t numThreads) @@ -344,7 +345,8 @@ namespace swrenderer viewport->SetViewport(MainThread(), width, height, MainThread()->Viewport->viewwindow.WidescreenRatio); RenderActorView(actor, dontmaplines); - + DrawerThreads::WaitForWorkers(); + viewport->RenderTarget = screen; R_ExecuteSetViewSize(MainThread()->Viewport->viewpoint, MainThread()->Viewport->viewwindow); diff --git a/src/swrenderer/scene/r_scene.h b/src/swrenderer/scene/r_scene.h index d3680d422..02c12ec2c 100644 --- a/src/swrenderer/scene/r_scene.h +++ b/src/swrenderer/scene/r_scene.h @@ -51,9 +51,9 @@ namespace swrenderer private: void RenderActorView(AActor *actor, bool dontmaplines = false); - void RenderDrawQueues(); void RenderThreadSlices(); void RenderThreadSlice(RenderThread *thread); + void RenderPSprites(); void StartThreads(size_t numThreads); void StopThreads();