/* ** Renderer multithreading framework ** Copyright (c) 2016 Magnus Norddahl ** ** This software is provided 'as-is', without any express or implied ** warranty. In no event will the authors be held liable for any damages ** arising from the use of this software. ** ** Permission is granted to anyone to use this software for any purpose, ** including commercial applications, and to alter it and redistribute it ** freely, subject to the following restrictions: ** ** 1. The origin of this software must not be misrepresented; you must not ** claim that you wrote the original software. If you use this software ** in a product, an acknowledgment in the product documentation would be ** appreciated but is not required. ** 2. Altered source versions must be plainly marked as such, and must not be ** misrepresented as being the original software. ** 3. This notice may not be removed or altered from any source distribution. ** */ #pragma once #include <vector> #include <memory> #include <thread> #include <mutex> #include <condition_variable> #include "templates.h" #include "c_cvars.h" #include "basics.h" // Use multiple threads when drawing EXTERN_CVAR(Int, r_multithreaded) class PolyTriangleThreadData; namespace swrenderer { class WallColumnDrawerArgs; } // Worker data for each thread executing drawer commands class DrawerThread { public: std::thread thread; size_t current_queue = 0; // Thread line index of this thread int core = 0; // Number of active threads int num_cores = 1; // NUMA node this thread belongs to int numa_node = 0; // Number of active NUMA nodes int num_numa_nodes = 1; // Active range for the numa block the cores are part of int numa_start_y = 0; int numa_end_y = MAXHEIGHT; // Working buffer used by the tilted (sloped) span drawer const uint8_t *tiltlighting[MAXWIDTH]; std::shared_ptr<PolyTriangleThreadData> poly; std::shared_ptr<swrenderer::WallColumnDrawerArgs> columndrawer; size_t debug_draw_pos = 0; // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) { return line < numa_start_y || line >= numa_end_y || line % num_cores != core; } // The number of lines to skip to reach the first line to be rendered by this thread int skipped_by_thread(int first_line) { int clip_first_line = MAX(first_line, numa_start_y); int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores; return clip_first_line + core_skip - first_line; } // The number of lines to be rendered by this thread int count_for_thread(int first_line, int count) { count = MIN(count, numa_end_y - first_line); int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; return MAX(c, 0); } // Calculate the dest address for the first line to be rendered by this thread template<typename T> T *dest_for_thread(int first_line, int pitch, T *dest) { return dest + skipped_by_thread(first_line) * pitch; } // The first line in the dc_temp buffer used this thread int temp_line_for_thread(int first_line) { return (first_line + skipped_by_thread(first_line)) / num_cores; } }; // Task to be executed by each worker thread class DrawerCommand { public: virtual ~DrawerCommand() { } virtual void Execute(DrawerThread *thread) = 0; }; // Wait for all worker threads before executing next command class GroupMemoryBarrierCommand : public DrawerCommand { public: void Execute(DrawerThread *thread); private: std::mutex mutex; std::condition_variable condition; size_t count = 0; }; // Copy finished rows to video memory class MemcpyCommand : public DrawerCommand { public: MemcpyCommand(void *dest, int destpitch, const void *src, int width, int height, int srcpitch, int pixelsize); void Execute(DrawerThread *thread); private: void *dest; const void *src; int destpitch; int width; int height; int srcpitch; int pixelsize; }; class DrawerCommandQueue; typedef std::shared_ptr<DrawerCommandQueue> DrawerCommandQueuePtr; class DrawerThreads { public: // Runs the collected commands on worker threads static void Execute(DrawerCommandQueuePtr queue); // Waits for all commands to finish executing static void WaitForWorkers(); static void ResetDebugDrawPos(); private: DrawerThreads(); ~DrawerThreads(); void StartThreads(); void StopThreads(); void WorkerMain(DrawerThread *thread); static DrawerThreads *Instance(); std::mutex threads_mutex; std::vector<DrawerThread> threads; std::mutex start_mutex; std::condition_variable start_condition; std::vector<DrawerCommandQueuePtr> active_commands; bool shutdown_flag = false; std::mutex end_mutex; std::condition_variable end_condition; size_t tasks_left = 0; size_t debug_draw_end = 0; DrawerThread single_core_thread; friend class DrawerCommandQueue; }; class RenderMemory; class DrawerCommandQueue { public: DrawerCommandQueue(RenderMemory *memoryAllocator); void Clear() { commands.clear(); } // Queue command to be executed by drawer worker threads template<typename T, typename... Types> void Push(Types &&... args) { DrawerThreads *threads = DrawerThreads::Instance(); if (r_multithreaded != 0) { void *ptr = AllocMemory(sizeof(T)); T *command = new (ptr)T(std::forward<Types>(args)...); commands.push_back(command); } else { T command(std::forward<Types>(args)...); command.Execute(&threads->single_core_thread); } } private: // Allocate memory valid for the duration of a command execution void *AllocMemory(size_t size); std::vector<DrawerCommand *> commands; RenderMemory *FrameMemory; friend class DrawerThreads; };