mirror of
https://github.com/ZDoom/Raze.git
synced 2024-11-29 23:42:08 +00:00
285 lines
7.6 KiB
C++
285 lines
7.6 KiB
C++
/*
|
|
** Renderer multithreading framework
|
|
** Copyright (c) 2016 Magnus Norddahl
|
|
**
|
|
** This software is provided 'as-is', without any express or implied
|
|
** warranty. In no event will the authors be held liable for any damages
|
|
** arising from the use of this software.
|
|
**
|
|
** Permission is granted to anyone to use this software for any purpose,
|
|
** including commercial applications, and to alter it and redistribute it
|
|
** freely, subject to the following restrictions:
|
|
**
|
|
** 1. The origin of this software must not be misrepresented; you must not
|
|
** claim that you wrote the original software. If you use this software
|
|
** in a product, an acknowledgment in the product documentation would be
|
|
** appreciated but is not required.
|
|
** 2. Altered source versions must be plainly marked as such, and must not be
|
|
** misrepresented as being the original software.
|
|
** 3. This notice may not be removed or altered from any source distribution.
|
|
**
|
|
*/
|
|
|
|
#include <stddef.h>
|
|
|
|
#include "i_system.h"
|
|
#include "filesystem.h"
|
|
#include "v_video.h"
|
|
#include "r_thread.h"
|
|
#include "r_memory.h"
|
|
#include "printf.h"
|
|
#include <chrono>
|
|
|
|
CVAR(Int, r_multithreaded, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
|
|
CVAR(Int, r_debug_draw, 0, 0);
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
DrawerThreads *DrawerThreads::Instance()
|
|
{
|
|
static DrawerThreads threads;
|
|
return &threads;
|
|
}
|
|
|
|
DrawerThreads::DrawerThreads()
|
|
{
|
|
}
|
|
|
|
DrawerThreads::~DrawerThreads()
|
|
{
|
|
StopThreads();
|
|
}
|
|
|
|
void DrawerThreads::Execute(DrawerCommandQueuePtr commands)
|
|
{
|
|
if (!commands || commands->commands.empty())
|
|
return;
|
|
|
|
auto queue = Instance();
|
|
|
|
queue->StartThreads();
|
|
|
|
// Add to queue and awaken worker threads
|
|
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
|
|
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
|
|
queue->active_commands.push_back(commands);
|
|
queue->tasks_left += queue->threads.size();
|
|
end_lock.unlock();
|
|
start_lock.unlock();
|
|
queue->start_condition.notify_all();
|
|
}
|
|
|
|
void DrawerThreads::ResetDebugDrawPos()
|
|
{
|
|
auto queue = Instance();
|
|
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
|
|
bool reached_end = false;
|
|
for (auto &thread : queue->threads)
|
|
{
|
|
if (thread.debug_draw_pos + r_debug_draw * 60 * 2 < queue->debug_draw_end)
|
|
reached_end = true;
|
|
thread.debug_draw_pos = 0;
|
|
}
|
|
if (!reached_end)
|
|
queue->debug_draw_end += r_debug_draw;
|
|
else
|
|
queue->debug_draw_end = 0;
|
|
}
|
|
|
|
void DrawerThreads::WaitForWorkers()
|
|
{
|
|
using namespace std::chrono_literals;
|
|
|
|
// Wait for workers to finish
|
|
auto queue = Instance();
|
|
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
|
|
if (!queue->end_condition.wait_for(end_lock, 5s, [&]() { return queue->tasks_left == 0; }))
|
|
{
|
|
I_FatalError("Drawer threads did not finish within 5 seconds!");
|
|
}
|
|
end_lock.unlock();
|
|
|
|
// Clean up
|
|
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
|
|
for (auto &thread : queue->threads)
|
|
thread.current_queue = 0;
|
|
|
|
for (auto &list : queue->active_commands)
|
|
{
|
|
for (auto &command : list->commands)
|
|
command->~DrawerCommand();
|
|
list->Clear();
|
|
}
|
|
queue->active_commands.clear();
|
|
}
|
|
|
|
void DrawerThreads::WorkerMain(DrawerThread *thread)
|
|
{
|
|
while (true)
|
|
{
|
|
// Wait until we are signalled to run:
|
|
std::unique_lock<std::mutex> start_lock(start_mutex);
|
|
start_condition.wait(start_lock, [&]() { return thread->current_queue < active_commands.size() || shutdown_flag; });
|
|
if (shutdown_flag)
|
|
break;
|
|
|
|
// Grab the commands
|
|
DrawerCommandQueuePtr list = active_commands[thread->current_queue];
|
|
thread->current_queue++;
|
|
thread->numa_start_y = thread->numa_node * screen->GetHeight() / thread->num_numa_nodes;
|
|
thread->numa_end_y = (thread->numa_node + 1) * screen->GetHeight() / thread->num_numa_nodes;
|
|
start_lock.unlock();
|
|
|
|
// Do the work:
|
|
if (r_debug_draw)
|
|
{
|
|
for (auto& command : list->commands)
|
|
{
|
|
thread->debug_draw_pos++;
|
|
if (thread->debug_draw_pos < debug_draw_end)
|
|
command->Execute(thread);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (auto& command : list->commands)
|
|
{
|
|
command->Execute(thread);
|
|
}
|
|
}
|
|
|
|
// Notify main thread that we finished:
|
|
std::unique_lock<std::mutex> end_lock(end_mutex);
|
|
tasks_left--;
|
|
bool finishedTasks = tasks_left == 0;
|
|
end_lock.unlock();
|
|
if (finishedTasks)
|
|
end_condition.notify_all();
|
|
}
|
|
}
|
|
|
|
void DrawerThreads::StartThreads()
|
|
{
|
|
std::unique_lock<std::mutex> lock(threads_mutex);
|
|
|
|
int num_numathreads = 0;
|
|
for (int i = 0; i < I_GetNumaNodeCount(); i++)
|
|
num_numathreads += I_GetNumaNodeThreadCount(i);
|
|
|
|
int num_threads = num_numathreads;
|
|
if (num_threads == 0)
|
|
{
|
|
static bool firstCall = true;
|
|
if (firstCall)
|
|
{
|
|
firstCall = false;
|
|
if (r_multithreaded == 1)
|
|
Printf("Warning: Unable to determine number of CPU cores/threads for this computer. To improve performance, please type 'r_multithreaded x' in the console, where x is the number of threads to use.\n");
|
|
}
|
|
|
|
num_threads = 1;
|
|
}
|
|
|
|
if (r_multithreaded == 0)
|
|
num_threads = 1;
|
|
else if (r_multithreaded != 1)
|
|
num_threads = r_multithreaded;
|
|
|
|
if (num_threads != (int)threads.size())
|
|
{
|
|
StopThreads();
|
|
|
|
threads.resize(num_threads);
|
|
|
|
if (num_threads == num_numathreads)
|
|
{
|
|
int curThread = 0;
|
|
for (int numaNode = 0; numaNode < I_GetNumaNodeCount(); numaNode++)
|
|
{
|
|
for (int i = 0; i < I_GetNumaNodeThreadCount(numaNode); i++)
|
|
{
|
|
DrawerThreads *queue = this;
|
|
DrawerThread *thread = &threads[curThread++];
|
|
thread->core = i;
|
|
thread->num_cores = I_GetNumaNodeThreadCount(numaNode);
|
|
thread->numa_node = numaNode;
|
|
thread->num_numa_nodes = I_GetNumaNodeCount();
|
|
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
|
|
I_SetThreadNumaNode(thread->thread, numaNode);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < num_threads; i++)
|
|
{
|
|
DrawerThreads *queue = this;
|
|
DrawerThread *thread = &threads[i];
|
|
thread->core = i;
|
|
thread->num_cores = num_threads;
|
|
thread->numa_node = 0;
|
|
thread->num_numa_nodes = 1;
|
|
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
|
|
I_SetThreadNumaNode(thread->thread, 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void DrawerThreads::StopThreads()
|
|
{
|
|
std::unique_lock<std::mutex> lock(start_mutex);
|
|
shutdown_flag = true;
|
|
lock.unlock();
|
|
start_condition.notify_all();
|
|
for (auto &thread : threads)
|
|
thread.thread.join();
|
|
threads.clear();
|
|
lock.lock();
|
|
shutdown_flag = false;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
DrawerCommandQueue::DrawerCommandQueue(RenderMemory *frameMemory) : FrameMemory(frameMemory)
|
|
{
|
|
}
|
|
|
|
void *DrawerCommandQueue::AllocMemory(size_t size)
|
|
{
|
|
return FrameMemory->AllocMemory<uint8_t>((int)size);
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
void GroupMemoryBarrierCommand::Execute(DrawerThread *thread)
|
|
{
|
|
std::unique_lock<std::mutex> lock(mutex);
|
|
count++;
|
|
condition.notify_all();
|
|
condition.wait(lock, [&]() { return count >= (size_t)thread->num_cores; });
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
MemcpyCommand::MemcpyCommand(void *dest, int destpitch, const void *src, int width, int height, int srcpitch, int pixelsize)
|
|
: dest(dest), src(src), destpitch(destpitch), width(width), height(height), srcpitch(srcpitch), pixelsize(pixelsize)
|
|
{
|
|
}
|
|
|
|
void MemcpyCommand::Execute(DrawerThread *thread)
|
|
{
|
|
int start = thread->skipped_by_thread(0);
|
|
int count = thread->count_for_thread(0, height);
|
|
int sstep = thread->num_cores * srcpitch * pixelsize;
|
|
int dstep = thread->num_cores * destpitch * pixelsize;
|
|
int size = width * pixelsize;
|
|
uint8_t *d = (uint8_t*)dest + start * destpitch * pixelsize;
|
|
const uint8_t *s = (const uint8_t*)src + start * srcpitch * pixelsize;
|
|
for (int i = 0; i < count; i++)
|
|
{
|
|
memcpy(d, s, size);
|
|
d += dstep;
|
|
s += sstep;
|
|
}
|
|
}
|