- allow drawer queues to run immediately to improve r_scene_multithreaded performance

- removed unused pass ranges in DrawerThread
This commit is contained in:
Magnus Norddahl 2017-03-14 23:03:14 +01:00
parent 6926875b21
commit 3838ec3edc
7 changed files with 94 additions and 208 deletions

View file

@ -375,8 +375,6 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread)
WorkerThreadData thread_data;
thread_data.core = thread->core;
thread_data.num_cores = thread->num_cores;
thread_data.pass_start_y = thread->pass_start_y;
thread_data.pass_end_y = thread->pass_end_y;
thread_data.FullSpans = thread->FullSpansBuffer.data();
thread_data.PartialBlocks = thread->PartialBlocksBuffer.data();

View file

@ -46,8 +46,6 @@ struct WorkerThreadData
{
int32_t core;
int32_t num_cores;
int32_t pass_start_y;
int32_t pass_end_y;
uint32_t *temp;
// Triangle working data:

View file

@ -78,7 +78,9 @@ void PolyRenderer::RenderView(player_t *player)
Thread.DrawQueue->Push<ApplySpecialColormapRGBACommand>(cameraLight->ShaderColormap(), screen);
}
DrawerThreads::Execute({ Thread.DrawQueue });
DrawerThreads::Execute(Thread.DrawQueue);
DrawerThreads::WaitForWorkers();
Thread.DrawQueue->Clear();
}
void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines)
@ -98,7 +100,8 @@ void PolyRenderer::RenderViewToCanvas(AActor *actor, DCanvas *canvas, int x, int
canvas->Lock(true);
RenderActorView(actor, dontmaplines);
DrawerThreads::Execute({ Thread.DrawQueue });
DrawerThreads::Execute(Thread.DrawQueue);
DrawerThreads::WaitForWorkers();
canvas->Unlock();

View file

@ -53,90 +53,36 @@ DrawerThreads::~DrawerThreads()
StopThreads();
}
void DrawerThreads::Execute(const std::vector<DrawerCommandQueuePtr> &queues)
void DrawerThreads::Execute(DrawerCommandQueuePtr commands)
{
bool hasWork = false;
for (const auto &queue : queues)
hasWork = hasWork || !queue->commands.empty();
if (!hasWork)
if (!commands || commands->commands.empty())
return;
auto queue = Instance();
// Give worker threads something to do:
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
queue->active_commands = queues;
queue->run_id++;
start_lock.unlock();
queue->StartThreads();
queue->start_condition.notify_all();
// Do one thread ourselves:
static DrawerThread thread;
thread.core = 0;
thread.num_cores = (int)(queue->threads.size() + 1);
struct TryCatchData
{
DrawerThreads *queue;
DrawerThread *thread;
size_t list_index;
size_t command_index;
} data;
data.queue = queue;
data.thread = &thread;
data.list_index = 0;
data.command_index = 0;
VectoredTryCatch(&data,
[](void *data)
{
TryCatchData *d = (TryCatchData*)data;
for (int pass = 0; pass < d->queue->num_passes; pass++)
{
d->thread->pass_start_y = pass * d->queue->rows_in_pass;
d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass;
if (pass + 1 == d->queue->num_passes)
d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT);
for (auto &list : d->queue->active_commands)
{
size_t size = list->commands.size();
for (d->command_index = 0; d->command_index < size; d->command_index++)
{
auto &command = list->commands[d->command_index];
command->Execute(d->thread);
}
d->list_index++;
}
}
},
[](void *data, const char *reason, bool fatal)
{
TryCatchData *d = (TryCatchData*)data;
ReportDrawerError(d->queue->active_commands[d->list_index]->commands[d->command_index], true, reason, fatal);
});
// Wait for everyone to finish:
// Add to queue and awaken worker threads
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); });
queue->active_commands.push_back(commands);
queue->tasks_left += queue->threads.size();
end_lock.unlock();
start_lock.unlock();
queue->start_condition.notify_all();
}
if (!queue->thread_error.IsEmpty())
{
static bool first = true;
if (queue->thread_error_fatal)
I_FatalError("%s", queue->thread_error.GetChars());
else if (first)
Printf("%s\n", queue->thread_error.GetChars());
first = false;
}
void DrawerThreads::WaitForWorkers()
{
// Wait for workers to finish
auto queue = Instance();
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
queue->end_condition.wait(end_lock, [&]() { return queue->tasks_left == 0; });
end_lock.unlock();
// Clean up batch:
// Clean up
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
for (auto &thread : queue->threads)
thread.current_queue = 0;
for (auto &list : queue->active_commands)
{
@ -145,7 +91,39 @@ void DrawerThreads::Execute(const std::vector<DrawerCommandQueuePtr> &queues)
list->Clear();
}
queue->active_commands.clear();
queue->finished_threads = 0;
}
void DrawerThreads::WorkerMain(DrawerThread *thread)
{
while (true)
{
// Wait until we are signalled to run:
std::unique_lock<std::mutex> start_lock(start_mutex);
start_condition.wait(start_lock, [&]() { return thread->current_queue < active_commands.size() || shutdown_flag; });
if (shutdown_flag)
break;
// Grab the commands
DrawerCommandQueuePtr list = active_commands[thread->current_queue];
thread->current_queue++;
start_lock.unlock();
// Do the work:
size_t size = list->commands.size();
for (int i = 0; i < size; i++)
{
auto &command = list->commands[i];
command->Execute(thread);
}
// Notify main thread that we finished:
std::unique_lock<std::mutex> end_lock(end_mutex);
tasks_left--;
bool finishedTasks = tasks_left == 0;
end_lock.unlock();
if (finishedTasks)
end_condition.notify_all();
}
}
void DrawerThreads::StartThreads()
@ -157,78 +135,15 @@ void DrawerThreads::StartThreads()
if (num_threads == 0)
num_threads = 4;
threads.resize(num_threads - 1);
threads.resize(num_threads);
for (int i = 0; i < num_threads - 1; i++)
for (int i = 0; i < num_threads; i++)
{
DrawerThreads *queue = this;
DrawerThread *thread = &threads[i];
thread->core = i + 1;
thread->core = i;
thread->num_cores = num_threads;
thread->thread = std::thread([=]()
{
int run_id = 0;
while (true)
{
// Wait until we are signalled to run:
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; });
if (queue->shutdown_flag)
break;
run_id = queue->run_id;
start_lock.unlock();
// Do the work:
struct TryCatchData
{
DrawerThreads *queue;
DrawerThread *thread;
size_t list_index;
size_t command_index;
} data;
data.queue = queue;
data.thread = thread;
data.list_index = 0;
data.command_index = 0;
VectoredTryCatch(&data,
[](void *data)
{
TryCatchData *d = (TryCatchData*)data;
for (int pass = 0; pass < d->queue->num_passes; pass++)
{
d->thread->pass_start_y = pass * d->queue->rows_in_pass;
d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass;
if (pass + 1 == d->queue->num_passes)
d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT);
for (auto &list : d->queue->active_commands)
{
size_t size = list->commands.size();
for (d->command_index = 0; d->command_index < size; d->command_index++)
{
auto &command = list->commands[d->command_index];
command->Execute(d->thread);
}
d->list_index++;
}
}
},
[](void *data, const char *reason, bool fatal)
{
TryCatchData *d = (TryCatchData*)data;
ReportDrawerError(d->queue->active_commands[d->list_index]->commands[d->command_index], true, reason, fatal);
});
// Notify main thread that we finished:
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
queue->finished_threads++;
end_lock.unlock();
queue->end_condition.notify_all();
}
});
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
}
}
@ -245,28 +160,6 @@ void DrawerThreads::StopThreads()
shutdown_flag = false;
}
void DrawerThreads::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal)
{
if (worker_thread)
{
std::unique_lock<std::mutex> end_lock(Instance()->end_mutex);
if (Instance()->thread_error.IsEmpty() || (!Instance()->thread_error_fatal && fatal))
{
Instance()->thread_error = reason + (FString)": " + command->DebugInfo();
Instance()->thread_error_fatal = fatal;
}
}
else
{
static bool first = true;
if (fatal)
I_FatalError("%s: %s", reason, command->DebugInfo().GetChars());
else if (first)
Printf("%s: %s\n", reason, command->DebugInfo().GetChars());
first = false;
}
}
#ifndef WIN32
void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal))

View file

@ -44,6 +44,7 @@ public:
}
std::thread thread;
size_t current_queue = 0;
// Thread line index of this thread
int core = 0;
@ -51,10 +52,6 @@ public:
// Number of active threads
int num_cores = 1;
// Range of rows processed this pass
int pass_start_y = 0;
int pass_end_y = MAXHEIGHT;
// Working buffer used by the tilted (sloped) span drawer
const uint8_t *tiltlighting[MAXWIDTH];
@ -65,22 +62,19 @@ public:
// Checks if a line is rendered by this thread
bool line_skipped_by_thread(int line)
{
return line < pass_start_y || line >= pass_end_y || line % num_cores != core;
return line % num_cores != core;
}
// The number of lines to skip to reach the first line to be rendered by this thread
int skipped_by_thread(int first_line)
{
int pass_skip = MAX(pass_start_y - first_line, 0);
int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores;
return pass_skip + core_skip;
int core_skip = (num_cores - (first_line - core) % num_cores) % num_cores;
return core_skip;
}
// The number of lines to be rendered by this thread
int count_for_thread(int first_line, int count)
{
int lines_until_pass_end = MAX(pass_end_y - first_line, 0);
count = MIN(count, lines_until_pass_end);
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
return MAX(c, 0);
}
@ -118,7 +112,10 @@ class DrawerThreads
{
public:
// Runs the collected commands on worker threads
static void Execute(const std::vector<DrawerCommandQueuePtr> &queues);
static void Execute(DrawerCommandQueuePtr queue);
// Waits for all commands to finish executing
static void WaitForWorkers();
private:
DrawerThreads();
@ -126,6 +123,7 @@ private:
void StartThreads();
void StopThreads();
void WorkerMain(DrawerThread *thread);
static DrawerThreads *Instance();
static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal);
@ -136,18 +134,12 @@ private:
std::condition_variable start_condition;
std::vector<DrawerCommandQueuePtr> active_commands;
bool shutdown_flag = false;
int run_id = 0;
std::mutex end_mutex;
std::condition_variable end_condition;
size_t finished_threads = 0;
FString thread_error;
bool thread_error_fatal = false;
size_t tasks_left = 0;
int threaded_render = 0;
DrawerThread single_core_thread;
int num_passes = 1;
int rows_in_pass = MAXHEIGHT;
friend class DrawerCommandQueue;
};

View file

@ -106,23 +106,12 @@ namespace swrenderer
// Apply special colormap if the target cannot do it
if (CameraLight::Instance()->ShaderColormap() && viewport->RenderTarget->IsBgra() && !(r_shadercolormaps && screen->Accel2D))
{
MainThread()->DrawQueue->Push<ApplySpecialColormapRGBACommand>(CameraLight::Instance()->ShaderColormap(), screen);
RenderDrawQueues();
auto queue = std::make_shared<DrawerCommandQueue>(MainThread());
queue->Push<ApplySpecialColormapRGBACommand>(CameraLight::Instance()->ShaderColormap(), screen);
DrawerThreads::Execute(queue);
}
}
void RenderScene::RenderDrawQueues()
{
// Use reverse order so main thread is drawn last
std::vector<DrawerCommandQueuePtr> queues;
for (auto it = Threads.rbegin(); it != Threads.rend(); ++it)
{
queues.push_back((*it)->DrawQueue);
}
DrawerThreads::Execute(queues);
//using namespace std::chrono_literals;
//std::this_thread::sleep_for(0.5s);
DrawerThreads::WaitForWorkers();
}
void RenderScene::RenderActorView(AActor *actor, bool dontmaplines)
@ -154,8 +143,7 @@ namespace swrenderer
}
RenderThreadSlices();
MainThread()->PlayerSprites->Render();
RenderDrawQueues();
RenderPSprites();
MainThread()->Viewport->viewpoint.camera->renderflags = savedflags;
interpolator.RestoreInterpolations();
@ -168,6 +156,16 @@ namespace swrenderer
}
}
void RenderScene::RenderPSprites()
{
// Player sprites needs to be rendered after all the slices because they may be hardware accelerated.
// If they are not hardware accelerated the drawers must run after all sliced drawers finished.
DrawerThreads::WaitForWorkers();
MainThread()->DrawQueue->Clear();
MainThread()->PlayerSprites->Render();
DrawerThreads::Execute(MainThread()->DrawQueue);
}
void RenderScene::RenderThreadSlices()
{
int numThreads = std::thread::hardware_concurrency();
@ -220,6 +218,7 @@ namespace swrenderer
void RenderScene::RenderThreadSlice(RenderThread *thread)
{
thread->DrawQueue->Clear();
thread->FrameMemory->Clear();
thread->Clip3D->Cleanup();
thread->Clip3D->ResetClip(); // reset clips (floor/ceiling)
@ -278,6 +277,8 @@ namespace swrenderer
if (thread->MainThread)
NetUpdate();
}
DrawerThreads::Execute(thread->DrawQueue);
}
void RenderScene::StartThreads(size_t numThreads)
@ -344,7 +345,8 @@ namespace swrenderer
viewport->SetViewport(MainThread(), width, height, MainThread()->Viewport->viewwindow.WidescreenRatio);
RenderActorView(actor, dontmaplines);
DrawerThreads::WaitForWorkers();
viewport->RenderTarget = screen;
R_ExecuteSetViewSize(MainThread()->Viewport->viewpoint, MainThread()->Viewport->viewwindow);

View file

@ -51,9 +51,9 @@ namespace swrenderer
private:
void RenderActorView(AActor *actor, bool dontmaplines = false);
void RenderDrawQueues();
void RenderThreadSlices();
void RenderThreadSlice(RenderThread *thread);
void RenderPSprites();
void StartThreads(size_t numThreads);
void StopThreads();