diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ec7313c4f..552e5ff13 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -39,6 +39,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_plane.h" +#include "r_draw_rgba.h" #include "gi.h" #include "stats.h" @@ -2295,34 +2296,34 @@ void R_InitColumnDrawers () domvline4_saved = domvline4; } - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; - R_DrawColumn = R_DrawColumnP_RGBA_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA; + R_DrawColumn = R_DrawColumnP_RGBA; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA; + R_DrawSpan = R_DrawSpanP_RGBA; - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA; R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddColumn = R_FillAddColumn_RGBA; R_FillAddClampColumn = R_FillAddClampColumn_RGBA; R_FillSubClampColumn = R_FillSubClampColumn_RGBA; R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_DrawAddColumn = R_DrawAddColumnP_RGBA; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA; R_FillSpan = R_FillSpan_RGBA; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; R_MapTiltedPlane = R_MapTiltedPlane_RGBA; @@ -2338,30 +2339,30 @@ void R_InitColumnDrawers () tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; - rt_copy1col = rt_copy1col_RGBA_c; - rt_copy4cols = rt_copy4cols_RGBA_c; - rt_map1col = rt_map1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; - rt_shaded1col = rt_shaded1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add1col = rt_add1col_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp1col = rt_addclamp1col_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_subclamp1col = rt_subclamp1col_RGBA_c; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; - rt_tlate1col = rt_tlate1col_RGBA_c; - rt_tlateadd1col = rt_tlateadd1col_RGBA_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_subclamp4cols = rt_subclamp4cols_RGBA_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; - rt_tlate4cols = rt_tlate4cols_RGBA_c; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_copy1col = rt_copy1col_RGBA; + rt_copy4cols = rt_copy4cols_RGBA; + rt_map1col = rt_map1col_RGBA; + rt_map4cols = rt_map4cols_RGBA; + rt_shaded1col = rt_shaded1col_RGBA; + rt_shaded4cols = rt_shaded4cols_RGBA; + rt_add1col = rt_add1col_RGBA; + rt_add4cols = rt_add4cols_RGBA; + rt_addclamp1col = rt_addclamp1col_RGBA; + rt_addclamp4cols = rt_addclamp4cols_RGBA; + rt_subclamp1col = rt_subclamp1col_RGBA; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA; + rt_tlate1col = rt_tlate1col_RGBA; + rt_tlateadd1col = rt_tlateadd1col_RGBA; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA; + rt_subclamp4cols = rt_subclamp4cols_RGBA; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA; + rt_tlate4cols = rt_tlate4cols_RGBA; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA; rt_initcols = rt_initcols_rgba; rt_span_coverage = rt_span_coverage_rgba; diff --git a/src/r_draw.h b/src/r_draw.h index d09d0ab89..cea05e469 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -24,11 +24,6 @@ #define __R_DRAW__ #include "r_defs.h" -#include -#include -#include -#include -#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -175,39 +170,6 @@ void rt_map4cols_asm1 (int sx, int yl, int yh); void rt_map4cols_asm2 (int sx, int yl, int yh); void rt_add4cols_asm (int sx, int yl, int yh); void rt_addclamp4cols_asm (int sx, int yl, int yh); - -/// - -void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_copy4cols_RGBA_c (int sx, int yl, int yh); - -void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); - -void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); - -void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); - -void rt_map4cols_RGBA_c (int sx, int yl, int yh); -void rt_add4cols_RGBA_c (int sx, int yl, int yh); -void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); - -void rt_tlate4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlateadd4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh); - } extern void (*rt_copy1col)(int hx, int sx, int yl, int yh); @@ -247,10 +209,8 @@ void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. void rt_initcols_pal (BYTE *buffer); -void rt_initcols_rgba (BYTE *buffer); void rt_span_coverage_pal(int x, int start, int stop); -void rt_span_coverage_rgba(int x, int start, int stop); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -280,46 +240,6 @@ void R_DrawSpanMaskedP_C (void); #endif -void R_DrawColumnHorizP_RGBA_C (void); -void R_DrawColumnP_RGBA_C (void); -void R_DrawFuzzColumnP_RGBA_C (void); -void R_DrawTranslatedColumnP_RGBA_C (void); -void R_DrawShadedColumnP_RGBA_C (void); -void R_DrawSpanP_RGBA_C (void); -void R_DrawSpanMaskedP_RGBA_C (void); - -void R_DrawSpanTranslucentP_RGBA_C(); -void R_DrawSpanMaskedTranslucentP_RGBA_C(); -void R_DrawSpanAddClampP_RGBA_C(); -void R_DrawSpanMaskedAddClampP_RGBA_C(); -void R_FillColumnP_RGBA(); -void R_FillAddColumn_RGBA_C(); -void R_FillAddClampColumn_RGBA(); -void R_FillSubClampColumn_RGBA(); -void R_FillRevSubClampColumn_RGBA(); -void R_DrawAddColumnP_RGBA_C(); -void R_DrawTlatedAddColumnP_RGBA_C(); -void R_DrawAddClampColumnP_RGBA_C(); -void R_DrawAddClampTranslatedColumnP_RGBA_C(); -void R_DrawSubClampColumnP_RGBA_C(); -void R_DrawSubClampTranslatedColumnP_RGBA_C(); -void R_DrawRevSubClampColumnP_RGBA_C(); -void R_DrawRevSubClampTranslatedColumnP_RGBA_C(); -void R_FillSpan_RGBA(); -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); -fixed_t tmvline1_add_RGBA(); -void tmvline4_add_RGBA(); -fixed_t tmvline1_addclamp_RGBA(); -void tmvline4_addclamp_RGBA(); -fixed_t tmvline1_subclamp_RGBA(); -void tmvline4_subclamp_RGBA(); -fixed_t tmvline1_revsubclamp_RGBA(); -void tmvline4_revsubclamp_RGBA(); -DWORD vlinec1_RGBA(); -void vlinec4_RGBA(); -DWORD mvlinec1_RGBA(); -void mvlinec4_RGBA(); - void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); @@ -348,9 +268,6 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); -void R_FillColumnHorizP_RGBA_C(void); -void R_FillSpan_RGBA_C(void); - #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA #define R_DrawSlab R_DrawSlabA @@ -443,160 +360,4 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); -// Redirect drawer commands to worker threads -void R_BeginDrawerCommands(); - -// Wait until all drawers finished executing -void R_EndDrawerCommands(); - -class DrawerCommandQueue; - -class DrawerThread -{ -public: - std::thread thread; - - // Thread line index of this thread - int core = 0; - - // Number of active threads - int num_cores = 1; - - // Range of rows processed this pass - int pass_start_y = 0; - int pass_end_y = MAXHEIGHT; - - uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; - uint32_t *dc_temp_rgba; - - // Checks if a line is rendered by this thread - bool line_skipped_by_thread(int line) - { - return line < pass_start_y || line >= pass_end_y || line % num_cores != core; - } - - // The number of lines to skip to reach the first line to be rendered by this thread - int skipped_by_thread(int first_line) - { - int pass_skip = MAX(pass_start_y - first_line, 0); - int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; - return pass_skip + core_skip; - } - - // The number of lines to be rendered by this thread - int count_for_thread(int first_line, int count) - { - int lines_until_pass_end = MAX(pass_end_y - first_line, 0); - count = MIN(count, lines_until_pass_end); - int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; - return MAX(c, 0); - } - - // Calculate the dest address for the first line to be rendered by this thread - uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) - { - return dest + skipped_by_thread(first_line) * pitch; - } -}; - -class DrawerCommand -{ -protected: - int dc_dest_y; - -public: - DrawerCommand() - { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); - } - - virtual void Execute(DrawerThread *thread) = 0; -}; - -class DrawerCommandQueue -{ - enum { memorypool_size = 4 * 1024 * 1024 }; - char memorypool[memorypool_size]; - size_t memorypool_pos = 0; - - std::vector commands; - - std::vector threads; - - std::mutex start_mutex; - std::condition_variable start_condition; - std::vector active_commands; - bool shutdown_flag = false; - int run_id = 0; - - std::mutex end_mutex; - std::condition_variable end_condition; - size_t finished_threads = 0; - - int threaded_render = 0; - DrawerThread single_core_thread; - int num_passes = 2; - int rows_in_pass = 540; - - void StartThreads(); - void StopThreads(); - void Finish(); - - static DrawerCommandQueue *Instance(); - - ~DrawerCommandQueue(); - -public: - // Allocate memory valid for the duration of a command execution - static void* AllocMemory(size_t size); - - // Queue command to be executed by drawer worker threads - template - static void QueueCommand(Types &&... args) - { - auto queue = Instance(); - if (queue->threaded_render == 0) - { - T command(std::forward(args)...); - command.Execute(&queue->single_core_thread); - } - else - { - void *ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; - T *command = new (ptr)T(std::forward(args)...); - queue->commands.push_back(command); - } - } - - // Redirects all drawing commands to worker threads until End is called - // Begin/End blocks can be nested. - static void Begin(); - - // End redirection and wait until all worker threads finished executing - static void End(); - - // Waits until all worker threads finished executing - static void WaitForWorkers(); -}; - -class ApplySpecialColormapRGBACommand : public DrawerCommand -{ - BYTE *buffer; - int pitch; - int width; - int height; - int start_red; - int start_green; - int start_blue; - int end_red; - int end_green; - int end_blue; - -public: - ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); - void Execute(DrawerThread *thread) override; -}; - #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7e9f85117..a9dd2db32 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -37,6 +37,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_plane.h" +#include "r_draw_rgba.h" #include "gi.h" #include "stats.h" @@ -3655,7 +3656,7 @@ void R_EndDrawerCommands() DrawerCommandQueue::End(); } -void R_DrawColumnP_RGBA_C() +void R_DrawColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } @@ -3665,7 +3666,7 @@ void R_FillColumnP_RGBA() DrawerCommandQueue::QueueCommand(); } -void R_FillAddColumn_RGBA_C() +void R_FillAddColumn_RGBA() { DrawerCommandQueue::QueueCommand(); } @@ -3685,88 +3686,88 @@ void R_FillRevSubClampColumn_RGBA() DrawerCommandQueue::QueueCommand(); } -void R_DrawFuzzColumnP_RGBA_C() +void R_DrawFuzzColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; } -void R_DrawAddColumnP_RGBA_C() +void R_DrawAddColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTranslatedColumnP_RGBA_C() +void R_DrawTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTlatedAddColumnP_RGBA_C() +void R_DrawTlatedAddColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawShadedColumnP_RGBA_C() +void R_DrawShadedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampColumnP_RGBA_C() +void R_DrawAddClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampTranslatedColumnP_RGBA_C() +void R_DrawAddClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampColumnP_RGBA_C() +void R_DrawSubClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampTranslatedColumnP_RGBA_C() +void R_DrawSubClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampColumnP_RGBA_C() +void R_DrawRevSubClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +void R_DrawRevSubClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanP_RGBA_C() +void R_DrawSpanP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedP_RGBA_C() +void R_DrawSpanMaskedP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanTranslucentP_RGBA_C() +void R_DrawSpanTranslucentP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedTranslucentP_RGBA_C() +void R_DrawSpanMaskedTranslucentP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanAddClampP_RGBA_C() +void R_DrawSpanAddClampP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedAddClampP_RGBA_C() +void R_DrawSpanMaskedAddClampP_RGBA() { DrawerCommandQueue::QueueCommand(); } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h new file mode 100644 index 000000000..5d7402634 --- /dev/null +++ b/src/r_draw_rgba.h @@ -0,0 +1,276 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// DESCRIPTION: +// System specific interface stuff. +// +//----------------------------------------------------------------------------- + + +#ifndef __R_DRAW_RGBA__ +#define __R_DRAW_RGBA__ + +#include "r_draw.h" +#include +#include +#include +#include +#include + +///////////////////////////////////////////////////////////////////////////// +// Drawer functions: + +void rt_initcols_rgba(BYTE *buffer); +void rt_span_coverage_rgba(int x, int start, int stop); + +void rt_copy1col_RGBA(int hx, int sx, int yl, int yh); +void rt_copy4cols_RGBA(int sx, int yl, int yh); +void rt_shaded1col_RGBA(int hx, int sx, int yl, int yh); +void rt_shaded4cols_RGBA(int sx, int yl, int yh); +void rt_map1col_RGBA(int hx, int sx, int yl, int yh); +void rt_add1col_RGBA(int hx, int sx, int yl, int yh); +void rt_addclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_subclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlate1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlateadd1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_map4cols_RGBA(int sx, int yl, int yh); +void rt_add4cols_RGBA(int sx, int yl, int yh); +void rt_addclamp4cols_RGBA(int sx, int yl, int yh); +void rt_subclamp4cols_RGBA(int sx, int yl, int yh); +void rt_revsubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlate4cols_RGBA(int sx, int yl, int yh); +void rt_tlateadd4cols_RGBA(int sx, int yl, int yh); +void rt_tlateaddclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlatesubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_RGBA(int sx, int yl, int yh); + +void R_DrawColumnHorizP_RGBA(); +void R_DrawColumnP_RGBA(); +void R_DrawFuzzColumnP_RGBA(); +void R_DrawTranslatedColumnP_RGBA(); +void R_DrawShadedColumnP_RGBA(); + +void R_FillColumnP_RGBA(); +void R_FillAddColumn_RGBA(); +void R_FillAddClampColumn_RGBA(); +void R_FillSubClampColumn_RGBA(); +void R_FillRevSubClampColumn_RGBA(); +void R_DrawAddColumnP_RGBA(); +void R_DrawTlatedAddColumnP_RGBA(); +void R_DrawAddClampColumnP_RGBA(); +void R_DrawAddClampTranslatedColumnP_RGBA(); +void R_DrawSubClampColumnP_RGBA(); +void R_DrawSubClampTranslatedColumnP_RGBA(); +void R_DrawRevSubClampColumnP_RGBA(); +void R_DrawRevSubClampTranslatedColumnP_RGBA(); + +void R_DrawSpanP_RGBA(void); +void R_DrawSpanMaskedP_RGBA(void); +void R_DrawSpanTranslucentP_RGBA(); +void R_DrawSpanMaskedTranslucentP_RGBA(); +void R_DrawSpanAddClampP_RGBA(); +void R_DrawSpanMaskedAddClampP_RGBA(); +void R_FillSpan_RGBA(); + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); + +DWORD vlinec1_RGBA(); +void vlinec4_RGBA(); +DWORD mvlinec1_RGBA(); +void mvlinec4_RGBA(); +fixed_t tmvline1_add_RGBA(); +void tmvline4_add_RGBA(); +fixed_t tmvline1_addclamp_RGBA(); +void tmvline4_addclamp_RGBA(); +fixed_t tmvline1_subclamp_RGBA(); +void tmvline4_subclamp_RGBA(); +fixed_t tmvline1_revsubclamp_RGBA(); +void tmvline4_revsubclamp_RGBA(); + +void R_FillColumnHorizP_RGBA(); +void R_FillSpan_RGBA(); + +///////////////////////////////////////////////////////////////////////////// +// Multithreaded rendering infrastructure: + +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + +// Wait until all drawers finished executing +void R_EndDrawerCommands(); + +struct FSpecialColormap; +class DrawerCommandQueue; + +// Worker data for each thread executing drawer commands +class DrawerThread +{ +public: + std::thread thread; + + // Thread line index of this thread + int core = 0; + + // Number of active threads + int num_cores = 1; + + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = MAXHEIGHT; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + + // Calculate the dest address for the first line to be rendered by this thread + uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } +}; + +// Task to be executed by each worker thread +class DrawerCommand +{ +protected: + int dc_dest_y; + +public: + DrawerCommand() + { + dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + } + + virtual void Execute(DrawerThread *thread) = 0; +}; + +// Manages queueing up commands and executing them on worker threads +class DrawerCommandQueue +{ + enum { memorypool_size = 4 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; + + int threaded_render = 0; + DrawerThread single_core_thread; + int num_passes = 2; + int rows_in_pass = 540; + + void StartThreads(); + void StopThreads(); + void Finish(); + + static DrawerCommandQueue *Instance(); + + ~DrawerCommandQueue(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + auto queue = Instance(); + if (queue->threaded_render == 0) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } + } + + // Redirects all drawing commands to worker threads until End is called + // Begin/End blocks can be nested. + static void Begin(); + + // End redirection and wait until all worker threads finished executing + static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); +}; + +///////////////////////////////////////////////////////////////////////////// +// Drawer commands: + +class ApplySpecialColormapRGBACommand : public DrawerCommand +{ + BYTE *buffer; + int pitch; + int width; + int height; + int start_red; + int start_green; + int start_blue; + int end_red; + int end_green; + int end_blue; + +public: + ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); + void Execute(DrawerThread *thread) override; +}; + +#endif diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index cd124ac63..32d5080c5 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -42,6 +42,7 @@ #include "r_main.h" #include "r_things.h" #include "v_video.h" +#include "r_draw_rgba.h" #ifndef NO_SSE #include #endif @@ -1628,171 +1629,171 @@ public: ///////////////////////////////////////////////////////////////////////////// // Copies one span at hx to the screen at sx. -void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_copy1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. -void rt_copy4cols_RGBA_c (int sx, int yl, int yh) +void rt_copy4cols_RGBA (int sx, int yl, int yh) { // To do: we could do this with SSE using __m128i - rt_copy1col_RGBA_c(0, sx, yl, yh); - rt_copy1col_RGBA_c(1, sx + 1, yl, yh); - rt_copy1col_RGBA_c(2, sx + 2, yl, yh); - rt_copy1col_RGBA_c(3, sx + 3, yl, yh); + rt_copy1col_RGBA(0, sx, yl, yh); + rt_copy1col_RGBA(1, sx + 1, yl, yh); + rt_copy1col_RGBA(2, sx + 2, yl, yh); + rt_copy1col_RGBA(3, sx + 3, yl, yh); } // Maps one span at hx to the screen at sx. -void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_map1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA_c (int sx, int yl, int yh) +void rt_map4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } -void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) +void rt_Translate1col_RGBA(const BYTE *translation, int hx, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } -void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) +void rt_Translate4cols_RGBA(const BYTE *translation, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. -void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlate1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlate4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_add1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_RGBA_c (int sx, int yl, int yh) +void rt_add4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) +void rt_tlateadd4cols_RGBA(int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_shaded1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. -void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) +void rt_shaded4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_addclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_addclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_addclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_addclamp1col_RGBA(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_subclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_subclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_subclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_subclamp1col_RGBA(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); - rt_subclamp4cols_RGBA_c(sx, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_subclamp4cols_RGBA(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_revsubclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_revsubclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_revsubclamp1col_RGBA(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); - rt_revsubclamp4cols_RGBA_c(sx, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_revsubclamp4cols_RGBA(sx, yl, yh); } // Before each pass through a rendering loop that uses these routines, @@ -1815,7 +1816,7 @@ void rt_span_coverage_rgba(int x, int start, int stop) // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_RGBA_C (void) +void R_DrawColumnHorizP_RGBA (void) { if (dc_count <= 0) return; @@ -1830,7 +1831,7 @@ void R_DrawColumnHorizP_RGBA_C (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP_RGBA_C (void) +void R_FillColumnHorizP_RGBA (void) { if (dc_count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index 4e5ff1dbd..247a98125 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -40,6 +40,7 @@ #include "r_segs.h" #include "r_3dfloors.h" #include "r_sky.h" +#include "r_draw_rgba.h" #include "st_stuff.h" #include "c_cvars.h" #include "c_dispatch.h" diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 5be41660e..fbbd65b17 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -42,6 +42,7 @@ #include "r_3dfloors.h" #include "textures/textures.h" #include "r_data/voxels.h" +#include "r_draw_rgba.h" EXTERN_CVAR(Bool, r_shadercolormaps) diff --git a/src/r_things.cpp b/src/r_things.cpp index 2dc0bdb6c..f1f29f160 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -58,6 +58,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" +#include "r_draw_rgba.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h"