From c16506bf5977aac3e53f763d9e4feb4d120664b7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 6 Dec 2016 15:13:43 +0100 Subject: [PATCH] Add thread awareness to the rt family of drawers --- src/r_draw_tc.cpp | 10 +++ src/r_drawt_pal.cpp | 215 ++++++++++++++++++++++---------------------- src/r_thread.h | 6 ++ 3 files changed, 125 insertions(+), 106 deletions(-) diff --git a/src/r_draw_tc.cpp b/src/r_draw_tc.cpp index 77d4500064..ec9129a89c 100644 --- a/src/r_draw_tc.cpp +++ b/src/r_draw_tc.cpp @@ -925,18 +925,28 @@ namespace swrenderer void rt_tlate1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_map1col(hx, sx, yl, yh); + } } // Translates all four spans to the screen starting at sx. void rt_tlate4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_map4cols(sx, yl, yh); + } } // Adds one span at hx to the screen at sx without clamping. diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index b9f0c378d0..3356592d25 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -91,18 +91,20 @@ namespace swrenderer fixed_t fracstep; fixed_t frac; + count = thread->count_for_thread(_yl, count); if (count <= 0) return; - { - int x = _x & 3; - dest = &thread->dc_temp[x + 4 * _yl]; - } fracstep = _iscale; frac = _texturefrac; const uint8_t *source = _source; + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; + frac += fracstep * thread->skipped_by_thread(_yl); + fracstep *= thread->num_cores; + if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; } @@ -141,13 +143,12 @@ namespace swrenderer uint8_t color = _color; uint8_t *dest; + count = thread->count_for_thread(_yl, count); if (count <= 0) return; - { - int x = _x & 3; - dest = &thread->dc_temp[x + 4 * _yl]; - } + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; if (count & 1) { *dest = color; @@ -183,14 +184,15 @@ namespace swrenderer int count; int pitch; - count = yh-yl; - if (count < 0) - return; - count++; + count = yh - yl + 1; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; if (count & 1) { *dest = *source; @@ -223,14 +225,15 @@ namespace swrenderer int count; int pitch; - count = yh-yl; - if (count < 0) - return; - count++; + count = yh - yl + 1; - dest = (int *)(ylookup[yl] + sx + _destorg); - source = (int *)(&thread->dc_temp[yl*4]); - pitch = _pitch/sizeof(int); + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg); + source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]); + pitch = _pitch*thread->num_cores/sizeof(int); if (count & 1) { *dest = *source; @@ -256,15 +259,16 @@ namespace swrenderer int count; int pitch; - count = yh-yl; - if (count < 0) + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; colormap = _colormap; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx]; + pitch = _pitch*thread->num_cores; if (count & 1) { *dest = colormap[*source]; @@ -290,15 +294,16 @@ namespace swrenderer int count; int pitch; - count = yh-yl; - if (count < 0) + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; colormap = _colormap; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch*thread->num_cores; if (count & 1) { dest[0] = colormap[source[0]]; @@ -328,7 +333,11 @@ namespace swrenderer void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) { int count = yh - yl + 1; - uint8_t *source = &thread->dc_temp[yl*4 + hx]; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; const uint8_t *translation = _translation; // Things we do to hit the compiler's optimizer with a clue bat: @@ -376,7 +385,11 @@ namespace swrenderer void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) { int count = yh - yl + 1; - uint8_t *source = &thread->dc_temp[yl*4]; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; const uint8_t *translation = _translation; int c0, c1; uint8_t b0, b1; @@ -420,19 +433,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -453,19 +465,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -509,19 +520,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; fgstart = &Col2RGB8[0][_color]; colormap = _colormap; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; do { uint32_t val = colormap[*source]; @@ -539,19 +549,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; fgstart = &Col2RGB8[0][_color]; colormap = _colormap; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; do { uint32_t val; @@ -582,19 +591,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -617,17 +625,16 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; colormap = _colormap; const uint32_t *fg2rgb = _srcblend; @@ -681,19 +688,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -715,19 +721,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -774,19 +779,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4 + hx]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { @@ -808,19 +812,18 @@ namespace swrenderer const uint8_t *colormap; uint8_t *source; uint8_t *dest; - int count; int pitch; - count = yh-yl; - if (count < 0) + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) return; - count++; const uint32_t *fg2rgb = _srcblend; const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl] + sx + _destorg; - source = &thread->dc_temp[yl*4]; - pitch = _pitch; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; colormap = _colormap; do { diff --git a/src/r_thread.h b/src/r_thread.h index 1e48ff3074..9a8a5c1b58 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -89,6 +89,12 @@ public: { return dest + skipped_by_thread(first_line) * pitch; } + + // The first line in the dc_temp buffer used this thread + int temp_line_for_thread(int first_line) + { + return (first_line + skipped_by_thread(first_line)) / num_cores; + } }; // Task to be executed by each worker thread