From 246d1625e5912f9aa9765260cf52efea631198bc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 5 Dec 2016 13:05:05 +0100 Subject: [PATCH] Make wall drawers thread aware --- src/r_draw_pal.cpp | 176 +++++++++++++++++++++++++++++++++++++++++---- src/r_thread.h | 3 +- 2 files changed, 163 insertions(+), 16 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 66b5dce068..0c45e629e0 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -102,6 +102,15 @@ namespace swrenderer int bits = _vlinebits; int pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { *dest = colormap[source[frac >> bits]]; @@ -124,16 +133,32 @@ namespace swrenderer auto buf1 = _bufplce[1]; auto buf2 = _bufplce[2]; auto buf3 = _bufplce[3]; - const auto vince0 = _vince[0]; - const auto vince1 = _vince[1]; - const auto vince2 = _vince[2]; - const auto vince3 = _vince[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; auto vplce0 = _vplce[0]; auto vplce1 = _vplce[1]; auto vplce2 = _vplce[2]; auto vplce3 = _vplce[3]; auto pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + do { dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; @@ -155,6 +180,15 @@ namespace swrenderer int bits = _mvlinebits; int pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -181,16 +215,32 @@ namespace swrenderer auto buf1 = _bufplce[1]; auto buf2 = _bufplce[2]; auto buf3 = _bufplce[3]; - const auto vince0 = _vince[0]; - const auto vince1 = _vince[1]; - const auto vince2 = _vince[2]; - const auto vince3 = _vince[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; auto vplce0 = _vplce[0]; auto vplce1 = _vplce[1]; auto vplce2 = _vplce[2]; auto vplce3 = _vplce[3]; auto pitch = _pitch; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix; @@ -217,6 +267,15 @@ namespace swrenderer uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -242,6 +301,21 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; do { @@ -255,9 +329,9 @@ namespace swrenderer fg = (fg + bg) | 0x1f07c1f; dest[i] = RGB32k.All[fg & (fg >> 15)]; } - vplce[i] += _vince[i]; + vplce[i] += vince[i]; } - dest += _pitch; + dest += pitch; } while (--count); } @@ -275,6 +349,15 @@ namespace swrenderer uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -305,6 +388,21 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; do { @@ -323,9 +421,9 @@ namespace swrenderer a |= b; dest[i] = RGB32k.All[a & (a >> 15)]; } - vplce[i] += _vince[i]; + vplce[i] += vince[i]; } - dest += _pitch; + dest += pitch; } while (--count); } @@ -343,6 +441,15 @@ namespace swrenderer uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -372,6 +479,21 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; do { @@ -389,9 +511,9 @@ namespace swrenderer a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a >> 15)]; } - vplce[i] += _vince[i]; + vplce[i] += vince[i]; } - dest += _pitch; + dest += pitch; } while (--count); } @@ -409,6 +531,15 @@ namespace swrenderer uint32_t *fg2rgb = _srcblend; uint32_t *bg2rgb = _destblend; + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + do { uint8_t pix = source[frac >> bits]; @@ -438,6 +569,21 @@ namespace swrenderer uint32_t *bg2rgb = _destblend; uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; do { @@ -455,7 +601,7 @@ namespace swrenderer a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a >> 15)]; } - vplce[i] += _vince[i]; + vplce[i] += vince[i]; } dest += _pitch; } while (--count); diff --git a/src/r_thread.h b/src/r_thread.h index 3217e1904b..29d971ad38 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -84,7 +84,8 @@ public: } // Calculate the dest address for the first line to be rendered by this thread - uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + template + T *dest_for_thread(int first_line, int pitch, T *dest) { return dest + skipped_by_thread(first_line) * pitch; }