From 67b2e2f52e40fa4821405ac13a277cf15ef3189e Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Thu, 8 Dec 2016 21:39:12 +0100 Subject: [PATCH 1/8] - Remove valgrind code, now that the assembly stuff is gone. --- src/CMakeLists.txt | 1 - src/valgrind.inc | 31 ------------------------------- 2 files changed, 32 deletions(-) delete mode 100644 src/valgrind.inc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3f54e0fcf9..83192566f1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -196,7 +196,6 @@ else() else() option( NO_GTK "Disable GTK+ dialogs (Not applicable to Windows)" ) option( DYN_GTK "Load GTK+ at runtime instead of compile time" ON ) - option( VALGRIND "Add special Valgrind sequences to self-modifying code" ) set( FMOD_SEARCH_PATHS /usr/local/include diff --git a/src/valgrind.inc b/src/valgrind.inc deleted file mode 100644 index 729fe89bcd..0000000000 --- a/src/valgrind.inc +++ /dev/null @@ -1,31 +0,0 @@ -%ifdef VALGRIND_AWARE - -%define VG_USERREQ__DISCARD_TRANSLATIONS 0x1002 - -%macro selfmod 2 - pusha - sub esp,6*4 - mov dword [esp], VG_USERREQ__DISCARD_TRANSLATIONS - mov dword [esp+4], %1 - mov dword [esp+8], %2 - %1 - mov dword [esp+12], 0 - mov dword [esp+16], 0 - mov dword [esp+20], 0 - mov eax, esp - ; special instruction preamble - rol edi, 3 - rol edi, 13 - rol edi, 29 - rol edi, 19 - xchg ebx, ebx - ; restore state - add esp,6*4 - popa -%endmacro - -%else - -%macro selfmod 2 -%endmacro - -%endif From 5536184beed578e2b9df9ae673b2e965af92b2c0 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 00:41:34 +0100 Subject: [PATCH 2/8] - got rid of ksgn. Because every bit of Build code that can be removed is a good thing. This was only used in two places, one of which could be done better, the other one in the voxel drawer setup now uses a local C-inline version. --- src/basicinlines.h | 6 ------ src/gccinlines.h | 14 -------------- src/mscinlines.h | 9 --------- src/p_user.cpp | 6 +----- src/r_things.cpp | 7 ++++++- 5 files changed, 7 insertions(+), 35 deletions(-) diff --git a/src/basicinlines.h b/src/basicinlines.h index cc562f2b8c..2f81a6c646 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -195,9 +195,3 @@ static __forceinline void clearbufshort (void *buff, unsigned int count, WORD cl } } -static __forceinline SDWORD ksgn (SDWORD a) -{ - if (a < 0) return -1; - else if (a > 0) return 1; - else return 0; -} diff --git a/src/gccinlines.h b/src/gccinlines.h index 6cad307f7c..7311e9695d 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -316,17 +316,3 @@ static inline void clearbufshort (void *buff, unsigned int count, WORD clear) :"cc"); } -static inline SDWORD ksgn (SDWORD a) -{ - SDWORD result, dummy; - - asm volatile - ("add %0,%0\n\t" - "sbb %1,%1\n\t" - "cmp %0,%1\n\t" - "adc $0,%1" - :"=r" (dummy), "=r" (result) - :"0" (a) - :"cc"); - return result; -} diff --git a/src/mscinlines.h b/src/mscinlines.h index a8dd5fea73..7befe5395f 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -339,13 +339,4 @@ __forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) } } -__forceinline SDWORD ksgn (SDWORD a) -{ - __asm mov edx,a - __asm add edx,edx - __asm sbb eax,eax - __asm cmp eax,edx - __asm adc eax,0 -} - #pragma warning (default: 4035) diff --git a/src/p_user.cpp b/src/p_user.cpp index b808dd9c3d..e2d799834e 100644 --- a/src/p_user.cpp +++ b/src/p_user.cpp @@ -2609,11 +2609,7 @@ void P_PlayerThink (player_t *player) else if (cmd->ucmd.upmove != 0) { // Clamp the speed to some reasonable maximum. - int magnitude = abs (cmd->ucmd.upmove); - if (magnitude > 0x300) - { - cmd->ucmd.upmove = ksgn (cmd->ucmd.upmove) * 0x300; - } + cmd->ucmd.upmove = clamp(cmd->ucmd.upmove, -0x300, 0x300); if (player->mo->waterlevel >= 2 || (player->mo->flags2 & MF2_FLY) || (player->cheats & CF_NOCLIP2)) { player->mo->Vel.Z = player->mo->Speed * cmd->ucmd.upmove / 128.; diff --git a/src/r_things.cpp b/src/r_things.cpp index 4ba47d63d7..7ee3f4ca92 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2701,6 +2701,11 @@ void R_DrawParticle_C (vissprite_t *vis) extern double BaseYaspectMul;; +inline int sgn(int v) +{ + return v < 0 ? -1 : v > 0 ? 1 : 0; +} + void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, @@ -2840,7 +2845,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, xe += xi; ye += yi; } - i = ksgn(ys-backy)+ksgn(xs-backx)*3+4; + i = sgn(ys - backy) + sgn(xs - backx) * 3 + 4; switch(i) { case 6: case 7: x1 = 0; y1 = 0; break; From 8748b9ef6defc5c8a2d9570da29afcc5dcac8bbf Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 10:31:17 +0100 Subject: [PATCH 3/8] - removed some unused functions from m_fixed.h. - removed the Build license note from r_bsp.cpp. This note was for code in R_AddLine which had been both refactored into several subfunctions and completely replaced with a floating point version. What is left is just some basic common math without any traits that resemble anything in Build. --- src/m_fixed.h | 57 --------------------------------------------------- src/r_bsp.cpp | 6 ------ 2 files changed, 63 deletions(-) diff --git a/src/m_fixed.h b/src/m_fixed.h index 506b9702c8..4719ac4680 100644 --- a/src/m_fixed.h +++ b/src/m_fixed.h @@ -79,63 +79,6 @@ inline SDWORD SafeDivScale32 (SDWORD a, SDWORD b) #define FixedMul MulScale16 #define FixedDiv SafeDivScale16 -inline void qinterpolatedown16 (SDWORD *out, DWORD count, SDWORD val, SDWORD delta) -{ - if (count & 1) - { - out[0] = val >> 16; - val += delta; - } - count >>= 1; - while (count-- != 0) - { - int temp = val + delta; - out[0] = val >> 16; - val = temp + delta; - out[1] = temp >> 16; - out += 2; - } -} - -inline void qinterpolatedown16short (short *out, DWORD count, SDWORD val, SDWORD delta) -{ - if (count) - { - if ((size_t)out & 2) - { // align to dword boundary - *out++ = (short)(val >> 16); - count--; - val += delta; - } - DWORD *o2 = (DWORD *)out; - DWORD c2 = count>>1; - while (c2-- != 0) - { - SDWORD temp = val + delta; - *o2++ = (temp & 0xffff0000) | ((DWORD)val >> 16); - val = temp + delta; - } - if (count & 1) - { - *(short *)o2 = (short)(val >> 16); - } - } -} - - //returns num/den, dmval = num%den -inline SDWORD DivMod (SDWORD num, SDWORD den, SDWORD *dmval) -{ - *dmval = num % den; - return num / den; -} - - //returns num%den, dmval = num/den -inline SDWORD ModDiv (SDWORD num, SDWORD den, SDWORD *dmval) -{ - *dmval = num / den; - return num % den; -} - inline fixed_t FloatToFixed(double f) { return xs_Fix<16>::ToFix(f); diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 91fb86e928..15dff6065d 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -17,12 +17,6 @@ // DESCRIPTION: // BSP traversal, handling of LineSegs for rendering. // -// This file contains some code from the Build Engine. -// -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// //----------------------------------------------------------------------------- From 34d551fe1f3653cbb9447f9169d20f5255a69b9a Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 11:49:18 +0100 Subject: [PATCH 4/8] - moved wallscan code into its own file so that it can get an appropriate copyright notice and does not inflate an already large source file even more. --- src/CMakeLists.txt | 1 + src/r_draw.h | 2 - src/r_segs.cpp | 544 +----------------------------------------- src/r_wallscan.cpp | 583 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 588 insertions(+), 542 deletions(-) create mode 100644 src/r_wallscan.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 83192566f1..b4e4c78e05 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -755,6 +755,7 @@ set( FASTMATH_PCH_SOURCES r_segs.cpp r_sky.cpp r_things.cpp + r_wallscan.cpp s_advsound.cpp s_environment.cpp s_playlist.cpp diff --git a/src/r_draw.h b/src/r_draw.h index 40b3328964..39bac67acf 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -104,8 +104,6 @@ namespace swrenderer const uint8_t *R_GetColumn(FTexture *tex, int col); void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); - void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); - void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); void rt_initcols(uint8_t *buffer = nullptr); void rt_span_coverage(int x, int start, int stop); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index d4520e91de..1880273f90 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -56,7 +56,6 @@ #define WALLYREPEAT 8 -CVAR(Bool, r_np2, true, 0) CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); @@ -65,6 +64,10 @@ namespace swrenderer { using namespace drawerargs; + void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); + void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask); + void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); + #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -140,9 +143,6 @@ static fixed_t *maskedtexturecol; static void R_RenderDecal (side_t *wall, DBaseDecal *first, drawseg_t *clipper, int pass); static void WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)); -void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask); -static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); -static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); inline bool IsFogBoundary (sector_t *front, sector_t *back) { @@ -1051,542 +1051,6 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } -struct WallscanSampler -{ - WallscanSampler() { } - WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); - - uint32_t uv_pos; - uint32_t uv_step; - uint32_t uv_max; - - const BYTE *source; - uint32_t height; -}; - -WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) -{ - height = texture->GetHeight(); - - int uv_fracbits = 32 - texture->HeightBits; - if (uv_fracbits != 32) - { - uv_max = height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; - v = v - floor(v); - v *= height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - if (uv_step == 0) // To prevent divide by zero elsewhere - uv_step = 1; - } - else - { // Hack for one pixel tall textures - uv_pos = 0; - uv_step = 0; - uv_max = 1; - } - - source = getcol(texture, xoffset >> FRACBITS); -} - -// Draw a column with support for non-power-of-two ranges -void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) -{ - if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two - { - int count = y2 - y1; - - dc_source = sampler.source; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - draw1column(); - - uint64_t step64 = sampler.uv_step; - uint64_t pos64 = sampler.uv_pos; - sampler.uv_pos = (uint32_t)(pos64 + step64 * count); - } - else - { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) - { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); - - dc_source = sampler.source; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; - draw1column(); - - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; - } - - sampler.uv_pos = uv_pos; - } -} - -// Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) -{ - if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) + dc_destorg; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; - } - - left -= count; - } - } -} - -typedef DWORD(*Draw1ColumnFuncPtr)(); -typedef void(*Draw4ColumnsFuncPtr)(); - -void wallscan_any( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) -{ - if (rw_pic->UseType == FTexture::TEX_Null) - return; - - fixed_t xoffset = rw_offset; - - rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set - int fracbits = 32 - rw_pic->HeightBits; - if (fracbits == 32) - { // Hack for one pixel tall textures - fracbits = 0; - yrepeat = 0; - dc_texturemid = 0; - } - - DWORD(*draw1column)(); - void(*draw4columns)(); - setupwallscan(fracbits, draw1column, draw4columns); - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - } - - if (fixedcolormap) - dc_colormap = fixedcolormap; - else - dc_colormap = basecolormap->Maps; - - float light = rw_light; - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, sampler, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - WallscanSampler sampler[4]; - for (int i = 0; i < 4; i++) - sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - } - - if (empty_column_in_set || middle_y2 <= middle_y1) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - - if (y1[i] < middle_y1) - wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - } - } - wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - - if (middle_y2 < y2[i]) - wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, sampler, draw1column); - } - - NetUpdate(); -} - -void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupvline(bits); - line1 = dovline1; - line4 = dovline4; - }); -} - -void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupmvline(bits); - line1 = domvline1; - line4 = domvline4; - }); - } -} - -void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - static fixed_t(*tmvline1)(); - static void(*tmvline4)(); - if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setuptmvline(bits); - line1 = reinterpret_cast(tmvline1); - line4 = tmvline4; - }); - } -} - -void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) -{ - FDynamicColormap *startcolormap = basecolormap; - int startshade = wallshade; - bool fogginess = foggy; - - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - - up = uwal; - down = most1; - - assert(WallC.sx1 <= x1); - assert(WallC.sx2 >= x2); - - // kg3D - fake floors instead of zdoom light list - for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) - { - int j = WallMost (most3, frontsector->e->XFloor.lightlist[i].plane, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp (most3[j], up[j], dwal[j]); - } - wallscan (x1, x2, up, down, swal, lwal, yrepeat); - up = down; - down = (down == most1) ? most2 : most1; - } - - lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, - *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - } - - wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); - basecolormap = startcolormap; - wallshade = startshade; -} - -static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) -{ - if (mask) - { - if (colfunc == basecolfunc) - { - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - else - { - transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - } - else - { - if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - else - { - wallscan_striped(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - } -} - -//============================================================================= -// -// wallscan_np2 -// -// This is a wrapper around wallscan that helps it tile textures whose heights -// are not powers of 2. It divides the wall into texture-sized strips and calls -// wallscan for each of those. Since only one repetition of the texture fits -// in each strip, wallscan will not tile. -// -//============================================================================= - -void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) -{ - if (!r_np2) - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); - } - else - { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - double texheight = rw_pic->GetHeight(); - double partition; - double scaledtexheight = texheight / yrepeat; - - if (yrepeat >= 0) - { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - if (partition == top) - { - partition -= scaledtexheight; - } - up = uwal; - down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition > bot) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp(most3[j], up[j], dwal[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - up = down; - down = (down == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); - } - else - { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; - down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition < top) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 12) - { - for (int j = x1; j < x2; ++j) - { - up[j] = clamp(most3[j], uwal[j], down[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - down = up; - up = (up == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); - } - } -} - -static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) -{ - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - double top = MAX(frontcz1, frontcz2); - double bot = MIN(frontfz1, frontfz2); - if (fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, sclipTop); - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, sclipBottom); - } - wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); - } - else - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); - } -} - // // R_RenderSegLoop // Draws zero, one, or two textures for walls. diff --git a/src/r_wallscan.cpp b/src/r_wallscan.cpp new file mode 100644 index 0000000000..c5bcb1d26c --- /dev/null +++ b/src/r_wallscan.cpp @@ -0,0 +1,583 @@ +/* +** Replacement for Build's wallscan free of any Build licensing issues. +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include + +#include "doomdef.h" +#include "doomstat.h" +#include "doomdata.h" + +#include "r_local.h" +#include "r_sky.h" +#include "v_video.h" + +#include "m_swap.h" +#include "a_sharedglobal.h" +#include "d_net.h" +#include "g_level.h" +#include "r_draw.h" +#include "r_bsp.h" +#include "r_plane.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "v_palette.h" +#include "r_data/colormaps.h" + +namespace swrenderer +{ + using namespace drawerargs; + + extern FTexture *rw_pic; + extern int wallshade; + +struct WallscanSampler +{ + WallscanSampler() { } + WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + uint32_t uv_pos; + uint32_t uv_step; + uint32_t uv_max; + + const BYTE *source; + uint32_t height; +}; + +WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + height = texture->GetHeight(); + + int uv_fracbits = 32 - texture->HeightBits; + if (uv_fracbits != 32) + { + uv_max = height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + } + else + { // Hack for one pixel tall textures + uv_pos = 0; + uv_step = 0; + uv_max = 1; + } + + source = getcol(texture, xoffset >> FRACBITS); +} + +// Draw a column with support for non-power-of-two ranges +void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) +{ + if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two + { + int count = y2 - y1; + + dc_source = sampler.source; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; + draw1column(); + + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); + } + else + { + uint32_t uv_pos = sampler.uv_pos; + + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } +} + +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) +{ + if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed + { + int count = y2 - y1; + for (int i = 0; i < 4; i++) + { + bufplce[i] = sampler[i].source; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); + } + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) + dc_destorg; + for (int i = 0; i < 4; i++) + { + bufplce[i] = sampler[i].source; + } + + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; + } + + left -= count; + } + } +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + fixed_t xoffset = rw_offset; + + rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set + int fracbits = 32 - rw_pic->HeightBits; + if (fracbits == 32) + { // Hack for one pixel tall textures + fracbits = 0; + yrepeat = 0; + dc_texturemid = 0; + } + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(fracbits, draw1column, draw4columns); + + bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); + if (fixed) + { + palookupoffse[0] = dc_colormap; + palookupoffse[1] = dc_colormap; + palookupoffse[2] = dc_colormap; + palookupoffse[3] = dc_colormap; + } + + if (fixedcolormap) + dc_colormap = fixedcolormap; + else + dc_colormap = basecolormap->Maps; + + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); + } + + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) + { + // Find y1, y2, light and uv values for four columns: + int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; + int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; + + float lights[4]; + for (int i = 0; i < 4; i++) + { + lights[i] = light; + light += rw_lightstep; + } + + WallscanSampler sampler[4]; + for (int i = 0; i < 4; i++) + sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + continue; + + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); + } + continue; + } + + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + + if (y1[i] < middle_y1) + wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); + } + + // Draw the area where all 4 columns are active + if (!fixed) + { + for (int i = 0; i < 4; i++) + { + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + } + } + wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); + + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + + if (middle_y2 < y2[i]) + wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); + } + } + + // The last unaligned columns: + for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); + } + + NetUpdate(); +} + +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + +void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +{ + FDynamicColormap *startcolormap = basecolormap; + int startshade = wallshade; + bool fogginess = foggy; + + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + + up = uwal; + down = most1; + + assert(WallC.sx1 <= x1); + assert(WallC.sx2 >= x2); + + // kg3D - fake floors instead of zdoom light list + for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) + { + int j = WallMost (most3, frontsector->e->XFloor.lightlist[i].plane, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp (most3[j], up[j], dwal[j]); + } + wallscan (x1, x2, up, down, swal, lwal, yrepeat); + up = down; + down = (down == most1) ? most2 : most1; + } + + lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, + *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); + } + + wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); + basecolormap = startcolormap; + wallshade = startshade; +} + +void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) +{ + if (mask) + { + if (colfunc == basecolfunc) + { + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + else + { + transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + } + else + { + if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + else + { + wallscan_striped(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + } +} + +//============================================================================= +// +// wallscan_np2 +// +// This is a wrapper around wallscan that helps it tile textures whose heights +// are not powers of 2. It divides the wall into texture-sized strips and calls +// wallscan for each of those. Since only one repetition of the texture fits +// in each strip, wallscan will not tile. +// +//============================================================================= + +void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) +{ + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + double texheight = rw_pic->GetHeight(); + double partition; + double scaledtexheight = texheight / yrepeat; + + if (yrepeat >= 0) + { // normal orientation: draw strips from top to bottom + partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + if (partition == top) + { + partition -= scaledtexheight; + } + up = uwal; + down = most1; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition > bot) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + up = down; + down = (down == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + } + else + { // upside down: draw strips from bottom to top + partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + up = most1; + down = dwal; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition < top) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 12) + { + for (int j = x1; j < x2; ++j) + { + up[j] = clamp(most3[j], uwal[j], down[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + down = up; + up = (up == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + } +} + +void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +{ + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + double top = MAX(frontcz1, frontcz2); + double bot = MIN(frontfz1, frontfz2); + if (fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, sclipTop); + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, sclipBottom); + } + wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); + } + else + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + } +} + + +} \ No newline at end of file From f3389ff5b716d7c913c4a5fa8ee27687de520db4 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 12:21:27 +0100 Subject: [PATCH 5/8] - let's be a bit clearer and not name non-Build stuff with Build terms. --- src/CMakeLists.txt | 2 +- src/{r_wallscan.cpp => r_walldraw.cpp} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename src/{r_wallscan.cpp => r_walldraw.cpp} (99%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b4e4c78e05..aca24181f9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -755,7 +755,7 @@ set( FASTMATH_PCH_SOURCES r_segs.cpp r_sky.cpp r_things.cpp - r_wallscan.cpp + r_walldraw.cpp s_advsound.cpp s_environment.cpp s_playlist.cpp diff --git a/src/r_wallscan.cpp b/src/r_walldraw.cpp similarity index 99% rename from src/r_wallscan.cpp rename to src/r_walldraw.cpp index c5bcb1d26c..000bc01c78 100644 --- a/src/r_wallscan.cpp +++ b/src/r_walldraw.cpp @@ -1,5 +1,5 @@ /* -** Replacement for Build's wallscan free of any Build licensing issues. +** Wall drawing stuff free of Build pollution ** Copyright (c) 2016 Magnus Norddahl ** ** This software is provided 'as-is', without any express or implied From 12a99c3f3cc75fa3b241d89a8ebe43c8d3c07809 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 12:48:10 +0100 Subject: [PATCH 6/8] - got rid of clearbuf. This was used in only 4 places, 3 of which could easily be replaced with a memset, and the fourth, in the Strife status bar, suffering from a pointless performance optimization, rendering the code unreadable - the code spent here per frame is utterly insignificant so clarity should win here. --- src/basicinlines.h | 9 --------- src/g_strife/strife_sbar.cpp | 10 +++------- src/gccinlines.h | 13 ------------- src/mscinlines.h | 9 --------- src/p_maputl.cpp | 2 +- src/r_data/sprites.cpp | 4 ++-- 6 files changed, 6 insertions(+), 41 deletions(-) diff --git a/src/basicinlines.h b/src/basicinlines.h index 2f81a6c646..488cdf5526 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -177,15 +177,6 @@ static inline SDWORD DivScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD) static inline SDWORD DivScale31 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 31) / b); } static inline SDWORD DivScale32 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 32) / b); } -static __forceinline void clearbuf (void *buff, unsigned int count, SDWORD clear) -{ - SDWORD *b2 = (SDWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - static __forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) { SWORD *b2 = (SWORD *)buff; diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index 973364cf7d..46dd63ff76 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -174,14 +174,10 @@ void FHealthBar::MakeTexture () void FHealthBar::FillBar (int min, int max, BYTE light, BYTE dark) { -#ifdef __BIG_ENDIAN__ - SDWORD fill = (light << 24) | (dark << 16) | (light << 8) | dark; -#else - SDWORD fill = light | (dark << 8) | (light << 16) | (dark << 24); -#endif - if (max > min) + for (int i = min*2; i < max*2; i++) { - clearbuf (&Pixels[min*4], max - min, fill); + Pixels[i * 2] = light; + Pixels[i * 2 + 1] = dark; } } diff --git a/src/gccinlines.h b/src/gccinlines.h index 7311e9695d..54a2448c70 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -291,19 +291,6 @@ static inline SDWORD DivScale32 (SDWORD a, SDWORD b) return result; } -static inline void clearbuf (void *buff, int count, SDWORD clear) -{ - int dummy1, dummy2; - asm volatile - ("rep stosl" - :"=D" (dummy1), - "=c" (dummy2) - : "D" (buff), - "c" (count), - "a" (clear) - ); -} - static inline void clearbufshort (void *buff, unsigned int count, WORD clear) { asm volatile diff --git a/src/mscinlines.h b/src/mscinlines.h index 7befe5395f..1aded4e7b7 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -321,15 +321,6 @@ __forceinline SDWORD DivScale32 (SDWORD a, SDWORD b) __asm idiv b } -__forceinline void clearbuf (void *buff, unsigned int count, SDWORD clear) -{ - SDWORD *b2 = (SDWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - __forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) { SWORD *b2 = (SWORD *)buff; diff --git a/src/p_maputl.cpp b/src/p_maputl.cpp index 31b4125988..fdf66e969d 100644 --- a/src/p_maputl.cpp +++ b/src/p_maputl.cpp @@ -925,7 +925,7 @@ void FBlockThingsIterator::init(const FBoundingBox &box) void FBlockThingsIterator::ClearHash() { - clearbuf(Buckets, countof(Buckets), -1); + memset(Buckets, -1, sizeof(Buckets)); NumFixedHash = 0; DynHash.Clear(); } diff --git a/src/r_data/sprites.cpp b/src/r_data/sprites.cpp index 10b63d993e..829703bb9f 100644 --- a/src/r_data/sprites.cpp +++ b/src/r_data/sprites.cpp @@ -268,7 +268,7 @@ void R_InitSpriteDefs () // Create a hash table to speed up the process smax = TexMan.NumTextures(); hashes = new Hasher[smax]; - clearbuf(hashes, sizeof(Hasher)*smax/4, -1); + memset(hashes, -1, sizeof(Hasher)*smax); for (i = 0; i < smax; ++i) { FTexture *tex = TexMan.ByIndex(i); @@ -283,7 +283,7 @@ void R_InitSpriteDefs () // Repeat, for voxels vmax = Wads.GetNumLumps(); vhashes = new VHasher[vmax]; - clearbuf(vhashes, sizeof(VHasher)*vmax/4, -1); + memset(vhashes, -1, sizeof(VHasher)*vmax); for (i = 0; i < vmax; ++i) { if (Wads.GetLumpNamespace(i) == ns_voxels) From eab06ef0866b0c9ed84f94a8745e747116e2eba7 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 13:04:53 +0100 Subject: [PATCH 7/8] - more Build cleanup: Moved clearbufshort out of Build derived headers, removed the inline version for GCC 32 bit which was actually Build derived and renamed the C-version of this function, which does not originate from Build to 'fillshort'. - cleaned out a lot the SafeDivScale stuff in m_fixed.h. The only SafeDivScale variant still in use was #16 for FixedDiv, so all the SafeDivScale stuff has been removed and the 16 variant renamed to FixedDiv because that's the only form in which it is still being used. (2x in R_DrawVoxel and 1x in ACS's FixedDiv PCode.) - removed Build notice from m_fixed.h because aside from the inlines includes there is nothing here from Build anymore. --- src/basicinlines.h | 9 ----- src/g_shared/a_lightning.cpp | 4 +- src/gccinlines.h | 13 ------- src/m_fixed.h | 73 +++++++----------------------------- src/mscinlines.h | 9 ----- src/p_effect.cpp | 2 +- src/r_bsp.cpp | 4 +- src/r_draw.cpp | 4 +- src/r_main.cpp | 4 +- src/r_plane.cpp | 10 ++--- src/r_segs.cpp | 12 +++--- src/r_things.cpp | 4 +- src/v_draw.cpp | 4 +- 13 files changed, 37 insertions(+), 115 deletions(-) diff --git a/src/basicinlines.h b/src/basicinlines.h index 488cdf5526..8492572818 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -177,12 +177,3 @@ static inline SDWORD DivScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD) static inline SDWORD DivScale31 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 31) / b); } static inline SDWORD DivScale32 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 32) / b); } -static __forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - SWORD *b2 = (SWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - diff --git a/src/g_shared/a_lightning.cpp b/src/g_shared/a_lightning.cpp index 3f44f2fddd..bbc6638123 100644 --- a/src/g_shared/a_lightning.cpp +++ b/src/g_shared/a_lightning.cpp @@ -23,7 +23,7 @@ DLightningThinker::DLightningThinker () NextLightningFlash = ((pr_lightning()&15)+5)*35; // don't flash at level start LightningLightLevels.Resize(numsectors); - clearbufshort(&LightningLightLevels[0], numsectors, SHRT_MAX); + fillshort(&LightningLightLevels[0], numsectors, SHRT_MAX); } DLightningThinker::~DLightningThinker () @@ -87,7 +87,7 @@ void DLightningThinker::LightningFlash () tempSec->SetLightLevel(LightningLightLevels[j]); } } - clearbufshort(&LightningLightLevels[0], numsectors, SHRT_MAX); + fillshort(&LightningLightLevels[0], numsectors, SHRT_MAX); level.flags &= ~LEVEL_SWAPSKIES; } return; diff --git a/src/gccinlines.h b/src/gccinlines.h index 54a2448c70..eac1f3e6fb 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -290,16 +290,3 @@ static inline SDWORD DivScale32 (SDWORD a, SDWORD b) : "cc"); return result; } - -static inline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - asm volatile - ("shr $1,%%ecx\n\t" - "rep stosl\n\t" - "adc %%ecx,%%ecx\n\t" - "rep stosw" - :"=D" (buff), "=c" (count) - :"D" (buff), "c" (count), "a" (clear|(clear<<16)) - :"cc"); -} - diff --git a/src/m_fixed.h b/src/m_fixed.h index 4719ac4680..33d339608a 100644 --- a/src/m_fixed.h +++ b/src/m_fixed.h @@ -1,11 +1,3 @@ -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// -// This file is based on pragmas.h from Ken Silverman's original Build -// source code release and contains routines that were originally -// inline assembly but are not now. - #ifndef __M_FIXED__ #define __M_FIXED__ @@ -20,64 +12,25 @@ #include "basicinlines.h" #endif -#include "xs_Float.h" - -#define MAKESAFEDIVSCALE(x) \ - inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \ - { \ - if ((DWORD)abs(a) >> (31-x) >= (DWORD)abs (b)) \ - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; \ - return DivScale##x (a, b); \ - } - -MAKESAFEDIVSCALE(1) -MAKESAFEDIVSCALE(2) -MAKESAFEDIVSCALE(3) -MAKESAFEDIVSCALE(4) -MAKESAFEDIVSCALE(5) -MAKESAFEDIVSCALE(6) -MAKESAFEDIVSCALE(7) -MAKESAFEDIVSCALE(8) -MAKESAFEDIVSCALE(9) -MAKESAFEDIVSCALE(10) -MAKESAFEDIVSCALE(11) -MAKESAFEDIVSCALE(12) -MAKESAFEDIVSCALE(13) -MAKESAFEDIVSCALE(14) -MAKESAFEDIVSCALE(15) -MAKESAFEDIVSCALE(16) -MAKESAFEDIVSCALE(17) -MAKESAFEDIVSCALE(18) -MAKESAFEDIVSCALE(19) -MAKESAFEDIVSCALE(20) -MAKESAFEDIVSCALE(21) -MAKESAFEDIVSCALE(22) -MAKESAFEDIVSCALE(23) -MAKESAFEDIVSCALE(24) -MAKESAFEDIVSCALE(25) -MAKESAFEDIVSCALE(26) -MAKESAFEDIVSCALE(27) -MAKESAFEDIVSCALE(28) -MAKESAFEDIVSCALE(29) -MAKESAFEDIVSCALE(30) -#undef MAKESAFEDIVSCALE - -inline SDWORD SafeDivScale31 (SDWORD a, SDWORD b) +__forceinline void fillshort(void *buff, unsigned int count, WORD clear) { - if ((DWORD)abs(a) >= (DWORD)abs (b)) - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; - return DivScale31 (a, b); + SWORD *b2 = (SWORD *)buff; + for (unsigned int i = 0; i != count; ++i) + { + b2[i] = clear; + } } -inline SDWORD SafeDivScale32 (SDWORD a, SDWORD b) -{ - if ((DWORD)abs(a) >= (DWORD)abs (b) >> 1) - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; - return DivScale32 (a, b); +#include "xs_Float.h" + +inline SDWORD FixedDiv (SDWORD a, SDWORD b) +{ + if ((DWORD)abs(a) >> (31-16) >= (DWORD)abs (b)) + return (a^b)<0 ? FIXED_MIN : FIXED_MAX; + return DivScale16 (a, b); } #define FixedMul MulScale16 -#define FixedDiv SafeDivScale16 inline fixed_t FloatToFixed(double f) { diff --git a/src/mscinlines.h b/src/mscinlines.h index 1aded4e7b7..6af2da1de7 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -321,13 +321,4 @@ __forceinline SDWORD DivScale32 (SDWORD a, SDWORD b) __asm idiv b } -__forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - SWORD *b2 = (SWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - #pragma warning (default: 4035) diff --git a/src/p_effect.cpp b/src/p_effect.cpp index 42e82d7405..827a457ee2 100644 --- a/src/p_effect.cpp +++ b/src/p_effect.cpp @@ -198,7 +198,7 @@ void P_FindParticleSubsectors () ParticlesInSubsec.Reserve (numsubsectors - ParticlesInSubsec.Size()); } - clearbufshort (&ParticlesInSubsec[0], numsubsectors, NO_PARTICLE); + fillshort (&ParticlesInSubsec[0], numsubsectors, NO_PARTICLE); if (!r_particles) { diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 15dff6065d..1451180a20 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -740,8 +740,8 @@ void R_AddLine (seg_t *line) if (line->linedef->special == Line_Horizon) { // Be aware: Line_Horizon does not work properly with sloped planes - clearbufshort (walltop+WallC.sx1, WallC.sx2 - WallC.sx1, centery); - clearbufshort (wallbottom+WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort (walltop+WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort (wallbottom+WallC.sx1, WallC.sx2 - WallC.sx1, centery); } else { diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 52f5f24e16..12aa3484bc 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1274,7 +1274,7 @@ namespace swrenderer if (b2 > t2) { - clearbufshort(spanend + t2, b2 - t2, x); + fillshort(spanend + t2, b2 - t2, x); } R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); @@ -1301,7 +1301,7 @@ namespace swrenderer if (b1 > b2) b2 = b1; if (t2 < b2) { - clearbufshort(spanend + t2, b2 - t2, x); + fillshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); diff --git a/src/r_main.cpp b/src/r_main.cpp index 4bf15d4b99..15db65d45a 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -374,7 +374,7 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, pspritexiscale = 1 / pspritexscale; // thing clipping - clearbufshort (screenheightarray, viewwidth, (short)viewheight); + fillshort (screenheightarray, viewwidth, (short)viewheight); R_InitTextureMapping (); @@ -397,7 +397,7 @@ void R_InitRenderer() { atterm(R_ShutdownRenderer); // viewwidth / viewheight are set by the defaults - clearbufshort (zeroarray, MAXWIDTH, 0); + fillshort (zeroarray, MAXWIDTH, 0); R_InitPlanes (); R_InitShadeMaps(); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 8a5ee2263a..ac085412bf 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -317,9 +317,9 @@ void R_ClearPlanes (bool fullclear) } // opening / clipping determination - clearbufshort (floorclip, viewwidth, viewheight); + fillshort (floorclip, viewwidth, viewheight); // [RH] clip ceiling to console bottom - clearbufshort (ceilingclip, viewwidth, + fillshort (ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); @@ -492,7 +492,7 @@ visplane_t *R_FindPlane (const secplane_t &height, FTextureID picnum, int lightl check->MirrorFlags = MirrorFlags; check->CurrentSkybox = CurrentSkybox; - clearbufshort (check->top, viewwidth, 0x7fff); + fillshort (check->top, viewwidth, 0x7fff); return check; } @@ -577,7 +577,7 @@ visplane_t *R_CheckPlane (visplane_t *pl, int start, int stop) pl = new_pl; pl->left = start; pl->right = stop; - clearbufshort (pl->top, viewwidth, 0x7fff); + fillshort (pl->top, viewwidth, 0x7fff); } return pl; } @@ -1681,7 +1681,7 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)) if (b2 > t2) { - clearbufshort (spanend+t2, b2-t2, x); + fillshort (spanend+t2, b2-t2, x); } for (--x; x >= pl->left; --x) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 1880273f90..5a016fc4cb 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -538,7 +538,7 @@ clearfog: } else { - clearbufshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); + fillshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); } } return; @@ -1184,8 +1184,8 @@ void R_RenderSegLoop () call_wallscan(x1, x2, walltop, wallbottom, swall, lwall, yscale, false); } } - clearbufshort (ceilingclip+x1, x2-x1, viewheight); - clearbufshort (floorclip+x1, x2-x1, 0xffff); + fillshort (ceilingclip+x1, x2-x1, viewheight); + fillshort (floorclip+x1, x2-x1, 0xffff); } else { // two sided line @@ -1708,7 +1708,7 @@ void R_StoreWallRange (int start, int stop) { ds_p->sprtopclip = R_NewOpening (stop - start); ds_p->sprbottomclip = R_NewOpening (stop - start); - clearbufshort (openings + ds_p->sprtopclip, stop-start, viewheight); + fillshort (openings + ds_p->sprtopclip, stop-start, viewheight); memset (openings + ds_p->sprbottomclip, -1, (stop-start)*sizeof(short)); ds_p->silhouette = SIL_BOTH; } @@ -1748,7 +1748,7 @@ void R_StoreWallRange (int start, int stop) if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) { // killough 1/17/98, 2/8/98 ds_p->sprtopclip = R_NewOpening (stop - start); - clearbufshort (openings + ds_p->sprtopclip, stop - start, viewheight); + fillshort (openings + ds_p->sprtopclip, stop - start, viewheight); ds_p->silhouette |= SIL_TOP; } } @@ -1969,7 +1969,7 @@ int WallMostAny(short *mostbuf, double z1, double z2, const FWallCoords *wallc) } else if (y1 > viewheight && y2 > viewheight) // entire line is below screen { - clearbufshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); + fillshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); return 12; } diff --git a/src/r_things.cpp b/src/r_things.cpp index 7ee3f4ca92..1bd2e8a025 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2331,11 +2331,11 @@ void R_DrawSprite (vissprite_t *spr) // for R_DrawVisVoxel(). if (x1 > 0) { - clearbufshort(cliptop, x1, viewheight); + fillshort(cliptop, x1, viewheight); } if (x2 < viewwidth - 1) { - clearbufshort(cliptop + x2, viewwidth - x2, viewheight); + fillshort(cliptop + x2, viewwidth - x2, viewheight); } int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index f86a94bcdf..1524c7ba4a 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -235,13 +235,13 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (bottomclipper[0] != parms.dclip) { - clearbufshort(bottomclipper, screen->GetWidth(), (short)parms.dclip); + fillshort(bottomclipper, screen->GetWidth(), (short)parms.dclip); } if (parms.uclip != 0) { if (topclipper[0] != parms.uclip) { - clearbufshort(topclipper, screen->GetWidth(), (short)parms.uclip); + fillshort(topclipper, screen->GetWidth(), (short)parms.uclip); } mceilingclip = topclipper; } From 86909ecdb2c2a4623bb25408bd61554ac65397fc Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 9 Dec 2016 13:27:30 +0100 Subject: [PATCH 8/8] - removed several unused fixed point math functions from the inlines headers. --- src/basicinlines.h | 58 ------------------- src/gccinlines.h | 116 ------------------------------------- src/mscinlines.h | 138 --------------------------------------------- 3 files changed, 312 deletions(-) diff --git a/src/basicinlines.h b/src/basicinlines.h index 8492572818..135208b81c 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -20,11 +20,6 @@ static __forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) return (SDWORD)(((SQWORD)a*b)/c); } -static __forceinline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - return (SDWORD)(((SQWORD)a*b)>>c); -} - static __forceinline SDWORD MulScale1 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 1); } static __forceinline SDWORD MulScale2 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 2); } static __forceinline SDWORD MulScale3 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 3); } @@ -60,11 +55,6 @@ static __forceinline SDWORD MulScale32 (SDWORD a, SDWORD b) { return (SDWORD)((( static __forceinline DWORD UMulScale16 (DWORD a, DWORD b) { return (DWORD)(((QWORD)a * b) >> 16); } -static __forceinline SDWORD DMulScale (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD s) -{ - return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> s); -} - static __forceinline SDWORD DMulScale1 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 1); } static __forceinline SDWORD DMulScale2 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 2); } static __forceinline SDWORD DMulScale3 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 3); } @@ -98,53 +88,6 @@ static __forceinline SDWORD DMulScale30 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) static __forceinline SDWORD DMulScale31 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 31); } static __forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 32); } -static __forceinline SDWORD TMulScale1 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 1); } -static __forceinline SDWORD TMulScale2 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 2); } -static __forceinline SDWORD TMulScale3 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 3); } -static __forceinline SDWORD TMulScale4 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 4); } -static __forceinline SDWORD TMulScale5 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 5); } -static __forceinline SDWORD TMulScale6 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 6); } -static __forceinline SDWORD TMulScale7 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 7); } -static __forceinline SDWORD TMulScale8 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 8); } -static __forceinline SDWORD TMulScale9 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 9); } -static __forceinline SDWORD TMulScale10 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 10); } -static __forceinline SDWORD TMulScale11 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 11); } -static __forceinline SDWORD TMulScale12 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 12); } -static __forceinline SDWORD TMulScale13 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 13); } -static __forceinline SDWORD TMulScale14 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 14); } -static __forceinline SDWORD TMulScale15 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 15); } -static __forceinline SDWORD TMulScale16 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 16); } -static __forceinline SDWORD TMulScale17 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 17); } -static __forceinline SDWORD TMulScale18 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 18); } -static __forceinline SDWORD TMulScale19 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 19); } -static __forceinline SDWORD TMulScale20 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 20); } -static __forceinline SDWORD TMulScale21 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 21); } -static __forceinline SDWORD TMulScale22 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 22); } -static __forceinline SDWORD TMulScale23 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 23); } -static __forceinline SDWORD TMulScale24 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 24); } -static __forceinline SDWORD TMulScale25 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 25); } -static __forceinline SDWORD TMulScale26 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 26); } -static __forceinline SDWORD TMulScale27 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 27); } -static __forceinline SDWORD TMulScale28 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 28); } -static __forceinline SDWORD TMulScale29 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 29); } -static __forceinline SDWORD TMulScale30 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 30); } -static __forceinline SDWORD TMulScale31 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 31); } -static __forceinline SDWORD TMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 32); } - -static __forceinline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - SQWORD x = ((SQWORD)a * b) >> c; - return x > 0x7FFFFFFFll ? 0x7FFFFFFF : - x < -0x80000000ll ? 0x80000000 : - (SDWORD)x; -} - -static inline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - return (SDWORD)(((SQWORD)a << c) / b); -} - -static inline SDWORD DivScale1 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 1) / b); } static inline SDWORD DivScale2 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 2) / b); } static inline SDWORD DivScale3 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 3) / b); } static inline SDWORD DivScale4 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 4) / b); } @@ -175,5 +118,4 @@ static inline SDWORD DivScale28 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD) static inline SDWORD DivScale29 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 29) / b); } static inline SDWORD DivScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 30) / b); } static inline SDWORD DivScale31 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 31) / b); } -static inline SDWORD DivScale32 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 32) / b); } diff --git a/src/gccinlines.h b/src/gccinlines.h index eac1f3e6fb..925a0037a9 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -47,23 +47,6 @@ static inline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) return result; } -static inline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - SDWORD result, dummy; - - asm volatile - ("imull %3\n\t" - "shrdl %b4,%1,%0" - : "=a,a,a,a" (result), - "=d,d,d,d" (dummy) - : "a,a,a,a" (a), - "m,r,m,r" (b), - "c,c,I,I" (c) - : "cc" - ); - return result; -} - #define MAKECONSTMulScale(s) \ static inline SDWORD MulScale##s (SDWORD a, SDWORD b) { return ((SQWORD)a * b) >> s; } @@ -143,92 +126,6 @@ MAKECONSTDMulScale(31) MAKECONSTDMulScale(32) #undef MAKECONSTDMulScale -#define MAKECONSTTMulScale(s) \ - static inline SDWORD TMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD ee) \ - { \ - return (((SQWORD)a * b) + ((SQWORD)c * d) + ((SQWORD)e * ee)) >> s; \ - } - -MAKECONSTTMulScale(1) -MAKECONSTTMulScale(2) -MAKECONSTTMulScale(3) -MAKECONSTTMulScale(4) -MAKECONSTTMulScale(5) -MAKECONSTTMulScale(6) -MAKECONSTTMulScale(7) -MAKECONSTTMulScale(8) -MAKECONSTTMulScale(9) -MAKECONSTTMulScale(10) -MAKECONSTTMulScale(11) -MAKECONSTTMulScale(12) -MAKECONSTTMulScale(13) -MAKECONSTTMulScale(14) -MAKECONSTTMulScale(15) -MAKECONSTTMulScale(16) -MAKECONSTTMulScale(17) -MAKECONSTTMulScale(18) -MAKECONSTTMulScale(19) -MAKECONSTTMulScale(20) -MAKECONSTTMulScale(21) -MAKECONSTTMulScale(22) -MAKECONSTTMulScale(23) -MAKECONSTTMulScale(24) -MAKECONSTTMulScale(25) -MAKECONSTTMulScale(26) -MAKECONSTTMulScale(27) -MAKECONSTTMulScale(28) -MAKECONSTTMulScale(29) -MAKECONSTTMulScale(30) -MAKECONSTTMulScale(31) -MAKECONSTTMulScale(32) -#undef MAKECONSTTMulScale - -static inline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - union { - long long big; - struct - { - int l, h; - }; - } u; - u.big = ((long long)a * b) >> c; - if ((u.h ^ u.l) < 0 || (unsigned int)(u.h+1) > 1) return (u.h >> 31) ^ 0x7fffffff; - return u.l; -} - -static inline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - SDWORD result, dummy; - SDWORD lo = a << c; - SDWORD hi = a >> (-c); - - asm volatile - ("idivl %4" - :"=a" (result), - "=d" (dummy) - : "a" (lo), - "d" (hi), - "r" (b) - : "cc"); - return result; -} - -static inline SDWORD DivScale1 (SDWORD a, SDWORD b) -{ - SDWORD result, dummy; - - asm volatile - ("addl %%eax,%%eax\n\t" - "sbbl %%edx,%%edx\n\t" - "idivl %3" - :"=a,a" (result), - "=&d,d" (dummy) - : "a,a" (a), - "r,m" (b) - : "cc"); - return result; -} #define MAKECONSTDivScale(s) \ static inline SDWORD DivScale##s (SDWORD a, SDWORD b) \ @@ -277,16 +174,3 @@ MAKECONSTDivScale(30) MAKECONSTDivScale(31) #undef MAKECONSTDivScale -static inline SDWORD DivScale32 (SDWORD a, SDWORD b) -{ - SDWORD result = 0, dummy; - - asm volatile - ("idivl %3" - :"+a,a" (result), - "=d,d" (dummy) - : "d,d" (a), - "r,m" (b) - : "cc"); - return result; -} diff --git a/src/mscinlines.h b/src/mscinlines.h index 6af2da1de7..3d00e9cb03 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -27,14 +27,6 @@ __forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) __asm idiv c } -__forceinline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm mov ecx,c - __asm imul b - __asm shrd eax,edx,cl -} - #define MAKECONSTMulScale(s) \ __forceinline SDWORD MulScale##s (SDWORD a, SDWORD b) \ { \ @@ -89,20 +81,6 @@ __forceinline DWORD UMulScale16(DWORD a, DWORD b) __asm shrd eax,edx,16 } -__forceinline SDWORD DMulScale (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD s) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov esi,edx - __asm mov ecx,s - __asm imul d - __asm add eax,ebx - __asm adc edx,esi - __asm shrd eax,edx,cl -} - #define MAKECONSTDMulScale(s) \ __forceinline SDWORD DMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d) \ { \ @@ -163,115 +141,6 @@ __forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) __asm mov eax,edx } -#define MAKECONSTTMulScale(s) \ - __forceinline SDWORD TMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) \ - { \ - __asm mov eax,a \ - __asm imul b \ - __asm mov ebx,eax \ - __asm mov eax,d \ - __asm mov ecx,edx \ - __asm imul c \ - __asm add ebx,eax \ - __asm mov eax,e \ - __asm adc ecx,edx \ - __asm imul f \ - __asm add eax,ebx \ - __asm adc edx,ecx \ - __asm shrd eax,edx,s \ - } - -MAKECONSTTMulScale(1) -MAKECONSTTMulScale(2) -MAKECONSTTMulScale(3) -MAKECONSTTMulScale(4) -MAKECONSTTMulScale(5) -MAKECONSTTMulScale(6) -MAKECONSTTMulScale(7) -MAKECONSTTMulScale(8) -MAKECONSTTMulScale(9) -MAKECONSTTMulScale(10) -MAKECONSTTMulScale(11) -MAKECONSTTMulScale(12) -MAKECONSTTMulScale(13) -MAKECONSTTMulScale(14) -MAKECONSTTMulScale(15) -MAKECONSTTMulScale(16) -MAKECONSTTMulScale(17) -MAKECONSTTMulScale(18) -MAKECONSTTMulScale(19) -MAKECONSTTMulScale(20) -MAKECONSTTMulScale(21) -MAKECONSTTMulScale(22) -MAKECONSTTMulScale(23) -MAKECONSTTMulScale(24) -MAKECONSTTMulScale(25) -MAKECONSTTMulScale(26) -MAKECONSTTMulScale(27) -MAKECONSTTMulScale(28) -MAKECONSTTMulScale(29) -MAKECONSTTMulScale(30) -MAKECONSTTMulScale(31) -#undef MAKECONSTTMulScale - -__forceinline SDWORD TMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov ecx,edx - __asm imul d - __asm add ebx,eax - __asm mov eax,e - __asm adc ecx,edx - __asm imul f - __asm add eax,ebx - __asm adc edx,ecx - __asm mov eax,edx -} - -__forceinline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,edx - __asm mov ecx,c - __asm shrd eax,edx,cl - __asm sar edx,cl - __asm xor edx,eax - __asm js checkit - __asm xor edx,eax - __asm jz skipboundit - __asm cmp edx,0xffffffff - __asm je skipboundit -checkit: - __asm mov eax,ebx - __asm sar eax,31 - __asm xor eax,0x7fffffff -skipboundit: - ; -} - -__forceinline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm mov ecx,c - __asm shl eax,cl - __asm mov edx,a - __asm neg cl - __asm sar edx,cl - __asm idiv b -} - -__forceinline SDWORD DivScale1 (SDWORD a, SDWORD b) -{ - __asm mov eax,a - __asm add eax,eax - __asm sbb edx,edx - __asm idiv b -} - #define MAKECONSTDivScale(s) \ __forceinline SDWORD DivScale##s (SDWORD a, SDWORD b) \ { \ @@ -314,11 +183,4 @@ MAKECONSTDivScale(30) MAKECONSTDivScale(31) #undef MAKECONSTDivScale -__forceinline SDWORD DivScale32 (SDWORD a, SDWORD b) -{ - __asm mov edx,a - __asm xor eax,eax - __asm idiv b -} - #pragma warning (default: 4035)