diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2f5649e07c..b75f3559ca 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -197,7 +197,6 @@ else() else() option( NO_GTK "Disable GTK+ dialogs (Not applicable to Windows)" ) option( DYN_GTK "Load GTK+ at runtime instead of compile time" ON ) - option( VALGRIND "Add special Valgrind sequences to self-modifying code" ) set( FMOD_SEARCH_PATHS /usr/local/include @@ -816,6 +815,7 @@ set( FASTMATH_PCH_SOURCES r_segs.cpp r_sky.cpp r_things.cpp + r_walldraw.cpp s_advsound.cpp s_environment.cpp s_playlist.cpp diff --git a/src/basicinlines.h b/src/basicinlines.h index cc562f2b8c..135208b81c 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -20,11 +20,6 @@ static __forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) return (SDWORD)(((SQWORD)a*b)/c); } -static __forceinline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - return (SDWORD)(((SQWORD)a*b)>>c); -} - static __forceinline SDWORD MulScale1 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 1); } static __forceinline SDWORD MulScale2 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 2); } static __forceinline SDWORD MulScale3 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 3); } @@ -60,11 +55,6 @@ static __forceinline SDWORD MulScale32 (SDWORD a, SDWORD b) { return (SDWORD)((( static __forceinline DWORD UMulScale16 (DWORD a, DWORD b) { return (DWORD)(((QWORD)a * b) >> 16); } -static __forceinline SDWORD DMulScale (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD s) -{ - return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> s); -} - static __forceinline SDWORD DMulScale1 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 1); } static __forceinline SDWORD DMulScale2 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 2); } static __forceinline SDWORD DMulScale3 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 3); } @@ -98,53 +88,6 @@ static __forceinline SDWORD DMulScale30 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) static __forceinline SDWORD DMulScale31 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 31); } static __forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 32); } -static __forceinline SDWORD TMulScale1 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 1); } -static __forceinline SDWORD TMulScale2 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 2); } -static __forceinline SDWORD TMulScale3 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 3); } -static __forceinline SDWORD TMulScale4 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 4); } -static __forceinline SDWORD TMulScale5 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 5); } -static __forceinline SDWORD TMulScale6 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 6); } -static __forceinline SDWORD TMulScale7 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 7); } -static __forceinline SDWORD TMulScale8 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 8); } -static __forceinline SDWORD TMulScale9 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 9); } -static __forceinline SDWORD TMulScale10 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 10); } -static __forceinline SDWORD TMulScale11 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 11); } -static __forceinline SDWORD TMulScale12 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 12); } -static __forceinline SDWORD TMulScale13 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 13); } -static __forceinline SDWORD TMulScale14 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 14); } -static __forceinline SDWORD TMulScale15 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 15); } -static __forceinline SDWORD TMulScale16 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 16); } -static __forceinline SDWORD TMulScale17 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 17); } -static __forceinline SDWORD TMulScale18 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 18); } -static __forceinline SDWORD TMulScale19 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 19); } -static __forceinline SDWORD TMulScale20 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 20); } -static __forceinline SDWORD TMulScale21 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 21); } -static __forceinline SDWORD TMulScale22 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 22); } -static __forceinline SDWORD TMulScale23 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 23); } -static __forceinline SDWORD TMulScale24 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 24); } -static __forceinline SDWORD TMulScale25 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 25); } -static __forceinline SDWORD TMulScale26 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 26); } -static __forceinline SDWORD TMulScale27 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 27); } -static __forceinline SDWORD TMulScale28 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 28); } -static __forceinline SDWORD TMulScale29 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 29); } -static __forceinline SDWORD TMulScale30 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 30); } -static __forceinline SDWORD TMulScale31 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 31); } -static __forceinline SDWORD TMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d + (SQWORD)e*f) >> 32); } - -static __forceinline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - SQWORD x = ((SQWORD)a * b) >> c; - return x > 0x7FFFFFFFll ? 0x7FFFFFFF : - x < -0x80000000ll ? 0x80000000 : - (SDWORD)x; -} - -static inline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - return (SDWORD)(((SQWORD)a << c) / b); -} - -static inline SDWORD DivScale1 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 1) / b); } static inline SDWORD DivScale2 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 2) / b); } static inline SDWORD DivScale3 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 3) / b); } static inline SDWORD DivScale4 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 4) / b); } @@ -175,29 +118,4 @@ static inline SDWORD DivScale28 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD) static inline SDWORD DivScale29 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 29) / b); } static inline SDWORD DivScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 30) / b); } static inline SDWORD DivScale31 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 31) / b); } -static inline SDWORD DivScale32 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 32) / b); } -static __forceinline void clearbuf (void *buff, unsigned int count, SDWORD clear) -{ - SDWORD *b2 = (SDWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - -static __forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - SWORD *b2 = (SWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - -static __forceinline SDWORD ksgn (SDWORD a) -{ - if (a < 0) return -1; - else if (a > 0) return 1; - else return 0; -} diff --git a/src/g_shared/a_lightning.cpp b/src/g_shared/a_lightning.cpp index 3f44f2fddd..bbc6638123 100644 --- a/src/g_shared/a_lightning.cpp +++ b/src/g_shared/a_lightning.cpp @@ -23,7 +23,7 @@ DLightningThinker::DLightningThinker () NextLightningFlash = ((pr_lightning()&15)+5)*35; // don't flash at level start LightningLightLevels.Resize(numsectors); - clearbufshort(&LightningLightLevels[0], numsectors, SHRT_MAX); + fillshort(&LightningLightLevels[0], numsectors, SHRT_MAX); } DLightningThinker::~DLightningThinker () @@ -87,7 +87,7 @@ void DLightningThinker::LightningFlash () tempSec->SetLightLevel(LightningLightLevels[j]); } } - clearbufshort(&LightningLightLevels[0], numsectors, SHRT_MAX); + fillshort(&LightningLightLevels[0], numsectors, SHRT_MAX); level.flags &= ~LEVEL_SWAPSKIES; } return; diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index 973364cf7d..46dd63ff76 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -174,14 +174,10 @@ void FHealthBar::MakeTexture () void FHealthBar::FillBar (int min, int max, BYTE light, BYTE dark) { -#ifdef __BIG_ENDIAN__ - SDWORD fill = (light << 24) | (dark << 16) | (light << 8) | dark; -#else - SDWORD fill = light | (dark << 8) | (light << 16) | (dark << 24); -#endif - if (max > min) + for (int i = min*2; i < max*2; i++) { - clearbuf (&Pixels[min*4], max - min, fill); + Pixels[i * 2] = light; + Pixels[i * 2 + 1] = dark; } } diff --git a/src/gccinlines.h b/src/gccinlines.h index 6cad307f7c..925a0037a9 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -47,23 +47,6 @@ static inline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) return result; } -static inline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - SDWORD result, dummy; - - asm volatile - ("imull %3\n\t" - "shrdl %b4,%1,%0" - : "=a,a,a,a" (result), - "=d,d,d,d" (dummy) - : "a,a,a,a" (a), - "m,r,m,r" (b), - "c,c,I,I" (c) - : "cc" - ); - return result; -} - #define MAKECONSTMulScale(s) \ static inline SDWORD MulScale##s (SDWORD a, SDWORD b) { return ((SQWORD)a * b) >> s; } @@ -143,92 +126,6 @@ MAKECONSTDMulScale(31) MAKECONSTDMulScale(32) #undef MAKECONSTDMulScale -#define MAKECONSTTMulScale(s) \ - static inline SDWORD TMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD ee) \ - { \ - return (((SQWORD)a * b) + ((SQWORD)c * d) + ((SQWORD)e * ee)) >> s; \ - } - -MAKECONSTTMulScale(1) -MAKECONSTTMulScale(2) -MAKECONSTTMulScale(3) -MAKECONSTTMulScale(4) -MAKECONSTTMulScale(5) -MAKECONSTTMulScale(6) -MAKECONSTTMulScale(7) -MAKECONSTTMulScale(8) -MAKECONSTTMulScale(9) -MAKECONSTTMulScale(10) -MAKECONSTTMulScale(11) -MAKECONSTTMulScale(12) -MAKECONSTTMulScale(13) -MAKECONSTTMulScale(14) -MAKECONSTTMulScale(15) -MAKECONSTTMulScale(16) -MAKECONSTTMulScale(17) -MAKECONSTTMulScale(18) -MAKECONSTTMulScale(19) -MAKECONSTTMulScale(20) -MAKECONSTTMulScale(21) -MAKECONSTTMulScale(22) -MAKECONSTTMulScale(23) -MAKECONSTTMulScale(24) -MAKECONSTTMulScale(25) -MAKECONSTTMulScale(26) -MAKECONSTTMulScale(27) -MAKECONSTTMulScale(28) -MAKECONSTTMulScale(29) -MAKECONSTTMulScale(30) -MAKECONSTTMulScale(31) -MAKECONSTTMulScale(32) -#undef MAKECONSTTMulScale - -static inline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - union { - long long big; - struct - { - int l, h; - }; - } u; - u.big = ((long long)a * b) >> c; - if ((u.h ^ u.l) < 0 || (unsigned int)(u.h+1) > 1) return (u.h >> 31) ^ 0x7fffffff; - return u.l; -} - -static inline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - SDWORD result, dummy; - SDWORD lo = a << c; - SDWORD hi = a >> (-c); - - asm volatile - ("idivl %4" - :"=a" (result), - "=d" (dummy) - : "a" (lo), - "d" (hi), - "r" (b) - : "cc"); - return result; -} - -static inline SDWORD DivScale1 (SDWORD a, SDWORD b) -{ - SDWORD result, dummy; - - asm volatile - ("addl %%eax,%%eax\n\t" - "sbbl %%edx,%%edx\n\t" - "idivl %3" - :"=a,a" (result), - "=&d,d" (dummy) - : "a,a" (a), - "r,m" (b) - : "cc"); - return result; -} #define MAKECONSTDivScale(s) \ static inline SDWORD DivScale##s (SDWORD a, SDWORD b) \ @@ -277,56 +174,3 @@ MAKECONSTDivScale(30) MAKECONSTDivScale(31) #undef MAKECONSTDivScale -static inline SDWORD DivScale32 (SDWORD a, SDWORD b) -{ - SDWORD result = 0, dummy; - - asm volatile - ("idivl %3" - :"+a,a" (result), - "=d,d" (dummy) - : "d,d" (a), - "r,m" (b) - : "cc"); - return result; -} - -static inline void clearbuf (void *buff, int count, SDWORD clear) -{ - int dummy1, dummy2; - asm volatile - ("rep stosl" - :"=D" (dummy1), - "=c" (dummy2) - : "D" (buff), - "c" (count), - "a" (clear) - ); -} - -static inline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - asm volatile - ("shr $1,%%ecx\n\t" - "rep stosl\n\t" - "adc %%ecx,%%ecx\n\t" - "rep stosw" - :"=D" (buff), "=c" (count) - :"D" (buff), "c" (count), "a" (clear|(clear<<16)) - :"cc"); -} - -static inline SDWORD ksgn (SDWORD a) -{ - SDWORD result, dummy; - - asm volatile - ("add %0,%0\n\t" - "sbb %1,%1\n\t" - "cmp %0,%1\n\t" - "adc $0,%1" - :"=r" (dummy), "=r" (result) - :"0" (a) - :"cc"); - return result; -} diff --git a/src/m_fixed.h b/src/m_fixed.h index 506b9702c8..33d339608a 100644 --- a/src/m_fixed.h +++ b/src/m_fixed.h @@ -1,11 +1,3 @@ -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// -// This file is based on pragmas.h from Ken Silverman's original Build -// source code release and contains routines that were originally -// inline assembly but are not now. - #ifndef __M_FIXED__ #define __M_FIXED__ @@ -20,121 +12,25 @@ #include "basicinlines.h" #endif -#include "xs_Float.h" - -#define MAKESAFEDIVSCALE(x) \ - inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \ - { \ - if ((DWORD)abs(a) >> (31-x) >= (DWORD)abs (b)) \ - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; \ - return DivScale##x (a, b); \ - } - -MAKESAFEDIVSCALE(1) -MAKESAFEDIVSCALE(2) -MAKESAFEDIVSCALE(3) -MAKESAFEDIVSCALE(4) -MAKESAFEDIVSCALE(5) -MAKESAFEDIVSCALE(6) -MAKESAFEDIVSCALE(7) -MAKESAFEDIVSCALE(8) -MAKESAFEDIVSCALE(9) -MAKESAFEDIVSCALE(10) -MAKESAFEDIVSCALE(11) -MAKESAFEDIVSCALE(12) -MAKESAFEDIVSCALE(13) -MAKESAFEDIVSCALE(14) -MAKESAFEDIVSCALE(15) -MAKESAFEDIVSCALE(16) -MAKESAFEDIVSCALE(17) -MAKESAFEDIVSCALE(18) -MAKESAFEDIVSCALE(19) -MAKESAFEDIVSCALE(20) -MAKESAFEDIVSCALE(21) -MAKESAFEDIVSCALE(22) -MAKESAFEDIVSCALE(23) -MAKESAFEDIVSCALE(24) -MAKESAFEDIVSCALE(25) -MAKESAFEDIVSCALE(26) -MAKESAFEDIVSCALE(27) -MAKESAFEDIVSCALE(28) -MAKESAFEDIVSCALE(29) -MAKESAFEDIVSCALE(30) -#undef MAKESAFEDIVSCALE - -inline SDWORD SafeDivScale31 (SDWORD a, SDWORD b) +__forceinline void fillshort(void *buff, unsigned int count, WORD clear) { - if ((DWORD)abs(a) >= (DWORD)abs (b)) - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; - return DivScale31 (a, b); + SWORD *b2 = (SWORD *)buff; + for (unsigned int i = 0; i != count; ++i) + { + b2[i] = clear; + } } -inline SDWORD SafeDivScale32 (SDWORD a, SDWORD b) -{ - if ((DWORD)abs(a) >= (DWORD)abs (b) >> 1) - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; - return DivScale32 (a, b); +#include "xs_Float.h" + +inline SDWORD FixedDiv (SDWORD a, SDWORD b) +{ + if ((DWORD)abs(a) >> (31-16) >= (DWORD)abs (b)) + return (a^b)<0 ? FIXED_MIN : FIXED_MAX; + return DivScale16 (a, b); } #define FixedMul MulScale16 -#define FixedDiv SafeDivScale16 - -inline void qinterpolatedown16 (SDWORD *out, DWORD count, SDWORD val, SDWORD delta) -{ - if (count & 1) - { - out[0] = val >> 16; - val += delta; - } - count >>= 1; - while (count-- != 0) - { - int temp = val + delta; - out[0] = val >> 16; - val = temp + delta; - out[1] = temp >> 16; - out += 2; - } -} - -inline void qinterpolatedown16short (short *out, DWORD count, SDWORD val, SDWORD delta) -{ - if (count) - { - if ((size_t)out & 2) - { // align to dword boundary - *out++ = (short)(val >> 16); - count--; - val += delta; - } - DWORD *o2 = (DWORD *)out; - DWORD c2 = count>>1; - while (c2-- != 0) - { - SDWORD temp = val + delta; - *o2++ = (temp & 0xffff0000) | ((DWORD)val >> 16); - val = temp + delta; - } - if (count & 1) - { - *(short *)o2 = (short)(val >> 16); - } - } -} - - //returns num/den, dmval = num%den -inline SDWORD DivMod (SDWORD num, SDWORD den, SDWORD *dmval) -{ - *dmval = num % den; - return num / den; -} - - //returns num%den, dmval = num/den -inline SDWORD ModDiv (SDWORD num, SDWORD den, SDWORD *dmval) -{ - *dmval = num / den; - return num % den; -} inline fixed_t FloatToFixed(double f) { diff --git a/src/mscinlines.h b/src/mscinlines.h index a8dd5fea73..3d00e9cb03 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -27,14 +27,6 @@ __forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) __asm idiv c } -__forceinline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm mov ecx,c - __asm imul b - __asm shrd eax,edx,cl -} - #define MAKECONSTMulScale(s) \ __forceinline SDWORD MulScale##s (SDWORD a, SDWORD b) \ { \ @@ -89,20 +81,6 @@ __forceinline DWORD UMulScale16(DWORD a, DWORD b) __asm shrd eax,edx,16 } -__forceinline SDWORD DMulScale (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD s) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov esi,edx - __asm mov ecx,s - __asm imul d - __asm add eax,ebx - __asm adc edx,esi - __asm shrd eax,edx,cl -} - #define MAKECONSTDMulScale(s) \ __forceinline SDWORD DMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d) \ { \ @@ -163,115 +141,6 @@ __forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) __asm mov eax,edx } -#define MAKECONSTTMulScale(s) \ - __forceinline SDWORD TMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) \ - { \ - __asm mov eax,a \ - __asm imul b \ - __asm mov ebx,eax \ - __asm mov eax,d \ - __asm mov ecx,edx \ - __asm imul c \ - __asm add ebx,eax \ - __asm mov eax,e \ - __asm adc ecx,edx \ - __asm imul f \ - __asm add eax,ebx \ - __asm adc edx,ecx \ - __asm shrd eax,edx,s \ - } - -MAKECONSTTMulScale(1) -MAKECONSTTMulScale(2) -MAKECONSTTMulScale(3) -MAKECONSTTMulScale(4) -MAKECONSTTMulScale(5) -MAKECONSTTMulScale(6) -MAKECONSTTMulScale(7) -MAKECONSTTMulScale(8) -MAKECONSTTMulScale(9) -MAKECONSTTMulScale(10) -MAKECONSTTMulScale(11) -MAKECONSTTMulScale(12) -MAKECONSTTMulScale(13) -MAKECONSTTMulScale(14) -MAKECONSTTMulScale(15) -MAKECONSTTMulScale(16) -MAKECONSTTMulScale(17) -MAKECONSTTMulScale(18) -MAKECONSTTMulScale(19) -MAKECONSTTMulScale(20) -MAKECONSTTMulScale(21) -MAKECONSTTMulScale(22) -MAKECONSTTMulScale(23) -MAKECONSTTMulScale(24) -MAKECONSTTMulScale(25) -MAKECONSTTMulScale(26) -MAKECONSTTMulScale(27) -MAKECONSTTMulScale(28) -MAKECONSTTMulScale(29) -MAKECONSTTMulScale(30) -MAKECONSTTMulScale(31) -#undef MAKECONSTTMulScale - -__forceinline SDWORD TMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov ecx,edx - __asm imul d - __asm add ebx,eax - __asm mov eax,e - __asm adc ecx,edx - __asm imul f - __asm add eax,ebx - __asm adc edx,ecx - __asm mov eax,edx -} - -__forceinline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,edx - __asm mov ecx,c - __asm shrd eax,edx,cl - __asm sar edx,cl - __asm xor edx,eax - __asm js checkit - __asm xor edx,eax - __asm jz skipboundit - __asm cmp edx,0xffffffff - __asm je skipboundit -checkit: - __asm mov eax,ebx - __asm sar eax,31 - __asm xor eax,0x7fffffff -skipboundit: - ; -} - -__forceinline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm mov ecx,c - __asm shl eax,cl - __asm mov edx,a - __asm neg cl - __asm sar edx,cl - __asm idiv b -} - -__forceinline SDWORD DivScale1 (SDWORD a, SDWORD b) -{ - __asm mov eax,a - __asm add eax,eax - __asm sbb edx,edx - __asm idiv b -} - #define MAKECONSTDivScale(s) \ __forceinline SDWORD DivScale##s (SDWORD a, SDWORD b) \ { \ @@ -314,38 +183,4 @@ MAKECONSTDivScale(30) MAKECONSTDivScale(31) #undef MAKECONSTDivScale -__forceinline SDWORD DivScale32 (SDWORD a, SDWORD b) -{ - __asm mov edx,a - __asm xor eax,eax - __asm idiv b -} - -__forceinline void clearbuf (void *buff, unsigned int count, SDWORD clear) -{ - SDWORD *b2 = (SDWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - -__forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - SWORD *b2 = (SWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - -__forceinline SDWORD ksgn (SDWORD a) -{ - __asm mov edx,a - __asm add edx,edx - __asm sbb eax,eax - __asm cmp eax,edx - __asm adc eax,0 -} - #pragma warning (default: 4035) diff --git a/src/p_effect.cpp b/src/p_effect.cpp index 42e82d7405..827a457ee2 100644 --- a/src/p_effect.cpp +++ b/src/p_effect.cpp @@ -198,7 +198,7 @@ void P_FindParticleSubsectors () ParticlesInSubsec.Reserve (numsubsectors - ParticlesInSubsec.Size()); } - clearbufshort (&ParticlesInSubsec[0], numsubsectors, NO_PARTICLE); + fillshort (&ParticlesInSubsec[0], numsubsectors, NO_PARTICLE); if (!r_particles) { diff --git a/src/p_maputl.cpp b/src/p_maputl.cpp index 31b4125988..fdf66e969d 100644 --- a/src/p_maputl.cpp +++ b/src/p_maputl.cpp @@ -925,7 +925,7 @@ void FBlockThingsIterator::init(const FBoundingBox &box) void FBlockThingsIterator::ClearHash() { - clearbuf(Buckets, countof(Buckets), -1); + memset(Buckets, -1, sizeof(Buckets)); NumFixedHash = 0; DynHash.Clear(); } diff --git a/src/p_user.cpp b/src/p_user.cpp index b808dd9c3d..e2d799834e 100644 --- a/src/p_user.cpp +++ b/src/p_user.cpp @@ -2609,11 +2609,7 @@ void P_PlayerThink (player_t *player) else if (cmd->ucmd.upmove != 0) { // Clamp the speed to some reasonable maximum. - int magnitude = abs (cmd->ucmd.upmove); - if (magnitude > 0x300) - { - cmd->ucmd.upmove = ksgn (cmd->ucmd.upmove) * 0x300; - } + cmd->ucmd.upmove = clamp(cmd->ucmd.upmove, -0x300, 0x300); if (player->mo->waterlevel >= 2 || (player->mo->flags2 & MF2_FLY) || (player->cheats & CF_NOCLIP2)) { player->mo->Vel.Z = player->mo->Speed * cmd->ucmd.upmove / 128.; diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 91fb86e928..1451180a20 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -17,12 +17,6 @@ // DESCRIPTION: // BSP traversal, handling of LineSegs for rendering. // -// This file contains some code from the Build Engine. -// -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// //----------------------------------------------------------------------------- @@ -746,8 +740,8 @@ void R_AddLine (seg_t *line) if (line->linedef->special == Line_Horizon) { // Be aware: Line_Horizon does not work properly with sloped planes - clearbufshort (walltop+WallC.sx1, WallC.sx2 - WallC.sx1, centery); - clearbufshort (wallbottom+WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort (walltop+WallC.sx1, WallC.sx2 - WallC.sx1, centery); + fillshort (wallbottom+WallC.sx1, WallC.sx2 - WallC.sx1, centery); } else { diff --git a/src/r_data/sprites.cpp b/src/r_data/sprites.cpp index 3e0cb80ccf..b384211fc6 100644 --- a/src/r_data/sprites.cpp +++ b/src/r_data/sprites.cpp @@ -270,7 +270,7 @@ void R_InitSpriteDefs () // Create a hash table to speed up the process smax = TexMan.NumTextures(); hashes = new Hasher[smax]; - clearbuf(hashes, sizeof(Hasher)*smax/4, -1); + memset(hashes, -1, sizeof(Hasher)*smax); for (i = 0; i < smax; ++i) { FTexture *tex = TexMan.ByIndex(i); @@ -285,7 +285,7 @@ void R_InitSpriteDefs () // Repeat, for voxels vmax = Wads.GetNumLumps(); vhashes = new VHasher[vmax]; - clearbuf(vhashes, sizeof(VHasher)*vmax/4, -1); + memset(vhashes, -1, sizeof(VHasher)*vmax); for (i = 0; i < vmax; ++i) { if (Wads.GetLumpNamespace(i) == ns_voxels) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 52f5f24e16..12aa3484bc 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1274,7 +1274,7 @@ namespace swrenderer if (b2 > t2) { - clearbufshort(spanend + t2, b2 - t2, x); + fillshort(spanend + t2, b2 - t2, x); } R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); @@ -1301,7 +1301,7 @@ namespace swrenderer if (b1 > b2) b2 = b1; if (t2 < b2) { - clearbufshort(spanend + t2, b2 - t2, x); + fillshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); diff --git a/src/r_draw.h b/src/r_draw.h index 40b3328964..39bac67acf 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -104,8 +104,6 @@ namespace swrenderer const uint8_t *R_GetColumn(FTexture *tex, int col); void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); - void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); - void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); void rt_initcols(uint8_t *buffer = nullptr); void rt_span_coverage(int x, int start, int stop); diff --git a/src/r_main.cpp b/src/r_main.cpp index 4bf15d4b99..15db65d45a 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -374,7 +374,7 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, pspritexiscale = 1 / pspritexscale; // thing clipping - clearbufshort (screenheightarray, viewwidth, (short)viewheight); + fillshort (screenheightarray, viewwidth, (short)viewheight); R_InitTextureMapping (); @@ -397,7 +397,7 @@ void R_InitRenderer() { atterm(R_ShutdownRenderer); // viewwidth / viewheight are set by the defaults - clearbufshort (zeroarray, MAXWIDTH, 0); + fillshort (zeroarray, MAXWIDTH, 0); R_InitPlanes (); R_InitShadeMaps(); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 8a5ee2263a..ac085412bf 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -317,9 +317,9 @@ void R_ClearPlanes (bool fullclear) } // opening / clipping determination - clearbufshort (floorclip, viewwidth, viewheight); + fillshort (floorclip, viewwidth, viewheight); // [RH] clip ceiling to console bottom - clearbufshort (ceilingclip, viewwidth, + fillshort (ceilingclip, viewwidth, !screen->Accel2D && ConBottom > viewwindowy && !bRenderingToCanvas ? (ConBottom - viewwindowy) : 0); @@ -492,7 +492,7 @@ visplane_t *R_FindPlane (const secplane_t &height, FTextureID picnum, int lightl check->MirrorFlags = MirrorFlags; check->CurrentSkybox = CurrentSkybox; - clearbufshort (check->top, viewwidth, 0x7fff); + fillshort (check->top, viewwidth, 0x7fff); return check; } @@ -577,7 +577,7 @@ visplane_t *R_CheckPlane (visplane_t *pl, int start, int stop) pl = new_pl; pl->left = start; pl->right = stop; - clearbufshort (pl->top, viewwidth, 0x7fff); + fillshort (pl->top, viewwidth, 0x7fff); } return pl; } @@ -1681,7 +1681,7 @@ void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)) if (b2 > t2) { - clearbufshort (spanend+t2, b2-t2, x); + fillshort (spanend+t2, b2-t2, x); } for (--x; x >= pl->left; --x) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index d4520e91de..5a016fc4cb 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -56,7 +56,6 @@ #define WALLYREPEAT 8 -CVAR(Bool, r_np2, true, 0) CVAR(Bool, r_fogboundary, true, 0) CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); @@ -65,6 +64,10 @@ namespace swrenderer { using namespace drawerargs; + void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); + void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask); + void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); + #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -140,9 +143,6 @@ static fixed_t *maskedtexturecol; static void R_RenderDecal (side_t *wall, DBaseDecal *first, drawseg_t *clipper, int pass); static void WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)); -void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask); -static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); -static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); inline bool IsFogBoundary (sector_t *front, sector_t *back) { @@ -538,7 +538,7 @@ clearfog: } else { - clearbufshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); + fillshort(openings + ds->sprtopclip - ds->x1 + x1, x2 - x1, viewheight); } } return; @@ -1051,542 +1051,6 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } -struct WallscanSampler -{ - WallscanSampler() { } - WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); - - uint32_t uv_pos; - uint32_t uv_step; - uint32_t uv_max; - - const BYTE *source; - uint32_t height; -}; - -WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) -{ - height = texture->GetHeight(); - - int uv_fracbits = 32 - texture->HeightBits; - if (uv_fracbits != 32) - { - uv_max = height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; - v = v - floor(v); - v *= height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - if (uv_step == 0) // To prevent divide by zero elsewhere - uv_step = 1; - } - else - { // Hack for one pixel tall textures - uv_pos = 0; - uv_step = 0; - uv_max = 1; - } - - source = getcol(texture, xoffset >> FRACBITS); -} - -// Draw a column with support for non-power-of-two ranges -void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) -{ - if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two - { - int count = y2 - y1; - - dc_source = sampler.source; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = sampler.uv_pos; - draw1column(); - - uint64_t step64 = sampler.uv_step; - uint64_t pos64 = sampler.uv_pos; - sampler.uv_pos = (uint32_t)(pos64 + step64 * count); - } - else - { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) - { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); - - dc_source = sampler.source; - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; - draw1column(); - - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; - } - - sampler.uv_pos = uv_pos; - } -} - -// Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) -{ - if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) + dc_destorg; - for (int i = 0; i < 4; i++) - { - bufplce[i] = sampler[i].source; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - vplce[i] = sampler[i].uv_pos; - vince[i] = sampler[i].uv_step; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; - } - - left -= count; - } - } -} - -typedef DWORD(*Draw1ColumnFuncPtr)(); -typedef void(*Draw4ColumnsFuncPtr)(); - -void wallscan_any( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) -{ - if (rw_pic->UseType == FTexture::TEX_Null) - return; - - fixed_t xoffset = rw_offset; - - rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set - int fracbits = 32 - rw_pic->HeightBits; - if (fracbits == 32) - { // Hack for one pixel tall textures - fracbits = 0; - yrepeat = 0; - dc_texturemid = 0; - } - - DWORD(*draw1column)(); - void(*draw4columns)(); - setupwallscan(fracbits, draw1column, draw4columns); - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - } - - if (fixedcolormap) - dc_colormap = fixedcolormap; - else - dc_colormap = basecolormap->Maps; - - float light = rw_light; - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, sampler, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - WallscanSampler sampler[4]; - for (int i = 0; i < 4; i++) - sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - } - - if (empty_column_in_set || middle_y2 <= middle_y1) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - - if (y1[i] < middle_y1) - wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - } - } - wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - - if (middle_y2 < y2[i]) - wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, sampler, draw1column); - } - - NetUpdate(); -} - -void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupvline(bits); - line1 = dovline1; - line4 = dovline4; - }); -} - -void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupmvline(bits); - line1 = domvline1; - line4 = domvline4; - }); - } -} - -void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - static fixed_t(*tmvline1)(); - static void(*tmvline4)(); - if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setuptmvline(bits); - line1 = reinterpret_cast(tmvline1); - line4 = tmvline4; - }); - } -} - -void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) -{ - FDynamicColormap *startcolormap = basecolormap; - int startshade = wallshade; - bool fogginess = foggy; - - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - - up = uwal; - down = most1; - - assert(WallC.sx1 <= x1); - assert(WallC.sx2 >= x2); - - // kg3D - fake floors instead of zdoom light list - for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) - { - int j = WallMost (most3, frontsector->e->XFloor.lightlist[i].plane, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp (most3[j], up[j], dwal[j]); - } - wallscan (x1, x2, up, down, swal, lwal, yrepeat); - up = down; - down = (down == most1) ? most2 : most1; - } - - lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; - basecolormap = lit->extra_colormap; - wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, - *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); - } - - wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); - basecolormap = startcolormap; - wallshade = startshade; -} - -static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) -{ - if (mask) - { - if (colfunc == basecolfunc) - { - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - else - { - transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - } - else - { - if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - else - { - wallscan_striped(x1, x2, uwal, dwal, swal, lwal, yrepeat); - } - } -} - -//============================================================================= -// -// wallscan_np2 -// -// This is a wrapper around wallscan that helps it tile textures whose heights -// are not powers of 2. It divides the wall into texture-sized strips and calls -// wallscan for each of those. Since only one repetition of the texture fits -// in each strip, wallscan will not tile. -// -//============================================================================= - -void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) -{ - if (!r_np2) - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); - } - else - { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - double texheight = rw_pic->GetHeight(); - double partition; - double scaledtexheight = texheight / yrepeat; - - if (yrepeat >= 0) - { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - if (partition == top) - { - partition -= scaledtexheight; - } - up = uwal; - down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition > bot) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp(most3[j], up[j], dwal[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - up = down; - down = (down == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); - } - else - { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; - down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition < top) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 12) - { - for (int j = x1; j < x2; ++j) - { - up[j] = clamp(most3[j], uwal[j], down[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - down = up; - up = (up == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); - } - } -} - -static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) -{ - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - double top = MAX(frontcz1, frontcz2); - double bot = MIN(frontfz1, frontfz2); - if (fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, sclipTop); - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, sclipBottom); - } - wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); - } - else - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); - } -} - // // R_RenderSegLoop // Draws zero, one, or two textures for walls. @@ -1720,8 +1184,8 @@ void R_RenderSegLoop () call_wallscan(x1, x2, walltop, wallbottom, swall, lwall, yscale, false); } } - clearbufshort (ceilingclip+x1, x2-x1, viewheight); - clearbufshort (floorclip+x1, x2-x1, 0xffff); + fillshort (ceilingclip+x1, x2-x1, viewheight); + fillshort (floorclip+x1, x2-x1, 0xffff); } else { // two sided line @@ -2244,7 +1708,7 @@ void R_StoreWallRange (int start, int stop) { ds_p->sprtopclip = R_NewOpening (stop - start); ds_p->sprbottomclip = R_NewOpening (stop - start); - clearbufshort (openings + ds_p->sprtopclip, stop-start, viewheight); + fillshort (openings + ds_p->sprtopclip, stop-start, viewheight); memset (openings + ds_p->sprbottomclip, -1, (stop-start)*sizeof(short)); ds_p->silhouette = SIL_BOTH; } @@ -2284,7 +1748,7 @@ void R_StoreWallRange (int start, int stop) if (doorclosed || (rw_backfz1 >= rw_frontcz1 && rw_backfz2 >= rw_frontcz2)) { // killough 1/17/98, 2/8/98 ds_p->sprtopclip = R_NewOpening (stop - start); - clearbufshort (openings + ds_p->sprtopclip, stop - start, viewheight); + fillshort (openings + ds_p->sprtopclip, stop - start, viewheight); ds_p->silhouette |= SIL_TOP; } } @@ -2505,7 +1969,7 @@ int WallMostAny(short *mostbuf, double z1, double z2, const FWallCoords *wallc) } else if (y1 > viewheight && y2 > viewheight) // entire line is below screen { - clearbufshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); + fillshort(&mostbuf[wallc->sx1], wallc->sx2 - wallc->sx1, viewheight); return 12; } diff --git a/src/r_things.cpp b/src/r_things.cpp index 4ba47d63d7..1bd2e8a025 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2331,11 +2331,11 @@ void R_DrawSprite (vissprite_t *spr) // for R_DrawVisVoxel(). if (x1 > 0) { - clearbufshort(cliptop, x1, viewheight); + fillshort(cliptop, x1, viewheight); } if (x2 < viewwidth - 1) { - clearbufshort(cliptop + x2, viewwidth - x2, viewheight); + fillshort(cliptop + x2, viewwidth - x2, viewheight); } int minvoxely = spr->gzt <= hzt ? 0 : xs_RoundToInt((spr->gzt - hzt) / spr->yscale); int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); @@ -2701,6 +2701,11 @@ void R_DrawParticle_C (vissprite_t *vis) extern double BaseYaspectMul;; +inline int sgn(int v) +{ + return v < 0 ? -1 : v > 0 ? 1 : 0; +} + void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, @@ -2840,7 +2845,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, xe += xi; ye += yi; } - i = ksgn(ys-backy)+ksgn(xs-backx)*3+4; + i = sgn(ys - backy) + sgn(xs - backx) * 3 + 4; switch(i) { case 6: case 7: x1 = 0; y1 = 0; break; diff --git a/src/r_walldraw.cpp b/src/r_walldraw.cpp new file mode 100644 index 0000000000..000bc01c78 --- /dev/null +++ b/src/r_walldraw.cpp @@ -0,0 +1,583 @@ +/* +** Wall drawing stuff free of Build pollution +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include + +#include "doomdef.h" +#include "doomstat.h" +#include "doomdata.h" + +#include "r_local.h" +#include "r_sky.h" +#include "v_video.h" + +#include "m_swap.h" +#include "a_sharedglobal.h" +#include "d_net.h" +#include "g_level.h" +#include "r_draw.h" +#include "r_bsp.h" +#include "r_plane.h" +#include "r_segs.h" +#include "r_3dfloors.h" +#include "v_palette.h" +#include "r_data/colormaps.h" + +namespace swrenderer +{ + using namespace drawerargs; + + extern FTexture *rw_pic; + extern int wallshade; + +struct WallscanSampler +{ + WallscanSampler() { } + WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + uint32_t uv_pos; + uint32_t uv_step; + uint32_t uv_max; + + const BYTE *source; + uint32_t height; +}; + +WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + height = texture->GetHeight(); + + int uv_fracbits = 32 - texture->HeightBits; + if (uv_fracbits != 32) + { + uv_max = height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + } + else + { // Hack for one pixel tall textures + uv_pos = 0; + uv_step = 0; + uv_max = 1; + } + + source = getcol(texture, xoffset >> FRACBITS); +} + +// Draw a column with support for non-power-of-two ranges +void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) +{ + if (sampler.uv_max == 0 || sampler.uv_step == 0) // power of two + { + int count = y2 - y1; + + dc_source = sampler.source; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; + draw1column(); + + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); + } + else + { + uint32_t uv_pos = sampler.uv_pos; + + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } +} + +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) +{ + if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed + { + int count = y2 - y1; + for (int i = 0; i < 4; i++) + { + bufplce[i] = sampler[i].source; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); + } + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) + dc_destorg; + for (int i = 0; i < 4; i++) + { + bufplce[i] = sampler[i].source; + } + + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; + } + + left -= count; + } + } +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + fixed_t xoffset = rw_offset; + + rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set + int fracbits = 32 - rw_pic->HeightBits; + if (fracbits == 32) + { // Hack for one pixel tall textures + fracbits = 0; + yrepeat = 0; + dc_texturemid = 0; + } + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(fracbits, draw1column, draw4columns); + + bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); + if (fixed) + { + palookupoffse[0] = dc_colormap; + palookupoffse[1] = dc_colormap; + palookupoffse[2] = dc_colormap; + palookupoffse[3] = dc_colormap; + } + + if (fixedcolormap) + dc_colormap = fixedcolormap; + else + dc_colormap = basecolormap->Maps; + + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); + } + + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) + { + // Find y1, y2, light and uv values for four columns: + int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; + int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; + + float lights[4]; + for (int i = 0; i < 4; i++) + { + lights[i] = light; + light += rw_lightstep; + } + + WallscanSampler sampler[4]; + for (int i = 0; i < 4; i++) + sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + continue; + + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); + } + continue; + } + + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + + if (y1[i] < middle_y1) + wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); + } + + // Draw the area where all 4 columns are active + if (!fixed) + { + for (int i = 0; i < 4; i++) + { + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + } + } + wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); + + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + + if (middle_y2 < y2[i]) + wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); + } + } + + // The last unaligned columns: + for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); + } + + NetUpdate(); +} + +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + +void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +{ + FDynamicColormap *startcolormap = basecolormap; + int startshade = wallshade; + bool fogginess = foggy; + + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + + up = uwal; + down = most1; + + assert(WallC.sx1 <= x1); + assert(WallC.sx2 >= x2); + + // kg3D - fake floors instead of zdoom light list + for (unsigned int i = 0; i < frontsector->e->XFloor.lightlist.Size(); i++) + { + int j = WallMost (most3, frontsector->e->XFloor.lightlist[i].plane, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp (most3[j], up[j], dwal[j]); + } + wallscan (x1, x2, up, down, swal, lwal, yrepeat); + up = down; + down = (down == most1) ? most2 : most1; + } + + lightlist_t *lit = &frontsector->e->XFloor.lightlist[i]; + basecolormap = lit->extra_colormap; + wallshade = LIGHT2SHADE(curline->sidedef->GetLightLevel(fogginess, + *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); + } + + wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); + basecolormap = startcolormap; + wallshade = startshade; +} + +void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask) +{ + if (mask) + { + if (colfunc == basecolfunc) + { + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + else + { + transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + } + else + { + if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + else + { + wallscan_striped(x1, x2, uwal, dwal, swal, lwal, yrepeat); + } + } +} + +//============================================================================= +// +// wallscan_np2 +// +// This is a wrapper around wallscan that helps it tile textures whose heights +// are not powers of 2. It divides the wall into texture-sized strips and calls +// wallscan for each of those. Since only one repetition of the texture fits +// in each strip, wallscan will not tile. +// +//============================================================================= + +void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) +{ + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + double texheight = rw_pic->GetHeight(); + double partition; + double scaledtexheight = texheight / yrepeat; + + if (yrepeat >= 0) + { // normal orientation: draw strips from top to bottom + partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + if (partition == top) + { + partition -= scaledtexheight; + } + up = uwal; + down = most1; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition > bot) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + up = down; + down = (down == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + } + else + { // upside down: draw strips from bottom to top + partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + up = most1; + down = dwal; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition < top) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 12) + { + for (int j = x1; j < x2; ++j) + { + up[j] = clamp(most3[j], uwal[j], down[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + down = up; + up = (up == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + } +} + +void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) +{ + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + double top = MAX(frontcz1, frontcz2); + double bot = MIN(frontfz1, frontfz2); + if (fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, sclipTop); + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, sclipBottom); + } + wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); + } + else + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + } +} + + +} \ No newline at end of file diff --git a/src/v_draw.cpp b/src/v_draw.cpp index f86a94bcdf..1524c7ba4a 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -235,13 +235,13 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (bottomclipper[0] != parms.dclip) { - clearbufshort(bottomclipper, screen->GetWidth(), (short)parms.dclip); + fillshort(bottomclipper, screen->GetWidth(), (short)parms.dclip); } if (parms.uclip != 0) { if (topclipper[0] != parms.uclip) { - clearbufshort(topclipper, screen->GetWidth(), (short)parms.uclip); + fillshort(topclipper, screen->GetWidth(), (short)parms.uclip); } mceilingclip = topclipper; } diff --git a/src/valgrind.inc b/src/valgrind.inc deleted file mode 100644 index 729fe89bcd..0000000000 --- a/src/valgrind.inc +++ /dev/null @@ -1,31 +0,0 @@ -%ifdef VALGRIND_AWARE - -%define VG_USERREQ__DISCARD_TRANSLATIONS 0x1002 - -%macro selfmod 2 - pusha - sub esp,6*4 - mov dword [esp], VG_USERREQ__DISCARD_TRANSLATIONS - mov dword [esp+4], %1 - mov dword [esp+8], %2 - %1 - mov dword [esp+12], 0 - mov dword [esp+16], 0 - mov dword [esp+20], 0 - mov eax, esp - ; special instruction preamble - rol edi, 3 - rol edi, 13 - rol edi, 29 - rol edi, 19 - xchg ebx, ebx - ; restore state - add esp,6*4 - popa -%endmacro - -%else - -%macro selfmod 2 -%endmacro - -%endif