diff --git a/src/basicinlines.h b/src/basicinlines.h deleted file mode 100644 index db03232b3..000000000 --- a/src/basicinlines.h +++ /dev/null @@ -1,40 +0,0 @@ -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// -// This file is based on pragmas.h from Ken Silverman's original Build -// source code release but is meant for use with any compiler and does not -// rely on any inline assembly. -// - -#if _MSC_VER -#pragma once -#endif - -#if defined(__GNUC__) && !defined(__forceinline) -#define __forceinline __inline__ __attribute__((always_inline)) -#endif - -static __forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) -{ - return (SDWORD)(((SQWORD)a*b)/c); -} - -static __forceinline SDWORD MulScale14 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 14); } -static __forceinline SDWORD MulScale16 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 16); } -static __forceinline SDWORD MulScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 30); } -static __forceinline SDWORD MulScale32 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 32); } - -static __forceinline DWORD UMulScale16 (DWORD a, DWORD b) { return (DWORD)(((QWORD)a * b) >> 16); } - -static __forceinline SDWORD DMulScale3 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 3); } -static __forceinline SDWORD DMulScale6 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 6); } -static __forceinline SDWORD DMulScale10 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 10); } -static __forceinline SDWORD DMulScale18 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 18); } -static __forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 32); } - -static inline SDWORD DivScale6 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 6) / b); } -static inline SDWORD DivScale16 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 16) / b); } -static inline SDWORD DivScale21 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 21) / b); } -static inline SDWORD DivScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 30) / b); } - diff --git a/src/gccinlines.h b/src/gccinlines.h deleted file mode 100644 index 20506a5a8..000000000 --- a/src/gccinlines.h +++ /dev/null @@ -1,95 +0,0 @@ -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// -// This file is based on pragmas.h from Ken Silverman's original Build -// source code release but is meant for use with GCC instead of Watcom C. -// -// Some of the inline assembly has been turned into C code, because -// modern compilers are smart enough to produce code at least as good as -// Ken's inline assembly. -// - -// I can come up with several different operand constraints for the -// following that work just fine when used all by themselves, but -// when I concatenate them together so that the compiler can choose -// between them, I get the following (but only if enough parameters -// are passed as the result of some operations instead of as -// variables/constants--e.g. DMulScale16 (a*2, b*2, c*2, d*2) instead of -// DMulScale16 (a, b, c, d)): -// -// `asm' operand requires impossible reload -// -// Why? - -#include - -#ifndef alloca -// MinGW does not seem to come with alloca defined. -#define alloca __builtin_alloca -#endif - -static inline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) -{ - SDWORD result, dummy; - - asm volatile - ("imull %3\n\t" - "idivl %4" - : "=a,a,a,a,a,a" (result), - "=&d,&d,&d,&d,d,d" (dummy) - : "a,a,a,a,a,a" (a), - "m,r,m,r,d,d" (b), - "r,r,m,m,r,m" (c) - : "cc" - ); - - return result; -} - -#define MAKECONSTMulScale(s) \ -static inline SDWORD MulScale##s (SDWORD a, SDWORD b) { return ((SQWORD)a * b) >> s; } - -MAKECONSTMulScale(14) -MAKECONSTMulScale(16) -MAKECONSTMulScale(30) -MAKECONSTMulScale(32) -#undef MAKECONSTMulScale - -static inline DWORD UMulScale16(DWORD a, DWORD b) { return ((QWORD)a * b) >> 16; } - -#define MAKECONSTDMulScale(s) \ - static inline SDWORD DMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d) \ - { \ - return (((SQWORD)a * b) + ((SQWORD)c * d)) >> s; \ - } - -MAKECONSTDMulScale(3) -MAKECONSTDMulScale(6) -MAKECONSTDMulScale(10) -MAKECONSTDMulScale(18) -MAKECONSTDMulScale(32) -#undef MAKECONSTDMulScale - - -#define MAKECONSTDivScale(s) \ - static inline SDWORD DivScale##s (SDWORD a, SDWORD b) \ - { \ - SDWORD result, dummy; \ - asm volatile \ - ("idivl %4" \ - :"=a,a" (result), \ - "=d,d" (dummy) \ - : "a,a" (a<>(32-s)), \ - "r,m" (b) \ - : "cc"); \ - return result; \ - } - -MAKECONSTDivScale(6) -MAKECONSTDivScale(16) -MAKECONSTDivScale(21) -MAKECONSTDivScale(30) -#undef MAKECONSTDivScale - diff --git a/src/m_fixed.h b/src/m_fixed.h index 33d339608..dc7c3f4c9 100644 --- a/src/m_fixed.h +++ b/src/m_fixed.h @@ -4,14 +4,72 @@ #include #include "doomtype.h" +// Unfortunately, the Scale function still gets badly handled on 32 bit x86 platforms so it's the last remaining piece of inline assembly + +// GCC inlines #if defined(__GNUC__) && defined(__i386__) && !defined(__clang__) -#include "gccinlines.h" -#elif defined(_MSC_VER) && defined(_M_IX86) -#include "mscinlines.h" -#else -#include "basicinlines.h" +#ifndef alloca +// MinGW does not seem to come with alloca defined. +#define alloca __builtin_alloca #endif +static inline int32_t Scale(int32_t a, int32_t b, int32_t c) +{ + int32_t result, dummy; + + asm volatile + ("imull %3\n\t" + "idivl %4" + : "=a,a,a,a,a,a" (result), + "=&d,&d,&d,&d,d,d" (dummy) + : "a,a,a,a,a,a" (a), + "m,r,m,r,d,d" (b), + "r,r,m,m,r,m" (c) + : "cc" + ); + + return result; +} + +// MSVC inlines +#elif defined(_MSC_VER) && defined(_M_IX86) +#pragma warning (disable: 4035) + +__forceinline int32_t Scale(int32_t a, int32_t b, int32_t c) +{ + __asm mov eax, a + __asm imul b + __asm idiv c +} + +#pragma warning (default: 4035) +#else + +static __forceinline int32_t Scale(int32_t a, int32_t b, int32_t c) +{ + return (int32_t)(((int64_t)a*b) / c); +} + +#endif + +// Modern compilers are smart enough to do these multiplications intelligently. +__forceinline int32_t MulScale14(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 14); } // only used by R_DrawVoxel +__forceinline int32_t MulScale30(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 30); } // only used once in the node builder +__forceinline int32_t MulScale32(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 32); } // only used by R_DrawVoxel + +__forceinline uint32_t UMulScale16(uint32_t a, uint32_t b) { return (uint32_t)(((uint64_t)a * b) >> 16); } // used for sky drawing + +__forceinline int32_t DMulScale3(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 3); } // used for setting up slopes for Build maps +__forceinline int32_t DMulScale6(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 6); } // only used by R_DrawVoxel +__forceinline int32_t DMulScale10(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 10); } // only used by R_DrawVoxel +__forceinline int32_t DMulScale18(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 18); } // only used by R_DrawVoxel +__forceinline int32_t DMulScale32(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 32); } // used by R_PointOnSide. + +// Sadly, for divisions this is not true but these are so infrequently used that the C versions are just fine, despite not being fully optimal. +__forceinline int32_t DivScale6(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 6) / b); } // only used by R_DrawVoxel +__forceinline int32_t DivScale21(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 21) / b); } // only used by R_DrawVoxel +__forceinline int32_t DivScale30(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 30) / b); } // only used once in the node builder + __forceinline void fillshort(void *buff, unsigned int count, WORD clear) { SWORD *b2 = (SWORD *)buff; @@ -23,14 +81,18 @@ __forceinline void fillshort(void *buff, unsigned int count, WORD clear) #include "xs_Float.h" -inline SDWORD FixedDiv (SDWORD a, SDWORD b) +inline int32_t FixedDiv (int32_t a, int32_t b) { - if ((DWORD)abs(a) >> (31-16) >= (DWORD)abs (b)) + if ((uint32_t)abs(a) >> (31-16) >= (uint32_t)abs (b)) return (a^b)<0 ? FIXED_MIN : FIXED_MAX; - return DivScale16 (a, b); + + return (int32_t)(((int64_t)a << 16) / b); } -#define FixedMul MulScale16 +__forceinline int32_t FixedMul(int32_t a, int32_t b) +{ + return (int32_t)(((int64_t)a * b) >> 16); +} inline fixed_t FloatToFixed(double f) { diff --git a/src/mscinlines.h b/src/mscinlines.h deleted file mode 100644 index cc16c319d..000000000 --- a/src/mscinlines.h +++ /dev/null @@ -1,105 +0,0 @@ -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// -// This file is based on pragmas.h from Ken Silverman's original Build -// source code release but is meant for use with Visual C++ instead of -// Watcom C. -// -// Some of the inline assembly has been turned into C code, because VC++ -// is smart enough to produce code at least as good as Ken's inlines. -// The more used functions are still inline assembly, because they do -// things that can't really be done in C. (I consider this a bad thing, -// because VC++ has considerably poorer support for inline assembly than -// Watcom, so it's better to rely on its C optimizer to produce fast code.) -// - - -#include -#include - -#pragma warning (disable: 4035) - -__forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm imul b - __asm idiv c -} - -#define MAKECONSTMulScale(s) \ - __forceinline SDWORD MulScale##s (SDWORD a, SDWORD b) \ - { \ - __asm mov eax,a \ - __asm imul b \ - __asm shrd eax,edx,s \ - } -MAKECONSTMulScale(14) -MAKECONSTMulScale(16) -MAKECONSTMulScale(30) -#undef MAKECONSTMulScale - -__forceinline SDWORD MulScale32 (SDWORD a, SDWORD b) -{ - __asm mov eax,a - __asm imul b - __asm mov eax,edx -} - -__forceinline DWORD UMulScale16(DWORD a, DWORD b) -{ - __asm mov eax,a - __asm mul b - __asm shrd eax,edx,16 -} - -#define MAKECONSTDMulScale(s) \ - __forceinline SDWORD DMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d) \ - { \ - __asm mov eax,a \ - __asm imul b \ - __asm mov ebx,eax \ - __asm mov eax,c \ - __asm mov esi,edx \ - __asm imul d \ - __asm add eax,ebx \ - __asm adc edx,esi \ - __asm shrd eax,edx,s \ - } - -MAKECONSTDMulScale(3) -MAKECONSTDMulScale(6) -MAKECONSTDMulScale(10) -MAKECONSTDMulScale(18) -#undef MAKCONSTDMulScale - -__forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov esi,edx - __asm imul d - __asm add eax,ebx - __asm adc edx,esi - __asm mov eax,edx -} - -#define MAKECONSTDivScale(s) \ - __forceinline SDWORD DivScale##s (SDWORD a, SDWORD b) \ - { \ - __asm mov edx,a \ - __asm sar edx,32-s \ - __asm mov eax,a \ - __asm shl eax,s \ - __asm idiv b \ - } - -MAKECONSTDivScale(6) -MAKECONSTDivScale(16) -MAKECONSTDivScale(21) -MAKECONSTDivScale(30) -#undef MAKECONSTDivScale - -#pragma warning (default: 4035) diff --git a/src/p_maputl.cpp b/src/p_maputl.cpp index fdf66e969..d676e966c 100644 --- a/src/p_maputl.cpp +++ b/src/p_maputl.cpp @@ -2001,8 +2001,8 @@ int P_VanillaPointOnLineSide(double x, double y, const line_t* line) auto dx = FloatToFixed(x - line->v1->fX()); auto dy = FloatToFixed(y - line->v1->fY()); - auto left = MulScale16( int(delta.Y * 256) , dx ); - auto right = MulScale16( dy , int(delta.X * 256) ); + auto left = FixedMul( int(delta.Y * 256) , dx ); + auto right = FixedMul( dy , int(delta.X * 256) ); if (right < left) return 0; // front side