- removed most inline assembly. Integer multiplication gets handled fine by all current compilers and fixed point division is too infrequently used to justify this mess.

That only leaves the Scale function which is still being used in a few places and which would create considerably worse code without assembly on 32 bit platforms. This is also far too primitive (2 or 3 assembly instructions) to claim any copyright on it, so I think m_fixed.h can now be considered free of Build-related issues. The deficated inline headers have been removed because that sole remaining function could be easily moved into m_fixed.h.
This commit is contained in:
Christoph Oelckers 2016-12-10 13:58:18 +01:00
parent 3563718049
commit 7d4a5898ac
5 changed files with 73 additions and 251 deletions

View file

@ -1,40 +0,0 @@
// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
// Ken Silverman's official web site: "http://www.advsys.net/ken"
// See the included license file "BUILDLIC.TXT" for license info.
//
// This file is based on pragmas.h from Ken Silverman's original Build
// source code release but is meant for use with any compiler and does not
// rely on any inline assembly.
//
#if _MSC_VER
#pragma once
#endif
#if defined(__GNUC__) && !defined(__forceinline)
#define __forceinline __inline__ __attribute__((always_inline))
#endif
static __forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c)
{
return (SDWORD)(((SQWORD)a*b)/c);
}
static __forceinline SDWORD MulScale14 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 14); }
static __forceinline SDWORD MulScale16 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 16); }
static __forceinline SDWORD MulScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 30); }
static __forceinline SDWORD MulScale32 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a * b) >> 32); }
static __forceinline DWORD UMulScale16 (DWORD a, DWORD b) { return (DWORD)(((QWORD)a * b) >> 16); }
static __forceinline SDWORD DMulScale3 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 3); }
static __forceinline SDWORD DMulScale6 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 6); }
static __forceinline SDWORD DMulScale10 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 10); }
static __forceinline SDWORD DMulScale18 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 18); }
static __forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) { return (SDWORD)(((SQWORD)a*b + (SQWORD)c*d) >> 32); }
static inline SDWORD DivScale6 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 6) / b); }
static inline SDWORD DivScale16 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 16) / b); }
static inline SDWORD DivScale21 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 21) / b); }
static inline SDWORD DivScale30 (SDWORD a, SDWORD b) { return (SDWORD)(((SQWORD)a << 30) / b); }

View file

@ -1,95 +0,0 @@
// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
// Ken Silverman's official web site: "http://www.advsys.net/ken"
// See the included license file "BUILDLIC.TXT" for license info.
//
// This file is based on pragmas.h from Ken Silverman's original Build
// source code release but is meant for use with GCC instead of Watcom C.
//
// Some of the inline assembly has been turned into C code, because
// modern compilers are smart enough to produce code at least as good as
// Ken's inline assembly.
//
// I can come up with several different operand constraints for the
// following that work just fine when used all by themselves, but
// when I concatenate them together so that the compiler can choose
// between them, I get the following (but only if enough parameters
// are passed as the result of some operations instead of as
// variables/constants--e.g. DMulScale16 (a*2, b*2, c*2, d*2) instead of
// DMulScale16 (a, b, c, d)):
//
// `asm' operand requires impossible reload
//
// Why?
#include <stdlib.h>
#ifndef alloca
// MinGW does not seem to come with alloca defined.
#define alloca __builtin_alloca
#endif
static inline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c)
{
SDWORD result, dummy;
asm volatile
("imull %3\n\t"
"idivl %4"
: "=a,a,a,a,a,a" (result),
"=&d,&d,&d,&d,d,d" (dummy)
: "a,a,a,a,a,a" (a),
"m,r,m,r,d,d" (b),
"r,r,m,m,r,m" (c)
: "cc"
);
return result;
}
#define MAKECONSTMulScale(s) \
static inline SDWORD MulScale##s (SDWORD a, SDWORD b) { return ((SQWORD)a * b) >> s; }
MAKECONSTMulScale(14)
MAKECONSTMulScale(16)
MAKECONSTMulScale(30)
MAKECONSTMulScale(32)
#undef MAKECONSTMulScale
static inline DWORD UMulScale16(DWORD a, DWORD b) { return ((QWORD)a * b) >> 16; }
#define MAKECONSTDMulScale(s) \
static inline SDWORD DMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d) \
{ \
return (((SQWORD)a * b) + ((SQWORD)c * d)) >> s; \
}
MAKECONSTDMulScale(3)
MAKECONSTDMulScale(6)
MAKECONSTDMulScale(10)
MAKECONSTDMulScale(18)
MAKECONSTDMulScale(32)
#undef MAKECONSTDMulScale
#define MAKECONSTDivScale(s) \
static inline SDWORD DivScale##s (SDWORD a, SDWORD b) \
{ \
SDWORD result, dummy; \
asm volatile \
("idivl %4" \
:"=a,a" (result), \
"=d,d" (dummy) \
: "a,a" (a<<s), \
"d,d" (a>>(32-s)), \
"r,m" (b) \
: "cc"); \
return result; \
}
MAKECONSTDivScale(6)
MAKECONSTDivScale(16)
MAKECONSTDivScale(21)
MAKECONSTDivScale(30)
#undef MAKECONSTDivScale

View file

@ -4,14 +4,72 @@
#include <stdlib.h>
#include "doomtype.h"
// Unfortunately, the Scale function still gets badly handled on 32 bit x86 platforms so it's the last remaining piece of inline assembly
// GCC inlines
#if defined(__GNUC__) && defined(__i386__) && !defined(__clang__)
#include "gccinlines.h"
#elif defined(_MSC_VER) && defined(_M_IX86)
#include "mscinlines.h"
#else
#include "basicinlines.h"
#ifndef alloca
// MinGW does not seem to come with alloca defined.
#define alloca __builtin_alloca
#endif
static inline int32_t Scale(int32_t a, int32_t b, int32_t c)
{
int32_t result, dummy;
asm volatile
("imull %3\n\t"
"idivl %4"
: "=a,a,a,a,a,a" (result),
"=&d,&d,&d,&d,d,d" (dummy)
: "a,a,a,a,a,a" (a),
"m,r,m,r,d,d" (b),
"r,r,m,m,r,m" (c)
: "cc"
);
return result;
}
// MSVC inlines
#elif defined(_MSC_VER) && defined(_M_IX86)
#pragma warning (disable: 4035)
__forceinline int32_t Scale(int32_t a, int32_t b, int32_t c)
{
__asm mov eax, a
__asm imul b
__asm idiv c
}
#pragma warning (default: 4035)
#else
static __forceinline int32_t Scale(int32_t a, int32_t b, int32_t c)
{
return (int32_t)(((int64_t)a*b) / c);
}
#endif
// Modern compilers are smart enough to do these multiplications intelligently.
__forceinline int32_t MulScale14(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 14); } // only used by R_DrawVoxel
__forceinline int32_t MulScale30(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 30); } // only used once in the node builder
__forceinline int32_t MulScale32(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 32); } // only used by R_DrawVoxel
__forceinline uint32_t UMulScale16(uint32_t a, uint32_t b) { return (uint32_t)(((uint64_t)a * b) >> 16); } // used for sky drawing
__forceinline int32_t DMulScale3(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 3); } // used for setting up slopes for Build maps
__forceinline int32_t DMulScale6(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 6); } // only used by R_DrawVoxel
__forceinline int32_t DMulScale10(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 10); } // only used by R_DrawVoxel
__forceinline int32_t DMulScale18(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 18); } // only used by R_DrawVoxel
__forceinline int32_t DMulScale32(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 32); } // used by R_PointOnSide.
// Sadly, for divisions this is not true but these are so infrequently used that the C versions are just fine, despite not being fully optimal.
__forceinline int32_t DivScale6(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 6) / b); } // only used by R_DrawVoxel
__forceinline int32_t DivScale21(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 21) / b); } // only used by R_DrawVoxel
__forceinline int32_t DivScale30(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 30) / b); } // only used once in the node builder
__forceinline void fillshort(void *buff, unsigned int count, WORD clear)
{
SWORD *b2 = (SWORD *)buff;
@ -23,14 +81,18 @@ __forceinline void fillshort(void *buff, unsigned int count, WORD clear)
#include "xs_Float.h"
inline SDWORD FixedDiv (SDWORD a, SDWORD b)
inline int32_t FixedDiv (int32_t a, int32_t b)
{
if ((DWORD)abs(a) >> (31-16) >= (DWORD)abs (b))
if ((uint32_t)abs(a) >> (31-16) >= (uint32_t)abs (b))
return (a^b)<0 ? FIXED_MIN : FIXED_MAX;
return DivScale16 (a, b);
return (int32_t)(((int64_t)a << 16) / b);
}
#define FixedMul MulScale16
__forceinline int32_t FixedMul(int32_t a, int32_t b)
{
return (int32_t)(((int64_t)a * b) >> 16);
}
inline fixed_t FloatToFixed(double f)
{

View file

@ -1,105 +0,0 @@
// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
// Ken Silverman's official web site: "http://www.advsys.net/ken"
// See the included license file "BUILDLIC.TXT" for license info.
//
// This file is based on pragmas.h from Ken Silverman's original Build
// source code release but is meant for use with Visual C++ instead of
// Watcom C.
//
// Some of the inline assembly has been turned into C code, because VC++
// is smart enough to produce code at least as good as Ken's inlines.
// The more used functions are still inline assembly, because they do
// things that can't really be done in C. (I consider this a bad thing,
// because VC++ has considerably poorer support for inline assembly than
// Watcom, so it's better to rely on its C optimizer to produce fast code.)
//
#include <string.h>
#include <stddef.h>
#pragma warning (disable: 4035)
__forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c)
{
__asm mov eax,a
__asm imul b
__asm idiv c
}
#define MAKECONSTMulScale(s) \
__forceinline SDWORD MulScale##s (SDWORD a, SDWORD b) \
{ \
__asm mov eax,a \
__asm imul b \
__asm shrd eax,edx,s \
}
MAKECONSTMulScale(14)
MAKECONSTMulScale(16)
MAKECONSTMulScale(30)
#undef MAKECONSTMulScale
__forceinline SDWORD MulScale32 (SDWORD a, SDWORD b)
{
__asm mov eax,a
__asm imul b
__asm mov eax,edx
}
__forceinline DWORD UMulScale16(DWORD a, DWORD b)
{
__asm mov eax,a
__asm mul b
__asm shrd eax,edx,16
}
#define MAKECONSTDMulScale(s) \
__forceinline SDWORD DMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d) \
{ \
__asm mov eax,a \
__asm imul b \
__asm mov ebx,eax \
__asm mov eax,c \
__asm mov esi,edx \
__asm imul d \
__asm add eax,ebx \
__asm adc edx,esi \
__asm shrd eax,edx,s \
}
MAKECONSTDMulScale(3)
MAKECONSTDMulScale(6)
MAKECONSTDMulScale(10)
MAKECONSTDMulScale(18)
#undef MAKCONSTDMulScale
__forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d)
{
__asm mov eax,a
__asm imul b
__asm mov ebx,eax
__asm mov eax,c
__asm mov esi,edx
__asm imul d
__asm add eax,ebx
__asm adc edx,esi
__asm mov eax,edx
}
#define MAKECONSTDivScale(s) \
__forceinline SDWORD DivScale##s (SDWORD a, SDWORD b) \
{ \
__asm mov edx,a \
__asm sar edx,32-s \
__asm mov eax,a \
__asm shl eax,s \
__asm idiv b \
}
MAKECONSTDivScale(6)
MAKECONSTDivScale(16)
MAKECONSTDivScale(21)
MAKECONSTDivScale(30)
#undef MAKECONSTDivScale
#pragma warning (default: 4035)

View file

@ -2001,8 +2001,8 @@ int P_VanillaPointOnLineSide(double x, double y, const line_t* line)
auto dx = FloatToFixed(x - line->v1->fX());
auto dy = FloatToFixed(y - line->v1->fY());
auto left = MulScale16( int(delta.Y * 256) , dx );
auto right = MulScale16( dy , int(delta.X * 256) );
auto left = FixedMul( int(delta.Y * 256) , dx );
auto right = FixedMul( dy , int(delta.X * 256) );
if (right < left)
return 0; // front side