mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-22 20:21:26 +00:00
- Replaced toint/quickertoint with the portable routines from xs_Float.h. The
former used fistp, which is not portable across platforms, so cannot be used in the play simulation. They were only suitable for the renderer. xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED uses that now. (And I also learned that the FPU's round to nearest is not the rounding I learned in grade school but actually Banker's Rounding. I had no idea.) (Also, also, the only thing that could have made quickertoint faster than toint was that it stored a 32-bit int. I never timed them, and I doubt in practice there was any real difference between the two.) - Changed atan2f to atan2. Using floats is not a win, because the result is returned as a double on the x87 stack, which the caller then needs to cast down to a float using fst/fld. SVN r1990 (trunk)
This commit is contained in:
parent
eca9b4981b
commit
344dda4a1a
13 changed files with 56 additions and 83 deletions
|
@ -1,3 +1,15 @@
|
|||
November 19, 2009
|
||||
- Replaced toint/quickertoint with the portable routines from xs_Float.h. The
|
||||
former used fistp, which is not portable across platforms, so cannot be
|
||||
used in the play simulation. They were only suitable for the renderer.
|
||||
xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED
|
||||
uses that now.
|
||||
(And I also learned that the FPU's round to nearest is not the rounding I
|
||||
learned in grade school but actually Banker's Rounding. I had no idea.)
|
||||
(Also, also, the only thing that could have made quickertoint faster than
|
||||
toint was that it stored a 32-bit int. I never timed them, and I doubt in
|
||||
practice there was any real difference between the two.)
|
||||
|
||||
November 18, 2009
|
||||
- Added padding around packed textures to compensate for apparent NVidia
|
||||
texture coordinate imprecision.
|
||||
|
|
|
@ -201,13 +201,3 @@ static __forceinline SDWORD ksgn (SDWORD a)
|
|||
else if (a > 0) return 1;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
static __forceinline int toint (float v)
|
||||
{
|
||||
return int(v);
|
||||
}
|
||||
|
||||
static __forceinline int quickertoint (float v)
|
||||
{
|
||||
return int(v);
|
||||
}
|
||||
|
|
|
@ -23,6 +23,10 @@ typedef int64_t SQWORD;
|
|||
typedef uint64_t QWORD;
|
||||
#endif
|
||||
|
||||
typedef SDWORD int32;
|
||||
typedef float real32;
|
||||
typedef double real64;
|
||||
|
||||
// windef.h, included by windows.h, has its own incompatible definition
|
||||
// of DWORD as a long. In files that mix Doom and Windows code, you
|
||||
// must define USE_WINDOWS_DWORD before including doomtype.h so that
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "gi.h"
|
||||
#include "templates.h"
|
||||
#include "v_font.h"
|
||||
#include "m_fixed.h"
|
||||
|
||||
TArray<FSkillInfo> AllSkills;
|
||||
int DefaultSkill = -1;
|
||||
|
|
|
@ -331,27 +331,3 @@ static inline SDWORD ksgn (SDWORD a)
|
|||
:"%cc");
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline int toint (float v)
|
||||
{
|
||||
volatile QWORD result;
|
||||
|
||||
asm volatile
|
||||
("fistpq %0"
|
||||
:"=m" (result)
|
||||
:"t" (v)
|
||||
:"%st");
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline int quickertoint (float v)
|
||||
{
|
||||
volatile int result;
|
||||
|
||||
asm volatile
|
||||
("fistpl %0"
|
||||
:"=m" (result)
|
||||
:"t" (v)
|
||||
:"%st");
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "basicinlines.h"
|
||||
#endif
|
||||
|
||||
#include "xs_Float.h"
|
||||
|
||||
#define MAKESAFEDIVSCALE(x) \
|
||||
inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \
|
||||
{ \
|
||||
|
@ -134,4 +136,8 @@ inline SDWORD ModDiv (SDWORD num, SDWORD den, SDWORD *dmval)
|
|||
return num % den;
|
||||
}
|
||||
|
||||
|
||||
#define FLOAT2FIXED(f) xs_Fix<16>::ToFix(f)
|
||||
#define FIXED2FLOAT(f) ((f) / float(65536))
|
||||
|
||||
#endif
|
||||
|
|
|
@ -348,20 +348,4 @@ __forceinline SDWORD ksgn (SDWORD a)
|
|||
__asm adc eax,0
|
||||
}
|
||||
|
||||
__forceinline int toint (float v)
|
||||
{
|
||||
SQWORD res;
|
||||
__asm fld v;
|
||||
__asm fistp res;
|
||||
return (int)res;
|
||||
}
|
||||
|
||||
__forceinline int quickertoint (float v)
|
||||
{
|
||||
SDWORD res;
|
||||
__asm fld v;
|
||||
__asm fistp res;
|
||||
return (int)res;
|
||||
}
|
||||
|
||||
#pragma warning (default: 4035)
|
||||
|
|
|
@ -326,9 +326,7 @@ angle_t R_PointToAngle2 (fixed_t x1, fixed_t y1, fixed_t x, fixed_t y)
|
|||
else
|
||||
{
|
||||
// we have to use the slower but more precise floating point atan2 function here.
|
||||
// (use quickertoint to speed this up because the CRT's conversion is rather slow and
|
||||
// this is used in time critical code.)
|
||||
return quickertoint((float)(atan2f(float(y), float(x)) * (ANGLE_180/M_PI)));
|
||||
return xs_RoundToUInt(atan2(double(y), double(x)) * (ANGLE_180/M_PI));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -534,7 +532,7 @@ void R_SetVisibility (float vis)
|
|||
return;
|
||||
}
|
||||
|
||||
r_BaseVisibility = toint (vis * 65536.f);
|
||||
r_BaseVisibility = xs_RoundToInt(vis * 65536.f);
|
||||
|
||||
// Prevent overflow on walls
|
||||
if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall)
|
||||
|
|
|
@ -336,7 +336,7 @@ void R_MapTiltedPlane (int y, int x1)
|
|||
{
|
||||
uz = (iz + plane_sz[0]*width) * planelightfloat;
|
||||
vz = iz * planelightfloat;
|
||||
R_CalcTiltedLighting (toint (vz), toint (uz), width);
|
||||
R_CalcTiltedLighting (xs_RoundToInt(vz), xs_RoundToInt(uz), width);
|
||||
}
|
||||
|
||||
uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx);
|
||||
|
|
|
@ -2006,29 +2006,29 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
|
|||
x = WallSX1;
|
||||
|
||||
l = top / bot;
|
||||
swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x] = quickertoint (l * xrepeat);
|
||||
swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x] = xs_RoundToInt(l * xrepeat);
|
||||
// As long as l is invalid, step one column at a time so that
|
||||
// we can get as many correct texture columns as possible.
|
||||
while (l > 1.0 && x+1 < WallSX2)
|
||||
{
|
||||
l = (top += WallUoverZstep) / (bot += WallInvZstep);
|
||||
x++;
|
||||
swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x] = quickertoint (l * xrepeat);
|
||||
swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x] = xs_RoundToInt(l * xrepeat);
|
||||
}
|
||||
l *= xrepeat;
|
||||
while (x+4 < WallSX2)
|
||||
{
|
||||
top += topinc; bot += botinc;
|
||||
ol = l; l = top / bot;
|
||||
swall[x+4] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x+4] = quickertoint (l *= xrepeat);
|
||||
swall[x+4] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x+4] = xs_RoundToInt(l *= xrepeat);
|
||||
|
||||
i = (ol+l) * 0.5f;
|
||||
lwall[x+2] = quickertoint (i);
|
||||
lwall[x+1] = quickertoint ((ol+i) * 0.5f);
|
||||
lwall[x+3] = quickertoint ((l+i) * 0.5f);
|
||||
lwall[x+2] = xs_RoundToInt(i);
|
||||
lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
|
||||
lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
|
||||
swall[x+2] = ((swall[x]+swall[x+4])>>1);
|
||||
swall[x+1] = ((swall[x]+swall[x+2])>>1);
|
||||
swall[x+3] = ((swall[x+4]+swall[x+2])>>1);
|
||||
|
@ -2038,25 +2038,25 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
|
|||
{
|
||||
top += topinc * 0.5f; bot += botinc * 0.5f;
|
||||
ol = l; l = top / bot;
|
||||
swall[x+2] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x+2] = quickertoint (l *= xrepeat);
|
||||
swall[x+2] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x+2] = xs_RoundToInt(l *= xrepeat);
|
||||
|
||||
lwall[x+1] = quickertoint ((l+ol)*0.5f);
|
||||
lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
|
||||
swall[x+1] = (swall[x]+swall[x+2])>>1;
|
||||
x += 2;
|
||||
}
|
||||
if (x+1 < WallSX2)
|
||||
{
|
||||
l = (top + WallUoverZstep) / (bot + WallInvZstep);
|
||||
swall[x+1] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x+1] = quickertoint (l * xrepeat);
|
||||
swall[x+1] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||
lwall[x+1] = xs_RoundToInt(l * xrepeat);
|
||||
}
|
||||
/*
|
||||
for (x = WallSX1; x < WallSX2; x++)
|
||||
{
|
||||
frac = top / bot;
|
||||
lwall[x] = quickertoint (frac * xrepeat);
|
||||
swall[x] = quickertoint (frac * WallDepthScale + WallDepthOrg);
|
||||
lwall[x] = xs_RoundToInt(frac * xrepeat);
|
||||
swall[x] = xs_RoundToInt(frac * WallDepthScale + WallDepthOrg);
|
||||
top += WallUoverZstep;
|
||||
bot += WallInvZstep;
|
||||
}
|
||||
|
@ -2108,39 +2108,39 @@ void PrepLWall (fixed_t *lwall, fixed_t walxrepeat)
|
|||
x = WallSX1;
|
||||
|
||||
l = top / bot;
|
||||
lwall[x] = quickertoint (l * xrepeat);
|
||||
lwall[x] = xs_RoundToInt(l * xrepeat);
|
||||
// As long as l is invalid, step one column at a time so that
|
||||
// we can get as many correct texture columns as possible.
|
||||
while (l > 1.0 && x+1 < WallSX2)
|
||||
{
|
||||
l = (top += WallUoverZstep) / (bot += WallInvZstep);
|
||||
lwall[++x] = quickertoint (l * xrepeat);
|
||||
lwall[++x] = xs_RoundToInt(l * xrepeat);
|
||||
}
|
||||
l *= xrepeat;
|
||||
while (x+4 < WallSX2)
|
||||
{
|
||||
top += topinc; bot += botinc;
|
||||
ol = l; l = top / bot;
|
||||
lwall[x+4] = quickertoint (l *= xrepeat);
|
||||
lwall[x+4] = xs_RoundToInt(l *= xrepeat);
|
||||
|
||||
i = (ol+l) * 0.5f;
|
||||
lwall[x+2] = quickertoint (i);
|
||||
lwall[x+1] = quickertoint ((ol+i) * 0.5f);
|
||||
lwall[x+3] = quickertoint ((l+i) * 0.5f);
|
||||
lwall[x+2] = xs_RoundToInt(i);
|
||||
lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
|
||||
lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
|
||||
x += 4;
|
||||
}
|
||||
if (x+2 < WallSX2)
|
||||
{
|
||||
top += topinc * 0.5f; bot += botinc * 0.5f;
|
||||
ol = l; l = top / bot;
|
||||
lwall[x+2] = quickertoint (l *= xrepeat);
|
||||
lwall[x+1] = quickertoint ((l+ol)*0.5f);
|
||||
lwall[x+2] = xs_RoundToInt(l *= xrepeat);
|
||||
lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
|
||||
x += 2;
|
||||
}
|
||||
if (x+1 < WallSX2)
|
||||
{
|
||||
l = (top + WallUoverZstep) / (bot + WallInvZstep);
|
||||
lwall[x+1] = quickertoint (l * xrepeat);
|
||||
lwall[x+1] = xs_RoundToInt(l * xrepeat);
|
||||
}
|
||||
|
||||
// fix for rounding errors
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
#include "p_lnspec.h"
|
||||
#include "doomstat.h"
|
||||
#include "thingdef_exp.h"
|
||||
#include "m_fixed.h"
|
||||
|
||||
int testglobalvar = 1337; // just for having one global variable to test with
|
||||
DEFINE_GLOBAL_VARIABLE(testglobalvar)
|
||||
|
|
|
@ -1230,7 +1230,4 @@ typedef TRotator<float> FRotator;
|
|||
typedef TMatrix3x3<float> FMatrix3x3;
|
||||
typedef TAngle<float> FAngle;
|
||||
|
||||
#define FLOAT2FIXED(f) fixed_t((f) * float(65536))
|
||||
#define FIXED2FLOAT(f) ((f) / float(65536))
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1593,6 +1593,10 @@
|
|||
RelativePath=".\src\x86.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\xs_Float.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\src\zstring.h"
|
||||
>
|
||||
|
|
Loading…
Reference in a new issue