- Replaced toint/quickertoint with the portable routines from xs_Float.h. The

former used fistp, which is not portable across platforms, so cannot be
  used in the play simulation. They were only suitable for the renderer.
  xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED
  uses that now.
  (And I also learned that the FPU's round to nearest is not the rounding I
  learned in grade school but actually Banker's Rounding. I had no idea.)
  (Also, also, the only thing that could have made quickertoint faster than
  toint was that it stored a 32-bit int. I never timed them, and I doubt in
  practice there was any real difference between the two.)
- Changed atan2f to atan2. Using floats is not a win, because the result is
  returned as a double on the x87 stack, which the caller then needs to cast
  down to a float using fst/fld.

SVN r1990 (trunk)
This commit is contained in:
Randy Heit 2009-11-20 05:34:20 +00:00
parent eca9b4981b
commit 344dda4a1a
13 changed files with 56 additions and 83 deletions

View file

@ -1,3 +1,15 @@
November 19, 2009
- Replaced toint/quickertoint with the portable routines from xs_Float.h. The
former used fistp, which is not portable across platforms, so cannot be
used in the play simulation. They were only suitable for the renderer.
xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED
uses that now.
(And I also learned that the FPU's round to nearest is not the rounding I
learned in grade school but actually Banker's Rounding. I had no idea.)
(Also, also, the only thing that could have made quickertoint faster than
toint was that it stored a 32-bit int. I never timed them, and I doubt in
practice there was any real difference between the two.)
November 18, 2009 November 18, 2009
- Added padding around packed textures to compensate for apparent NVidia - Added padding around packed textures to compensate for apparent NVidia
texture coordinate imprecision. texture coordinate imprecision.

View file

@ -201,13 +201,3 @@ static __forceinline SDWORD ksgn (SDWORD a)
else if (a > 0) return 1; else if (a > 0) return 1;
else return 0; else return 0;
} }
static __forceinline int toint (float v)
{
return int(v);
}
static __forceinline int quickertoint (float v)
{
return int(v);
}

View file

@ -23,6 +23,10 @@ typedef int64_t SQWORD;
typedef uint64_t QWORD; typedef uint64_t QWORD;
#endif #endif
typedef SDWORD int32;
typedef float real32;
typedef double real64;
// windef.h, included by windows.h, has its own incompatible definition // windef.h, included by windows.h, has its own incompatible definition
// of DWORD as a long. In files that mix Doom and Windows code, you // of DWORD as a long. In files that mix Doom and Windows code, you
// must define USE_WINDOWS_DWORD before including doomtype.h so that // must define USE_WINDOWS_DWORD before including doomtype.h so that

View file

@ -40,6 +40,7 @@
#include "gi.h" #include "gi.h"
#include "templates.h" #include "templates.h"
#include "v_font.h" #include "v_font.h"
#include "m_fixed.h"
TArray<FSkillInfo> AllSkills; TArray<FSkillInfo> AllSkills;
int DefaultSkill = -1; int DefaultSkill = -1;

View file

@ -331,27 +331,3 @@ static inline SDWORD ksgn (SDWORD a)
:"%cc"); :"%cc");
return result; return result;
} }
static inline int toint (float v)
{
volatile QWORD result;
asm volatile
("fistpq %0"
:"=m" (result)
:"t" (v)
:"%st");
return result;
}
static inline int quickertoint (float v)
{
volatile int result;
asm volatile
("fistpl %0"
:"=m" (result)
:"t" (v)
:"%st");
return result;
}

View file

@ -20,6 +20,8 @@
#include "basicinlines.h" #include "basicinlines.h"
#endif #endif
#include "xs_Float.h"
#define MAKESAFEDIVSCALE(x) \ #define MAKESAFEDIVSCALE(x) \
inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \ inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \
{ \ { \
@ -134,4 +136,8 @@ inline SDWORD ModDiv (SDWORD num, SDWORD den, SDWORD *dmval)
return num % den; return num % den;
} }
#define FLOAT2FIXED(f) xs_Fix<16>::ToFix(f)
#define FIXED2FLOAT(f) ((f) / float(65536))
#endif #endif

View file

@ -348,20 +348,4 @@ __forceinline SDWORD ksgn (SDWORD a)
__asm adc eax,0 __asm adc eax,0
} }
__forceinline int toint (float v)
{
SQWORD res;
__asm fld v;
__asm fistp res;
return (int)res;
}
__forceinline int quickertoint (float v)
{
SDWORD res;
__asm fld v;
__asm fistp res;
return (int)res;
}
#pragma warning (default: 4035) #pragma warning (default: 4035)

View file

@ -326,9 +326,7 @@ angle_t R_PointToAngle2 (fixed_t x1, fixed_t y1, fixed_t x, fixed_t y)
else else
{ {
// we have to use the slower but more precise floating point atan2 function here. // we have to use the slower but more precise floating point atan2 function here.
// (use quickertoint to speed this up because the CRT's conversion is rather slow and return xs_RoundToUInt(atan2(double(y), double(x)) * (ANGLE_180/M_PI));
// this is used in time critical code.)
return quickertoint((float)(atan2f(float(y), float(x)) * (ANGLE_180/M_PI)));
} }
} }
@ -534,7 +532,7 @@ void R_SetVisibility (float vis)
return; return;
} }
r_BaseVisibility = toint (vis * 65536.f); r_BaseVisibility = xs_RoundToInt(vis * 65536.f);
// Prevent overflow on walls // Prevent overflow on walls
if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall) if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall)

View file

@ -336,7 +336,7 @@ void R_MapTiltedPlane (int y, int x1)
{ {
uz = (iz + plane_sz[0]*width) * planelightfloat; uz = (iz + plane_sz[0]*width) * planelightfloat;
vz = iz * planelightfloat; vz = iz * planelightfloat;
R_CalcTiltedLighting (toint (vz), toint (uz), width); R_CalcTiltedLighting (xs_RoundToInt(vz), xs_RoundToInt(uz), width);
} }
uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx);

View file

@ -2006,29 +2006,29 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
x = WallSX1; x = WallSX1;
l = top / bot; l = top / bot;
swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg); swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x] = quickertoint (l * xrepeat); lwall[x] = xs_RoundToInt(l * xrepeat);
// As long as l is invalid, step one column at a time so that // As long as l is invalid, step one column at a time so that
// we can get as many correct texture columns as possible. // we can get as many correct texture columns as possible.
while (l > 1.0 && x+1 < WallSX2) while (l > 1.0 && x+1 < WallSX2)
{ {
l = (top += WallUoverZstep) / (bot += WallInvZstep); l = (top += WallUoverZstep) / (bot += WallInvZstep);
x++; x++;
swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg); swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x] = quickertoint (l * xrepeat); lwall[x] = xs_RoundToInt(l * xrepeat);
} }
l *= xrepeat; l *= xrepeat;
while (x+4 < WallSX2) while (x+4 < WallSX2)
{ {
top += topinc; bot += botinc; top += topinc; bot += botinc;
ol = l; l = top / bot; ol = l; l = top / bot;
swall[x+4] = quickertoint (l * WallDepthScale + WallDepthOrg); swall[x+4] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x+4] = quickertoint (l *= xrepeat); lwall[x+4] = xs_RoundToInt(l *= xrepeat);
i = (ol+l) * 0.5f; i = (ol+l) * 0.5f;
lwall[x+2] = quickertoint (i); lwall[x+2] = xs_RoundToInt(i);
lwall[x+1] = quickertoint ((ol+i) * 0.5f); lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
lwall[x+3] = quickertoint ((l+i) * 0.5f); lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
swall[x+2] = ((swall[x]+swall[x+4])>>1); swall[x+2] = ((swall[x]+swall[x+4])>>1);
swall[x+1] = ((swall[x]+swall[x+2])>>1); swall[x+1] = ((swall[x]+swall[x+2])>>1);
swall[x+3] = ((swall[x+4]+swall[x+2])>>1); swall[x+3] = ((swall[x+4]+swall[x+2])>>1);
@ -2038,25 +2038,25 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
{ {
top += topinc * 0.5f; bot += botinc * 0.5f; top += topinc * 0.5f; bot += botinc * 0.5f;
ol = l; l = top / bot; ol = l; l = top / bot;
swall[x+2] = quickertoint (l * WallDepthScale + WallDepthOrg); swall[x+2] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x+2] = quickertoint (l *= xrepeat); lwall[x+2] = xs_RoundToInt(l *= xrepeat);
lwall[x+1] = quickertoint ((l+ol)*0.5f); lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
swall[x+1] = (swall[x]+swall[x+2])>>1; swall[x+1] = (swall[x]+swall[x+2])>>1;
x += 2; x += 2;
} }
if (x+1 < WallSX2) if (x+1 < WallSX2)
{ {
l = (top + WallUoverZstep) / (bot + WallInvZstep); l = (top + WallUoverZstep) / (bot + WallInvZstep);
swall[x+1] = quickertoint (l * WallDepthScale + WallDepthOrg); swall[x+1] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x+1] = quickertoint (l * xrepeat); lwall[x+1] = xs_RoundToInt(l * xrepeat);
} }
/* /*
for (x = WallSX1; x < WallSX2; x++) for (x = WallSX1; x < WallSX2; x++)
{ {
frac = top / bot; frac = top / bot;
lwall[x] = quickertoint (frac * xrepeat); lwall[x] = xs_RoundToInt(frac * xrepeat);
swall[x] = quickertoint (frac * WallDepthScale + WallDepthOrg); swall[x] = xs_RoundToInt(frac * WallDepthScale + WallDepthOrg);
top += WallUoverZstep; top += WallUoverZstep;
bot += WallInvZstep; bot += WallInvZstep;
} }
@ -2108,39 +2108,39 @@ void PrepLWall (fixed_t *lwall, fixed_t walxrepeat)
x = WallSX1; x = WallSX1;
l = top / bot; l = top / bot;
lwall[x] = quickertoint (l * xrepeat); lwall[x] = xs_RoundToInt(l * xrepeat);
// As long as l is invalid, step one column at a time so that // As long as l is invalid, step one column at a time so that
// we can get as many correct texture columns as possible. // we can get as many correct texture columns as possible.
while (l > 1.0 && x+1 < WallSX2) while (l > 1.0 && x+1 < WallSX2)
{ {
l = (top += WallUoverZstep) / (bot += WallInvZstep); l = (top += WallUoverZstep) / (bot += WallInvZstep);
lwall[++x] = quickertoint (l * xrepeat); lwall[++x] = xs_RoundToInt(l * xrepeat);
} }
l *= xrepeat; l *= xrepeat;
while (x+4 < WallSX2) while (x+4 < WallSX2)
{ {
top += topinc; bot += botinc; top += topinc; bot += botinc;
ol = l; l = top / bot; ol = l; l = top / bot;
lwall[x+4] = quickertoint (l *= xrepeat); lwall[x+4] = xs_RoundToInt(l *= xrepeat);
i = (ol+l) * 0.5f; i = (ol+l) * 0.5f;
lwall[x+2] = quickertoint (i); lwall[x+2] = xs_RoundToInt(i);
lwall[x+1] = quickertoint ((ol+i) * 0.5f); lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
lwall[x+3] = quickertoint ((l+i) * 0.5f); lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
x += 4; x += 4;
} }
if (x+2 < WallSX2) if (x+2 < WallSX2)
{ {
top += topinc * 0.5f; bot += botinc * 0.5f; top += topinc * 0.5f; bot += botinc * 0.5f;
ol = l; l = top / bot; ol = l; l = top / bot;
lwall[x+2] = quickertoint (l *= xrepeat); lwall[x+2] = xs_RoundToInt(l *= xrepeat);
lwall[x+1] = quickertoint ((l+ol)*0.5f); lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
x += 2; x += 2;
} }
if (x+1 < WallSX2) if (x+1 < WallSX2)
{ {
l = (top + WallUoverZstep) / (bot + WallInvZstep); l = (top + WallUoverZstep) / (bot + WallInvZstep);
lwall[x+1] = quickertoint (l * xrepeat); lwall[x+1] = xs_RoundToInt(l * xrepeat);
} }
// fix for rounding errors // fix for rounding errors

View file

@ -49,6 +49,7 @@
#include "p_lnspec.h" #include "p_lnspec.h"
#include "doomstat.h" #include "doomstat.h"
#include "thingdef_exp.h" #include "thingdef_exp.h"
#include "m_fixed.h"
int testglobalvar = 1337; // just for having one global variable to test with int testglobalvar = 1337; // just for having one global variable to test with
DEFINE_GLOBAL_VARIABLE(testglobalvar) DEFINE_GLOBAL_VARIABLE(testglobalvar)

View file

@ -1230,7 +1230,4 @@ typedef TRotator<float> FRotator;
typedef TMatrix3x3<float> FMatrix3x3; typedef TMatrix3x3<float> FMatrix3x3;
typedef TAngle<float> FAngle; typedef TAngle<float> FAngle;
#define FLOAT2FIXED(f) fixed_t((f) * float(65536))
#define FIXED2FLOAT(f) ((f) / float(65536))
#endif #endif

View file

@ -1593,6 +1593,10 @@
RelativePath=".\src\x86.h" RelativePath=".\src\x86.h"
> >
</File> </File>
<File
RelativePath=".\src\xs_Float.h"
>
</File>
<File <File
RelativePath=".\src\zstring.h" RelativePath=".\src\zstring.h"
> >