- Replaced toint/quickertoint with the portable routines from xs_Float.h. The

former used fistp, which is not portable across platforms, so cannot be
  used in the play simulation. They were only suitable for the renderer.
  xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED
  uses that now.
  (And I also learned that the FPU's round to nearest is not the rounding I
  learned in grade school but actually Banker's Rounding. I had no idea.)
  (Also, also, the only thing that could have made quickertoint faster than
  toint was that it stored a 32-bit int. I never timed them, and I doubt in
  practice there was any real difference between the two.)
- Changed atan2f to atan2. Using floats is not a win, because the result is
  returned as a double on the x87 stack, which the caller then needs to cast
  down to a float using fst/fld.

SVN r1990 (trunk)
This commit is contained in:
Randy Heit 2009-11-20 05:34:20 +00:00
parent eca9b4981b
commit 344dda4a1a
13 changed files with 56 additions and 83 deletions

View file

@ -1,3 +1,15 @@
November 19, 2009
- Replaced toint/quickertoint with the portable routines from xs_Float.h. The
former used fistp, which is not portable across platforms, so cannot be
used in the play simulation. They were only suitable for the renderer.
xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED
uses that now.
(And I also learned that the FPU's round to nearest is not the rounding I
learned in grade school but actually Banker's Rounding. I had no idea.)
(Also, also, the only thing that could have made quickertoint faster than
toint was that it stored a 32-bit int. I never timed them, and I doubt in
practice there was any real difference between the two.)
November 18, 2009
- Added padding around packed textures to compensate for apparent NVidia
texture coordinate imprecision.

View file

@ -201,13 +201,3 @@ static __forceinline SDWORD ksgn (SDWORD a)
else if (a > 0) return 1;
else return 0;
}
static __forceinline int toint (float v)
{
return int(v);
}
static __forceinline int quickertoint (float v)
{
return int(v);
}

View file

@ -23,6 +23,10 @@ typedef int64_t SQWORD;
typedef uint64_t QWORD;
#endif
typedef SDWORD int32;
typedef float real32;
typedef double real64;
// windef.h, included by windows.h, has its own incompatible definition
// of DWORD as a long. In files that mix Doom and Windows code, you
// must define USE_WINDOWS_DWORD before including doomtype.h so that

View file

@ -40,6 +40,7 @@
#include "gi.h"
#include "templates.h"
#include "v_font.h"
#include "m_fixed.h"
TArray<FSkillInfo> AllSkills;
int DefaultSkill = -1;

View file

@ -331,27 +331,3 @@ static inline SDWORD ksgn (SDWORD a)
:"%cc");
return result;
}
static inline int toint (float v)
{
volatile QWORD result;
asm volatile
("fistpq %0"
:"=m" (result)
:"t" (v)
:"%st");
return result;
}
static inline int quickertoint (float v)
{
volatile int result;
asm volatile
("fistpl %0"
:"=m" (result)
:"t" (v)
:"%st");
return result;
}

View file

@ -20,6 +20,8 @@
#include "basicinlines.h"
#endif
#include "xs_Float.h"
#define MAKESAFEDIVSCALE(x) \
inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \
{ \
@ -134,4 +136,8 @@ inline SDWORD ModDiv (SDWORD num, SDWORD den, SDWORD *dmval)
return num % den;
}
#define FLOAT2FIXED(f) xs_Fix<16>::ToFix(f)
#define FIXED2FLOAT(f) ((f) / float(65536))
#endif

View file

@ -348,20 +348,4 @@ __forceinline SDWORD ksgn (SDWORD a)
__asm adc eax,0
}
__forceinline int toint (float v)
{
SQWORD res;
__asm fld v;
__asm fistp res;
return (int)res;
}
__forceinline int quickertoint (float v)
{
SDWORD res;
__asm fld v;
__asm fistp res;
return (int)res;
}
#pragma warning (default: 4035)

View file

@ -326,9 +326,7 @@ angle_t R_PointToAngle2 (fixed_t x1, fixed_t y1, fixed_t x, fixed_t y)
else
{
// we have to use the slower but more precise floating point atan2 function here.
// (use quickertoint to speed this up because the CRT's conversion is rather slow and
// this is used in time critical code.)
return quickertoint((float)(atan2f(float(y), float(x)) * (ANGLE_180/M_PI)));
return xs_RoundToUInt(atan2(double(y), double(x)) * (ANGLE_180/M_PI));
}
}
@ -534,7 +532,7 @@ void R_SetVisibility (float vis)
return;
}
r_BaseVisibility = toint (vis * 65536.f);
r_BaseVisibility = xs_RoundToInt(vis * 65536.f);
// Prevent overflow on walls
if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall)

View file

@ -336,7 +336,7 @@ void R_MapTiltedPlane (int y, int x1)
{
uz = (iz + plane_sz[0]*width) * planelightfloat;
vz = iz * planelightfloat;
R_CalcTiltedLighting (toint (vz), toint (uz), width);
R_CalcTiltedLighting (xs_RoundToInt(vz), xs_RoundToInt(uz), width);
}
uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx);

View file

@ -2006,29 +2006,29 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
x = WallSX1;
l = top / bot;
swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg);
lwall[x] = quickertoint (l * xrepeat);
swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x] = xs_RoundToInt(l * xrepeat);
// As long as l is invalid, step one column at a time so that
// we can get as many correct texture columns as possible.
while (l > 1.0 && x+1 < WallSX2)
{
l = (top += WallUoverZstep) / (bot += WallInvZstep);
x++;
swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg);
lwall[x] = quickertoint (l * xrepeat);
swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x] = xs_RoundToInt(l * xrepeat);
}
l *= xrepeat;
while (x+4 < WallSX2)
{
top += topinc; bot += botinc;
ol = l; l = top / bot;
swall[x+4] = quickertoint (l * WallDepthScale + WallDepthOrg);
lwall[x+4] = quickertoint (l *= xrepeat);
swall[x+4] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x+4] = xs_RoundToInt(l *= xrepeat);
i = (ol+l) * 0.5f;
lwall[x+2] = quickertoint (i);
lwall[x+1] = quickertoint ((ol+i) * 0.5f);
lwall[x+3] = quickertoint ((l+i) * 0.5f);
lwall[x+2] = xs_RoundToInt(i);
lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
swall[x+2] = ((swall[x]+swall[x+4])>>1);
swall[x+1] = ((swall[x]+swall[x+2])>>1);
swall[x+3] = ((swall[x+4]+swall[x+2])>>1);
@ -2038,25 +2038,25 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
{
top += topinc * 0.5f; bot += botinc * 0.5f;
ol = l; l = top / bot;
swall[x+2] = quickertoint (l * WallDepthScale + WallDepthOrg);
lwall[x+2] = quickertoint (l *= xrepeat);
swall[x+2] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x+2] = xs_RoundToInt(l *= xrepeat);
lwall[x+1] = quickertoint ((l+ol)*0.5f);
lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
swall[x+1] = (swall[x]+swall[x+2])>>1;
x += 2;
}
if (x+1 < WallSX2)
{
l = (top + WallUoverZstep) / (bot + WallInvZstep);
swall[x+1] = quickertoint (l * WallDepthScale + WallDepthOrg);
lwall[x+1] = quickertoint (l * xrepeat);
swall[x+1] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
lwall[x+1] = xs_RoundToInt(l * xrepeat);
}
/*
for (x = WallSX1; x < WallSX2; x++)
{
frac = top / bot;
lwall[x] = quickertoint (frac * xrepeat);
swall[x] = quickertoint (frac * WallDepthScale + WallDepthOrg);
lwall[x] = xs_RoundToInt(frac * xrepeat);
swall[x] = xs_RoundToInt(frac * WallDepthScale + WallDepthOrg);
top += WallUoverZstep;
bot += WallInvZstep;
}
@ -2108,39 +2108,39 @@ void PrepLWall (fixed_t *lwall, fixed_t walxrepeat)
x = WallSX1;
l = top / bot;
lwall[x] = quickertoint (l * xrepeat);
lwall[x] = xs_RoundToInt(l * xrepeat);
// As long as l is invalid, step one column at a time so that
// we can get as many correct texture columns as possible.
while (l > 1.0 && x+1 < WallSX2)
{
l = (top += WallUoverZstep) / (bot += WallInvZstep);
lwall[++x] = quickertoint (l * xrepeat);
lwall[++x] = xs_RoundToInt(l * xrepeat);
}
l *= xrepeat;
while (x+4 < WallSX2)
{
top += topinc; bot += botinc;
ol = l; l = top / bot;
lwall[x+4] = quickertoint (l *= xrepeat);
lwall[x+4] = xs_RoundToInt(l *= xrepeat);
i = (ol+l) * 0.5f;
lwall[x+2] = quickertoint (i);
lwall[x+1] = quickertoint ((ol+i) * 0.5f);
lwall[x+3] = quickertoint ((l+i) * 0.5f);
lwall[x+2] = xs_RoundToInt(i);
lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
x += 4;
}
if (x+2 < WallSX2)
{
top += topinc * 0.5f; bot += botinc * 0.5f;
ol = l; l = top / bot;
lwall[x+2] = quickertoint (l *= xrepeat);
lwall[x+1] = quickertoint ((l+ol)*0.5f);
lwall[x+2] = xs_RoundToInt(l *= xrepeat);
lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
x += 2;
}
if (x+1 < WallSX2)
{
l = (top + WallUoverZstep) / (bot + WallInvZstep);
lwall[x+1] = quickertoint (l * xrepeat);
lwall[x+1] = xs_RoundToInt(l * xrepeat);
}
// fix for rounding errors

View file

@ -49,6 +49,7 @@
#include "p_lnspec.h"
#include "doomstat.h"
#include "thingdef_exp.h"
#include "m_fixed.h"
int testglobalvar = 1337; // just for having one global variable to test with
DEFINE_GLOBAL_VARIABLE(testglobalvar)

View file

@ -1230,7 +1230,4 @@ typedef TRotator<float> FRotator;
typedef TMatrix3x3<float> FMatrix3x3;
typedef TAngle<float> FAngle;
#define FLOAT2FIXED(f) fixed_t((f) * float(65536))
#define FIXED2FLOAT(f) ((f) / float(65536))
#endif

View file

@ -1593,6 +1593,10 @@
RelativePath=".\src\x86.h"
>
</File>
<File
RelativePath=".\src\xs_Float.h"
>
</File>
<File
RelativePath=".\src\zstring.h"
>