mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-26 05:51:20 +00:00
- Replaced toint/quickertoint with the portable routines from xs_Float.h. The
former used fistp, which is not portable across platforms, so cannot be used in the play simulation. They were only suitable for the renderer. xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED uses that now. (And I also learned that the FPU's round to nearest is not the rounding I learned in grade school but actually Banker's Rounding. I had no idea.) (Also, also, the only thing that could have made quickertoint faster than toint was that it stored a 32-bit int. I never timed them, and I doubt in practice there was any real difference between the two.) - Changed atan2f to atan2. Using floats is not a win, because the result is returned as a double on the x87 stack, which the caller then needs to cast down to a float using fst/fld. SVN r1990 (trunk)
This commit is contained in:
parent
eca9b4981b
commit
344dda4a1a
13 changed files with 56 additions and 83 deletions
|
@ -1,3 +1,15 @@
|
||||||
|
November 19, 2009
|
||||||
|
- Replaced toint/quickertoint with the portable routines from xs_Float.h. The
|
||||||
|
former used fistp, which is not portable across platforms, so cannot be
|
||||||
|
used in the play simulation. They were only suitable for the renderer.
|
||||||
|
xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED
|
||||||
|
uses that now.
|
||||||
|
(And I also learned that the FPU's round to nearest is not the rounding I
|
||||||
|
learned in grade school but actually Banker's Rounding. I had no idea.)
|
||||||
|
(Also, also, the only thing that could have made quickertoint faster than
|
||||||
|
toint was that it stored a 32-bit int. I never timed them, and I doubt in
|
||||||
|
practice there was any real difference between the two.)
|
||||||
|
|
||||||
November 18, 2009
|
November 18, 2009
|
||||||
- Added padding around packed textures to compensate for apparent NVidia
|
- Added padding around packed textures to compensate for apparent NVidia
|
||||||
texture coordinate imprecision.
|
texture coordinate imprecision.
|
||||||
|
|
|
@ -201,13 +201,3 @@ static __forceinline SDWORD ksgn (SDWORD a)
|
||||||
else if (a > 0) return 1;
|
else if (a > 0) return 1;
|
||||||
else return 0;
|
else return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline int toint (float v)
|
|
||||||
{
|
|
||||||
return int(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline int quickertoint (float v)
|
|
||||||
{
|
|
||||||
return int(v);
|
|
||||||
}
|
|
||||||
|
|
|
@ -23,6 +23,10 @@ typedef int64_t SQWORD;
|
||||||
typedef uint64_t QWORD;
|
typedef uint64_t QWORD;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
typedef SDWORD int32;
|
||||||
|
typedef float real32;
|
||||||
|
typedef double real64;
|
||||||
|
|
||||||
// windef.h, included by windows.h, has its own incompatible definition
|
// windef.h, included by windows.h, has its own incompatible definition
|
||||||
// of DWORD as a long. In files that mix Doom and Windows code, you
|
// of DWORD as a long. In files that mix Doom and Windows code, you
|
||||||
// must define USE_WINDOWS_DWORD before including doomtype.h so that
|
// must define USE_WINDOWS_DWORD before including doomtype.h so that
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
#include "gi.h"
|
#include "gi.h"
|
||||||
#include "templates.h"
|
#include "templates.h"
|
||||||
#include "v_font.h"
|
#include "v_font.h"
|
||||||
|
#include "m_fixed.h"
|
||||||
|
|
||||||
TArray<FSkillInfo> AllSkills;
|
TArray<FSkillInfo> AllSkills;
|
||||||
int DefaultSkill = -1;
|
int DefaultSkill = -1;
|
||||||
|
|
|
@ -331,27 +331,3 @@ static inline SDWORD ksgn (SDWORD a)
|
||||||
:"%cc");
|
:"%cc");
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int toint (float v)
|
|
||||||
{
|
|
||||||
volatile QWORD result;
|
|
||||||
|
|
||||||
asm volatile
|
|
||||||
("fistpq %0"
|
|
||||||
:"=m" (result)
|
|
||||||
:"t" (v)
|
|
||||||
:"%st");
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int quickertoint (float v)
|
|
||||||
{
|
|
||||||
volatile int result;
|
|
||||||
|
|
||||||
asm volatile
|
|
||||||
("fistpl %0"
|
|
||||||
:"=m" (result)
|
|
||||||
:"t" (v)
|
|
||||||
:"%st");
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
#include "basicinlines.h"
|
#include "basicinlines.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "xs_Float.h"
|
||||||
|
|
||||||
#define MAKESAFEDIVSCALE(x) \
|
#define MAKESAFEDIVSCALE(x) \
|
||||||
inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \
|
inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \
|
||||||
{ \
|
{ \
|
||||||
|
@ -134,4 +136,8 @@ inline SDWORD ModDiv (SDWORD num, SDWORD den, SDWORD *dmval)
|
||||||
return num % den;
|
return num % den;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define FLOAT2FIXED(f) xs_Fix<16>::ToFix(f)
|
||||||
|
#define FIXED2FLOAT(f) ((f) / float(65536))
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -348,20 +348,4 @@ __forceinline SDWORD ksgn (SDWORD a)
|
||||||
__asm adc eax,0
|
__asm adc eax,0
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline int toint (float v)
|
|
||||||
{
|
|
||||||
SQWORD res;
|
|
||||||
__asm fld v;
|
|
||||||
__asm fistp res;
|
|
||||||
return (int)res;
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline int quickertoint (float v)
|
|
||||||
{
|
|
||||||
SDWORD res;
|
|
||||||
__asm fld v;
|
|
||||||
__asm fistp res;
|
|
||||||
return (int)res;
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma warning (default: 4035)
|
#pragma warning (default: 4035)
|
||||||
|
|
|
@ -326,9 +326,7 @@ angle_t R_PointToAngle2 (fixed_t x1, fixed_t y1, fixed_t x, fixed_t y)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// we have to use the slower but more precise floating point atan2 function here.
|
// we have to use the slower but more precise floating point atan2 function here.
|
||||||
// (use quickertoint to speed this up because the CRT's conversion is rather slow and
|
return xs_RoundToUInt(atan2(double(y), double(x)) * (ANGLE_180/M_PI));
|
||||||
// this is used in time critical code.)
|
|
||||||
return quickertoint((float)(atan2f(float(y), float(x)) * (ANGLE_180/M_PI)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -534,7 +532,7 @@ void R_SetVisibility (float vis)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
r_BaseVisibility = toint (vis * 65536.f);
|
r_BaseVisibility = xs_RoundToInt(vis * 65536.f);
|
||||||
|
|
||||||
// Prevent overflow on walls
|
// Prevent overflow on walls
|
||||||
if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall)
|
if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall)
|
||||||
|
|
|
@ -336,7 +336,7 @@ void R_MapTiltedPlane (int y, int x1)
|
||||||
{
|
{
|
||||||
uz = (iz + plane_sz[0]*width) * planelightfloat;
|
uz = (iz + plane_sz[0]*width) * planelightfloat;
|
||||||
vz = iz * planelightfloat;
|
vz = iz * planelightfloat;
|
||||||
R_CalcTiltedLighting (toint (vz), toint (uz), width);
|
R_CalcTiltedLighting (xs_RoundToInt(vz), xs_RoundToInt(uz), width);
|
||||||
}
|
}
|
||||||
|
|
||||||
uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx);
|
uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx);
|
||||||
|
|
|
@ -2006,29 +2006,29 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
|
||||||
x = WallSX1;
|
x = WallSX1;
|
||||||
|
|
||||||
l = top / bot;
|
l = top / bot;
|
||||||
swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||||
lwall[x] = quickertoint (l * xrepeat);
|
lwall[x] = xs_RoundToInt(l * xrepeat);
|
||||||
// As long as l is invalid, step one column at a time so that
|
// As long as l is invalid, step one column at a time so that
|
||||||
// we can get as many correct texture columns as possible.
|
// we can get as many correct texture columns as possible.
|
||||||
while (l > 1.0 && x+1 < WallSX2)
|
while (l > 1.0 && x+1 < WallSX2)
|
||||||
{
|
{
|
||||||
l = (top += WallUoverZstep) / (bot += WallInvZstep);
|
l = (top += WallUoverZstep) / (bot += WallInvZstep);
|
||||||
x++;
|
x++;
|
||||||
swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||||
lwall[x] = quickertoint (l * xrepeat);
|
lwall[x] = xs_RoundToInt(l * xrepeat);
|
||||||
}
|
}
|
||||||
l *= xrepeat;
|
l *= xrepeat;
|
||||||
while (x+4 < WallSX2)
|
while (x+4 < WallSX2)
|
||||||
{
|
{
|
||||||
top += topinc; bot += botinc;
|
top += topinc; bot += botinc;
|
||||||
ol = l; l = top / bot;
|
ol = l; l = top / bot;
|
||||||
swall[x+4] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
swall[x+4] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||||
lwall[x+4] = quickertoint (l *= xrepeat);
|
lwall[x+4] = xs_RoundToInt(l *= xrepeat);
|
||||||
|
|
||||||
i = (ol+l) * 0.5f;
|
i = (ol+l) * 0.5f;
|
||||||
lwall[x+2] = quickertoint (i);
|
lwall[x+2] = xs_RoundToInt(i);
|
||||||
lwall[x+1] = quickertoint ((ol+i) * 0.5f);
|
lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
|
||||||
lwall[x+3] = quickertoint ((l+i) * 0.5f);
|
lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
|
||||||
swall[x+2] = ((swall[x]+swall[x+4])>>1);
|
swall[x+2] = ((swall[x]+swall[x+4])>>1);
|
||||||
swall[x+1] = ((swall[x]+swall[x+2])>>1);
|
swall[x+1] = ((swall[x]+swall[x+2])>>1);
|
||||||
swall[x+3] = ((swall[x+4]+swall[x+2])>>1);
|
swall[x+3] = ((swall[x+4]+swall[x+2])>>1);
|
||||||
|
@ -2038,25 +2038,25 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
|
||||||
{
|
{
|
||||||
top += topinc * 0.5f; bot += botinc * 0.5f;
|
top += topinc * 0.5f; bot += botinc * 0.5f;
|
||||||
ol = l; l = top / bot;
|
ol = l; l = top / bot;
|
||||||
swall[x+2] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
swall[x+2] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||||
lwall[x+2] = quickertoint (l *= xrepeat);
|
lwall[x+2] = xs_RoundToInt(l *= xrepeat);
|
||||||
|
|
||||||
lwall[x+1] = quickertoint ((l+ol)*0.5f);
|
lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
|
||||||
swall[x+1] = (swall[x]+swall[x+2])>>1;
|
swall[x+1] = (swall[x]+swall[x+2])>>1;
|
||||||
x += 2;
|
x += 2;
|
||||||
}
|
}
|
||||||
if (x+1 < WallSX2)
|
if (x+1 < WallSX2)
|
||||||
{
|
{
|
||||||
l = (top + WallUoverZstep) / (bot + WallInvZstep);
|
l = (top + WallUoverZstep) / (bot + WallInvZstep);
|
||||||
swall[x+1] = quickertoint (l * WallDepthScale + WallDepthOrg);
|
swall[x+1] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
|
||||||
lwall[x+1] = quickertoint (l * xrepeat);
|
lwall[x+1] = xs_RoundToInt(l * xrepeat);
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
for (x = WallSX1; x < WallSX2; x++)
|
for (x = WallSX1; x < WallSX2; x++)
|
||||||
{
|
{
|
||||||
frac = top / bot;
|
frac = top / bot;
|
||||||
lwall[x] = quickertoint (frac * xrepeat);
|
lwall[x] = xs_RoundToInt(frac * xrepeat);
|
||||||
swall[x] = quickertoint (frac * WallDepthScale + WallDepthOrg);
|
swall[x] = xs_RoundToInt(frac * WallDepthScale + WallDepthOrg);
|
||||||
top += WallUoverZstep;
|
top += WallUoverZstep;
|
||||||
bot += WallInvZstep;
|
bot += WallInvZstep;
|
||||||
}
|
}
|
||||||
|
@ -2108,39 +2108,39 @@ void PrepLWall (fixed_t *lwall, fixed_t walxrepeat)
|
||||||
x = WallSX1;
|
x = WallSX1;
|
||||||
|
|
||||||
l = top / bot;
|
l = top / bot;
|
||||||
lwall[x] = quickertoint (l * xrepeat);
|
lwall[x] = xs_RoundToInt(l * xrepeat);
|
||||||
// As long as l is invalid, step one column at a time so that
|
// As long as l is invalid, step one column at a time so that
|
||||||
// we can get as many correct texture columns as possible.
|
// we can get as many correct texture columns as possible.
|
||||||
while (l > 1.0 && x+1 < WallSX2)
|
while (l > 1.0 && x+1 < WallSX2)
|
||||||
{
|
{
|
||||||
l = (top += WallUoverZstep) / (bot += WallInvZstep);
|
l = (top += WallUoverZstep) / (bot += WallInvZstep);
|
||||||
lwall[++x] = quickertoint (l * xrepeat);
|
lwall[++x] = xs_RoundToInt(l * xrepeat);
|
||||||
}
|
}
|
||||||
l *= xrepeat;
|
l *= xrepeat;
|
||||||
while (x+4 < WallSX2)
|
while (x+4 < WallSX2)
|
||||||
{
|
{
|
||||||
top += topinc; bot += botinc;
|
top += topinc; bot += botinc;
|
||||||
ol = l; l = top / bot;
|
ol = l; l = top / bot;
|
||||||
lwall[x+4] = quickertoint (l *= xrepeat);
|
lwall[x+4] = xs_RoundToInt(l *= xrepeat);
|
||||||
|
|
||||||
i = (ol+l) * 0.5f;
|
i = (ol+l) * 0.5f;
|
||||||
lwall[x+2] = quickertoint (i);
|
lwall[x+2] = xs_RoundToInt(i);
|
||||||
lwall[x+1] = quickertoint ((ol+i) * 0.5f);
|
lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
|
||||||
lwall[x+3] = quickertoint ((l+i) * 0.5f);
|
lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
|
||||||
x += 4;
|
x += 4;
|
||||||
}
|
}
|
||||||
if (x+2 < WallSX2)
|
if (x+2 < WallSX2)
|
||||||
{
|
{
|
||||||
top += topinc * 0.5f; bot += botinc * 0.5f;
|
top += topinc * 0.5f; bot += botinc * 0.5f;
|
||||||
ol = l; l = top / bot;
|
ol = l; l = top / bot;
|
||||||
lwall[x+2] = quickertoint (l *= xrepeat);
|
lwall[x+2] = xs_RoundToInt(l *= xrepeat);
|
||||||
lwall[x+1] = quickertoint ((l+ol)*0.5f);
|
lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
|
||||||
x += 2;
|
x += 2;
|
||||||
}
|
}
|
||||||
if (x+1 < WallSX2)
|
if (x+1 < WallSX2)
|
||||||
{
|
{
|
||||||
l = (top + WallUoverZstep) / (bot + WallInvZstep);
|
l = (top + WallUoverZstep) / (bot + WallInvZstep);
|
||||||
lwall[x+1] = quickertoint (l * xrepeat);
|
lwall[x+1] = xs_RoundToInt(l * xrepeat);
|
||||||
}
|
}
|
||||||
|
|
||||||
// fix for rounding errors
|
// fix for rounding errors
|
||||||
|
|
|
@ -49,6 +49,7 @@
|
||||||
#include "p_lnspec.h"
|
#include "p_lnspec.h"
|
||||||
#include "doomstat.h"
|
#include "doomstat.h"
|
||||||
#include "thingdef_exp.h"
|
#include "thingdef_exp.h"
|
||||||
|
#include "m_fixed.h"
|
||||||
|
|
||||||
int testglobalvar = 1337; // just for having one global variable to test with
|
int testglobalvar = 1337; // just for having one global variable to test with
|
||||||
DEFINE_GLOBAL_VARIABLE(testglobalvar)
|
DEFINE_GLOBAL_VARIABLE(testglobalvar)
|
||||||
|
|
|
@ -1230,7 +1230,4 @@ typedef TRotator<float> FRotator;
|
||||||
typedef TMatrix3x3<float> FMatrix3x3;
|
typedef TMatrix3x3<float> FMatrix3x3;
|
||||||
typedef TAngle<float> FAngle;
|
typedef TAngle<float> FAngle;
|
||||||
|
|
||||||
#define FLOAT2FIXED(f) fixed_t((f) * float(65536))
|
|
||||||
#define FIXED2FLOAT(f) ((f) / float(65536))
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1593,6 +1593,10 @@
|
||||||
RelativePath=".\src\x86.h"
|
RelativePath=".\src\x86.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath=".\src\xs_Float.h"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath=".\src\zstring.h"
|
RelativePath=".\src\zstring.h"
|
||||||
>
|
>
|
||||||
|
|
Loading…
Reference in a new issue