diff --git a/docs/rh-log.txt b/docs/rh-log.txt index 8bf80f425..2b7bbbe9a 100644 --- a/docs/rh-log.txt +++ b/docs/rh-log.txt @@ -1,3 +1,15 @@ +November 19, 2009 +- Replaced toint/quickertoint with the portable routines from xs_Float.h. The + former used fistp, which is not portable across platforms, so cannot be + used in the play simulation. They were only suitable for the renderer. + xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED + uses that now. + (And I also learned that the FPU's round to nearest is not the rounding I + learned in grade school but actually Banker's Rounding. I had no idea.) + (Also, also, the only thing that could have made quickertoint faster than + toint was that it stored a 32-bit int. I never timed them, and I doubt in + practice there was any real difference between the two.) + November 18, 2009 - Added padding around packed textures to compensate for apparent NVidia texture coordinate imprecision. diff --git a/src/basicinlines.h b/src/basicinlines.h index 4bc211f99..fa6ce822b 100644 --- a/src/basicinlines.h +++ b/src/basicinlines.h @@ -201,13 +201,3 @@ static __forceinline SDWORD ksgn (SDWORD a) else if (a > 0) return 1; else return 0; } - -static __forceinline int toint (float v) -{ - return int(v); -} - -static __forceinline int quickertoint (float v) -{ - return int(v); -} diff --git a/src/basictypes.h b/src/basictypes.h index 37d7aec75..3aa4f4635 100644 --- a/src/basictypes.h +++ b/src/basictypes.h @@ -23,6 +23,10 @@ typedef int64_t SQWORD; typedef uint64_t QWORD; #endif +typedef SDWORD int32; +typedef float real32; +typedef double real64; + // windef.h, included by windows.h, has its own incompatible definition // of DWORD as a long. In files that mix Doom and Windows code, you // must define USE_WINDOWS_DWORD before including doomtype.h so that diff --git a/src/g_skill.cpp b/src/g_skill.cpp index 3d2ef9bf2..a91e38264 100644 --- a/src/g_skill.cpp +++ b/src/g_skill.cpp @@ -40,6 +40,7 @@ #include "gi.h" #include "templates.h" #include "v_font.h" +#include "m_fixed.h" TArray AllSkills; int DefaultSkill = -1; diff --git a/src/gccinlines.h b/src/gccinlines.h index 3b987895a..4f1ec7e5e 100644 --- a/src/gccinlines.h +++ b/src/gccinlines.h @@ -331,27 +331,3 @@ static inline SDWORD ksgn (SDWORD a) :"%cc"); return result; } - -static inline int toint (float v) -{ - volatile QWORD result; - - asm volatile - ("fistpq %0" - :"=m" (result) - :"t" (v) - :"%st"); - return result; -} - -static inline int quickertoint (float v) -{ - volatile int result; - - asm volatile - ("fistpl %0" - :"=m" (result) - :"t" (v) - :"%st"); - return result; -} diff --git a/src/m_fixed.h b/src/m_fixed.h index 3d7f577b7..14db59d63 100644 --- a/src/m_fixed.h +++ b/src/m_fixed.h @@ -20,6 +20,8 @@ #include "basicinlines.h" #endif +#include "xs_Float.h" + #define MAKESAFEDIVSCALE(x) \ inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \ { \ @@ -134,4 +136,8 @@ inline SDWORD ModDiv (SDWORD num, SDWORD den, SDWORD *dmval) return num % den; } + +#define FLOAT2FIXED(f) xs_Fix<16>::ToFix(f) +#define FIXED2FLOAT(f) ((f) / float(65536)) + #endif diff --git a/src/mscinlines.h b/src/mscinlines.h index 0a5fdb798..c7f2527f4 100644 --- a/src/mscinlines.h +++ b/src/mscinlines.h @@ -348,20 +348,4 @@ __forceinline SDWORD ksgn (SDWORD a) __asm adc eax,0 } -__forceinline int toint (float v) -{ - SQWORD res; - __asm fld v; - __asm fistp res; - return (int)res; -} - -__forceinline int quickertoint (float v) -{ - SDWORD res; - __asm fld v; - __asm fistp res; - return (int)res; -} - #pragma warning (default: 4035) diff --git a/src/r_main.cpp b/src/r_main.cpp index b528373ee..789388842 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -326,9 +326,7 @@ angle_t R_PointToAngle2 (fixed_t x1, fixed_t y1, fixed_t x, fixed_t y) else { // we have to use the slower but more precise floating point atan2 function here. - // (use quickertoint to speed this up because the CRT's conversion is rather slow and - // this is used in time critical code.) - return quickertoint((float)(atan2f(float(y), float(x)) * (ANGLE_180/M_PI))); + return xs_RoundToUInt(atan2(double(y), double(x)) * (ANGLE_180/M_PI)); } } @@ -534,7 +532,7 @@ void R_SetVisibility (float vis) return; } - r_BaseVisibility = toint (vis * 65536.f); + r_BaseVisibility = xs_RoundToInt(vis * 65536.f); // Prevent overflow on walls if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 7fb98e4fc..c9612c24c 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -336,7 +336,7 @@ void R_MapTiltedPlane (int y, int x1) { uz = (iz + plane_sz[0]*width) * planelightfloat; vz = iz * planelightfloat; - R_CalcTiltedLighting (toint (vz), toint (uz), width); + R_CalcTiltedLighting (xs_RoundToInt(vz), xs_RoundToInt(uz), width); } uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 8782f5631..06b793af4 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -2006,29 +2006,29 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat) x = WallSX1; l = top / bot; - swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg); - lwall[x] = quickertoint (l * xrepeat); + swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg); + lwall[x] = xs_RoundToInt(l * xrepeat); // As long as l is invalid, step one column at a time so that // we can get as many correct texture columns as possible. while (l > 1.0 && x+1 < WallSX2) { l = (top += WallUoverZstep) / (bot += WallInvZstep); x++; - swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg); - lwall[x] = quickertoint (l * xrepeat); + swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg); + lwall[x] = xs_RoundToInt(l * xrepeat); } l *= xrepeat; while (x+4 < WallSX2) { top += topinc; bot += botinc; ol = l; l = top / bot; - swall[x+4] = quickertoint (l * WallDepthScale + WallDepthOrg); - lwall[x+4] = quickertoint (l *= xrepeat); + swall[x+4] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg); + lwall[x+4] = xs_RoundToInt(l *= xrepeat); i = (ol+l) * 0.5f; - lwall[x+2] = quickertoint (i); - lwall[x+1] = quickertoint ((ol+i) * 0.5f); - lwall[x+3] = quickertoint ((l+i) * 0.5f); + lwall[x+2] = xs_RoundToInt(i); + lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f); + lwall[x+3] = xs_RoundToInt((l+i) * 0.5f); swall[x+2] = ((swall[x]+swall[x+4])>>1); swall[x+1] = ((swall[x]+swall[x+2])>>1); swall[x+3] = ((swall[x+4]+swall[x+2])>>1); @@ -2038,25 +2038,25 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat) { top += topinc * 0.5f; bot += botinc * 0.5f; ol = l; l = top / bot; - swall[x+2] = quickertoint (l * WallDepthScale + WallDepthOrg); - lwall[x+2] = quickertoint (l *= xrepeat); + swall[x+2] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg); + lwall[x+2] = xs_RoundToInt(l *= xrepeat); - lwall[x+1] = quickertoint ((l+ol)*0.5f); + lwall[x+1] = xs_RoundToInt((l+ol)*0.5f); swall[x+1] = (swall[x]+swall[x+2])>>1; x += 2; } if (x+1 < WallSX2) { l = (top + WallUoverZstep) / (bot + WallInvZstep); - swall[x+1] = quickertoint (l * WallDepthScale + WallDepthOrg); - lwall[x+1] = quickertoint (l * xrepeat); + swall[x+1] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg); + lwall[x+1] = xs_RoundToInt(l * xrepeat); } /* for (x = WallSX1; x < WallSX2; x++) { frac = top / bot; - lwall[x] = quickertoint (frac * xrepeat); - swall[x] = quickertoint (frac * WallDepthScale + WallDepthOrg); + lwall[x] = xs_RoundToInt(frac * xrepeat); + swall[x] = xs_RoundToInt(frac * WallDepthScale + WallDepthOrg); top += WallUoverZstep; bot += WallInvZstep; } @@ -2108,39 +2108,39 @@ void PrepLWall (fixed_t *lwall, fixed_t walxrepeat) x = WallSX1; l = top / bot; - lwall[x] = quickertoint (l * xrepeat); + lwall[x] = xs_RoundToInt(l * xrepeat); // As long as l is invalid, step one column at a time so that // we can get as many correct texture columns as possible. while (l > 1.0 && x+1 < WallSX2) { l = (top += WallUoverZstep) / (bot += WallInvZstep); - lwall[++x] = quickertoint (l * xrepeat); + lwall[++x] = xs_RoundToInt(l * xrepeat); } l *= xrepeat; while (x+4 < WallSX2) { top += topinc; bot += botinc; ol = l; l = top / bot; - lwall[x+4] = quickertoint (l *= xrepeat); + lwall[x+4] = xs_RoundToInt(l *= xrepeat); i = (ol+l) * 0.5f; - lwall[x+2] = quickertoint (i); - lwall[x+1] = quickertoint ((ol+i) * 0.5f); - lwall[x+3] = quickertoint ((l+i) * 0.5f); + lwall[x+2] = xs_RoundToInt(i); + lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f); + lwall[x+3] = xs_RoundToInt((l+i) * 0.5f); x += 4; } if (x+2 < WallSX2) { top += topinc * 0.5f; bot += botinc * 0.5f; ol = l; l = top / bot; - lwall[x+2] = quickertoint (l *= xrepeat); - lwall[x+1] = quickertoint ((l+ol)*0.5f); + lwall[x+2] = xs_RoundToInt(l *= xrepeat); + lwall[x+1] = xs_RoundToInt((l+ol)*0.5f); x += 2; } if (x+1 < WallSX2) { l = (top + WallUoverZstep) / (bot + WallInvZstep); - lwall[x+1] = quickertoint (l * xrepeat); + lwall[x+1] = xs_RoundToInt(l * xrepeat); } // fix for rounding errors diff --git a/src/thingdef/thingdef_expression.cpp b/src/thingdef/thingdef_expression.cpp index eee2b3608..2511b93a8 100644 --- a/src/thingdef/thingdef_expression.cpp +++ b/src/thingdef/thingdef_expression.cpp @@ -49,6 +49,7 @@ #include "p_lnspec.h" #include "doomstat.h" #include "thingdef_exp.h" +#include "m_fixed.h" int testglobalvar = 1337; // just for having one global variable to test with DEFINE_GLOBAL_VARIABLE(testglobalvar) diff --git a/src/vectors.h b/src/vectors.h index ea11ab611..013077042 100644 --- a/src/vectors.h +++ b/src/vectors.h @@ -1230,7 +1230,4 @@ typedef TRotator FRotator; typedef TMatrix3x3 FMatrix3x3; typedef TAngle FAngle; -#define FLOAT2FIXED(f) fixed_t((f) * float(65536)) -#define FIXED2FLOAT(f) ((f) / float(65536)) - #endif diff --git a/zdoom.vcproj b/zdoom.vcproj index 681c2ce69..f31bbbbe2 100644 --- a/zdoom.vcproj +++ b/zdoom.vcproj @@ -1593,6 +1593,10 @@ RelativePath=".\src\x86.h" > + +