- Replaced toint/quickertoint with the portable routines from xs_Float.h. The

former used fistp, which is not portable across platforms, so cannot be used in the play simulation. They were only suitable for the renderer. xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED uses that now. (And I also learned that the FPU's round to nearest is not the rounding I learned in grade school but actually Banker's Rounding. I had no idea.) (Also, also, the only thing that could have made quickertoint faster than toint was that it stored a 32-bit int. I never timed them, and I doubt in practice there was any real difference between the two.) - Changed atan2f to atan2. Using floats is not a win, because the result is returned as a double on the x87 stack, which the caller then needs to cast down to a float using fst/fld. SVN r1990 (trunk)
2025-06-01 01:20:58 +00:00 · 2009-11-20 05:34:20 +00:00 · 2009-11-20 05:34:20 +00:00 · 344dda4a1a
commit 344dda4a1a
parent eca9b4981b
13 changed files with 56 additions and 83 deletions
--- a/docs/rh-log.txt
+++ b/docs/rh-log.txt
@ -1,3 +1,15 @@
 November 19, 2009
 - Replaced toint/quickertoint with the portable routines from xs_Float.h. The
  former used fistp, which is not portable across platforms, so cannot be
  used in the play simulation. They were only suitable for the renderer.
  xs_Float.h also has a very fast float->fixed conversion, so FLOAT2FIXED
  uses that now.
  (And I also learned that the FPU's round to nearest is not the rounding I
  learned in grade school but actually Banker's Rounding. I had no idea.)
  (Also, also, the only thing that could have made quickertoint faster than
  toint was that it stored a 32-bit int. I never timed them, and I doubt in
  practice there was any real difference between the two.)
 November 18, 2009
 - Added padding around packed textures to compensate for apparent NVidia
  texture coordinate imprecision.
--- a/src/basicinlines.h
+++ b/src/basicinlines.h
@ -201,13 +201,3 @@ static __forceinline SDWORD ksgn (SDWORD a)
 	else if (a > 0) return 1;
 	else return 0;
 }
 static __forceinline int toint (float v)
 {
 	return int(v);
 }
 static __forceinline int quickertoint (float v)
 {
 	return int(v);
 }
--- a/src/basictypes.h
+++ b/src/basictypes.h
@ -23,6 +23,10 @@ typedef int64_t					SQWORD;
 typedef uint64_t				QWORD;
 #endif
 typedef SDWORD					int32;
 typedef float					real32;
 typedef double					real64;
 // windef.h, included by windows.h, has its own incompatible definition
 // of DWORD as a long. In files that mix Doom and Windows code, you
 // must define USE_WINDOWS_DWORD before including doomtype.h so that
--- a/src/g_skill.cpp
+++ b/src/g_skill.cpp
@ -40,6 +40,7 @@
 #include "gi.h"
 #include "templates.h"
 #include "v_font.h"
 #include "m_fixed.h"
 TArray<FSkillInfo> AllSkills;
 int DefaultSkill = -1;
--- a/src/gccinlines.h
+++ b/src/gccinlines.h
@ -331,27 +331,3 @@ static inline SDWORD ksgn (SDWORD a)
 		:"%cc");
 	return result;
 }
 static inline int toint (float v)
 {
 	volatile QWORD result;
 	asm volatile
 		("fistpq %0"
 		:"=m" (result)
 		:"t" (v)
 		:"%st");
 	return result;
 }
 static inline int quickertoint (float v)
 {
 	volatile int result;
 	asm volatile
 		("fistpl %0"
 		:"=m" (result)
 		:"t" (v)
 		:"%st");
 	return result;
 }
--- a/src/m_fixed.h
+++ b/src/m_fixed.h
@ -20,6 +20,8 @@
 #include "basicinlines.h"
 #endif
 #include "xs_Float.h"
 #define MAKESAFEDIVSCALE(x) \
 	inline SDWORD SafeDivScale##x (SDWORD a, SDWORD b) \
 	{ \
@ -134,4 +136,8 @@ inline SDWORD ModDiv (SDWORD num, SDWORD den, SDWORD *dmval)
 	return num % den;
 }
 #define FLOAT2FIXED(f)		xs_Fix<16>::ToFix(f)
 #define FIXED2FLOAT(f)		((f) / float(65536))
 #endif
--- a/src/mscinlines.h
+++ b/src/mscinlines.h
@ -348,20 +348,4 @@ __forceinline SDWORD ksgn (SDWORD a)
 	__asm adc eax,0
 }
 __forceinline int toint (float v)
 {
 	SQWORD res;
 	__asm fld v;
 	__asm fistp res;
 	return (int)res;
 }
 __forceinline int quickertoint (float v)
 {
 	SDWORD res;
 	__asm fld v;
 	__asm fistp res;
 	return (int)res;
 }
 #pragma warning (default: 4035)
--- a/src/r_main.cpp
+++ b/src/r_main.cpp
@ -326,9 +326,7 @@ angle_t R_PointToAngle2 (fixed_t x1, fixed_t y1, fixed_t x, fixed_t y)
 	else
 	{
 		// we have to use the slower but more precise floating point atan2 function here.
-		// (use quickertoint to speed this up because the CRT's conversion is rather slow and
+		return xs_RoundToUInt(atan2(double(y), double(x)) * (ANGLE_180/M_PI));
 		//  this is used in time critical code.)
 		return quickertoint((float)(atan2f(float(y), float(x)) * (ANGLE_180/M_PI)));
 	}
 }
@ -534,7 +532,7 @@ void R_SetVisibility (float vis)
 		return;
 	}
-	r_BaseVisibility = toint (vis * 65536.f);
+	r_BaseVisibility = xs_RoundToInt(vis * 65536.f);
 	// Prevent overflow on walls
 	if (r_BaseVisibility < 0 && r_BaseVisibility < -MaxVisForWall)
--- a/src/r_plane.cpp
+++ b/src/r_plane.cpp
@ -336,7 +336,7 @@ void R_MapTiltedPlane (int y, int x1)
 	{
 		uz = (iz + plane_sz[0]*width) * planelightfloat;
 		vz = iz * planelightfloat;
-		R_CalcTiltedLighting (toint (vz), toint (uz), width);
+		R_CalcTiltedLighting (xs_RoundToInt(vz), xs_RoundToInt(uz), width);
 	}
 	uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx);
--- a/src/r_segs.cpp
+++ b/src/r_segs.cpp
@ -2006,29 +2006,29 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
 	x = WallSX1;
 	l = top / bot;
-	swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg);
+	swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
-	lwall[x] = quickertoint (l * xrepeat);
+	lwall[x] = xs_RoundToInt(l * xrepeat);
 	// As long as l is invalid, step one column at a time so that
 	// we can get as many correct texture columns as possible.
 	while (l > 1.0 && x+1 < WallSX2)
 	{
 		l = (top += WallUoverZstep) / (bot += WallInvZstep);
 		x++;
-		swall[x] = quickertoint (l * WallDepthScale + WallDepthOrg);
+		swall[x] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
-		lwall[x] = quickertoint (l * xrepeat);
+		lwall[x] = xs_RoundToInt(l * xrepeat);
 	}
 	l *= xrepeat;
 	while (x+4 < WallSX2)
 	{
 		top += topinc; bot += botinc;
 		ol = l; l = top / bot;
-		swall[x+4] = quickertoint (l * WallDepthScale + WallDepthOrg);
+		swall[x+4] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
-		lwall[x+4] = quickertoint (l *= xrepeat);
+		lwall[x+4] = xs_RoundToInt(l *= xrepeat);
 		i = (ol+l) * 0.5f;
-		lwall[x+2] = quickertoint (i);
+		lwall[x+2] = xs_RoundToInt(i);
-		lwall[x+1] = quickertoint ((ol+i) * 0.5f);
+		lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
-		lwall[x+3] = quickertoint ((l+i) * 0.5f);
+		lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
 		swall[x+2] = ((swall[x]+swall[x+4])>>1);
 		swall[x+1] = ((swall[x]+swall[x+2])>>1);
 		swall[x+3] = ((swall[x+4]+swall[x+2])>>1);
@ -2038,25 +2038,25 @@ void PrepWall (fixed_t *swall, fixed_t *lwall, fixed_t walxrepeat)
 	{
 		top += topinc * 0.5f; bot += botinc * 0.5f;
 		ol = l; l = top / bot;
-		swall[x+2] = quickertoint (l * WallDepthScale + WallDepthOrg);
+		swall[x+2] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
-		lwall[x+2] = quickertoint (l *= xrepeat);
+		lwall[x+2] = xs_RoundToInt(l *= xrepeat);
-		lwall[x+1] = quickertoint ((l+ol)*0.5f);
+		lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
 		swall[x+1] = (swall[x]+swall[x+2])>>1;
 		x += 2;
 	}
 	if (x+1 < WallSX2)
 	{
 		l = (top + WallUoverZstep) / (bot + WallInvZstep);
-		swall[x+1] = quickertoint (l * WallDepthScale + WallDepthOrg);
+		swall[x+1] = xs_RoundToInt(l * WallDepthScale + WallDepthOrg);
-		lwall[x+1] = quickertoint (l * xrepeat);
+		lwall[x+1] = xs_RoundToInt(l * xrepeat);
 	}
 	/*
 	for (x = WallSX1; x < WallSX2; x++)
 	{
 		frac = top / bot;
-		lwall[x] = quickertoint (frac * xrepeat);
+		lwall[x] = xs_RoundToInt(frac * xrepeat);
-		swall[x] = quickertoint (frac * WallDepthScale + WallDepthOrg);
+		swall[x] = xs_RoundToInt(frac * WallDepthScale + WallDepthOrg);
 		top += WallUoverZstep;
 		bot += WallInvZstep;
 	}
@ -2108,39 +2108,39 @@ void PrepLWall (fixed_t *lwall, fixed_t walxrepeat)
 	x = WallSX1;
 	l = top / bot;
-	lwall[x] = quickertoint (l * xrepeat);
+	lwall[x] = xs_RoundToInt(l * xrepeat);
 	// As long as l is invalid, step one column at a time so that
 	// we can get as many correct texture columns as possible.
 	while (l > 1.0 && x+1 < WallSX2)
 	{
 		l = (top += WallUoverZstep) / (bot += WallInvZstep);
-		lwall[++x] = quickertoint (l * xrepeat);
+		lwall[++x] = xs_RoundToInt(l * xrepeat);
 	}
 	l *= xrepeat;
 	while (x+4 < WallSX2)
 	{
 		top += topinc; bot += botinc;
 		ol = l; l = top / bot;
-		lwall[x+4] = quickertoint (l *= xrepeat);
+		lwall[x+4] = xs_RoundToInt(l *= xrepeat);
 		i = (ol+l) * 0.5f;
-		lwall[x+2] = quickertoint (i);
+		lwall[x+2] = xs_RoundToInt(i);
-		lwall[x+1] = quickertoint ((ol+i) * 0.5f);
+		lwall[x+1] = xs_RoundToInt((ol+i) * 0.5f);
-		lwall[x+3] = quickertoint ((l+i) * 0.5f);
+		lwall[x+3] = xs_RoundToInt((l+i) * 0.5f);
 		x += 4;
 	}
 	if (x+2 < WallSX2)
 	{
 		top += topinc * 0.5f; bot += botinc * 0.5f;
 		ol = l; l = top / bot;
-		lwall[x+2] = quickertoint (l *= xrepeat);
+		lwall[x+2] = xs_RoundToInt(l *= xrepeat);
-		lwall[x+1] = quickertoint ((l+ol)*0.5f);
+		lwall[x+1] = xs_RoundToInt((l+ol)*0.5f);
 		x += 2;
 	}
 	if (x+1 < WallSX2)
 	{
 		l = (top + WallUoverZstep) / (bot + WallInvZstep);
-		lwall[x+1] = quickertoint (l * xrepeat);
+		lwall[x+1] = xs_RoundToInt(l * xrepeat);
 	}
 	// fix for rounding errors
--- a/src/thingdef/thingdef_expression.cpp
+++ b/src/thingdef/thingdef_expression.cpp
@ -49,6 +49,7 @@
 #include "p_lnspec.h"
 #include "doomstat.h"
 #include "thingdef_exp.h"
 #include "m_fixed.h"
 int testglobalvar = 1337;	// just for having one global variable to test with
 DEFINE_GLOBAL_VARIABLE(testglobalvar)
--- a/src/vectors.h
+++ b/src/vectors.h
@ -1230,7 +1230,4 @@ typedef TRotator<float>		FRotator;
 typedef TMatrix3x3<float>	FMatrix3x3;
 typedef TAngle<float>		FAngle;
 #define FLOAT2FIXED(f)		fixed_t((f) * float(65536))
 #define FIXED2FLOAT(f)		((f) / float(65536))
 #endif
--- a/zdoom.vcproj
+++ b/zdoom.vcproj
@ -1593,6 +1593,10 @@
 				RelativePath=".\src\x86.h"
 				>
 			</File>
 			<File
 				RelativePath=".\src\xs_Float.h"
 				>
 			</File>
 			<File
 				RelativePath=".\src\zstring.h"
 				>