- Removed the RSQRTSS code from GLSeg::Normal.

If we have to write compiler specific code for micro-optimizations I am out. The Posix compatible version nullified most the advantage on MSVC by writing out the XMM register to memory and then reading back the float. That's not worth the hassle for an optimization that brings a few microseconds at best.
2024-11-11 07:12:16 +00:00 · 2017-03-13 13:17:15 +01:00 · 2017-03-13 13:17:15 +01:00 · feb680a4eb
commit feb680a4eb
parent 5fd86cf98c
1 changed files with 2 additions and 13 deletions
--- a/src/gl/scene/gl_wall.h
+++ b/src/gl/scene/gl_wall.h
@ -9,9 +9,6 @@
 #include "r_data/renderstyle.h"
 #include "textures/textures.h"
 #include "gl/renderer/gl_colormap.h"
 #if defined(__amd64__) || defined(_M_X64)
 #include <xmmintrin.h>
 #endif // x64
 struct GLHorizonInfo;
 struct F3DFloor;
@ -71,16 +68,8 @@ struct GLSeg
 		// we do not use the vector math inlines here because they are not optimized for speed but accuracy in the playsim and this is called quite frequently.
 		float x = y2 - y1;
 		float y = x1 - x2;
-#if defined(__amd64__) || defined(_M_X64)
+		float ilength = 1.f / sqrtf(x*x + y*y);
-        __m128 v = _mm_set_ss(x*x + y*y);
+		return FVector3(x * ilength, 0, y * ilength);
        v = _mm_rsqrt_ss(v);
        float ilength;
        _mm_store_ss(&ilength, v);
        return FVector3(x * ilength, 0, y * ilength);
 #else
 		float length = sqrtf(x*x + y*y);
 		return FVector3(x / length, 0, y / length);
 #endif
 	}
 };