- Removed the RSQRTSS code from GLSeg::Normal.

If we have to write compiler specific code for micro-optimizations I am out.
The Posix compatible version nullified most the advantage on MSVC by writing out the XMM register to memory and then reading back the float.
That's not worth the hassle for an optimization that brings a few microseconds at best.
This commit is contained in:
Christoph Oelckers 2017-03-13 13:17:15 +01:00
parent 5fd86cf98c
commit feb680a4eb

View file

@ -9,9 +9,6 @@
#include "r_data/renderstyle.h" #include "r_data/renderstyle.h"
#include "textures/textures.h" #include "textures/textures.h"
#include "gl/renderer/gl_colormap.h" #include "gl/renderer/gl_colormap.h"
#if defined(__amd64__) || defined(_M_X64)
#include <xmmintrin.h>
#endif // x64
struct GLHorizonInfo; struct GLHorizonInfo;
struct F3DFloor; struct F3DFloor;
@ -71,16 +68,8 @@ struct GLSeg
// we do not use the vector math inlines here because they are not optimized for speed but accuracy in the playsim and this is called quite frequently. // we do not use the vector math inlines here because they are not optimized for speed but accuracy in the playsim and this is called quite frequently.
float x = y2 - y1; float x = y2 - y1;
float y = x1 - x2; float y = x1 - x2;
#if defined(__amd64__) || defined(_M_X64) float ilength = 1.f / sqrtf(x*x + y*y);
__m128 v = _mm_set_ss(x*x + y*y); return FVector3(x * ilength, 0, y * ilength);
v = _mm_rsqrt_ss(v);
float ilength;
_mm_store_ss(&ilength, v);
return FVector3(x * ilength, 0, y * ilength);
#else
float length = sqrtf(x*x + y*y);
return FVector3(x / length, 0, y / length);
#endif
} }
}; };