From 735157aea47e25d8d035d7ea27ac17bf61e7b631 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 25 Feb 2017 01:22:54 +0100 Subject: [PATCH] Bump minimum architecture to SSE 2 on the x86 platform (a Pentium 4 from 2001!) --- CMakeLists.txt | 31 ++++++++++--------- src/CMakeLists.txt | 1 - src/swrenderer/drawers/r_draw_rgba.h | 7 +++++ src/swrenderer/drawers/r_draw_span32_sse2.h | 10 +++--- src/swrenderer/drawers/r_draw_sprite32_sse2.h | 10 +++--- src/swrenderer/drawers/r_draw_wall32_sse2.h | 10 +++--- 6 files changed, 39 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b52e680dd..9df5498e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,20 +174,23 @@ if( MSVC ) # Disable run-time type information set( ALL_C_FLAGS "/GF /Gy /GR-" ) - if( CMAKE_SIZEOF_VOID_P MATCHES "4") - # SSE2 option (to allow x87 in 32 bit and disallow extended feature sets which have not yet been checked for precision) - option (ZDOOM_USE_SSE2 "Use SSE2 instruction set") - if (ZDOOM_USE_SSE2) - set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") - else () - if (MSVC_VERSION GREATER 1699) - # On Visual C++ 2012 and later SSE2 is the default, so we need to switch it off explicitly - set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:IA32") - endif () - endif () - else() - set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") - endif() + # Use SSE 2 as minimum always as the true color drawers needs it for __vectorcall + set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") + +# if( CMAKE_SIZEOF_VOID_P MATCHES "4") +# # SSE2 option (to allow x87 in 32 bit and disallow extended feature sets which have not yet been checked for precision) +# option (ZDOOM_USE_SSE2 "Use SSE2 instruction set") +# if (ZDOOM_USE_SSE2) +# set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") +# else () +# if (MSVC_VERSION GREATER 1699) +# # On Visual C++ 2012 and later SSE2 is the default, so we need to switch it off explicitly +# set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:IA32") +# endif () +# endif () +# else() +# set( ALL_C_FLAGS "${ALL_C_FLAGS} /arch:SSE2") +# endif() # Avoid CRT DLL dependancies in release builds, optionally generate assembly output for checking crash locations. option( ZDOOM_GENERATE_ASM "Generate assembly output." OFF ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 897d78721..a2e2bc053 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -791,7 +791,6 @@ file( GLOB HEADER_FILES xlat/*.h swrenderer/*.h swrenderer/drawers/*.h - swrenderer/drawers/*.php swrenderer/scene/*.h swrenderer/segments/*.h swrenderer/line/*.h diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 2eab9a185..3e95c8cb6 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -67,6 +67,13 @@ namespace swrenderer #endif #endif + // Force the compiler to use a calling convention that works for vector types + #if defined(_MSC_VER) + #define VECTORCALL __vectorcall + #else + #define VECTORCALL + #endif + class DrawFuzzColumnRGBACommand : public DrawerCommand { int _x; diff --git a/src/swrenderer/drawers/r_draw_span32_sse2.h b/src/swrenderer/drawers/r_draw_span32_sse2.h index 34daaeb97..e8ee704db 100644 --- a/src/swrenderer/drawers/r_draw_span32_sse2.h +++ b/src/swrenderer/drawers/r_draw_span32_sse2.h @@ -152,7 +152,7 @@ namespace swrenderer } template - void Loop(DrawerThread *thread, TextureData texdata, ShadeConstants shade_constants) + FORCEINLINE void VECTORCALL Loop(DrawerThread *thread, TextureData texdata, ShadeConstants shade_constants) { using namespace DrawSpan32TModes; @@ -264,7 +264,7 @@ namespace swrenderer } template - unsigned int Sample(uint32_t xbits, uint32_t ybits, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, uint32_t yshift, uint32_t xshift, uint32_t xmask, const uint32_t *source) + FORCEINLINE unsigned int VECTORCALL Sample(uint32_t xbits, uint32_t ybits, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, uint32_t yshift, uint32_t xshift, uint32_t xmask, const uint32_t *source) { using namespace DrawSpan32TModes; @@ -319,7 +319,7 @@ namespace swrenderer } template - __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_x) + FORCEINLINE __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_x) { using namespace DrawSpan32TModes; @@ -351,7 +351,7 @@ namespace swrenderer return AddLights(material, fgcolor, lights, num_lights, viewpos_x); } - __m128i AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_x) + FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_x) { using namespace DrawSpan32TModes; @@ -398,7 +398,7 @@ namespace swrenderer return fgcolor; } - __m128i Blend(__m128i fgcolor, __m128i bgcolor, uint32_t srcalpha, uint32_t destalpha, unsigned int ifgcolor0, unsigned int ifgcolor1) + FORCEINLINE __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, uint32_t srcalpha, uint32_t destalpha, unsigned int ifgcolor0, unsigned int ifgcolor1) { using namespace DrawSpan32TModes; diff --git a/src/swrenderer/drawers/r_draw_sprite32_sse2.h b/src/swrenderer/drawers/r_draw_sprite32_sse2.h index 0ec3e63d8..dc4a42172 100644 --- a/src/swrenderer/drawers/r_draw_sprite32_sse2.h +++ b/src/swrenderer/drawers/r_draw_sprite32_sse2.h @@ -100,7 +100,7 @@ namespace swrenderer } template - void Loop(DrawerThread *thread, ShadeConstants shade_constants) + FORCEINLINE void VECTORCALL Loop(DrawerThread *thread, ShadeConstants shade_constants) { using namespace DrawSprite32TModes; @@ -254,7 +254,7 @@ namespace swrenderer } template - unsigned int Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, const uint32_t *translation, int textureheight, uint32_t one, uint32_t texturefracx, uint32_t color, uint32_t srccolor) + FORCEINLINE unsigned int VECTORCALL Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, const uint32_t *translation, int textureheight, uint32_t one, uint32_t texturefracx, uint32_t color, uint32_t srccolor) { using namespace DrawSprite32TModes; @@ -303,7 +303,7 @@ namespace swrenderer } } - unsigned int SampleShade(uint32_t frac, const uint32_t *source, const uint8_t *colormap) + FORCEINLINE unsigned int VECTORCALL SampleShade(uint32_t frac, const uint32_t *source, const uint8_t *colormap) { using namespace DrawSprite32TModes; @@ -320,7 +320,7 @@ namespace swrenderer } template - __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i lightcontrib) + FORCEINLINE __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i lightcontrib) { using namespace DrawSprite32TModes; @@ -359,7 +359,7 @@ namespace swrenderer } } - __m128i Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) + FORCEINLINE __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) { using namespace DrawSprite32TModes; diff --git a/src/swrenderer/drawers/r_draw_wall32_sse2.h b/src/swrenderer/drawers/r_draw_wall32_sse2.h index 03be70e75..7c8057f1c 100644 --- a/src/swrenderer/drawers/r_draw_wall32_sse2.h +++ b/src/swrenderer/drawers/r_draw_wall32_sse2.h @@ -78,7 +78,7 @@ namespace swrenderer } template - void Loop(DrawerThread *thread, ShadeConstants shade_constants) + FORCEINLINE void VECTORCALL Loop(DrawerThread *thread, ShadeConstants shade_constants) { using namespace DrawWall32TModes; @@ -205,7 +205,7 @@ namespace swrenderer } template - unsigned int Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx) + FORCEINLINE unsigned int VECTORCALL Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx) { using namespace DrawWall32TModes; @@ -241,7 +241,7 @@ namespace swrenderer } template - __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_z) + FORCEINLINE __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_z) { using namespace DrawWall32TModes; @@ -273,7 +273,7 @@ namespace swrenderer return AddLights(material, fgcolor, lights, num_lights, viewpos_z); } - __m128i AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_z) + FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_z) { using namespace DrawWall32TModes; @@ -320,7 +320,7 @@ namespace swrenderer return fgcolor; } - __m128i Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, uint32_t srcalpha, uint32_t destalpha) + FORCEINLINE __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, uint32_t srcalpha, uint32_t destalpha) { using namespace DrawWall32TModes;