From 4281dd6dcf43ec4ab5d6cece64847aeff7208e5e Mon Sep 17 00:00:00 2001 From: raynorpat Date: Tue, 25 May 2021 18:46:12 -0400 Subject: [PATCH 1/3] Some improvements to CMakeLists.txt: - Change tests for Clang and AppleClang to use MATCHES instead of STREQUAL - Add USE_FOLDERS global property, which helps keep the CMake default targets neat in IDEs - Sets the default startup project in MSVC to RBDoom3BFG instead of ALL_BUILD, helps for easier debugging when opening the solution for the first time - Sets the default debug directory path for MSVC, setting it to the root directory, which again helps for easier debugging --- neo/CMakeLists.txt | 76 ++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 50 deletions(-) diff --git a/neo/CMakeLists.txt b/neo/CMakeLists.txt index b9fce3b8..21ce9831 100644 --- a/neo/CMakeLists.txt +++ b/neo/CMakeLists.txt @@ -1,8 +1,14 @@ cmake_minimum_required(VERSION 3.2) -set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) project(RBDoom3BFG) +set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) +set_property(GLOBAL PROPERTY USE_FOLDERS ON) +if(CMAKE_MAJOR_VERSION EQUAL 3 AND CMAKE_MINOR_VERSION GREATER_EQUAL 6) + # set MSVC startup project + set_property (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT RBDoom3BFG) +endif() + option(FORCE_COLOR_OUTPUT "Always produce ANSI-colored output (GNU/Clang only)." OFF) @@ -76,7 +82,6 @@ if(FFMPEG AND BINKDEC) message(FATAL_ERROR "Only one of FFMPEG and BINKDEC (or neither) can be enabled at a time") endif() - if(UNIX) set(OPENAL ON) @@ -97,14 +102,11 @@ endif() if (FORCE_COLOR_OUTPUT) if (CMAKE_COMPILER_IS_GNUCC) add_compile_options(-fdiagnostics-color=always) - # SRS - Add test for AppleClang - elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang") + elseif(CMAKE_C_COMPILER_ID MATCHES "Clang") add_compile_options(-fcolor-diagnostics) endif () endif () - - if(MSVC) #message(STATUS CMAKE_ROOT: ${CMAKE_ROOT}) @@ -121,8 +123,8 @@ else() message(STATUS CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}) endif() -# SRS - Add test for AppleClang -if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang") +if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang") + add_definitions(-pipe) #add_definitions(-Wall) add_definitions(-Werror=format-security) @@ -164,8 +166,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_CO #endif() add_compile_options(-Wno-pragmas -Wno-unused-variable -Wno-switch -Wno-unused-value -Winvalid-pch -Wno-multichar) - # SRS - Add test for AppleClang - if(CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang") + if(CMAKE_C_COMPILER_ID MATCHES "Clang") # add clang-specific settings for warnings (the second one make sure clang doesn't complain # about unknown -W flags, like -Wno-unused-but-set-variable) # SRS - Add -Wno-deprecated-register and -Wno-expansion-to-defined to list of warning settings @@ -266,7 +267,6 @@ elseif(MSVC) set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /SAFESEH:NO") set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /SAFESEH:NO") # endif() - add_definitions(-DWIN32 -D_WINDOWS @@ -274,9 +274,8 @@ elseif(MSVC) -D_CRT_NONSTDC_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS -D_MBCS - #-DUSE_OPENAL -DUSE_EXCEPTIONS) - ## Check for Version ## + if( WINRT OR WINDOWS10 ) # Windows RT add_definitions(-DUSE_WINRT) endif() @@ -297,6 +296,7 @@ elseif(MSVC) # 2: Use MFC in a Shared DLL set(CMAKE_MFC_FLAG 2) endif() + endif() #if(STANDALONE) @@ -426,8 +426,6 @@ if(USE_VULKAN) add_definitions(-DVK_USE_PLATFORM_WIN32_KHR) endif() - - else() if(USE_SYSTEM_LIBGLEW) @@ -624,7 +622,6 @@ endif() file(GLOB MINIZIP_INCLUDES libs/zlib/minizip/*.h) file(GLOB MINIZIP_SOURCES libs/zlib/minizip/*.c libs/zlib/minizip/*.cpp) - set(FREETYPE_SOURCES libs/freetype/src/autofit/autofit.c @@ -669,9 +666,6 @@ set(SOUND_INCLUDES sound/WaveFile.h) set(SOUND_SOURCES - #sound/snd_cache.cpp - #sound/snd_decoder.cpp - #sound/snd_efxfile.cpp sound/snd_emitter.cpp sound/snd_shader.cpp sound/snd_system.cpp @@ -858,9 +852,6 @@ file(GLOB COMPILER_AAS_SOURCES tools/compilers/aas/*.cpp) file(GLOB COMPILER_DMAP_INCLUDES tools/compilers/dmap/*.h) file(GLOB COMPILER_DMAP_SOURCES tools/compilers/dmap/*.cpp) -file(GLOB COMPILER_RENDERBUMP_INCLUDES tools/compilers/renderbump/*.h) -file(GLOB COMPILER_RENDERBUMP_SOURCES tools/compilers/renderbump/*.cpp) - file(GLOB COMPILER_ROQVQ_INCLUDES tools/compilers/roqvq/*.h) file(GLOB COMPILER_ROQVQ_SOURCES tools/compilers/roqvq/*.cpp) @@ -1261,9 +1252,6 @@ source_group("tools\\compilers\\aas" FILES ${COMPILER_AAS_SOURCES}) source_group("tools\\compilers\\dmap" FILES ${COMPILER_DMAP_INCLUDES}) source_group("tools\\compilers\\dmap" FILES ${COMPILER_DMAP_SOURCES}) -source_group("tools\\compilers\\renderbump" FILES ${COMPILER_RENDERBUMP_INCLUDES}) -source_group("tools\\compilers\\renderbump" FILES ${COMPILER_RENDERBUMP_SOURCES}) - source_group("tools\\compilers\\roqvq" FILES ${COMPILER_ROQVQ_INCLUDES}) source_group("tools\\compilers\\roqvq" FILES ${COMPILER_ROQVQ_SOURCES}) @@ -1460,9 +1448,8 @@ if(MSVC) list(APPEND RBDOOM3_INCLUDES ${RENDERER_VULKAN_INCLUDES}) list(APPEND RBDOOM3_SOURCES ${RENDERER_VULKAN_SOURCES}) - set(Vulkan_LIBRARIES - ${Vulkan_LIBRARY} - ) + set(Vulkan_LIBRARIES + ${Vulkan_LIBRARY}) if(SPIRV_SHADERC) list(APPEND Vulkan_LIBRARIES shaderc_combined) @@ -1631,13 +1618,7 @@ if(MSVC) list(APPEND RBDOOM3_SOURCES ${WIN32_RESOURCES}) add_executable(RBDoom3BFG WIN32 ${RBDOOM3_INCLUDES} ${RBDOOM3_SOURCES}) - - #add_custom_command(TARGET RBDoom3BFG - # PRE_BUILD - # COMMAND nmake ARGS /f Makefile.vc6 CFG=release - # WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/libs/curl/lib - # COMMENT "Compiling libcURL") - + add_dependencies(RBDoom3BFG idlib) target_link_libraries(RBDoom3BFG idlib @@ -1658,6 +1639,11 @@ if(MSVC) ${CMAKE_DL_LIBS} ) + # set MSVC default debug directory on executable + if(CMAKE_MAJOR_VERSION EQUAL 3 AND CMAKE_MINOR_VERSION GREATER_EQUAL 8) + set_target_properties(RBDoom3BFG PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/..") + endif() + #CMAKE_BINARY_DIR if(CMAKE_CL_64) install(TARGETS RBDoom3BFG @@ -1665,8 +1651,8 @@ if(MSVC) else() install(TARGETS RBDoom3BFG RUNTIME DESTINATION .) - endif() - + endif() + else() list(APPEND RBDOOM3_SOURCES @@ -1705,14 +1691,6 @@ else() find_package(OpenAL REQUIRED) add_definitions(-DUSE_OPENAL) - #include_directories(${CMAKE_CURRENT_SOURCE_DIR}/libs/openal-soft/include) - - #if(CMAKE_CL_64) - # link_directories(${CMAKE_CURRENT_SOURCE_DIR}/libs/openal-soft/lib/win64) - #else() - # link_directories(${CMAKE_CURRENT_SOURCE_DIR}/libs/openal-soft/lib/win32) - #endif() - list(APPEND RBDOOM3_INCLUDES ${OPENAL_INCLUDES}) list(APPEND RBDOOM3_SOURCES ${OPENAL_SOURCES} @@ -1728,11 +1706,9 @@ else() list(APPEND RBDOOM3_INCLUDES ${RENDERER_VULKAN_INCLUDES}) list(APPEND RBDOOM3_SOURCES ${RENDERER_VULKAN_SOURCES}) - set(Vulkan_LIBRARIES - ${Vulkan_LIBRARY} - ${X11_XCB_LIBRARIES} - ) - + set(Vulkan_LIBRARIES + ${Vulkan_LIBRARY}) + # SRS - Added UNIX/OSX support for shaderc if(SPIRV_SHADERC) list(APPEND Vulkan_LIBRARIES shaderc_combined) From 49e46df731f4f85d59fa8e2a353a3f7cace0ee7a Mon Sep 17 00:00:00 2001 From: raynorpat Date: Tue, 25 May 2021 20:04:41 -0400 Subject: [PATCH 2/3] Fix two compiler warnings in idlib/math/Vector.h - floor to floorf --- neo/idlib/math/Vector.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/neo/idlib/math/Vector.h b/neo/idlib/math/Vector.h index 362b35c1..61e87b2a 100644 --- a/neo/idlib/math/Vector.h +++ b/neo/idlib/math/Vector.h @@ -250,8 +250,8 @@ ID_INLINE void idVec2::Clamp( const idVec2& min, const idVec2& max ) ID_INLINE void idVec2::Snap() { - x = floor( x + 0.5f ); - y = floor( y + 0.5f ); + x = floorf( x + 0.5f ); + y = floorf( y + 0.5f ); } ID_INLINE void idVec2::SnapInt() @@ -838,9 +838,9 @@ ID_INLINE void idVec3::Clamp( const idVec3& min, const idVec3& max ) ID_INLINE void idVec3::Snap() { - x = floor( x + 0.5f ); - y = floor( y + 0.5f ); - z = floor( z + 0.5f ); + x = floorf( x + 0.5f ); + y = floorf( y + 0.5f ); + z = floorf( z + 0.5f ); } ID_INLINE void idVec3::SnapInt() From b91080902b2725401984ecc52677dd9af80f5570 Mon Sep 17 00:00:00 2001 From: raynorpat Date: Tue, 25 May 2021 20:06:37 -0400 Subject: [PATCH 3/3] Convert USE_INTRINSICS to USE_INTRINSICS_SSE and fix CMakeLists.txt logic surrounding its check to focus on x86 and x86_64 CPU types --- neo/CMakeLists.txt | 28 +++++------ ...make-linux-nvidia-jetson-vulkan-release.sh | 2 +- neo/cmake-linux-release-nosimd.sh | 2 +- neo/idlib/geometry/DrawVert.h | 4 +- neo/idlib/geometry/DrawVert_intrinsics.h | 6 +-- neo/idlib/geometry/RenderMatrix.cpp | 46 +++++++++---------- neo/idlib/math/Lcp.cpp | 2 +- neo/idlib/math/MatX.cpp | 2 +- neo/idlib/math/MatX.h | 2 +- neo/idlib/math/Math.cpp | 2 +- neo/idlib/math/Math.h | 10 ++-- neo/idlib/math/Simd.cpp | 4 +- neo/idlib/math/Simd_SSE.cpp | 4 +- neo/idlib/math/Simd_SSE.h | 2 +- neo/idlib/math/VecX.h | 2 +- neo/idlib/sys/sys_intrinsics.h | 8 ++-- neo/renderer/BufferObject.cpp | 2 +- neo/renderer/DXT/DXTCodec.h | 10 ++-- neo/renderer/DXT/DXTEncoder_SSE2.cpp | 4 +- neo/renderer/GLMatrix.cpp | 2 +- neo/renderer/ModelDecal.cpp | 4 +- neo/renderer/ModelOverlay.cpp | 6 +-- neo/renderer/Model_md5.cpp | 6 +-- neo/renderer/jobs/ShadowShared.cpp | 2 +- .../DynamicShadowVolume.cpp | 16 +++---- neo/renderer/tr_trace.cpp | 4 +- 26 files changed, 89 insertions(+), 93 deletions(-) diff --git a/neo/CMakeLists.txt b/neo/CMakeLists.txt index 21ce9831..abb24759 100644 --- a/neo/CMakeLists.txt +++ b/neo/CMakeLists.txt @@ -72,10 +72,10 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") set(CPU_OPTIMIZATION "-mmmx -msse -msse2" CACHE STRING "Which CPU specific optimitations should be used beside the compiler's default?") endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "((powerpc|ppc)64le)|(mips64)") - option(USE_INTRINSICS "Compile using intrinsics (e.g mmx, sse, msse2)" OFF) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + option(USE_INTRINSICS_SSE "Compile using SSE intrinsics (e.g mmx, sse, msse2)" ON) else() - option(USE_INTRINSICS "Compile using intrinsics (e.g mmx, sse, msse2)" ON) + option(USE_INTRINSICS_SSE "Compile using SSE intrinsics (e.g mmx, sse, msse2)" OF) endif() if(FFMPEG AND BINKDEC) @@ -147,9 +147,6 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang") if (CPU_OPTIMIZATION) add_definitions(${CPU_OPTIMIZATION}) endif() - if (USE_INTRINSICS) - add_definitions(-DUSE_INTRINSICS) - endif() if(WIN32) # require msvcr70.dll or newer for _aligned_malloc etc # I think it is from Visual C++ .NET 2002, so it should be available on any remotely modern system. @@ -276,13 +273,9 @@ elseif(MSVC) -D_MBCS -DUSE_EXCEPTIONS) - if( WINRT OR WINDOWS10 ) # Windows RT - add_definitions(-DUSE_WINRT) - endif() - - if (USE_INTRINSICS) - add_definitions(-DUSE_INTRINSICS) - endif() + if( WINRT OR WINDOWS10 ) # Windows RT + add_definitions(-DUSE_WINRT) + endif() if(NOT CMAKE_CL_64) add_definitions(-D_USE_32BIT_TIME_T) @@ -299,6 +292,10 @@ elseif(MSVC) endif() +if (USE_INTRINSICS_SSE) + add_definitions(-DUSE_INTRINSICS_SSE) +endif() + #if(STANDALONE) # add_definitions(-DSTANDALONE) #endif() @@ -316,7 +313,7 @@ else (ZLIB_FOUND) endif (ZLIB_FOUND) if(USE_SYSTEM_LIBPNG) -find_package(PNG REQUIRED) + find_package(PNG REQUIRED) endif (USE_SYSTEM_LIBPNG) if (PNG_FOUND) @@ -327,9 +324,8 @@ else (PNG_FOUND) set(PNG_LIBRARY "" ) endif (PNG_FOUND) - if(USE_SYSTEM_LIBJPEG) -find_package(JPEG REQUIRED) + find_package(JPEG REQUIRED) endif(USE_SYSTEM_LIBJPEG) if (JPEG_FOUND) diff --git a/neo/cmake-linux-nvidia-jetson-vulkan-release.sh b/neo/cmake-linux-nvidia-jetson-vulkan-release.sh index 4f022002..78e1953b 100755 --- a/neo/cmake-linux-nvidia-jetson-vulkan-release.sh +++ b/neo/cmake-linux-nvidia-jetson-vulkan-release.sh @@ -2,4 +2,4 @@ cd .. rm -rf build mkdir build cd build -cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DSDL2=ON -DONATIVE=ON -DUSE_INTRINSICS=OFF -DUSE_VULKAN=ON -DSPIRV_SHADERC=OFF ../neo +cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DSDL2=ON -DONATIVE=ON -DUSE_VULKAN=ON -DSPIRV_SHADERC=OFF ../neo diff --git a/neo/cmake-linux-release-nosimd.sh b/neo/cmake-linux-release-nosimd.sh index 3be4e365..19ec0430 100755 --- a/neo/cmake-linux-release-nosimd.sh +++ b/neo/cmake-linux-release-nosimd.sh @@ -2,4 +2,4 @@ cd .. rm -rf build mkdir build cd build -cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DONATIVE=ON -DSDL2=ON -DUSE_INTRINSICS=OFF ../neo \ No newline at end of file +cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DONATIVE=ON -DSDL2=ON -DUSE_INTRINSICS_SSE=OFF ../neo \ No newline at end of file diff --git a/neo/idlib/geometry/DrawVert.h b/neo/idlib/geometry/DrawVert.h index 5d51d9a6..73434f46 100644 --- a/neo/idlib/geometry/DrawVert.h +++ b/neo/idlib/geometry/DrawVert.h @@ -219,7 +219,7 @@ ID_INLINE void VertexFloatToByte( const float& x, const float& y, const float& z { assert_4_byte_aligned( bval ); // for __stvebx -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 vector_float_one = { 1.0f, 1.0f, 1.0f, 1.0f }; const __m128 vector_float_half = { 0.5f, 0.5f, 0.5f, 0.5f }; @@ -700,7 +700,7 @@ ID_INLINE void WriteDrawVerts16( idDrawVert* destVerts, const idDrawVert* localV assert_16_byte_aligned( destVerts ); assert_16_byte_aligned( localVerts ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) for( int i = 0; i < numVerts; i++ ) { diff --git a/neo/idlib/geometry/DrawVert_intrinsics.h b/neo/idlib/geometry/DrawVert_intrinsics.h index 2af33a05..6bef8feb 100644 --- a/neo/idlib/geometry/DrawVert_intrinsics.h +++ b/neo/idlib/geometry/DrawVert_intrinsics.h @@ -30,7 +30,7 @@ If you have questions concerning this license or the applicable additional terms #define __DRAWVERT_INTRINSICS_H__ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) static const __m128i vector_int_f32_sign_mask = _mm_set1_epi32( 1U << IEEE_FLT_SIGN_BIT ); static const __m128i vector_int_f32_exponent_mask = _mm_set1_epi32( ( ( 1U << IEEE_FLT_EXPONENT_BITS ) - 1 ) << IEEE_FLT_MANTISSA_BITS ); static const __m128i vector_int_f32_mantissa_mask = _mm_set1_epi32( ( 1U << IEEE_FLT_MANTISSA_BITS ) - 1 ); @@ -58,7 +58,7 @@ static const __m128 vector_float_1_over_4 = { 1.0f / 4.0f, 1.0f / 4.0f, 1.0 FastF32toF16 ==================== */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) ID_INLINE_EXTERN __m128i FastF32toF16( __m128i f32_bits ) { __m128i f16_sign = _mm_srli_epi32( _mm_and_si128( f32_bits, vector_int_f32_sign_mask ), f32_to_f16_sign_shift ); @@ -120,7 +120,7 @@ ID_INLINE_EXTERN halfFloat_t Scalar_FastF32toF16( float f32 ) LoadSkinnedDrawVertPosition ==================== */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) ID_INLINE_EXTERN __m128 LoadSkinnedDrawVertPosition( const idDrawVert& base, const idJointMat* joints ) { const idJointMat& j0 = joints[base.color[0]]; diff --git a/neo/idlib/geometry/RenderMatrix.cpp b/neo/idlib/geometry/RenderMatrix.cpp index f7dfd5e4..ca936160 100644 --- a/neo/idlib/geometry/RenderMatrix.cpp +++ b/neo/idlib/geometry/RenderMatrix.cpp @@ -109,7 +109,7 @@ SIMD constants ================================================================================================ */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) static const __m128i vector_int_1 = _mm_set1_epi32( 1 ); static const __m128i vector_int_4 = _mm_set1_epi32( 4 ); static const __m128i vector_int_0123 = _mm_set_epi32( 3, 2, 1, 0 ); @@ -560,7 +560,7 @@ front bits: bit 5 = pos-Z is front facing ======================== */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) static int GetBoxFrontBits_SSE2( const __m128& b0, const __m128& b1, const __m128& viewOrigin ) { const __m128 dir0 = _mm_sub_ps( viewOrigin, b0 ); @@ -776,7 +776,7 @@ void idRenderMatrix::OffsetScaleForBounds( const idRenderMatrix& src, const idBo { assert( &src != &out ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 b0 = _mm_loadu_bounds_0( bounds ); __m128 b1 = _mm_loadu_bounds_1( bounds ); @@ -862,7 +862,7 @@ void idRenderMatrix::InverseOffsetScaleForBounds( const idRenderMatrix& src, con { assert( &src != &out ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 b0 = _mm_loadu_bounds_0( bounds ); __m128 b1 = _mm_loadu_bounds_1( bounds ); @@ -930,7 +930,7 @@ void idRenderMatrix::Transpose( const idRenderMatrix& src, idRenderMatrix& out ) { assert( &src != &out ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 a0 = _mm_loadu_ps( src.m + 0 * 4 ); const __m128 a1 = _mm_loadu_ps( src.m + 1 * 4 ); const __m128 a2 = _mm_loadu_ps( src.m + 2 * 4 ); @@ -978,7 +978,7 @@ idRenderMatrix::Multiply */ void idRenderMatrix::Multiply( const idRenderMatrix& a, const idRenderMatrix& b, idRenderMatrix& out ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 a0 = _mm_loadu_ps( a.m + 0 * 4 ); __m128 a1 = _mm_loadu_ps( a.m + 1 * 4 ); __m128 a2 = _mm_loadu_ps( a.m + 2 * 4 ); @@ -1068,7 +1068,7 @@ can get really, really small. */ bool idRenderMatrix::Inverse( const idRenderMatrix& src, idRenderMatrix& out ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 r0 = _mm_loadu_ps( src.m + 0 * 4 ); const __m128 r1 = _mm_loadu_ps( src.m + 1 * 4 ); @@ -1382,7 +1382,7 @@ bool idRenderMatrix::InverseByDoubles( const idRenderMatrix& src, idRenderMatrix DeterminantIsNegative ======================== */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) void DeterminantIsNegative( bool& negativeDeterminant, const __m128& r0, const __m128& r1, const __m128& r2, const __m128& r3 ) { const __m128 r1u1 = _mm_perm_ps( r1, _MM_SHUFFLE( 2, 1, 0, 3 ) ); @@ -1464,7 +1464,7 @@ void idRenderMatrix::CopyMatrix( const idRenderMatrix& matrix, idVec4& row0, idV assert_16_byte_aligned( row2.ToFloatPtr() ); assert_16_byte_aligned( row3.ToFloatPtr() ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 r0 = _mm_loadu_ps( matrix.m + 0 * 4 ); const __m128 r1 = _mm_loadu_ps( matrix.m + 1 * 4 ); const __m128 r2 = _mm_loadu_ps( matrix.m + 2 * 4 ); @@ -1506,7 +1506,7 @@ void idRenderMatrix::SetMVP( const idRenderMatrix& mvp, idVec4& row0, idVec4& ro assert_16_byte_aligned( row2.ToFloatPtr() ); assert_16_byte_aligned( row3.ToFloatPtr() ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 r0 = _mm_loadu_ps( mvp.m + 0 * 4 ); const __m128 r1 = _mm_loadu_ps( mvp.m + 1 * 4 ); const __m128 r2 = _mm_loadu_ps( mvp.m + 2 * 4 ); @@ -1553,7 +1553,7 @@ void idRenderMatrix::SetMVPForBounds( const idRenderMatrix& mvp, const idBounds& assert_16_byte_aligned( row2.ToFloatPtr() ); assert_16_byte_aligned( row3.ToFloatPtr() ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 b0 = _mm_loadu_bounds_0( bounds ); __m128 b1 = _mm_loadu_bounds_1( bounds ); @@ -1644,7 +1644,7 @@ void idRenderMatrix::SetMVPForInverseProject( const idRenderMatrix& mvp, const i assert_16_byte_aligned( row2.ToFloatPtr() ); assert_16_byte_aligned( row3.ToFloatPtr() ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 r0 = _mm_loadu_ps( mvp.m + 0 * 4 ); __m128 r1 = _mm_loadu_ps( mvp.m + 1 * 4 ); @@ -1788,7 +1788,7 @@ frustum plane, but only while also being behind another one. */ bool idRenderMatrix::CullBoundsToMVPbits( const idRenderMatrix& mvp, const idBounds& bounds, byte* outBits, bool zeroToOne ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 mvp0 = _mm_loadu_ps( mvp[0] ); __m128 mvp1 = _mm_loadu_ps( mvp[1] ); __m128 mvp2 = _mm_loadu_ps( mvp[2] ); @@ -1974,7 +1974,7 @@ bool idRenderMatrix::CullExtrudedBoundsToMVPbits( const idRenderMatrix& mvp, con { assert( idMath::Fabs( extrudeDirection * clipPlane.Normal() ) >= idMath::FLT_SMALLEST_NON_DENORMAL ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 mvp0 = _mm_loadu_ps( mvp[0] ); __m128 mvp1 = _mm_loadu_ps( mvp[1] ); @@ -2292,7 +2292,7 @@ is W=0 clipped. */ void idRenderMatrix::ProjectedBounds( idBounds& projected, const idRenderMatrix& mvp, const idBounds& bounds, bool windowSpace ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 mvp0 = _mm_loadu_ps( mvp[0] ); __m128 mvp1 = _mm_loadu_ps( mvp[1] ); @@ -2540,7 +2540,7 @@ void idRenderMatrix::ProjectedNearClippedBounds( idBounds& projected, const idRe - X + */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 mvp0 = _mm_loadu_ps( mvp[0] ); const __m128 mvp1 = _mm_loadu_ps( mvp[1] ); @@ -3179,7 +3179,7 @@ ClipHomogeneousPolygonToSide Clips a polygon with homogeneous coordinates to the axis aligned plane[axis] = sign * offset. ======================== */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) static void ClipHomogeneousPolygonToSide_SSE2( idVec4* __restrict newPoints, idVec4* __restrict points, int& numPoints, const int axis, const __m128& sign, const __m128& offset ) { @@ -3436,7 +3436,7 @@ the given bounds in which case the projected bounds should be set to fully cover */ void idRenderMatrix::ProjectedFullyClippedBounds( idBounds& projected, const idRenderMatrix& mvp, const idBounds& bounds, bool windowSpace ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 mvp0 = _mm_loadu_ps( mvp[0] ); const __m128 mvp1 = _mm_loadu_ps( mvp[1] ); @@ -3701,7 +3701,7 @@ The given bounding box is not clipped to the MVP so the depth bounds may not be */ void idRenderMatrix::DepthBoundsForBounds( float& min, float& max, const idRenderMatrix& mvp, const idBounds& bounds, bool windowSpace ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 mvp2 = _mm_loadu_ps( mvp[2] ); __m128 mvp3 = _mm_loadu_ps( mvp[3] ); @@ -3833,7 +3833,7 @@ void idRenderMatrix::DepthBoundsForExtrudedBounds( float& min, float& max, const { assert( idMath::Fabs( extrudeDirection * clipPlane.Normal() ) >= idMath::FLT_SMALLEST_NON_DENORMAL ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 mvp2 = _mm_loadu_ps( mvp[2] ); __m128 mvp3 = _mm_loadu_ps( mvp[3] ); @@ -4114,7 +4114,7 @@ testing if the center of the far clipping plane is contained inside the shadow v */ void idRenderMatrix::DepthBoundsForShadowBounds( float& min, float& max, const idRenderMatrix& mvp, const idBounds& bounds, const idVec3& localLightOrigin, bool windowSpace ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 mvp0 = _mm_loadu_ps( mvp[0] ); const __m128 mvp1 = _mm_loadu_ps( mvp[1] ); @@ -4505,7 +4505,7 @@ void idRenderMatrix::GetFrustumCorners( frustumCorners_t& corners, const idRende { assert_16_byte_aligned( &corners ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 mvp0 = _mm_loadu_ps( frustumTransform[0] ); __m128 mvp1 = _mm_loadu_ps( frustumTransform[1] ); @@ -4619,7 +4619,7 @@ frustumCull_t idRenderMatrix::CullFrustumCornersToPlane( const frustumCorners_t& { assert_16_byte_aligned( &corners ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 vp = _mm_loadu_ps( plane.ToFloatPtr() ); diff --git a/neo/idlib/math/Lcp.cpp b/neo/idlib/math/Lcp.cpp index 28b69392..c9d9d4f6 100644 --- a/neo/idlib/math/Lcp.cpp +++ b/neo/idlib/math/Lcp.cpp @@ -44,7 +44,7 @@ const float LCP_DELTA_FORCE_EPSILON = 1e-9f; #define IGNORE_UNSATISFIABLE_VARIABLES -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) #define LCP_SIMD #endif diff --git a/neo/idlib/math/MatX.cpp b/neo/idlib/math/MatX.cpp index fb7a2ba7..e40197c8 100644 --- a/neo/idlib/math/MatX.cpp +++ b/neo/idlib/math/MatX.cpp @@ -201,7 +201,7 @@ void idMatX::CopyLowerToUpperTriangle() assert( ( GetNumColumns() & 3 ) == 0 ); assert( GetNumColumns() >= GetNumRows() ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const int n = GetNumColumns(); const int m = GetNumRows(); diff --git a/neo/idlib/math/MatX.h b/neo/idlib/math/MatX.h index 3d221b93..5f25bafc 100644 --- a/neo/idlib/math/MatX.h +++ b/neo/idlib/math/MatX.h @@ -47,7 +47,7 @@ NOTE: due to the temporary memory pool idMatX cannot be used by multiple threads #define MATX_ALLOCA( n ) ( (float *) _alloca16( MATX_QUAD( n ) ) ) #define MATX_ALLOCA_CACHE_LINES( n ) ( (float *) _alloca128( ( ( n ) * sizeof( float ) + CACHE_LINE_SIZE - 1 ) & ~ ( CACHE_LINE_SIZE - 1 ) ) ) -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) #define MATX_SIMD #endif diff --git a/neo/idlib/math/Math.cpp b/neo/idlib/math/Math.cpp index 4e926051..df74654e 100644 --- a/neo/idlib/math/Math.cpp +++ b/neo/idlib/math/Math.cpp @@ -51,7 +51,7 @@ const float idMath::INFINITY = 1e30f; const float idMath::FLT_EPSILON = 1.192092896e-07f; const float idMath::FLT_SMALLEST_NON_DENORMAL = * reinterpret_cast< const float* >( & SMALLEST_NON_DENORMAL ); // 1.1754944e-038f -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 idMath::SIMD_SP_zero = { 0.0f, 0.0f, 0.0f, 0.0f }; const __m128 idMath::SIMD_SP_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; const __m128 idMath::SIMD_SP_min_char = { -128.0f, -128.0f, -128.0f, -128.0f }; diff --git a/neo/idlib/math/Math.h b/neo/idlib/math/Math.h index a4ffddce..93f7cbe0 100644 --- a/neo/idlib/math/Math.h +++ b/neo/idlib/math/Math.h @@ -462,7 +462,7 @@ public: static const float FLT_EPSILON; // smallest positive number such that 1.0+FLT_EPSILON != 1.0 static const float FLT_SMALLEST_NON_DENORMAL; // smallest non-denormal 32-bit floating point value -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) static const __m128 SIMD_SP_zero; static const __m128 SIMD_SP_255; static const __m128 SIMD_SP_min_char; @@ -1316,7 +1316,7 @@ ID_INLINE int idMath::Ftoi( float f ) // If a converted result is larger than the maximum signed doubleword integer, // the floating-point invalid exception is raised, and if this exception is masked, // the indefinite integer value (80000000H) is returned. -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 x = _mm_load_ss( &f ); return _mm_cvttss_si32( x ); #elif 0 // round chop (C/C++ standard) @@ -1340,7 +1340,7 @@ idMath::Ftoi8 */ ID_INLINE char idMath::Ftoi8( float f ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 x = _mm_load_ss( &f ); x = _mm_max_ss( x, SIMD_SP_min_char ); x = _mm_min_ss( x, SIMD_SP_max_char ); @@ -1367,7 +1367,7 @@ idMath::Ftoi16 */ ID_INLINE short idMath::Ftoi16( float f ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 x = _mm_load_ss( &f ); x = _mm_max_ss( x, SIMD_SP_min_short ); x = _mm_min_ss( x, SIMD_SP_max_short ); @@ -1419,7 +1419,7 @@ ID_INLINE byte idMath::Ftob( float f ) // If a converted result is negative the value (0) is returned and if the // converted result is larger than the maximum byte the value (255) is returned. -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 x = _mm_load_ss( &f ); x = _mm_max_ss( x, SIMD_SP_zero ); x = _mm_min_ss( x, SIMD_SP_255 ); diff --git a/neo/idlib/math/Simd.cpp b/neo/idlib/math/Simd.cpp index a2b39401..38a9683e 100644 --- a/neo/idlib/math/Simd.cpp +++ b/neo/idlib/math/Simd.cpp @@ -77,7 +77,7 @@ void idSIMD::InitProcessor( const char* module, bool forceGeneric ) if( processor == NULL ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) if( ( cpuid & CPUID_MMX ) && ( cpuid & CPUID_SSE ) ) { processor = new( TAG_MATH ) idSIMD_SSE; @@ -1389,7 +1389,7 @@ void idSIMD::Test_f( const idCmdArgs& args ) argString.Replace( " ", "" ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) if( idStr::Icmp( argString, "SSE" ) == 0 ) { if( !( cpuid & CPUID_MMX ) || !( cpuid & CPUID_SSE ) ) diff --git a/neo/idlib/math/Simd_SSE.cpp b/neo/idlib/math/Simd_SSE.cpp index c88648c0..5c9c6a5e 100644 --- a/neo/idlib/math/Simd_SSE.cpp +++ b/neo/idlib/math/Simd_SSE.cpp @@ -38,7 +38,7 @@ If you have questions concerning this license or the applicable additional terms // E //=============================================================== -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) #include @@ -974,5 +974,5 @@ void VPCALL idSIMD_SSE::UntransformJoints( idJointMat* jointMats, const int* par } } -#endif // #if defined(USE_INTRINSICS) +#endif // #if defined(USE_INTRINSICS_SSE) diff --git a/neo/idlib/math/Simd_SSE.h b/neo/idlib/math/Simd_SSE.h index bc1c5a4b..8a7ba566 100644 --- a/neo/idlib/math/Simd_SSE.h +++ b/neo/idlib/math/Simd_SSE.h @@ -38,7 +38,7 @@ If you have questions concerning this license or the applicable additional terms =============================================================================== */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) class idSIMD_SSE : public idSIMD_Generic { diff --git a/neo/idlib/math/VecX.h b/neo/idlib/math/VecX.h index 83438837..238c86f5 100644 --- a/neo/idlib/math/VecX.h +++ b/neo/idlib/math/VecX.h @@ -46,7 +46,7 @@ NOTE: due to the temporary memory pool idVecX cannot be used by multiple threads #define VECX_CLEAREND() int s = size; while( s < ( ( s + 3) & ~3 ) ) { p[s++] = 0.0f; } #define VECX_ALLOCA( n ) ( (float *) _alloca16( VECX_QUAD( n ) ) ) -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) #define VECX_SIMD #endif diff --git a/neo/idlib/sys/sys_intrinsics.h b/neo/idlib/sys/sys_intrinsics.h index e721e7a5..e11bf56f 100644 --- a/neo/idlib/sys/sys_intrinsics.h +++ b/neo/idlib/sys/sys_intrinsics.h @@ -29,7 +29,7 @@ If you have questions concerning this license or the applicable additional terms #ifndef __SYS_INTRIINSICS_H__ #define __SYS_INTRIINSICS_H__ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) #include #endif /* @@ -93,7 +93,7 @@ ID_INLINE_EXTERN float __frndz( float x ) ================================================================================================ */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) // The code below assumes that a cache line is 64 bytes. // We specify the cache line size as 128 here to make the code consistent with the consoles. #define CACHE_LINE_SIZE 128 @@ -191,7 +191,7 @@ ID_INLINE_EXTERN int CACHE_LINE_CLEAR_OVERFLOW_COUNT( int size ) ================================================================================================ */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) /* ================================================ @@ -301,6 +301,6 @@ ID_FORCE_INLINE_EXTERN __m128 _mm_div16_ps( __m128 x, __m128 y ) // load idBounds::GetMaxs() #define _mm_loadu_bounds_1( bounds ) _mm_perm_ps( _mm_loadh_pi( _mm_load_ss( & bounds[1].x ), (__m64 *) & bounds[1].y ), _MM_SHUFFLE( 1, 3, 2, 0 ) ) -#endif // #if defined(USE_INTRINSICS) +#endif // #if defined(USE_INTRINSICS_SSE) #endif // !__SYS_INTRIINSICS_H__ diff --git a/neo/renderer/BufferObject.cpp b/neo/renderer/BufferObject.cpp index 02a9afd5..62bc8c63 100644 --- a/neo/renderer/BufferObject.cpp +++ b/neo/renderer/BufferObject.cpp @@ -54,7 +54,7 @@ bool IsWriteCombined( void* base ) } #endif -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) void CopyBuffer( byte* dst, const byte* src, int numBytes ) { diff --git a/neo/renderer/DXT/DXTCodec.h b/neo/renderer/DXT/DXTCodec.h index a33e4a63..9884db6b 100644 --- a/neo/renderer/DXT/DXTCodec.h +++ b/neo/renderer/DXT/DXTCodec.h @@ -319,7 +319,7 @@ idDxtEncoder::CompressImageDXT1Fast */ ID_INLINE void idDxtEncoder::CompressImageDXT1Fast( const byte* inBuf, byte* outBuf, int width, int height ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) CompressImageDXT1Fast_SSE2( inBuf, outBuf, width, height ); #else CompressImageDXT1Fast_Generic( inBuf, outBuf, width, height ); @@ -333,7 +333,7 @@ idDxtEncoder::CompressImageDXT1AlphaFast */ ID_INLINE void idDxtEncoder::CompressImageDXT1AlphaFast( const byte* inBuf, byte* outBuf, int width, int height ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) CompressImageDXT1AlphaFast_SSE2( inBuf, outBuf, width, height ); #else CompressImageDXT1AlphaFast_Generic( inBuf, outBuf, width, height ); @@ -347,7 +347,7 @@ idDxtEncoder::CompressImageDXT5Fast */ ID_INLINE void idDxtEncoder::CompressImageDXT5Fast( const byte* inBuf, byte* outBuf, int width, int height ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) CompressImageDXT5Fast_SSE2( inBuf, outBuf, width, height ); #else CompressImageDXT5Fast_Generic( inBuf, outBuf, width, height ); @@ -371,7 +371,7 @@ idDxtEncoder::CompressYCoCgDXT5Fast */ ID_INLINE void idDxtEncoder::CompressYCoCgDXT5Fast( const byte* inBuf, byte* outBuf, int width, int height ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) CompressYCoCgDXT5Fast_SSE2( inBuf, outBuf, width, height ); #else CompressYCoCgDXT5Fast_Generic( inBuf, outBuf, width, height ); @@ -395,7 +395,7 @@ idDxtEncoder::CompressNormalMapDXT5Fast */ ID_INLINE void idDxtEncoder::CompressNormalMapDXT5Fast( const byte* inBuf, byte* outBuf, int width, int height ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) CompressNormalMapDXT5Fast_SSE2( inBuf, outBuf, width, height ); #else CompressNormalMapDXT5Fast_Generic( inBuf, outBuf, width, height ); diff --git a/neo/renderer/DXT/DXTEncoder_SSE2.cpp b/neo/renderer/DXT/DXTEncoder_SSE2.cpp index 06390262..00f7ce47 100644 --- a/neo/renderer/DXT/DXTEncoder_SSE2.cpp +++ b/neo/renderer/DXT/DXTEncoder_SSE2.cpp @@ -33,7 +33,7 @@ If you have questions concerning this license or the applicable additional terms #include "DXTCodec_local.h" #include "DXTCodec.h" -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) //#define TEST_COMPRESSION #ifdef TEST_COMPRESSION @@ -1634,4 +1634,4 @@ void idDxtEncoder::CompressNormalMapDXT5Fast_SSE2( const byte* inBuf, byte* outB #endif } -#endif // #if defined(USE_INTRINSICS) +#endif // #if defined(USE_INTRINSICS_SSE) diff --git a/neo/renderer/GLMatrix.cpp b/neo/renderer/GLMatrix.cpp index b91edd4c..10308baa 100644 --- a/neo/renderer/GLMatrix.cpp +++ b/neo/renderer/GLMatrix.cpp @@ -75,7 +75,7 @@ R_MatrixMultiply */ void R_MatrixMultiply( const float a[16], const float b[16], float out[16] ) { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 a0 = _mm_loadu_ps( a + 0 * 4 ); __m128 a1 = _mm_loadu_ps( a + 1 * 4 ); __m128 a2 = _mm_loadu_ps( a + 2 * 4 ); diff --git a/neo/renderer/ModelDecal.cpp b/neo/renderer/ModelDecal.cpp index 2abce0b9..7325ffe9 100644 --- a/neo/renderer/ModelDecal.cpp +++ b/neo/renderer/ModelDecal.cpp @@ -312,7 +312,7 @@ static void R_DecalPointCullStatic( byte* cullBits, const idPlane* planes, const assert_16_byte_aligned( cullBits ); assert_16_byte_aligned( verts ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts ); const __m128 vector_float_zero = _mm_setzero_ps(); @@ -680,7 +680,7 @@ static void R_CopyDecalSurface( idDrawVert* verts, int numVerts, triIndex_t* ind assert( ( ( decal->numIndexes * sizeof( triIndex_t ) ) & 15 ) == 0 ); assert_16_byte_aligned( fadeColor ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128i vector_int_num_verts = _mm_shuffle_epi32( _mm_cvtsi32_si128( numVerts ), 0 ); const __m128i vector_short_num_verts = _mm_packs_epi32( vector_int_num_verts, vector_int_num_verts ); diff --git a/neo/renderer/ModelOverlay.cpp b/neo/renderer/ModelOverlay.cpp index 2e5da4bc..a4759a26 100644 --- a/neo/renderer/ModelOverlay.cpp +++ b/neo/renderer/ModelOverlay.cpp @@ -117,7 +117,7 @@ static void R_OverlayPointCullStatic( byte* cullBits, halfFloat_t* texCoordS, ha assert_16_byte_aligned( texCoordT ); assert_16_byte_aligned( verts ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts ); const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f }; @@ -242,7 +242,7 @@ static void R_OverlayPointCullSkinned( byte* cullBits, halfFloat_t* texCoordS, h assert_16_byte_aligned( texCoordT ); assert_16_byte_aligned( verts ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts ); const __m128 vector_float_zero = { 0.0f, 0.0f, 0.0f, 0.0f }; @@ -567,7 +567,7 @@ static void R_CopyOverlaySurface( idDrawVert* verts, int numVerts, triIndex_t* i assert( ( ( overlay->numVerts * sizeof( idDrawVert ) ) & 15 ) == 0 ); assert( ( ( overlay->numIndexes * sizeof( triIndex_t ) ) & 15 ) == 0 ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128i vector_int_clear_last = _mm_set_epi32( 0, -1, -1, -1 ); const __m128i vector_int_num_verts = _mm_shuffle_epi32( _mm_cvtsi32_si128( numVerts ), 0 ); diff --git a/neo/renderer/Model_md5.cpp b/neo/renderer/Model_md5.cpp index 3cfa2c5a..cab3f3bf 100644 --- a/neo/renderer/Model_md5.cpp +++ b/neo/renderer/Model_md5.cpp @@ -34,7 +34,7 @@ If you have questions concerning this license or the applicable additional terms #include "RenderCommon.h" #include "Model_local.h" -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) static const __m128 vector_float_posInfinity = { idMath::INFINITY, idMath::INFINITY, idMath::INFINITY, idMath::INFINITY }; static const __m128 vector_float_negInfinity = { -idMath::INFINITY, -idMath::INFINITY, -idMath::INFINITY, -idMath::INFINITY }; #endif @@ -565,7 +565,7 @@ idMD5Mesh::CalculateBounds */ void idMD5Mesh::CalculateBounds( const idJointMat* entJoints, idBounds& bounds ) const { -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) __m128 minX = vector_float_posInfinity; __m128 minY = vector_float_posInfinity; @@ -1236,7 +1236,7 @@ static void TransformJoints( idJointMat* __restrict outJoints, const int numJoin assert_16_byte_aligned( inFloats1 ); assert_16_byte_aligned( inFloats2 ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) const __m128 mask_keep_last = __m128c( _mm_set_epi32( 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 ) ); diff --git a/neo/renderer/jobs/ShadowShared.cpp b/neo/renderer/jobs/ShadowShared.cpp index 0ac5c897..e8f9ff38 100644 --- a/neo/renderer/jobs/ShadowShared.cpp +++ b/neo/renderer/jobs/ShadowShared.cpp @@ -92,7 +92,7 @@ static void R_ShadowVolumeCullBits( byte* cullBits, byte& totalOr, const float r assert_16_byte_aligned( cullBits ); assert_16_byte_aligned( verts ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) idODSStreamedArray< idShadowVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts ); const __m128 vector_float_radius = _mm_splat_ps( _mm_load_ss( &radius ), 0 ); diff --git a/neo/renderer/jobs/dynamicshadowvolume/DynamicShadowVolume.cpp b/neo/renderer/jobs/dynamicshadowvolume/DynamicShadowVolume.cpp index afd7887c..326a73f9 100644 --- a/neo/renderer/jobs/dynamicshadowvolume/DynamicShadowVolume.cpp +++ b/neo/renderer/jobs/dynamicshadowvolume/DynamicShadowVolume.cpp @@ -31,7 +31,7 @@ If you have questions concerning this license or the applicable additional terms #include "../../../idlib/sys/sys_intrinsics.h" #include "../../../idlib/geometry/DrawVert_intrinsics.h" -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) static const __m128i vector_int_neg_one = _mm_set_epi32( -1, -1, -1, -1 ); #endif @@ -40,7 +40,7 @@ If you have questions concerning this license or the applicable additional terms TriangleFacing_SSE2 ===================== */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) static ID_FORCE_INLINE __m128i TriangleFacing_SSE2( const __m128& vert0X, const __m128& vert0Y, const __m128& vert0Z, const __m128& vert1X, const __m128& vert1Y, const __m128& vert1Z, const __m128& vert2X, const __m128& vert2Y, const __m128& vert2Z, @@ -71,7 +71,7 @@ TriangleCulled The clip space of the 'lightProject' is assumed to be in the range [0, 1]. ===================== */ -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) static ID_FORCE_INLINE __m128i TriangleCulled_SSE2( const __m128& vert0X, const __m128& vert0Y, const __m128& vert0Z, const __m128& vert1X, const __m128& vert1Y, const __m128& vert1Z, const __m128& vert2X, const __m128& vert2Y, const __m128& vert2Z, @@ -249,7 +249,7 @@ static int CalculateTriangleFacingCulledStatic( byte* __restrict facing, byte* _ const idVec3 lineDir = lineDelta * lineLengthRcp; const float lineLength = lineLengthSqr * lineLengthRcp; -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) idODSStreamedIndexedArray< idDrawVert, triIndex_t, 32, SBT_QUAD, 4 * 3 > indexedVertsODS( verts, numVerts, indexes, numIndexes ); @@ -449,7 +449,7 @@ static int CalculateTriangleFacingCulledSkinned( byte* __restrict facing, byte* const idVec3 lineDir = lineDelta * lineLengthRcp; const float lineLength = lineLengthSqr * lineLengthRcp; -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) idODSStreamedArray< idDrawVert, 32, SBT_DOUBLE, 1 > vertsODS( verts, numVerts ); @@ -684,7 +684,7 @@ static void StreamOut( void* dst, const void* src, int numBytes ) assert_16_byte_aligned( dst ); assert_16_byte_aligned( src ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) int i = 0; for( ; i + 128 <= numBytes; i += 128 ) { @@ -933,7 +933,7 @@ static void R_CreateShadowVolumeTriangles( triIndex_t* __restrict shadowIndices, numShadowIndexesTotal = numShadowIndices; -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) _mm_sfence(); #endif @@ -1122,7 +1122,7 @@ void R_CreateLightTriangles( triIndex_t* __restrict lightIndices, triIndex_t* __ numLightIndicesTotal = numLightIndices; -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) _mm_sfence(); #endif diff --git a/neo/renderer/tr_trace.cpp b/neo/renderer/tr_trace.cpp index 7f6a826c..766aff8c 100644 --- a/neo/renderer/tr_trace.cpp +++ b/neo/renderer/tr_trace.cpp @@ -45,7 +45,7 @@ static void R_TracePointCullStatic( byte* cullBits, byte& totalOr, const float r assert_16_byte_aligned( cullBits ); assert_16_byte_aligned( verts ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts ); const __m128 vector_float_radius = _mm_splat_ps( _mm_load_ss( &radius ), 0 ); @@ -230,7 +230,7 @@ static void R_TracePointCullSkinned( byte* cullBits, byte& totalOr, const float assert_16_byte_aligned( cullBits ); assert_16_byte_aligned( verts ); -#if defined(USE_INTRINSICS) +#if defined(USE_INTRINSICS_SSE) idODSStreamedArray< idDrawVert, 16, SBT_DOUBLE, 4 > vertsODS( verts, numVerts ); const __m128 vector_float_radius = _mm_splat_ps( _mm_load_ss( &radius ), 0 );