diff --git a/neo/CMakeLists.txt b/neo/CMakeLists.txt index 8bf5d306..37690a43 100644 --- a/neo/CMakeLists.txt +++ b/neo/CMakeLists.txt @@ -612,6 +612,7 @@ set(src_sys_dedicated sys/linux/dedicated.cpp) if (APPLE) set(src_sys_base + sys/cpu.cpp sys/threads.cpp sys/sys_local.cpp sys/posix/posix_net.cpp @@ -633,6 +634,7 @@ if (APPLE) ) elseif (WIN32) set(src_sys_base + sys/cpu.cpp sys/threads.cpp sys/sys_local.cpp sys/win32/win_cpu.cpp @@ -653,6 +655,7 @@ elseif (WIN32) ) else() set(src_sys_base + sys/cpu.cpp sys/threads.cpp sys/sys_local.cpp sys/posix/posix_net.cpp diff --git a/neo/idlib/math/Simd.cpp b/neo/idlib/math/Simd.cpp index b540e44d..e0482bd1 100644 --- a/neo/idlib/math/Simd.cpp +++ b/neo/idlib/math/Simd.cpp @@ -112,14 +112,9 @@ void idSIMD::InitProcessor( const char *module, bool forceGeneric ) { idLib::common->Printf( "%s using %s for SIMD processing\n", module, SIMDProcessor->GetName() ); } - if ( cpuid & CPUID_FTZ ) { + if ( cpuid & CPUID_SSE ) { idLib::sys->FPU_SetFTZ( true ); - idLib::common->Printf( "enabled Flush-To-Zero mode\n" ); - } - - if ( cpuid & CPUID_DAZ ) { idLib::sys->FPU_SetDAZ( true ); - idLib::common->Printf( "enabled Denormals-Are-Zero mode\n" ); } } diff --git a/neo/sys/cpu.cpp b/neo/sys/cpu.cpp new file mode 100644 index 00000000..96e9d1f2 --- /dev/null +++ b/neo/sys/cpu.cpp @@ -0,0 +1,175 @@ +/* +=========================================================================== + +Doom 3 GPL Source Code +Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company. + +This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code"). + +Doom 3 Source Code is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Doom 3 Source Code is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Doom 3 Source Code. If not, see . + +In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below. + +If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA. + +=========================================================================== +*/ + +#include + +#include "sys/platform.h" +#include "framework/Common.h" + +#include "sys/sys_public.h" + +#ifdef NO_SSE +#undef NO_SSE +#endif + +#if defined(__GNUC__) + #if !defined(__i386__) && !defined(__x86_64__) + #define NO_CPUID + #endif +#elif defined(_MSC_VER) + #if !defined(_M_IX86) && !defined(_M_X64) + #define NO_CPUID + #endif +#else +#error unsupported compiler +#endif + +#ifdef NO_CPUID +void Sys_FPU_SetDAZ(bool enable) { + common->Error("ERROR: Sys_FPU_SetDAZ not supported on this architecture\n"); +} + +void Sys_FPU_SetFTZ(bool enable) { + common->Error("ERROR: Sys_FPU_SetFTZ not supported on this architecture\n"); +} +#else + +#if defined(__GNUC__) +static inline void CPUid(int index, int *a, int *b, int *c, int *d) { +#if __x86_64__ +# define REG_b "rbx" +# define REG_S "rsi" +#elif __i386__ +# define REG_b "ebx" +# define REG_S "esi" +#endif + *a = *b = *c = *d = 0; + + __asm__ volatile + ( "mov %%" REG_b ", %%" REG_S "\n\t" + "cpuid\n\t" + "xchg %%" REG_b ", %%" REG_S + : "=a" (*a), "=S" (*b), + "=c" (*c), "=d" (*d) + : "0" (index)); +} +#elif defined(_MSC_VER) +#include +static inline void CPUid(int index, int *a, int *b, int *c, int *d) { + int info[4] = { }; + + // VS2005 and up + __cpuid(info, index); + + *a = info[0]; + *b = info[1]; + *c = info[2]; + *d = info[3]; +} +#else +#error unsupported compiler +#endif + +#define d_FXSAVE (1 << 24) + +static inline bool HasDAZ() { + int a, b, c, d; + + CPUid(0, &a, &b, &c, &d); + if (a < 1) + return false; + + CPUid(1, &a, &b, &c, &d); + + return (d & d_FXSAVE) == d_FXSAVE; +} + +#define MXCSR_DAZ (1 << 6) +#define MXCSR_FTZ (1 << 15) + +#ifdef _MSC_VER +#define STREFLOP_FSTCW(cw) do { short tmp; __asm { fstcw tmp }; (cw) = tmp; } while (0) +#define STREFLOP_FLDCW(cw) do { short tmp = (cw); __asm { fclex }; __asm { fldcw tmp }; } while (0) +#define STREFLOP_STMXCSR(cw) do { int tmp; __asm { stmxcsr tmp }; (cw) = tmp; } while (0) +#define STREFLOP_LDMXCSR(cw) do { int tmp = (cw); __asm { ldmxcsr tmp }; } while (0) +#else +#define STREFLOP_FSTCW(cw) do { asm volatile ("fstcw %0" : "=m" (cw) : ); } while (0) +#define STREFLOP_FLDCW(cw) do { asm volatile ("fclex \n fldcw %0" : : "m" (cw)); } while (0) +#define STREFLOP_STMXCSR(cw) do { asm volatile ("stmxcsr %0" : "=m" (cw) : ); } while (0) +#define STREFLOP_LDMXCSR(cw) do { asm volatile ("ldmxcsr %0" : : "m" (cw) ); } while (0) +#endif + +static void EnableMXCSRFlag(int flag, bool enable, const char *name) { + int sse_mode; + + STREFLOP_STMXCSR(sse_mode); + + if (enable && (sse_mode & flag) == flag) { + common->Printf("%s mode is already enabled\n", name); + return; + } + + if (!enable && (sse_mode & flag) == 0) { + common->Printf("%s mode is already disabled\n", name); + return; + } + + if (enable) { + common->Printf("enabling %s mode\n", name); + sse_mode |= flag; + } else { + common->Printf("disabling %s mode\n", name); + sse_mode &= ~flag; + } + + STREFLOP_LDMXCSR(sse_mode); +} + +/* +================ +Sys_FPU_SetDAZ +================ +*/ +void Sys_FPU_SetDAZ(bool enable) { + if (!HasDAZ()) { + common->Printf("this CPU doesn't support Denormals-Are-Zero\n"); + return; + } + + EnableMXCSRFlag(MXCSR_DAZ, enable, "Denormals-Are-Zero"); +} + +/* +================ +Sys_FPU_SetFTZ +================ +*/ +void Sys_FPU_SetFTZ(bool enable) { + EnableMXCSRFlag(MXCSR_FTZ, enable, "Flush-To-Zero"); +} +#endif diff --git a/neo/sys/linux/main.cpp b/neo/sys/linux/main.cpp index 6d002361..dab00b7d 100644 --- a/neo/sys/linux/main.cpp +++ b/neo/sys/linux/main.cpp @@ -418,52 +418,6 @@ void idSysLocal::OpenURL( const char *url, bool quit ) { */ void Sys_DoPreferences( void ) { } -/* -================ -Sys_FPU_SetDAZ -================ -*/ -void Sys_FPU_SetDAZ( bool enable ) { - /* - DWORD dwData; - - _asm { - movzx ecx, byte ptr enable - and ecx, 1 - shl ecx, 6 - STMXCSR dword ptr dwData - mov eax, dwData - and eax, ~(1<<6) // clear DAX bit - or eax, ecx // set the DAZ bit - mov dwData, eax - LDMXCSR dword ptr dwData - } - */ -} - -/* -================ -Sys_FPU_SetFTZ -================ -*/ -void Sys_FPU_SetFTZ( bool enable ) { - /* - DWORD dwData; - - _asm { - movzx ecx, byte ptr enable - and ecx, 1 - shl ecx, 15 - STMXCSR dword ptr dwData - mov eax, dwData - and eax, ~(1<<15) // clear FTZ bit - or eax, ecx // set the FTZ bit - mov dwData, eax - LDMXCSR dword ptr dwData - } - */ -} - /* =============== mem consistency stuff diff --git a/neo/sys/osx/DOOMController.mm b/neo/sys/osx/DOOMController.mm index 274be04a..f2d81260 100644 --- a/neo/sys/osx/DOOMController.mm +++ b/neo/sys/osx/DOOMController.mm @@ -475,7 +475,7 @@ int Sys_GetProcessorId( void ) { #if defined(__ppc__) cpuid |= CPUID_ALTIVEC; #elif defined(__i386__) - cpuid |= CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_SSE3 | CPUID_FTZ | CPUID_DAZ; + cpuid |= CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_SSE3; #endif return cpuid; } @@ -903,63 +903,3 @@ static OSErr DoRegCodeDialog( char* ioRegCode1 ) return regCodeInfo.okPressed ? (OSErr)noErr : (OSErr)userCanceledErr; } - -#if defined(__ppc__) - -/* - ================ - Sys_FPU_SetDAZ - ================ - */ -void Sys_FPU_SetDAZ( bool enable ) { -} - -/* - ================ - Sys_FPU_SetFTZ - ================ - */ -void Sys_FPU_SetFTZ( bool enable ) { -} - - -#elif defined(__i386__) - -#include - -/* - ================ - Sys_FPU_SetDAZ - ================ - */ -void Sys_FPU_SetDAZ( bool enable ) { - uint32_t dwData; - uint32_t enable_l = (uint32_t) enable; - - enable_l = enable_l & 1; - enable_l = enable_l << 6; - dwData = _mm_getcsr(); // store MXCSR to dwData - dwData = dwData & 0xffbf; - dwData = dwData | enable_l; - _mm_setcsr(dwData); // load MXCSR with dwData -} - -/* - ================ - Sys_FPU_SetFTZ - ================ - */ -void Sys_FPU_SetFTZ( bool enable ) { - - uint32_t dwData; - uint32_t enable_l = (uint32_t) enable; - - enable_l = enable_l & 1; - enable_l = enable_l << 15; - dwData = _mm_getcsr(); // store MXCSR to dwData - dwData = dwData & 0x7fff; - dwData = dwData | enable_l; - _mm_setcsr(dwData); // load MXCSR with dwData -} - -#endif diff --git a/neo/sys/scons/SConscript.core b/neo/sys/scons/SConscript.core index 9962c8eb..7061cae5 100644 --- a/neo/sys/scons/SConscript.core +++ b/neo/sys/scons/SConscript.core @@ -193,6 +193,7 @@ ui_list = scons_utils.BuildList( 'ui', ui_string ) sys_string = ' \ sys_local.cpp \ + cpu.cpp \ threads.cpp \ posix/posix_net.cpp \ posix/posix_main.cpp \ diff --git a/neo/sys/sys_local.h b/neo/sys/sys_local.h index 7894d9a5..c245abad 100644 --- a/neo/sys/sys_local.h +++ b/neo/sys/sys_local.h @@ -31,6 +31,7 @@ If you have questions concerning this license or the applicable additional terms #include "renderer/RenderSystem.h" #include "sound/sound.h" +#include "sys/sys_public.h" /* ============================================================== diff --git a/neo/sys/sys_public.h b/neo/sys/sys_public.h index 17af79ea..49ad389d 100644 --- a/neo/sys/sys_public.h +++ b/neo/sys/sys_public.h @@ -39,8 +39,6 @@ typedef enum { CPUID_SSE2 = 0x00080, // Streaming SIMD Extensions 2 CPUID_SSE3 = 0x00100, // Streaming SIMD Extentions 3 aka Prescott's New Instructions CPUID_ALTIVEC = 0x00200, // AltiVec - CPUID_FTZ = 0x04000, // Flush-To-Zero mode (denormal results are flushed to zero) - CPUID_DAZ = 0x08000 // Denormals-Are-Zero mode (denormal source operands are set to zero) } cpuid_t; typedef enum { @@ -169,10 +167,10 @@ void Sys_FPU_SetPrecision( int precision ); // sets the FPU rounding mode void Sys_FPU_SetRounding( int rounding ); -// sets Flush-To-Zero mode (only available when CPUID_FTZ is set) +// sets Flush-To-Zero mode void Sys_FPU_SetFTZ( bool enable ); -// sets Denormals-Are-Zero mode (only available when CPUID_DAZ is set) +// sets Denormals-Are-Zero mode void Sys_FPU_SetDAZ( bool enable ); // returns amount of system ram diff --git a/neo/sys/win32/win_cpu.cpp b/neo/sys/win32/win_cpu.cpp index 224b4ae8..308b3814 100644 --- a/neo/sys/win32/win_cpu.cpp +++ b/neo/sys/win32/win_cpu.cpp @@ -210,40 +210,6 @@ static bool HasSSE3( void ) { return false; } -/* -================ -HasDAZ -================ -*/ -static bool HasDAZ( void ) { -#ifdef _MSC_VER - __declspec(align(16)) unsigned char FXSaveArea[512]; - unsigned char *FXArea = FXSaveArea; - DWORD dwMask = 0; - unsigned regs[4]; - - // get CPU feature bits - CPUID( 1, regs ); - - // bit 24 of EDX denotes support for FXSAVE - if ( !( regs[_REG_EDX] & ( 1 << 24 ) ) ) { - return false; - } - - memset( FXArea, 0, sizeof( FXSaveArea ) ); - - __asm { - mov eax, FXArea - FXSAVE [eax] - } - - dwMask = *(DWORD *)&FXArea[28]; // Read the MXCSR Mask - return ( ( dwMask & ( 1 << 6 ) ) == ( 1 << 6 ) ); // Return if the DAZ bit is set -#else - return false; -#endif -} - /* ================ Sys_GetCPUId @@ -270,7 +236,7 @@ int Sys_GetCPUId( void ) { // check for Streaming SIMD Extensions if ( HasSSE() ) { - flags |= CPUID_SSE | CPUID_FTZ; + flags |= CPUID_SSE; } // check for Streaming SIMD Extensions 2 @@ -283,11 +249,6 @@ int Sys_GetCPUId( void ) { flags |= CPUID_SSE3; } - // check for Denormals-Are-Zero mode - if ( HasDAZ() ) { - flags |= CPUID_DAZ; - } - return flags; #else return CPUID_GENERIC; @@ -611,49 +572,3 @@ void Sys_FPU_SetRounding( int rounding ) { } #endif } - -/* -================ -Sys_FPU_SetDAZ -================ -*/ -void Sys_FPU_SetDAZ( bool enable ) { -#ifdef _MSC_VER - DWORD dwData; - - _asm { - movzx ecx, byte ptr enable - and ecx, 1 - shl ecx, 6 - STMXCSR dword ptr dwData - mov eax, dwData - and eax, ~(1<<6) // clear DAX bit - or eax, ecx // set the DAZ bit - mov dwData, eax - LDMXCSR dword ptr dwData - } -#endif -} - -/* -================ -Sys_FPU_SetFTZ -================ -*/ -void Sys_FPU_SetFTZ( bool enable ) { -#ifdef _MSC_VER - DWORD dwData; - - _asm { - movzx ecx, byte ptr enable - and ecx, 1 - shl ecx, 15 - STMXCSR dword ptr dwData - mov eax, dwData - and eax, ~(1<<15) // clear FTZ bit - or eax, ecx // set the FTZ bit - mov dwData, eax - LDMXCSR dword ptr dwData - } -#endif -}