mirror of
https://github.com/dhewm/dhewm3.git
synced 2025-03-20 17:51:02 +00:00
Unify CPUID_FTZ and CPUID_DAZ
Get rid of the 2 CPUID flags and combine them with SSE in one implementation. SSE flags can now be set on all x86 and x86_64 platforms - independent of -ffast-math. Helper defines borrowed from STREFLOP.
This commit is contained in:
parent
acfe2489e4
commit
4150029322
9 changed files with 185 additions and 203 deletions
|
@ -612,6 +612,7 @@ set(src_sys_dedicated sys/linux/dedicated.cpp)
|
|||
|
||||
if (APPLE)
|
||||
set(src_sys_base
|
||||
sys/cpu.cpp
|
||||
sys/threads.cpp
|
||||
sys/sys_local.cpp
|
||||
sys/posix/posix_net.cpp
|
||||
|
@ -633,6 +634,7 @@ if (APPLE)
|
|||
)
|
||||
elseif (WIN32)
|
||||
set(src_sys_base
|
||||
sys/cpu.cpp
|
||||
sys/threads.cpp
|
||||
sys/sys_local.cpp
|
||||
sys/win32/win_cpu.cpp
|
||||
|
@ -653,6 +655,7 @@ elseif (WIN32)
|
|||
)
|
||||
else()
|
||||
set(src_sys_base
|
||||
sys/cpu.cpp
|
||||
sys/threads.cpp
|
||||
sys/sys_local.cpp
|
||||
sys/posix/posix_net.cpp
|
||||
|
|
|
@ -112,14 +112,9 @@ void idSIMD::InitProcessor( const char *module, bool forceGeneric ) {
|
|||
idLib::common->Printf( "%s using %s for SIMD processing\n", module, SIMDProcessor->GetName() );
|
||||
}
|
||||
|
||||
if ( cpuid & CPUID_FTZ ) {
|
||||
if ( cpuid & CPUID_SSE ) {
|
||||
idLib::sys->FPU_SetFTZ( true );
|
||||
idLib::common->Printf( "enabled Flush-To-Zero mode\n" );
|
||||
}
|
||||
|
||||
if ( cpuid & CPUID_DAZ ) {
|
||||
idLib::sys->FPU_SetDAZ( true );
|
||||
idLib::common->Printf( "enabled Denormals-Are-Zero mode\n" );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
175
neo/sys/cpu.cpp
Normal file
175
neo/sys/cpu.cpp
Normal file
|
@ -0,0 +1,175 @@
|
|||
/*
|
||||
===========================================================================
|
||||
|
||||
Doom 3 GPL Source Code
|
||||
Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
|
||||
|
||||
This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
|
||||
|
||||
Doom 3 Source Code is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Doom 3 Source Code is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
|
||||
|
||||
If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
|
||||
|
||||
===========================================================================
|
||||
*/
|
||||
|
||||
#include <SDL_cpuinfo.h>
|
||||
|
||||
#include "sys/platform.h"
|
||||
#include "framework/Common.h"
|
||||
|
||||
#include "sys/sys_public.h"
|
||||
|
||||
#ifdef NO_SSE
|
||||
#undef NO_SSE
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#if !defined(__i386__) && !defined(__x86_64__)
|
||||
#define NO_CPUID
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#if !defined(_M_IX86) && !defined(_M_X64)
|
||||
#define NO_CPUID
|
||||
#endif
|
||||
#else
|
||||
#error unsupported compiler
|
||||
#endif
|
||||
|
||||
#ifdef NO_CPUID
|
||||
void Sys_FPU_SetDAZ(bool enable) {
|
||||
common->Error("ERROR: Sys_FPU_SetDAZ not supported on this architecture\n");
|
||||
}
|
||||
|
||||
void Sys_FPU_SetFTZ(bool enable) {
|
||||
common->Error("ERROR: Sys_FPU_SetFTZ not supported on this architecture\n");
|
||||
}
|
||||
#else
|
||||
|
||||
#if defined(__GNUC__)
|
||||
static inline void CPUid(int index, int *a, int *b, int *c, int *d) {
|
||||
#if __x86_64__
|
||||
# define REG_b "rbx"
|
||||
# define REG_S "rsi"
|
||||
#elif __i386__
|
||||
# define REG_b "ebx"
|
||||
# define REG_S "esi"
|
||||
#endif
|
||||
*a = *b = *c = *d = 0;
|
||||
|
||||
__asm__ volatile
|
||||
( "mov %%" REG_b ", %%" REG_S "\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg %%" REG_b ", %%" REG_S
|
||||
: "=a" (*a), "=S" (*b),
|
||||
"=c" (*c), "=d" (*d)
|
||||
: "0" (index));
|
||||
}
|
||||
#elif defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
static inline void CPUid(int index, int *a, int *b, int *c, int *d) {
|
||||
int info[4] = { };
|
||||
|
||||
// VS2005 and up
|
||||
__cpuid(info, index);
|
||||
|
||||
*a = info[0];
|
||||
*b = info[1];
|
||||
*c = info[2];
|
||||
*d = info[3];
|
||||
}
|
||||
#else
|
||||
#error unsupported compiler
|
||||
#endif
|
||||
|
||||
#define d_FXSAVE (1 << 24)
|
||||
|
||||
static inline bool HasDAZ() {
|
||||
int a, b, c, d;
|
||||
|
||||
CPUid(0, &a, &b, &c, &d);
|
||||
if (a < 1)
|
||||
return false;
|
||||
|
||||
CPUid(1, &a, &b, &c, &d);
|
||||
|
||||
return (d & d_FXSAVE) == d_FXSAVE;
|
||||
}
|
||||
|
||||
#define MXCSR_DAZ (1 << 6)
|
||||
#define MXCSR_FTZ (1 << 15)
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define STREFLOP_FSTCW(cw) do { short tmp; __asm { fstcw tmp }; (cw) = tmp; } while (0)
|
||||
#define STREFLOP_FLDCW(cw) do { short tmp = (cw); __asm { fclex }; __asm { fldcw tmp }; } while (0)
|
||||
#define STREFLOP_STMXCSR(cw) do { int tmp; __asm { stmxcsr tmp }; (cw) = tmp; } while (0)
|
||||
#define STREFLOP_LDMXCSR(cw) do { int tmp = (cw); __asm { ldmxcsr tmp }; } while (0)
|
||||
#else
|
||||
#define STREFLOP_FSTCW(cw) do { asm volatile ("fstcw %0" : "=m" (cw) : ); } while (0)
|
||||
#define STREFLOP_FLDCW(cw) do { asm volatile ("fclex \n fldcw %0" : : "m" (cw)); } while (0)
|
||||
#define STREFLOP_STMXCSR(cw) do { asm volatile ("stmxcsr %0" : "=m" (cw) : ); } while (0)
|
||||
#define STREFLOP_LDMXCSR(cw) do { asm volatile ("ldmxcsr %0" : : "m" (cw) ); } while (0)
|
||||
#endif
|
||||
|
||||
static void EnableMXCSRFlag(int flag, bool enable, const char *name) {
|
||||
int sse_mode;
|
||||
|
||||
STREFLOP_STMXCSR(sse_mode);
|
||||
|
||||
if (enable && (sse_mode & flag) == flag) {
|
||||
common->Printf("%s mode is already enabled\n", name);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!enable && (sse_mode & flag) == 0) {
|
||||
common->Printf("%s mode is already disabled\n", name);
|
||||
return;
|
||||
}
|
||||
|
||||
if (enable) {
|
||||
common->Printf("enabling %s mode\n", name);
|
||||
sse_mode |= flag;
|
||||
} else {
|
||||
common->Printf("disabling %s mode\n", name);
|
||||
sse_mode &= ~flag;
|
||||
}
|
||||
|
||||
STREFLOP_LDMXCSR(sse_mode);
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetDAZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetDAZ(bool enable) {
|
||||
if (!HasDAZ()) {
|
||||
common->Printf("this CPU doesn't support Denormals-Are-Zero\n");
|
||||
return;
|
||||
}
|
||||
|
||||
EnableMXCSRFlag(MXCSR_DAZ, enable, "Denormals-Are-Zero");
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetFTZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetFTZ(bool enable) {
|
||||
EnableMXCSRFlag(MXCSR_FTZ, enable, "Flush-To-Zero");
|
||||
}
|
||||
#endif
|
|
@ -418,52 +418,6 @@ void idSysLocal::OpenURL( const char *url, bool quit ) {
|
|||
*/
|
||||
void Sys_DoPreferences( void ) { }
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetDAZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetDAZ( bool enable ) {
|
||||
/*
|
||||
DWORD dwData;
|
||||
|
||||
_asm {
|
||||
movzx ecx, byte ptr enable
|
||||
and ecx, 1
|
||||
shl ecx, 6
|
||||
STMXCSR dword ptr dwData
|
||||
mov eax, dwData
|
||||
and eax, ~(1<<6) // clear DAX bit
|
||||
or eax, ecx // set the DAZ bit
|
||||
mov dwData, eax
|
||||
LDMXCSR dword ptr dwData
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetFTZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetFTZ( bool enable ) {
|
||||
/*
|
||||
DWORD dwData;
|
||||
|
||||
_asm {
|
||||
movzx ecx, byte ptr enable
|
||||
and ecx, 1
|
||||
shl ecx, 15
|
||||
STMXCSR dword ptr dwData
|
||||
mov eax, dwData
|
||||
and eax, ~(1<<15) // clear FTZ bit
|
||||
or eax, ecx // set the FTZ bit
|
||||
mov dwData, eax
|
||||
LDMXCSR dword ptr dwData
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
===============
|
||||
mem consistency stuff
|
||||
|
|
|
@ -475,7 +475,7 @@ int Sys_GetProcessorId( void ) {
|
|||
#if defined(__ppc__)
|
||||
cpuid |= CPUID_ALTIVEC;
|
||||
#elif defined(__i386__)
|
||||
cpuid |= CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_SSE3 | CPUID_FTZ | CPUID_DAZ;
|
||||
cpuid |= CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_SSE3;
|
||||
#endif
|
||||
return cpuid;
|
||||
}
|
||||
|
@ -903,63 +903,3 @@ static OSErr DoRegCodeDialog( char* ioRegCode1 )
|
|||
|
||||
return regCodeInfo.okPressed ? (OSErr)noErr : (OSErr)userCanceledErr;
|
||||
}
|
||||
|
||||
#if defined(__ppc__)
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetDAZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetDAZ( bool enable ) {
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetFTZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetFTZ( bool enable ) {
|
||||
}
|
||||
|
||||
|
||||
#elif defined(__i386__)
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetDAZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetDAZ( bool enable ) {
|
||||
uint32_t dwData;
|
||||
uint32_t enable_l = (uint32_t) enable;
|
||||
|
||||
enable_l = enable_l & 1;
|
||||
enable_l = enable_l << 6;
|
||||
dwData = _mm_getcsr(); // store MXCSR to dwData
|
||||
dwData = dwData & 0xffbf;
|
||||
dwData = dwData | enable_l;
|
||||
_mm_setcsr(dwData); // load MXCSR with dwData
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetFTZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetFTZ( bool enable ) {
|
||||
|
||||
uint32_t dwData;
|
||||
uint32_t enable_l = (uint32_t) enable;
|
||||
|
||||
enable_l = enable_l & 1;
|
||||
enable_l = enable_l << 15;
|
||||
dwData = _mm_getcsr(); // store MXCSR to dwData
|
||||
dwData = dwData & 0x7fff;
|
||||
dwData = dwData | enable_l;
|
||||
_mm_setcsr(dwData); // load MXCSR with dwData
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -193,6 +193,7 @@ ui_list = scons_utils.BuildList( 'ui', ui_string )
|
|||
|
||||
sys_string = ' \
|
||||
sys_local.cpp \
|
||||
cpu.cpp \
|
||||
threads.cpp \
|
||||
posix/posix_net.cpp \
|
||||
posix/posix_main.cpp \
|
||||
|
|
|
@ -31,6 +31,7 @@ If you have questions concerning this license or the applicable additional terms
|
|||
|
||||
#include "renderer/RenderSystem.h"
|
||||
#include "sound/sound.h"
|
||||
#include "sys/sys_public.h"
|
||||
|
||||
/*
|
||||
==============================================================
|
||||
|
|
|
@ -39,8 +39,6 @@ typedef enum {
|
|||
CPUID_SSE2 = 0x00080, // Streaming SIMD Extensions 2
|
||||
CPUID_SSE3 = 0x00100, // Streaming SIMD Extentions 3 aka Prescott's New Instructions
|
||||
CPUID_ALTIVEC = 0x00200, // AltiVec
|
||||
CPUID_FTZ = 0x04000, // Flush-To-Zero mode (denormal results are flushed to zero)
|
||||
CPUID_DAZ = 0x08000 // Denormals-Are-Zero mode (denormal source operands are set to zero)
|
||||
} cpuid_t;
|
||||
|
||||
typedef enum {
|
||||
|
@ -169,10 +167,10 @@ void Sys_FPU_SetPrecision( int precision );
|
|||
// sets the FPU rounding mode
|
||||
void Sys_FPU_SetRounding( int rounding );
|
||||
|
||||
// sets Flush-To-Zero mode (only available when CPUID_FTZ is set)
|
||||
// sets Flush-To-Zero mode
|
||||
void Sys_FPU_SetFTZ( bool enable );
|
||||
|
||||
// sets Denormals-Are-Zero mode (only available when CPUID_DAZ is set)
|
||||
// sets Denormals-Are-Zero mode
|
||||
void Sys_FPU_SetDAZ( bool enable );
|
||||
|
||||
// returns amount of system ram
|
||||
|
|
|
@ -210,40 +210,6 @@ static bool HasSSE3( void ) {
|
|||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
HasDAZ
|
||||
================
|
||||
*/
|
||||
static bool HasDAZ( void ) {
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(16)) unsigned char FXSaveArea[512];
|
||||
unsigned char *FXArea = FXSaveArea;
|
||||
DWORD dwMask = 0;
|
||||
unsigned regs[4];
|
||||
|
||||
// get CPU feature bits
|
||||
CPUID( 1, regs );
|
||||
|
||||
// bit 24 of EDX denotes support for FXSAVE
|
||||
if ( !( regs[_REG_EDX] & ( 1 << 24 ) ) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
memset( FXArea, 0, sizeof( FXSaveArea ) );
|
||||
|
||||
__asm {
|
||||
mov eax, FXArea
|
||||
FXSAVE [eax]
|
||||
}
|
||||
|
||||
dwMask = *(DWORD *)&FXArea[28]; // Read the MXCSR Mask
|
||||
return ( ( dwMask & ( 1 << 6 ) ) == ( 1 << 6 ) ); // Return if the DAZ bit is set
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_GetCPUId
|
||||
|
@ -270,7 +236,7 @@ int Sys_GetCPUId( void ) {
|
|||
|
||||
// check for Streaming SIMD Extensions
|
||||
if ( HasSSE() ) {
|
||||
flags |= CPUID_SSE | CPUID_FTZ;
|
||||
flags |= CPUID_SSE;
|
||||
}
|
||||
|
||||
// check for Streaming SIMD Extensions 2
|
||||
|
@ -283,11 +249,6 @@ int Sys_GetCPUId( void ) {
|
|||
flags |= CPUID_SSE3;
|
||||
}
|
||||
|
||||
// check for Denormals-Are-Zero mode
|
||||
if ( HasDAZ() ) {
|
||||
flags |= CPUID_DAZ;
|
||||
}
|
||||
|
||||
return flags;
|
||||
#else
|
||||
return CPUID_GENERIC;
|
||||
|
@ -611,49 +572,3 @@ void Sys_FPU_SetRounding( int rounding ) {
|
|||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetDAZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetDAZ( bool enable ) {
|
||||
#ifdef _MSC_VER
|
||||
DWORD dwData;
|
||||
|
||||
_asm {
|
||||
movzx ecx, byte ptr enable
|
||||
and ecx, 1
|
||||
shl ecx, 6
|
||||
STMXCSR dword ptr dwData
|
||||
mov eax, dwData
|
||||
and eax, ~(1<<6) // clear DAX bit
|
||||
or eax, ecx // set the DAZ bit
|
||||
mov dwData, eax
|
||||
LDMXCSR dword ptr dwData
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
================
|
||||
Sys_FPU_SetFTZ
|
||||
================
|
||||
*/
|
||||
void Sys_FPU_SetFTZ( bool enable ) {
|
||||
#ifdef _MSC_VER
|
||||
DWORD dwData;
|
||||
|
||||
_asm {
|
||||
movzx ecx, byte ptr enable
|
||||
and ecx, 1
|
||||
shl ecx, 15
|
||||
STMXCSR dword ptr dwData
|
||||
mov eax, dwData
|
||||
and eax, ~(1<<15) // clear FTZ bit
|
||||
or eax, ecx // set the FTZ bit
|
||||
mov dwData, eax
|
||||
LDMXCSR dword ptr dwData
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue