Unify CPUID_FTZ and CPUID_DAZ

Get rid of the 2 CPUID flags and combine them with SSE in one
implementation.
SSE flags can now be set on all x86 and x86_64 platforms -
independent of -ffast-math.
Helper defines borrowed from STREFLOP.
This commit is contained in:
dhewg 2011-12-21 21:37:40 +01:00
parent acfe2489e4
commit 4150029322
9 changed files with 185 additions and 203 deletions

View file

@ -612,6 +612,7 @@ set(src_sys_dedicated sys/linux/dedicated.cpp)
if (APPLE)
set(src_sys_base
sys/cpu.cpp
sys/threads.cpp
sys/sys_local.cpp
sys/posix/posix_net.cpp
@ -633,6 +634,7 @@ if (APPLE)
)
elseif (WIN32)
set(src_sys_base
sys/cpu.cpp
sys/threads.cpp
sys/sys_local.cpp
sys/win32/win_cpu.cpp
@ -653,6 +655,7 @@ elseif (WIN32)
)
else()
set(src_sys_base
sys/cpu.cpp
sys/threads.cpp
sys/sys_local.cpp
sys/posix/posix_net.cpp

View file

@ -112,14 +112,9 @@ void idSIMD::InitProcessor( const char *module, bool forceGeneric ) {
idLib::common->Printf( "%s using %s for SIMD processing\n", module, SIMDProcessor->GetName() );
}
if ( cpuid & CPUID_FTZ ) {
if ( cpuid & CPUID_SSE ) {
idLib::sys->FPU_SetFTZ( true );
idLib::common->Printf( "enabled Flush-To-Zero mode\n" );
}
if ( cpuid & CPUID_DAZ ) {
idLib::sys->FPU_SetDAZ( true );
idLib::common->Printf( "enabled Denormals-Are-Zero mode\n" );
}
}

175
neo/sys/cpu.cpp Normal file
View file

@ -0,0 +1,175 @@
/*
===========================================================================
Doom 3 GPL Source Code
Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
Doom 3 Source Code is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Doom 3 Source Code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
===========================================================================
*/
#include <SDL_cpuinfo.h>
#include "sys/platform.h"
#include "framework/Common.h"
#include "sys/sys_public.h"
#ifdef NO_SSE
#undef NO_SSE
#endif
#if defined(__GNUC__)
#if !defined(__i386__) && !defined(__x86_64__)
#define NO_CPUID
#endif
#elif defined(_MSC_VER)
#if !defined(_M_IX86) && !defined(_M_X64)
#define NO_CPUID
#endif
#else
#error unsupported compiler
#endif
#ifdef NO_CPUID
void Sys_FPU_SetDAZ(bool enable) {
common->Error("ERROR: Sys_FPU_SetDAZ not supported on this architecture\n");
}
void Sys_FPU_SetFTZ(bool enable) {
common->Error("ERROR: Sys_FPU_SetFTZ not supported on this architecture\n");
}
#else
#if defined(__GNUC__)
static inline void CPUid(int index, int *a, int *b, int *c, int *d) {
#if __x86_64__
# define REG_b "rbx"
# define REG_S "rsi"
#elif __i386__
# define REG_b "ebx"
# define REG_S "esi"
#endif
*a = *b = *c = *d = 0;
__asm__ volatile
( "mov %%" REG_b ", %%" REG_S "\n\t"
"cpuid\n\t"
"xchg %%" REG_b ", %%" REG_S
: "=a" (*a), "=S" (*b),
"=c" (*c), "=d" (*d)
: "0" (index));
}
#elif defined(_MSC_VER)
#include <intrin.h>
static inline void CPUid(int index, int *a, int *b, int *c, int *d) {
int info[4] = { };
// VS2005 and up
__cpuid(info, index);
*a = info[0];
*b = info[1];
*c = info[2];
*d = info[3];
}
#else
#error unsupported compiler
#endif
#define d_FXSAVE (1 << 24)
static inline bool HasDAZ() {
int a, b, c, d;
CPUid(0, &a, &b, &c, &d);
if (a < 1)
return false;
CPUid(1, &a, &b, &c, &d);
return (d & d_FXSAVE) == d_FXSAVE;
}
#define MXCSR_DAZ (1 << 6)
#define MXCSR_FTZ (1 << 15)
#ifdef _MSC_VER
#define STREFLOP_FSTCW(cw) do { short tmp; __asm { fstcw tmp }; (cw) = tmp; } while (0)
#define STREFLOP_FLDCW(cw) do { short tmp = (cw); __asm { fclex }; __asm { fldcw tmp }; } while (0)
#define STREFLOP_STMXCSR(cw) do { int tmp; __asm { stmxcsr tmp }; (cw) = tmp; } while (0)
#define STREFLOP_LDMXCSR(cw) do { int tmp = (cw); __asm { ldmxcsr tmp }; } while (0)
#else
#define STREFLOP_FSTCW(cw) do { asm volatile ("fstcw %0" : "=m" (cw) : ); } while (0)
#define STREFLOP_FLDCW(cw) do { asm volatile ("fclex \n fldcw %0" : : "m" (cw)); } while (0)
#define STREFLOP_STMXCSR(cw) do { asm volatile ("stmxcsr %0" : "=m" (cw) : ); } while (0)
#define STREFLOP_LDMXCSR(cw) do { asm volatile ("ldmxcsr %0" : : "m" (cw) ); } while (0)
#endif
static void EnableMXCSRFlag(int flag, bool enable, const char *name) {
int sse_mode;
STREFLOP_STMXCSR(sse_mode);
if (enable && (sse_mode & flag) == flag) {
common->Printf("%s mode is already enabled\n", name);
return;
}
if (!enable && (sse_mode & flag) == 0) {
common->Printf("%s mode is already disabled\n", name);
return;
}
if (enable) {
common->Printf("enabling %s mode\n", name);
sse_mode |= flag;
} else {
common->Printf("disabling %s mode\n", name);
sse_mode &= ~flag;
}
STREFLOP_LDMXCSR(sse_mode);
}
/*
================
Sys_FPU_SetDAZ
================
*/
void Sys_FPU_SetDAZ(bool enable) {
if (!HasDAZ()) {
common->Printf("this CPU doesn't support Denormals-Are-Zero\n");
return;
}
EnableMXCSRFlag(MXCSR_DAZ, enable, "Denormals-Are-Zero");
}
/*
================
Sys_FPU_SetFTZ
================
*/
void Sys_FPU_SetFTZ(bool enable) {
EnableMXCSRFlag(MXCSR_FTZ, enable, "Flush-To-Zero");
}
#endif

View file

@ -418,52 +418,6 @@ void idSysLocal::OpenURL( const char *url, bool quit ) {
*/
void Sys_DoPreferences( void ) { }
/*
================
Sys_FPU_SetDAZ
================
*/
void Sys_FPU_SetDAZ( bool enable ) {
/*
DWORD dwData;
_asm {
movzx ecx, byte ptr enable
and ecx, 1
shl ecx, 6
STMXCSR dword ptr dwData
mov eax, dwData
and eax, ~(1<<6) // clear DAX bit
or eax, ecx // set the DAZ bit
mov dwData, eax
LDMXCSR dword ptr dwData
}
*/
}
/*
================
Sys_FPU_SetFTZ
================
*/
void Sys_FPU_SetFTZ( bool enable ) {
/*
DWORD dwData;
_asm {
movzx ecx, byte ptr enable
and ecx, 1
shl ecx, 15
STMXCSR dword ptr dwData
mov eax, dwData
and eax, ~(1<<15) // clear FTZ bit
or eax, ecx // set the FTZ bit
mov dwData, eax
LDMXCSR dword ptr dwData
}
*/
}
/*
===============
mem consistency stuff

View file

@ -475,7 +475,7 @@ int Sys_GetProcessorId( void ) {
#if defined(__ppc__)
cpuid |= CPUID_ALTIVEC;
#elif defined(__i386__)
cpuid |= CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_SSE3 | CPUID_FTZ | CPUID_DAZ;
cpuid |= CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_SSE3;
#endif
return cpuid;
}
@ -903,63 +903,3 @@ static OSErr DoRegCodeDialog( char* ioRegCode1 )
return regCodeInfo.okPressed ? (OSErr)noErr : (OSErr)userCanceledErr;
}
#if defined(__ppc__)
/*
================
Sys_FPU_SetDAZ
================
*/
void Sys_FPU_SetDAZ( bool enable ) {
}
/*
================
Sys_FPU_SetFTZ
================
*/
void Sys_FPU_SetFTZ( bool enable ) {
}
#elif defined(__i386__)
#include <xmmintrin.h>
/*
================
Sys_FPU_SetDAZ
================
*/
void Sys_FPU_SetDAZ( bool enable ) {
uint32_t dwData;
uint32_t enable_l = (uint32_t) enable;
enable_l = enable_l & 1;
enable_l = enable_l << 6;
dwData = _mm_getcsr(); // store MXCSR to dwData
dwData = dwData & 0xffbf;
dwData = dwData | enable_l;
_mm_setcsr(dwData); // load MXCSR with dwData
}
/*
================
Sys_FPU_SetFTZ
================
*/
void Sys_FPU_SetFTZ( bool enable ) {
uint32_t dwData;
uint32_t enable_l = (uint32_t) enable;
enable_l = enable_l & 1;
enable_l = enable_l << 15;
dwData = _mm_getcsr(); // store MXCSR to dwData
dwData = dwData & 0x7fff;
dwData = dwData | enable_l;
_mm_setcsr(dwData); // load MXCSR with dwData
}
#endif

View file

@ -193,6 +193,7 @@ ui_list = scons_utils.BuildList( 'ui', ui_string )
sys_string = ' \
sys_local.cpp \
cpu.cpp \
threads.cpp \
posix/posix_net.cpp \
posix/posix_main.cpp \

View file

@ -31,6 +31,7 @@ If you have questions concerning this license or the applicable additional terms
#include "renderer/RenderSystem.h"
#include "sound/sound.h"
#include "sys/sys_public.h"
/*
==============================================================

View file

@ -39,8 +39,6 @@ typedef enum {
CPUID_SSE2 = 0x00080, // Streaming SIMD Extensions 2
CPUID_SSE3 = 0x00100, // Streaming SIMD Extentions 3 aka Prescott's New Instructions
CPUID_ALTIVEC = 0x00200, // AltiVec
CPUID_FTZ = 0x04000, // Flush-To-Zero mode (denormal results are flushed to zero)
CPUID_DAZ = 0x08000 // Denormals-Are-Zero mode (denormal source operands are set to zero)
} cpuid_t;
typedef enum {
@ -169,10 +167,10 @@ void Sys_FPU_SetPrecision( int precision );
// sets the FPU rounding mode
void Sys_FPU_SetRounding( int rounding );
// sets Flush-To-Zero mode (only available when CPUID_FTZ is set)
// sets Flush-To-Zero mode
void Sys_FPU_SetFTZ( bool enable );
// sets Denormals-Are-Zero mode (only available when CPUID_DAZ is set)
// sets Denormals-Are-Zero mode
void Sys_FPU_SetDAZ( bool enable );
// returns amount of system ram

View file

@ -210,40 +210,6 @@ static bool HasSSE3( void ) {
return false;
}
/*
================
HasDAZ
================
*/
static bool HasDAZ( void ) {
#ifdef _MSC_VER
__declspec(align(16)) unsigned char FXSaveArea[512];
unsigned char *FXArea = FXSaveArea;
DWORD dwMask = 0;
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 24 of EDX denotes support for FXSAVE
if ( !( regs[_REG_EDX] & ( 1 << 24 ) ) ) {
return false;
}
memset( FXArea, 0, sizeof( FXSaveArea ) );
__asm {
mov eax, FXArea
FXSAVE [eax]
}
dwMask = *(DWORD *)&FXArea[28]; // Read the MXCSR Mask
return ( ( dwMask & ( 1 << 6 ) ) == ( 1 << 6 ) ); // Return if the DAZ bit is set
#else
return false;
#endif
}
/*
================
Sys_GetCPUId
@ -270,7 +236,7 @@ int Sys_GetCPUId( void ) {
// check for Streaming SIMD Extensions
if ( HasSSE() ) {
flags |= CPUID_SSE | CPUID_FTZ;
flags |= CPUID_SSE;
}
// check for Streaming SIMD Extensions 2
@ -283,11 +249,6 @@ int Sys_GetCPUId( void ) {
flags |= CPUID_SSE3;
}
// check for Denormals-Are-Zero mode
if ( HasDAZ() ) {
flags |= CPUID_DAZ;
}
return flags;
#else
return CPUID_GENERIC;
@ -611,49 +572,3 @@ void Sys_FPU_SetRounding( int rounding ) {
}
#endif
}
/*
================
Sys_FPU_SetDAZ
================
*/
void Sys_FPU_SetDAZ( bool enable ) {
#ifdef _MSC_VER
DWORD dwData;
_asm {
movzx ecx, byte ptr enable
and ecx, 1
shl ecx, 6
STMXCSR dword ptr dwData
mov eax, dwData
and eax, ~(1<<6) // clear DAX bit
or eax, ecx // set the DAZ bit
mov dwData, eax
LDMXCSR dword ptr dwData
}
#endif
}
/*
================
Sys_FPU_SetFTZ
================
*/
void Sys_FPU_SetFTZ( bool enable ) {
#ifdef _MSC_VER
DWORD dwData;
_asm {
movzx ecx, byte ptr enable
and ecx, 1
shl ecx, 15
STMXCSR dword ptr dwData
mov eax, dwData
and eax, ~(1<<15) // clear FTZ bit
or eax, ecx // set the FTZ bit
mov dwData, eax
LDMXCSR dword ptr dwData
}
#endif
}