doom3-bfg/neo/sys/win32/win_cpu.cpp
2012-12-13 20:37:55 +01:00

1205 lines
26 KiB
C++

/*
===========================================================================
Doom 3 BFG Edition GPL Source Code
Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company.
Copyright (C) 2012 Robert Beckebans
This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").
Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Doom 3 BFG Edition Source Code. If not, see <http://www.gnu.org/licenses/>.
In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below.
If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
===========================================================================
*/
#pragma hdrstop
#include "../../idlib/precompiled.h"
#include "win_local.h"
#pragma warning(disable:4740) // warning C4740: flow in or out of inline asm code suppresses global optimization
#pragma warning(disable:4731) // warning C4731: 'XXX' : frame pointer register 'ebx' modified by inline assembly code
/*
==============================================================
Clock ticks
==============================================================
*/
/*
================
Sys_GetClockTicks
================
*/
double Sys_GetClockTicks()
{
// RB begin
#if defined(_WIN64)
LARGE_INTEGER li;
QueryPerformanceCounter( &li );
return (double ) li.LowPart + (double) 0xFFFFFFFF * li.HighPart;
#else
#if defined(_MSC_VER)
unsigned long lo, hi;
__asm {
push ebx
xor eax, eax
cpuid
rdtsc
mov lo, eax
mov hi, edx
pop ebx
}
return (double ) lo + (double) 0xFFFFFFFF * hi;
#elif defined(__GNUC__) && defined( __i386__ )
unsigned long lo, hi;
__asm__ __volatile__(
"push %%ebx\n" \
"xor %%eax,%%eax\n" \
"cpuid\n" \
"rdtsc\n" \
"mov %%eax,%0\n" \
"mov %%edx,%1\n" \
"pop %%ebx\n"
: "=r"( lo ), "=r"( hi ) );
return ( double ) lo + ( double ) 0xFFFFFFFF * hi;
#else
#error unsupported CPU
#endif
#endif
// RB end
}
/*
================
Sys_ClockTicksPerSecond
================
*/
double Sys_ClockTicksPerSecond() {
static double ticks = 0;
#if 0
if ( !ticks ) {
LARGE_INTEGER li;
QueryPerformanceFrequency( &li );
ticks = li.QuadPart;
}
#else
if ( !ticks ) {
HKEY hKey;
LPBYTE ProcSpeed;
DWORD buflen, ret;
if ( !RegOpenKeyEx( HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, KEY_READ, &hKey ) ) {
ProcSpeed = 0;
buflen = sizeof( ProcSpeed );
ret = RegQueryValueEx( hKey, "~MHz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
// If we don't succeed, try some other spellings.
if ( ret != ERROR_SUCCESS ) {
ret = RegQueryValueEx( hKey, "~Mhz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
}
if ( ret != ERROR_SUCCESS ) {
ret = RegQueryValueEx( hKey, "~mhz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
}
RegCloseKey( hKey );
if ( ret == ERROR_SUCCESS ) {
ticks = (double) ((unsigned long)ProcSpeed) * 1000000;
}
}
}
#endif
return ticks;
}
/*
==============================================================
CPU
==============================================================
*/
/*
================
HasCPUID
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool HasCPUID() {
__asm
{
pushfd // save eflags
pop eax
test eax, 0x00200000 // check ID bit
jz set21 // bit 21 is not set, so jump to set_21
and eax, 0xffdfffff // clear bit 21
push eax // save new value in register
popfd // store new value in flags
pushfd
pop eax
test eax, 0x00200000 // check ID bit
jz good
jmp err // cpuid not supported
set21:
or eax, 0x00200000 // set ID bit
push eax // store new value
popfd // store new value in EFLAGS
pushfd
pop eax
test eax, 0x00200000 // if bit 21 is on
jnz good
jmp err
}
err:
return false;
good:
return true;
}
#endif
#define _REG_EAX 0
#define _REG_EBX 1
#define _REG_ECX 2
#define _REG_EDX 3
/*
================
CPUID
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static void CPUID( int func, unsigned regs[4] ) {
unsigned regEAX, regEBX, regECX, regEDX;
__asm pusha
__asm mov eax, func
__asm __emit 00fh
__asm __emit 0a2h
__asm mov regEAX, eax
__asm mov regEBX, ebx
__asm mov regECX, ecx
__asm mov regEDX, edx
__asm popa
regs[_REG_EAX] = regEAX;
regs[_REG_EBX] = regEBX;
regs[_REG_ECX] = regECX;
regs[_REG_EDX] = regEDX;
}
#endif
/*
================
IsAMD
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool IsAMD() {
char pstring[16];
char processorString[13];
// get name of processor
CPUID( 0, ( unsigned int * ) pstring );
processorString[0] = pstring[4];
processorString[1] = pstring[5];
processorString[2] = pstring[6];
processorString[3] = pstring[7];
processorString[4] = pstring[12];
processorString[5] = pstring[13];
processorString[6] = pstring[14];
processorString[7] = pstring[15];
processorString[8] = pstring[8];
processorString[9] = pstring[9];
processorString[10] = pstring[10];
processorString[11] = pstring[11];
processorString[12] = 0;
if ( strcmp( processorString, "AuthenticAMD" ) == 0 ) {
return true;
}
return false;
}
#endif
/*
================
HasCMOV
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool HasCMOV() {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 15 of EDX denotes CMOV existence
if ( regs[_REG_EDX] & ( 1 << 15 ) ) {
return true;
}
return false;
}
#endif
/*
================
Has3DNow
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool Has3DNow() {
unsigned regs[4];
// check AMD-specific functions
CPUID( 0x80000000, regs );
if ( regs[_REG_EAX] < 0x80000000 ) {
return false;
}
// bit 31 of EDX denotes 3DNow! support
CPUID( 0x80000001, regs );
if ( regs[_REG_EDX] & ( 1 << 31 ) ) {
return true;
}
return false;
}
#endif
/*
================
HasMMX
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool HasMMX() {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 23 of EDX denotes MMX existence
if ( regs[_REG_EDX] & ( 1 << 23 ) ) {
return true;
}
return false;
}
#endif
/*
================
HasSSE
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool HasSSE() {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 25 of EDX denotes SSE existence
if ( regs[_REG_EDX] & ( 1 << 25 ) ) {
return true;
}
return false;
}
#endif
/*
================
HasSSE2
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool HasSSE2() {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 26 of EDX denotes SSE2 existence
if ( regs[_REG_EDX] & ( 1 << 26 ) ) {
return true;
}
return false;
}
#endif
/*
================
HasSSE3
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool HasSSE3() {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 0 of ECX denotes SSE3 existence
if ( regs[_REG_ECX] & ( 1 << 0 ) ) {
return true;
}
return false;
}
#endif
/*
================
LogicalProcPerPhysicalProc
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
#define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
// processors per physical processor when execute cpuid with
// eax set to 1
static unsigned char LogicalProcPerPhysicalProc() {
unsigned int regebx = 0;
__asm {
mov eax, 1
cpuid
mov regebx, ebx
}
return (unsigned char) ((regebx & NUM_LOGICAL_BITS) >> 16);
}
#endif
/*
================
GetAPIC_ID
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
// initial APIC ID for the processor this code is running on.
// Default value = 0xff if HT is not supported
static unsigned char GetAPIC_ID() {
unsigned int regebx = 0;
__asm {
mov eax, 1
cpuid
mov regebx, ebx
}
return (unsigned char) ((regebx & INITIAL_APIC_ID_BITS) >> 24);
}
#endif
/*
================
CPUCount
logicalNum is the number of logical CPU per physical CPU
physicalNum is the total number of physical processor
returns one of the HT_* flags
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
#define HT_NOT_CAPABLE 0
#define HT_ENABLED 1
#define HT_DISABLED 2
#define HT_SUPPORTED_NOT_ENABLED 3
#define HT_CANNOT_DETECT 4
int CPUCount( int &logicalNum, int &physicalNum ) {
int statusFlag;
SYSTEM_INFO info;
physicalNum = 1;
logicalNum = 1;
statusFlag = HT_NOT_CAPABLE;
info.dwNumberOfProcessors = 0;
GetSystemInfo (&info);
// Number of physical processors in a non-Intel system
// or in a 32-bit Intel system with Hyper-Threading technology disabled
physicalNum = info.dwNumberOfProcessors;
unsigned char HT_Enabled = 0;
logicalNum = LogicalProcPerPhysicalProc();
if ( logicalNum >= 1 ) { // > 1 doesn't mean HT is enabled in the BIOS
HANDLE hCurrentProcessHandle;
DWORD dwProcessAffinity;
DWORD dwSystemAffinity;
DWORD dwAffinityMask;
// Calculate the appropriate shifts and mask based on the
// number of logical processors.
unsigned char i = 1, PHY_ID_MASK = 0xFF, PHY_ID_SHIFT = 0;
while( i < logicalNum ) {
i *= 2;
PHY_ID_MASK <<= 1;
PHY_ID_SHIFT++;
}
hCurrentProcessHandle = GetCurrentProcess();
GetProcessAffinityMask( hCurrentProcessHandle, &dwProcessAffinity, &dwSystemAffinity );
// Check if available process affinity mask is equal to the
// available system affinity mask
if ( dwProcessAffinity != dwSystemAffinity ) {
statusFlag = HT_CANNOT_DETECT;
physicalNum = -1;
return statusFlag;
}
dwAffinityMask = 1;
while ( dwAffinityMask != 0 && dwAffinityMask <= dwProcessAffinity ) {
// Check if this CPU is available
if ( dwAffinityMask & dwProcessAffinity ) {
if ( SetProcessAffinityMask( hCurrentProcessHandle, dwAffinityMask ) ) {
unsigned char APIC_ID, LOG_ID, PHY_ID;
Sleep( 0 ); // Give OS time to switch CPU
APIC_ID = GetAPIC_ID();
LOG_ID = APIC_ID & ~PHY_ID_MASK;
PHY_ID = APIC_ID >> PHY_ID_SHIFT;
if ( LOG_ID != 0 ) {
HT_Enabled = 1;
}
}
}
dwAffinityMask = dwAffinityMask << 1;
}
// Reset the processor affinity
SetProcessAffinityMask( hCurrentProcessHandle, dwProcessAffinity );
if ( logicalNum == 1 ) { // Normal P4 : HT is disabled in hardware
statusFlag = HT_DISABLED;
} else {
if ( HT_Enabled ) {
// Total physical processors in a Hyper-Threading enabled system.
physicalNum /= logicalNum;
statusFlag = HT_ENABLED;
} else {
statusFlag = HT_SUPPORTED_NOT_ENABLED;
}
}
}
return statusFlag;
}
#endif
/*
================
HasHTT
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool HasHTT() {
unsigned regs[4];
int logicalNum, physicalNum, HTStatusFlag;
// get CPU feature bits
CPUID( 1, regs );
// bit 28 of EDX denotes HTT existence
if ( !( regs[_REG_EDX] & ( 1 << 28 ) ) ) {
return false;
}
HTStatusFlag = CPUCount( logicalNum, physicalNum );
if ( HTStatusFlag != HT_ENABLED ) {
return false;
}
return true;
}
#endif
/*
================
HasDAZ
================
*/
// RB: no checks on Win64
#if !defined(_WIN64)
static bool HasDAZ() {
__declspec(align(16)) unsigned char FXSaveArea[512];
unsigned char *FXArea = FXSaveArea;
DWORD dwMask = 0;
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 24 of EDX denotes support for FXSAVE
if ( !( regs[_REG_EDX] & ( 1 << 24 ) ) ) {
return false;
}
memset( FXArea, 0, sizeof( FXSaveArea ) );
__asm {
mov eax, FXArea
FXSAVE [eax]
}
dwMask = *(DWORD *)&FXArea[28]; // Read the MXCSR Mask
return ( ( dwMask & ( 1 << 6 ) ) == ( 1 << 6 ) ); // Return if the DAZ bit is set
}
#endif
/*
================================================================================================
CPU
================================================================================================
*/
/*
========================
CountSetBits
Helper function to count set bits in the processor mask.
========================
*/
DWORD CountSetBits( ULONG_PTR bitMask ) {
DWORD LSHIFT = sizeof( ULONG_PTR ) * 8 - 1;
DWORD bitSetCount = 0;
ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT;
for ( DWORD i = 0; i <= LSHIFT; i++ ) {
bitSetCount += ( ( bitMask & bitTest ) ? 1 : 0 );
bitTest /= 2;
}
return bitSetCount;
}
typedef BOOL (WINAPI *LPFN_GLPI)( PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD );
enum LOGICAL_PROCESSOR_RELATIONSHIP_LOCAL {
localRelationProcessorCore,
localRelationNumaNode,
localRelationCache,
localRelationProcessorPackage
};
struct cpuInfo_t {
int processorPackageCount;
int processorCoreCount;
int logicalProcessorCount;
int numaNodeCount;
struct cacheInfo_t {
int count;
int associativity;
int lineSize;
int size;
} cacheLevel[3];
};
/*
========================
GetCPUInfo
========================
*/
bool GetCPUInfo( cpuInfo_t & cpuInfo ) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL;
PCACHE_DESCRIPTOR Cache;
LPFN_GLPI glpi;
BOOL done = FALSE;
DWORD returnLength = 0;
DWORD byteOffset = 0;
memset( & cpuInfo, 0, sizeof( cpuInfo ) );
glpi = (LPFN_GLPI)GetProcAddress( GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation" );
if ( NULL == glpi ) {
idLib::Printf( "\nGetLogicalProcessorInformation is not supported.\n" );
return 0;
}
while ( !done ) {
DWORD rc = glpi( buffer, &returnLength );
if ( FALSE == rc ) {
if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) {
if ( buffer ) {
free( buffer );
}
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc( returnLength );
} else {
idLib::Printf( "Sys_CPUCount error: %d\n", GetLastError() );
return false;
}
} else {
done = TRUE;
}
}
ptr = buffer;
while ( byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength ) {
switch ( (LOGICAL_PROCESSOR_RELATIONSHIP_LOCAL) ptr->Relationship ) {
case localRelationProcessorCore:
cpuInfo.processorCoreCount++;
// A hyperthreaded core supplies more than one logical processor.
cpuInfo.logicalProcessorCount += CountSetBits( ptr->ProcessorMask );
break;
case localRelationNumaNode:
// Non-NUMA systems report a single record of this type.
cpuInfo.numaNodeCount++;
break;
case localRelationCache:
// Cache data is in ptr->Cache, one CACHE_DESCRIPTOR structure for each cache.
Cache = &ptr->Cache;
if ( Cache->Level >= 1 && Cache->Level <= 3 ) {
int level = Cache->Level - 1;
if ( cpuInfo.cacheLevel[level].count > 0 ) {
cpuInfo.cacheLevel[level].count++;
} else {
cpuInfo.cacheLevel[level].associativity = Cache->Associativity;
cpuInfo.cacheLevel[level].lineSize = Cache->LineSize;
cpuInfo.cacheLevel[level].size = Cache->Size;
}
}
break;
case localRelationProcessorPackage:
// Logical processors share a physical package.
cpuInfo.processorPackageCount++;
break;
default:
idLib::Printf( "Error: Unsupported LOGICAL_PROCESSOR_RELATIONSHIP value.\n" );
break;
}
byteOffset += sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION );
ptr++;
}
free( buffer );
return true;
}
/*
========================
Sys_GetCPUCacheSize
========================
*/
void Sys_GetCPUCacheSize( int level, int & count, int & size, int & lineSize ) {
assert( level >= 1 && level <= 3 );
cpuInfo_t cpuInfo;
GetCPUInfo( cpuInfo );
count = cpuInfo.cacheLevel[level - 1].count;
size = cpuInfo.cacheLevel[level - 1].size;
lineSize = cpuInfo.cacheLevel[level - 1].lineSize;
}
/*
========================
Sys_CPUCount
numLogicalCPUCores - the number of logical CPU per core
numPhysicalCPUCores - the total number of cores per package
numCPUPackages - the total number of packages (physical processors)
========================
*/
void Sys_CPUCount( int & numLogicalCPUCores, int & numPhysicalCPUCores, int & numCPUPackages ) {
cpuInfo_t cpuInfo;
GetCPUInfo( cpuInfo );
numPhysicalCPUCores = cpuInfo.processorCoreCount;
numLogicalCPUCores = cpuInfo.logicalProcessorCount;
numCPUPackages = cpuInfo.processorPackageCount;
}
/*
================
Sys_GetCPUId
================
*/
cpuid_t Sys_GetCPUId()
{
// RB: we assume a modern x86 chip
#if defined(_WIN64)
int flags = CPUID_GENERIC;
flags |= CPUID_SSE;
flags |= CPUID_SSE2;
return (cpuid_t)flags;
#else
int flags;
// verify we're at least a Pentium or 486 with CPUID support
if ( !HasCPUID() ){
return CPUID_UNSUPPORTED;
}
// check for an AMD
if ( IsAMD() ) {
flags = CPUID_AMD;
} else {
flags = CPUID_INTEL;
}
// check for Multi Media Extensions
if ( HasMMX() ) {
flags |= CPUID_MMX;
}
// check for 3DNow!
if ( Has3DNow() ) {
flags |= CPUID_3DNOW;
}
// check for Streaming SIMD Extensions
if ( HasSSE() ) {
flags |= CPUID_SSE | CPUID_FTZ;
}
// check for Streaming SIMD Extensions 2
if ( HasSSE2() ) {
flags |= CPUID_SSE2;
}
// check for Streaming SIMD Extensions 3 aka Prescott's New Instructions
if ( HasSSE3() ) {
flags |= CPUID_SSE3;
}
// check for Hyper-Threading Technology
if ( HasHTT() ) {
flags |= CPUID_HTT;
}
// check for Conditional Move (CMOV) and fast floating point comparison (FCOMI) instructions
if ( HasCMOV() ) {
flags |= CPUID_CMOV;
}
// check for Denormals-Are-Zero mode
if ( HasDAZ() ) {
flags |= CPUID_DAZ;
}
return (cpuid_t)flags;
#endif
}
/*
===============================================================================
FPU
===============================================================================
*/
typedef struct bitFlag_s {
char * name;
int bit;
} bitFlag_t;
static byte fpuState[128], *statePtr = fpuState;
static char fpuString[2048];
static bitFlag_t controlWordFlags[] = {
{ "Invalid operation", 0 },
{ "Denormalized operand", 1 },
{ "Divide-by-zero", 2 },
{ "Numeric overflow", 3 },
{ "Numeric underflow", 4 },
{ "Inexact result (precision)", 5 },
{ "Infinity control", 12 },
{ "", 0 }
};
static char *precisionControlField[] = {
"Single Precision (24-bits)",
"Reserved",
"Double Precision (53-bits)",
"Double Extended Precision (64-bits)"
};
static char *roundingControlField[] = {
"Round to nearest",
"Round down",
"Round up",
"Round toward zero"
};
static bitFlag_t statusWordFlags[] = {
{ "Invalid operation", 0 },
{ "Denormalized operand", 1 },
{ "Divide-by-zero", 2 },
{ "Numeric overflow", 3 },
{ "Numeric underflow", 4 },
{ "Inexact result (precision)", 5 },
{ "Stack fault", 6 },
{ "Error summary status", 7 },
{ "FPU busy", 15 },
{ "", 0 }
};
/*
===============
Sys_FPU_PrintStateFlags
===============
*/
int Sys_FPU_PrintStateFlags( char *ptr, int ctrl, int stat, int tags, int inof, int inse, int opof, int opse ) {
int i, length = 0;
length += sprintf( ptr+length, "CTRL = %08x\n"
"STAT = %08x\n"
"TAGS = %08x\n"
"INOF = %08x\n"
"INSE = %08x\n"
"OPOF = %08x\n"
"OPSE = %08x\n"
"\n",
ctrl, stat, tags, inof, inse, opof, opse );
length += sprintf( ptr+length, "Control Word:\n" );
for ( i = 0; controlWordFlags[i].name[0]; i++ ) {
length += sprintf( ptr+length, " %-30s = %s\n", controlWordFlags[i].name, ( ctrl & ( 1 << controlWordFlags[i].bit ) ) ? "true" : "false" );
}
length += sprintf( ptr+length, " %-30s = %s\n", "Precision control", precisionControlField[(ctrl>>8)&3] );
length += sprintf( ptr+length, " %-30s = %s\n", "Rounding control", roundingControlField[(ctrl>>10)&3] );
length += sprintf( ptr+length, "Status Word:\n" );
for ( i = 0; statusWordFlags[i].name[0]; i++ ) {
ptr += sprintf( ptr+length, " %-30s = %s\n", statusWordFlags[i].name, ( stat & ( 1 << statusWordFlags[i].bit ) ) ? "true" : "false" );
}
length += sprintf( ptr+length, " %-30s = %d%d%d%d\n", "Condition code", (stat>>8)&1, (stat>>9)&1, (stat>>10)&1, (stat>>14)&1 );
length += sprintf( ptr+length, " %-30s = %d\n", "Top of stack pointer", (stat>>11)&7 );
return length;
}
/*
===============
Sys_FPU_StackIsEmpty
===============
*/
bool Sys_FPU_StackIsEmpty()
{
#if !defined(_WIN64)
__asm {
mov eax, statePtr
fnstenv [eax]
mov eax, [eax+8]
xor eax, 0xFFFFFFFF
and eax, 0x0000FFFF
jz empty
}
return false;
empty:
#endif
return true;
}
/*
===============
Sys_FPU_ClearStack
===============
*/
void Sys_FPU_ClearStack()
{
#if !defined(_WIN64)
__asm {
mov eax, statePtr
fnstenv [eax]
mov eax, [eax+8]
xor eax, 0xFFFFFFFF
mov edx, (3<<14)
emptyStack:
mov ecx, eax
and ecx, edx
jz done
fstp st
shr edx, 2
jmp emptyStack
done:
}
#endif
}
/*
===============
Sys_FPU_GetState
gets the FPU state without changing the state
===============
*/
const char *Sys_FPU_GetState()
{
#if defined(_WIN64)
return "TODO Sys_FPU_GetState()";
#else
double fpuStack[8] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
double *fpuStackPtr = fpuStack;
int i, numValues;
char *ptr;
__asm {
mov esi, statePtr
mov edi, fpuStackPtr
fnstenv [esi]
mov esi, [esi+8]
xor esi, 0xFFFFFFFF
mov edx, (3<<14)
xor eax, eax
mov ecx, esi
and ecx, edx
jz done
fst qword ptr [edi+0]
inc eax
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(1)
fst qword ptr [edi+8]
inc eax
fxch st(1)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(2)
fst qword ptr [edi+16]
inc eax
fxch st(2)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(3)
fst qword ptr [edi+24]
inc eax
fxch st(3)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(4)
fst qword ptr [edi+32]
inc eax
fxch st(4)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(5)
fst qword ptr [edi+40]
inc eax
fxch st(5)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(6)
fst qword ptr [edi+48]
inc eax
fxch st(6)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(7)
fst qword ptr [edi+56]
inc eax
fxch st(7)
done:
mov numValues, eax
}
int ctrl = *(int *)&fpuState[0];
int stat = *(int *)&fpuState[4];
int tags = *(int *)&fpuState[8];
int inof = *(int *)&fpuState[12];
int inse = *(int *)&fpuState[16];
int opof = *(int *)&fpuState[20];
int opse = *(int *)&fpuState[24];
ptr = fpuString;
ptr += sprintf( ptr,"FPU State:\n"
"num values on stack = %d\n", numValues );
for ( i = 0; i < 8; i++ ) {
ptr += sprintf( ptr, "ST%d = %1.10e\n", i, fpuStack[i] );
}
Sys_FPU_PrintStateFlags( ptr, ctrl, stat, tags, inof, inse, opof, opse );
return fpuString;
#endif
}
/*
===============
Sys_FPU_EnableExceptions
===============
*/
void Sys_FPU_EnableExceptions( int exceptions )
{
#if !defined(_WIN64)
__asm {
mov eax, statePtr
mov ecx, exceptions
and cx, 63
not cx
fnstcw word ptr [eax]
mov bx, word ptr [eax]
or bx, 63
and bx, cx
mov word ptr [eax], bx
fldcw word ptr [eax]
}
#endif
}
/*
===============
Sys_FPU_SetPrecision
===============
*/
void Sys_FPU_SetPrecision( int precision )
{
#if !defined(_WIN64)
short precisionBitTable[4] = { 0, 1, 3, 0 };
short precisionBits = precisionBitTable[precision & 3] << 8;
short precisionMask = ~( ( 1 << 9 ) | ( 1 << 8 ) );
__asm {
mov eax, statePtr
mov cx, precisionBits
fnstcw word ptr [eax]
mov bx, word ptr [eax]
and bx, precisionMask
or bx, cx
mov word ptr [eax], bx
fldcw word ptr [eax]
}
#endif
}
/*
================
Sys_FPU_SetRounding
================
*/
void Sys_FPU_SetRounding( int rounding )
{
#if !defined(_WIN64)
short roundingBitTable[4] = { 0, 1, 2, 3 };
short roundingBits = roundingBitTable[rounding & 3] << 10;
short roundingMask = ~( ( 1 << 11 ) | ( 1 << 10 ) );
__asm {
mov eax, statePtr
mov cx, roundingBits
fnstcw word ptr [eax]
mov bx, word ptr [eax]
and bx, roundingMask
or bx, cx
mov word ptr [eax], bx
fldcw word ptr [eax]
}
#endif
}
/*
================
Sys_FPU_SetDAZ
================
*/
void Sys_FPU_SetDAZ( bool enable )
{
#if !defined(_WIN64)
DWORD dwData;
_asm {
movzx ecx, byte ptr enable
and ecx, 1
shl ecx, 6
STMXCSR dword ptr dwData
mov eax, dwData
and eax, ~(1<<6) // clear DAX bit
or eax, ecx // set the DAZ bit
mov dwData, eax
LDMXCSR dword ptr dwData
}
#endif
}
/*
================
Sys_FPU_SetFTZ
================
*/
void Sys_FPU_SetFTZ( bool enable )
{
#if !defined(_WIN64)
DWORD dwData;
_asm {
movzx ecx, byte ptr enable
and ecx, 1
shl ecx, 15
STMXCSR dword ptr dwData
mov eax, dwData
and eax, ~(1<<15) // clear FTZ bit
or eax, ecx // set the FTZ bit
mov dwData, eax
LDMXCSR dword ptr dwData
}
#endif
}