dhewm3/neo/sys/win32/win_cpu.cpp
dhewg 736ec20d4d Untangle the epic precompiled.h mess
Don't include the lazy precompiled.h everywhere, only what's
required for the compilation unit.
platform.h needs to be included instead to provide all essential
defines and types.
All includes use the relative path to the neo or the game
specific root.
Move all idlib related includes from idlib/Lib.h to precompiled.h.
precompiled.h still exists for the MFC stuff in tools/.
Add some missing header guards.
2011-12-19 23:21:47 +01:00

978 lines
20 KiB
C++

/*
===========================================================================
Doom 3 GPL Source Code
Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
Doom 3 Source Code is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Doom 3 Source Code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
===========================================================================
*/
#include "sys/platform.h"
#include "sys/win32/win_local.h"
/*
==============================================================
Clock ticks
==============================================================
*/
/*
================
Sys_GetClockTicks
================
*/
double Sys_GetClockTicks( void ) {
#ifndef _MSC_VER
LARGE_INTEGER li;
QueryPerformanceCounter( &li );
return (double ) li.LowPart + (double) 0xFFFFFFFF * li.HighPart;
#else
unsigned long lo, hi;
__asm {
push ebx
xor eax, eax
cpuid
rdtsc
mov lo, eax
mov hi, edx
pop ebx
}
return (double ) lo + (double) 0xFFFFFFFF * hi;
#endif
}
/*
================
Sys_ClockTicksPerSecond
================
*/
double Sys_ClockTicksPerSecond( void ) {
static double ticks = 0;
#ifndef _MSC_VER
if ( !ticks ) {
LARGE_INTEGER li;
QueryPerformanceFrequency( &li );
ticks = li.QuadPart;
}
#else
if ( !ticks ) {
HKEY hKey;
LPBYTE ProcSpeed;
DWORD buflen, ret;
if ( !RegOpenKeyEx( HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, KEY_READ, &hKey ) ) {
ProcSpeed = 0;
buflen = sizeof( ProcSpeed );
ret = RegQueryValueEx( hKey, "~MHz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
// If we don't succeed, try some other spellings.
if ( ret != ERROR_SUCCESS ) {
ret = RegQueryValueEx( hKey, "~Mhz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
}
if ( ret != ERROR_SUCCESS ) {
ret = RegQueryValueEx( hKey, "~mhz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
}
RegCloseKey( hKey );
if ( ret == ERROR_SUCCESS ) {
ticks = (double) ((unsigned long)ProcSpeed) * 1000000;
}
}
}
#endif
return ticks;
}
/*
==============================================================
CPU
==============================================================
*/
/*
================
HasCPUID
================
*/
static bool HasCPUID( void ) {
#ifdef _MSC_VER
__asm
{
pushfd // save eflags
pop eax
test eax, 0x00200000 // check ID bit
jz set21 // bit 21 is not set, so jump to set_21
and eax, 0xffdfffff // clear bit 21
push eax // save new value in register
popfd // store new value in flags
pushfd
pop eax
test eax, 0x00200000 // check ID bit
jz good
jmp err // cpuid not supported
set21:
or eax, 0x00200000 // set ID bit
push eax // store new value
popfd // store new value in EFLAGS
pushfd
pop eax
test eax, 0x00200000 // if bit 21 is on
jnz good
jmp err
}
err:
return false;
good:
return true;
#else
return false;
#endif
}
#define _REG_EAX 0
#define _REG_EBX 1
#define _REG_ECX 2
#define _REG_EDX 3
/*
================
CPUID
================
*/
static void CPUID( int func, unsigned regs[4] ) {
#ifdef _MSC_VER
unsigned regEAX, regEBX, regECX, regEDX;
__asm pusha
__asm mov eax, func
__asm __emit 00fh
__asm __emit 0a2h
__asm mov regEAX, eax
__asm mov regEBX, ebx
__asm mov regECX, ecx
__asm mov regEDX, edx
__asm popa
regs[_REG_EAX] = regEAX;
regs[_REG_EBX] = regEBX;
regs[_REG_ECX] = regECX;
regs[_REG_EDX] = regEDX;
#else
regs[0] = 0;
regs[1] = 0;
regs[2] = 0;
regs[3] = 0;
#endif
}
/*
================
IsAMD
================
*/
static bool IsAMD( void ) {
char pstring[16];
char processorString[13];
// get name of processor
CPUID( 0, ( unsigned int * ) pstring );
processorString[0] = pstring[4];
processorString[1] = pstring[5];
processorString[2] = pstring[6];
processorString[3] = pstring[7];
processorString[4] = pstring[12];
processorString[5] = pstring[13];
processorString[6] = pstring[14];
processorString[7] = pstring[15];
processorString[8] = pstring[8];
processorString[9] = pstring[9];
processorString[10] = pstring[10];
processorString[11] = pstring[11];
processorString[12] = 0;
if ( strcmp( processorString, "AuthenticAMD" ) == 0 ) {
return true;
}
return false;
}
/*
================
HasCMOV
================
*/
static bool HasCMOV( void ) {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 15 of EDX denotes CMOV existence
if ( regs[_REG_EDX] & ( 1 << 15 ) ) {
return true;
}
return false;
}
/*
================
Has3DNow
================
*/
static bool Has3DNow( void ) {
unsigned regs[4];
// check AMD-specific functions
CPUID( 0x80000000, regs );
if ( regs[_REG_EAX] < 0x80000000 ) {
return false;
}
// bit 31 of EDX denotes 3DNow! support
CPUID( 0x80000001, regs );
if ( regs[_REG_EDX] & ( 1 << 31 ) ) {
return true;
}
return false;
}
/*
================
HasMMX
================
*/
static bool HasMMX( void ) {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 23 of EDX denotes MMX existence
if ( regs[_REG_EDX] & ( 1 << 23 ) ) {
return true;
}
return false;
}
/*
================
HasSSE
================
*/
static bool HasSSE( void ) {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 25 of EDX denotes SSE existence
if ( regs[_REG_EDX] & ( 1 << 25 ) ) {
return true;
}
return false;
}
/*
================
HasSSE2
================
*/
static bool HasSSE2( void ) {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 26 of EDX denotes SSE2 existence
if ( regs[_REG_EDX] & ( 1 << 26 ) ) {
return true;
}
return false;
}
/*
================
HasSSE3
================
*/
static bool HasSSE3( void ) {
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 0 of ECX denotes SSE3 existence
if ( regs[_REG_ECX] & ( 1 << 0 ) ) {
return true;
}
return false;
}
/*
================
LogicalProcPerPhysicalProc
================
*/
#define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
// processors per physical processor when execute cpuid with
// eax set to 1
static unsigned char LogicalProcPerPhysicalProc( void ) {
#ifdef _MSC_VER
unsigned int regebx = 0;
__asm {
mov eax, 1
cpuid
mov regebx, ebx
}
return (unsigned char) ((regebx & NUM_LOGICAL_BITS) >> 16);
#else
return 0;
#endif
}
/*
================
GetAPIC_ID
================
*/
#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
// initial APIC ID for the processor this code is running on.
// Default value = 0xff if HT is not supported
static unsigned char GetAPIC_ID( void ) {
#ifdef _MSC_VER
unsigned int regebx = 0;
__asm {
mov eax, 1
cpuid
mov regebx, ebx
}
return (unsigned char) ((regebx & INITIAL_APIC_ID_BITS) >> 24);
#else
return '\0';
#endif
}
/*
================
CPUCount
logicalNum is the number of logical CPU per physical CPU
physicalNum is the total number of physical processor
returns one of the HT_* flags
================
*/
#define HT_NOT_CAPABLE 0
#define HT_ENABLED 1
#define HT_DISABLED 2
#define HT_SUPPORTED_NOT_ENABLED 3
#define HT_CANNOT_DETECT 4
int CPUCount( int &logicalNum, int &physicalNum ) {
int statusFlag;
SYSTEM_INFO info;
physicalNum = 1;
logicalNum = 1;
statusFlag = HT_NOT_CAPABLE;
info.dwNumberOfProcessors = 0;
GetSystemInfo (&info);
// Number of physical processors in a non-Intel system
// or in a 32-bit Intel system with Hyper-Threading technology disabled
physicalNum = info.dwNumberOfProcessors;
unsigned char HT_Enabled = 0;
logicalNum = LogicalProcPerPhysicalProc();
if ( logicalNum >= 1 ) { // > 1 doesn't mean HT is enabled in the BIOS
HANDLE hCurrentProcessHandle;
DWORD dwProcessAffinity;
DWORD dwSystemAffinity;
DWORD dwAffinityMask;
// Calculate the appropriate shifts and mask based on the
// number of logical processors.
unsigned char i = 1, PHY_ID_MASK = 0xFF, PHY_ID_SHIFT = 0;
while( i < logicalNum ) {
i *= 2;
PHY_ID_MASK <<= 1;
PHY_ID_SHIFT++;
}
hCurrentProcessHandle = GetCurrentProcess();
GetProcessAffinityMask( hCurrentProcessHandle, &dwProcessAffinity, &dwSystemAffinity );
// Check if available process affinity mask is equal to the
// available system affinity mask
if ( dwProcessAffinity != dwSystemAffinity ) {
statusFlag = HT_CANNOT_DETECT;
physicalNum = -1;
return statusFlag;
}
dwAffinityMask = 1;
while ( dwAffinityMask != 0 && dwAffinityMask <= dwProcessAffinity ) {
// Check if this CPU is available
if ( dwAffinityMask & dwProcessAffinity ) {
if ( SetProcessAffinityMask( hCurrentProcessHandle, dwAffinityMask ) ) {
unsigned char APIC_ID, LOG_ID, PHY_ID;
Sleep( 0 ); // Give OS time to switch CPU
APIC_ID = GetAPIC_ID();
LOG_ID = APIC_ID & ~PHY_ID_MASK;
PHY_ID = APIC_ID >> PHY_ID_SHIFT;
if ( LOG_ID != 0 ) {
HT_Enabled = 1;
}
}
}
dwAffinityMask = dwAffinityMask << 1;
}
// Reset the processor affinity
SetProcessAffinityMask( hCurrentProcessHandle, dwProcessAffinity );
if ( logicalNum == 1 ) { // Normal P4 : HT is disabled in hardware
statusFlag = HT_DISABLED;
} else {
if ( HT_Enabled ) {
// Total physical processors in a Hyper-Threading enabled system.
physicalNum /= logicalNum;
statusFlag = HT_ENABLED;
} else {
statusFlag = HT_SUPPORTED_NOT_ENABLED;
}
}
}
return statusFlag;
}
/*
================
HasHTT
================
*/
static bool HasHTT( void ) {
unsigned regs[4];
int logicalNum, physicalNum, HTStatusFlag;
// get CPU feature bits
CPUID( 1, regs );
// bit 28 of EDX denotes HTT existence
if ( !( regs[_REG_EDX] & ( 1 << 28 ) ) ) {
return false;
}
HTStatusFlag = CPUCount( logicalNum, physicalNum );
if ( HTStatusFlag != HT_ENABLED ) {
return false;
}
return true;
}
/*
================
HasHTT
================
*/
static bool HasDAZ( void ) {
#ifdef _MSC_VER
__declspec(align(16)) unsigned char FXSaveArea[512];
unsigned char *FXArea = FXSaveArea;
DWORD dwMask = 0;
unsigned regs[4];
// get CPU feature bits
CPUID( 1, regs );
// bit 24 of EDX denotes support for FXSAVE
if ( !( regs[_REG_EDX] & ( 1 << 24 ) ) ) {
return false;
}
memset( FXArea, 0, sizeof( FXSaveArea ) );
__asm {
mov eax, FXArea
FXSAVE [eax]
}
dwMask = *(DWORD *)&FXArea[28]; // Read the MXCSR Mask
return ( ( dwMask & ( 1 << 6 ) ) == ( 1 << 6 ) ); // Return if the DAZ bit is set
#else
return false;
#endif
}
/*
================
Sys_GetCPUId
================
*/
int Sys_GetCPUId( void ) {
#ifdef _MSC_VER
int flags;
// verify we're at least a Pentium or 486 with CPUID support
if ( !HasCPUID() ) {
return CPUID_UNSUPPORTED;
}
// check for an AMD
if ( IsAMD() ) {
flags = CPUID_AMD;
} else {
flags = CPUID_INTEL;
}
// check for Multi Media Extensions
if ( HasMMX() ) {
flags |= CPUID_MMX;
}
// check for 3DNow!
if ( Has3DNow() ) {
flags |= CPUID_3DNOW;
}
// check for Streaming SIMD Extensions
if ( HasSSE() ) {
flags |= CPUID_SSE | CPUID_FTZ;
}
// check for Streaming SIMD Extensions 2
if ( HasSSE2() ) {
flags |= CPUID_SSE2;
}
// check for Streaming SIMD Extensions 3 aka Prescott's New Instructions
if ( HasSSE3() ) {
flags |= CPUID_SSE3;
}
// check for Hyper-Threading Technology
if ( HasHTT() ) {
flags |= CPUID_HTT;
}
// check for Conditional Move (CMOV) and fast floating point comparison (FCOMI) instructions
if ( HasCMOV() ) {
flags |= CPUID_CMOV;
}
// check for Denormals-Are-Zero mode
if ( HasDAZ() ) {
flags |= CPUID_DAZ;
}
return flags;
#else
return CPUID_GENERIC;
#endif
}
/*
===============================================================================
FPU
===============================================================================
*/
typedef struct bitFlag_s {
const char *name;
int bit;
} bitFlag_t;
static byte fpuState[128], *statePtr = fpuState;
static char fpuString[2048];
static bitFlag_t controlWordFlags[] = {
{ "Invalid operation", 0 },
{ "Denormalized operand", 1 },
{ "Divide-by-zero", 2 },
{ "Numeric overflow", 3 },
{ "Numeric underflow", 4 },
{ "Inexact result (precision)", 5 },
{ "Infinity control", 12 },
{ "", 0 }
};
static const char *precisionControlField[] = {
"Single Precision (24-bits)",
"Reserved",
"Double Precision (53-bits)",
"Double Extended Precision (64-bits)"
};
static const char *roundingControlField[] = {
"Round to nearest",
"Round down",
"Round up",
"Round toward zero"
};
static bitFlag_t statusWordFlags[] = {
{ "Invalid operation", 0 },
{ "Denormalized operand", 1 },
{ "Divide-by-zero", 2 },
{ "Numeric overflow", 3 },
{ "Numeric underflow", 4 },
{ "Inexact result (precision)", 5 },
{ "Stack fault", 6 },
{ "Error summary status", 7 },
{ "FPU busy", 15 },
{ "", 0 }
};
/*
===============
Sys_FPU_PrintStateFlags
===============
*/
int Sys_FPU_PrintStateFlags( char *ptr, int ctrl, int stat, int tags, int inof, int inse, int opof, int opse ) {
#ifdef _MSC_VER
int i, length = 0;
length += sprintf( ptr+length, "CTRL = %08x\n"
"STAT = %08x\n"
"TAGS = %08x\n"
"INOF = %08x\n"
"INSE = %08x\n"
"OPOF = %08x\n"
"OPSE = %08x\n"
"\n",
ctrl, stat, tags, inof, inse, opof, opse );
length += sprintf( ptr+length, "Control Word:\n" );
for ( i = 0; controlWordFlags[i].name[0]; i++ ) {
length += sprintf( ptr+length, " %-30s = %s\n", controlWordFlags[i].name, ( ctrl & ( 1 << controlWordFlags[i].bit ) ) ? "true" : "false" );
}
length += sprintf( ptr+length, " %-30s = %s\n", "Precision control", precisionControlField[(ctrl>>8)&3] );
length += sprintf( ptr+length, " %-30s = %s\n", "Rounding control", roundingControlField[(ctrl>>10)&3] );
length += sprintf( ptr+length, "Status Word:\n" );
for ( i = 0; statusWordFlags[i].name[0]; i++ ) {
ptr += sprintf( ptr+length, " %-30s = %s\n", statusWordFlags[i].name, ( stat & ( 1 << statusWordFlags[i].bit ) ) ? "true" : "false" );
}
length += sprintf( ptr+length, " %-30s = %d%d%d%d\n", "Condition code", (stat>>8)&1, (stat>>9)&1, (stat>>10)&1, (stat>>14)&1 );
length += sprintf( ptr+length, " %-30s = %d\n", "Top of stack pointer", (stat>>11)&7 );
return length;
#else
return 0;
#endif
}
/*
===============
Sys_FPU_StackIsEmpty
===============
*/
bool Sys_FPU_StackIsEmpty( void ) {
#ifdef _MSC_VER
__asm {
mov eax, statePtr
fnstenv [eax]
mov eax, [eax+8]
xor eax, 0xFFFFFFFF
and eax, 0x0000FFFF
jz empty
}
return false;
empty:
return true;
#else
return true;
#endif
}
/*
===============
Sys_FPU_ClearStack
===============
*/
void Sys_FPU_ClearStack( void ) {
#ifdef _MSC_VER
__asm {
mov eax, statePtr
fnstenv [eax]
mov eax, [eax+8]
xor eax, 0xFFFFFFFF
mov edx, (3<<14)
emptyStack:
mov ecx, eax
and ecx, edx
jz done
fstp st
shr edx, 2
jmp emptyStack
done:
}
#endif
}
/*
===============
Sys_FPU_GetState
gets the FPU state without changing the state
===============
*/
const char *Sys_FPU_GetState( void ) {
#ifdef _MSC_VER
double fpuStack[8] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
double *fpuStackPtr = fpuStack;
int i, numValues;
char *ptr;
__asm {
mov esi, statePtr
mov edi, fpuStackPtr
fnstenv [esi]
mov esi, [esi+8]
xor esi, 0xFFFFFFFF
mov edx, (3<<14)
xor eax, eax
mov ecx, esi
and ecx, edx
jz done
fst qword ptr [edi+0]
inc eax
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(1)
fst qword ptr [edi+8]
inc eax
fxch st(1)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(2)
fst qword ptr [edi+16]
inc eax
fxch st(2)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(3)
fst qword ptr [edi+24]
inc eax
fxch st(3)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(4)
fst qword ptr [edi+32]
inc eax
fxch st(4)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(5)
fst qword ptr [edi+40]
inc eax
fxch st(5)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(6)
fst qword ptr [edi+48]
inc eax
fxch st(6)
shr edx, 2
mov ecx, esi
and ecx, edx
jz done
fxch st(7)
fst qword ptr [edi+56]
inc eax
fxch st(7)
done:
mov numValues, eax
}
int ctrl = *(int *)&fpuState[0];
int stat = *(int *)&fpuState[4];
int tags = *(int *)&fpuState[8];
int inof = *(int *)&fpuState[12];
int inse = *(int *)&fpuState[16];
int opof = *(int *)&fpuState[20];
int opse = *(int *)&fpuState[24];
ptr = fpuString;
ptr += sprintf( ptr,"FPU State:\n"
"num values on stack = %d\n", numValues );
for ( i = 0; i < 8; i++ ) {
ptr += sprintf( ptr, "ST%d = %1.10e\n", i, fpuStack[i] );
}
Sys_FPU_PrintStateFlags( ptr, ctrl, stat, tags, inof, inse, opof, opse );
return fpuString;
#else
return "";
#endif
}
/*
===============
Sys_FPU_EnableExceptions
===============
*/
void Sys_FPU_EnableExceptions( int exceptions ) {
#ifdef _MSC_VER
__asm {
mov eax, statePtr
mov ecx, exceptions
and cx, 63
not cx
fnstcw word ptr [eax]
mov bx, word ptr [eax]
or bx, 63
and bx, cx
mov word ptr [eax], bx
fldcw word ptr [eax]
}
#endif
}
/*
===============
Sys_FPU_SetPrecision
===============
*/
void Sys_FPU_SetPrecision( int precision ) {
#ifdef _MSC_VER
short precisionBitTable[4] = { 0, 1, 3, 0 };
short precisionBits = precisionBitTable[precision & 3] << 8;
short precisionMask = ~( ( 1 << 9 ) | ( 1 << 8 ) );
__asm {
mov eax, statePtr
mov cx, precisionBits
fnstcw word ptr [eax]
mov bx, word ptr [eax]
and bx, precisionMask
or bx, cx
mov word ptr [eax], bx
fldcw word ptr [eax]
}
#endif
}
/*
================
Sys_FPU_SetRounding
================
*/
void Sys_FPU_SetRounding( int rounding ) {
#ifdef _MSC_VER
short roundingBitTable[4] = { 0, 1, 2, 3 };
short roundingBits = roundingBitTable[rounding & 3] << 10;
short roundingMask = ~( ( 1 << 11 ) | ( 1 << 10 ) );
__asm {
mov eax, statePtr
mov cx, roundingBits
fnstcw word ptr [eax]
mov bx, word ptr [eax]
and bx, roundingMask
or bx, cx
mov word ptr [eax], bx
fldcw word ptr [eax]
}
#endif
}
/*
================
Sys_FPU_SetDAZ
================
*/
void Sys_FPU_SetDAZ( bool enable ) {
#ifdef _MSC_VER
DWORD dwData;
_asm {
movzx ecx, byte ptr enable
and ecx, 1
shl ecx, 6
STMXCSR dword ptr dwData
mov eax, dwData
and eax, ~(1<<6) // clear DAX bit
or eax, ecx // set the DAZ bit
mov dwData, eax
LDMXCSR dword ptr dwData
}
#endif
}
/*
================
Sys_FPU_SetFTZ
================
*/
void Sys_FPU_SetFTZ( bool enable ) {
#ifdef _MSC_VER
DWORD dwData;
_asm {
movzx ecx, byte ptr enable
and ecx, 1
shl ecx, 15
STMXCSR dword ptr dwData
mov eax, dwData
and eax, ~(1<<15) // clear FTZ bit
or eax, ecx // set the FTZ bit
mov dwData, eax
LDMXCSR dword ptr dwData
}
#endif
}