mirror of
https://github.com/dhewm/dhewm3.git
synced 2025-01-22 09:11:15 +00:00
4150029322
Get rid of the 2 CPUID flags and combine them with SSE in one implementation. SSE flags can now be set on all x86 and x86_64 platforms - independent of -ffast-math. Helper defines borrowed from STREFLOP.
574 lines
12 KiB
C++
574 lines
12 KiB
C++
/*
|
|
===========================================================================
|
|
|
|
Doom 3 GPL Source Code
|
|
Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
|
|
|
|
This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
|
|
|
|
Doom 3 Source Code is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Doom 3 Source Code is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
|
|
|
|
If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
|
|
|
|
===========================================================================
|
|
*/
|
|
|
|
#include "sys/platform.h"
|
|
|
|
#include "sys/win32/win_local.h"
|
|
|
|
/*
|
|
==============================================================
|
|
|
|
CPU
|
|
|
|
==============================================================
|
|
*/
|
|
|
|
/*
|
|
================
|
|
HasCPUID
|
|
================
|
|
*/
|
|
static bool HasCPUID( void ) {
|
|
#ifdef _MSC_VER
|
|
__asm
|
|
{
|
|
pushfd // save eflags
|
|
pop eax
|
|
test eax, 0x00200000 // check ID bit
|
|
jz set21 // bit 21 is not set, so jump to set_21
|
|
and eax, 0xffdfffff // clear bit 21
|
|
push eax // save new value in register
|
|
popfd // store new value in flags
|
|
pushfd
|
|
pop eax
|
|
test eax, 0x00200000 // check ID bit
|
|
jz good
|
|
jmp err // cpuid not supported
|
|
set21:
|
|
or eax, 0x00200000 // set ID bit
|
|
push eax // store new value
|
|
popfd // store new value in EFLAGS
|
|
pushfd
|
|
pop eax
|
|
test eax, 0x00200000 // if bit 21 is on
|
|
jnz good
|
|
jmp err
|
|
}
|
|
|
|
err:
|
|
return false;
|
|
good:
|
|
return true;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
#define _REG_EAX 0
|
|
#define _REG_EBX 1
|
|
#define _REG_ECX 2
|
|
#define _REG_EDX 3
|
|
|
|
/*
|
|
================
|
|
CPUID
|
|
================
|
|
*/
|
|
static void CPUID( int func, unsigned regs[4] ) {
|
|
#ifdef _MSC_VER
|
|
unsigned regEAX, regEBX, regECX, regEDX;
|
|
|
|
__asm pusha
|
|
__asm mov eax, func
|
|
__asm __emit 00fh
|
|
__asm __emit 0a2h
|
|
__asm mov regEAX, eax
|
|
__asm mov regEBX, ebx
|
|
__asm mov regECX, ecx
|
|
__asm mov regEDX, edx
|
|
__asm popa
|
|
|
|
regs[_REG_EAX] = regEAX;
|
|
regs[_REG_EBX] = regEBX;
|
|
regs[_REG_ECX] = regECX;
|
|
regs[_REG_EDX] = regEDX;
|
|
#else
|
|
regs[0] = 0;
|
|
regs[1] = 0;
|
|
regs[2] = 0;
|
|
regs[3] = 0;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
================
|
|
Has3DNow
|
|
================
|
|
*/
|
|
static bool Has3DNow( void ) {
|
|
unsigned regs[4];
|
|
|
|
// check AMD-specific functions
|
|
CPUID( 0x80000000, regs );
|
|
if ( regs[_REG_EAX] < 0x80000000 ) {
|
|
return false;
|
|
}
|
|
|
|
// bit 31 of EDX denotes 3DNow! support
|
|
CPUID( 0x80000001, regs );
|
|
if ( regs[_REG_EDX] & ( 1 << 31 ) ) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
================
|
|
HasMMX
|
|
================
|
|
*/
|
|
static bool HasMMX( void ) {
|
|
unsigned regs[4];
|
|
|
|
// get CPU feature bits
|
|
CPUID( 1, regs );
|
|
|
|
// bit 23 of EDX denotes MMX existence
|
|
if ( regs[_REG_EDX] & ( 1 << 23 ) ) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
================
|
|
HasSSE
|
|
================
|
|
*/
|
|
static bool HasSSE( void ) {
|
|
unsigned regs[4];
|
|
|
|
// get CPU feature bits
|
|
CPUID( 1, regs );
|
|
|
|
// bit 25 of EDX denotes SSE existence
|
|
if ( regs[_REG_EDX] & ( 1 << 25 ) ) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
================
|
|
HasSSE2
|
|
================
|
|
*/
|
|
static bool HasSSE2( void ) {
|
|
unsigned regs[4];
|
|
|
|
// get CPU feature bits
|
|
CPUID( 1, regs );
|
|
|
|
// bit 26 of EDX denotes SSE2 existence
|
|
if ( regs[_REG_EDX] & ( 1 << 26 ) ) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
================
|
|
HasSSE3
|
|
================
|
|
*/
|
|
static bool HasSSE3( void ) {
|
|
unsigned regs[4];
|
|
|
|
// get CPU feature bits
|
|
CPUID( 1, regs );
|
|
|
|
// bit 0 of ECX denotes SSE3 existence
|
|
if ( regs[_REG_ECX] & ( 1 << 0 ) ) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
================
|
|
Sys_GetCPUId
|
|
================
|
|
*/
|
|
int Sys_GetCPUId( void ) {
|
|
#ifdef _MSC_VER
|
|
int flags;
|
|
|
|
// verify we're at least a Pentium or 486 with CPUID support
|
|
if ( !HasCPUID() ) {
|
|
return CPUID_UNSUPPORTED;
|
|
}
|
|
|
|
// check for Multi Media Extensions
|
|
if ( HasMMX() ) {
|
|
flags |= CPUID_MMX;
|
|
}
|
|
|
|
// check for 3DNow!
|
|
if ( Has3DNow() ) {
|
|
flags |= CPUID_3DNOW;
|
|
}
|
|
|
|
// check for Streaming SIMD Extensions
|
|
if ( HasSSE() ) {
|
|
flags |= CPUID_SSE;
|
|
}
|
|
|
|
// check for Streaming SIMD Extensions 2
|
|
if ( HasSSE2() ) {
|
|
flags |= CPUID_SSE2;
|
|
}
|
|
|
|
// check for Streaming SIMD Extensions 3 aka Prescott's New Instructions
|
|
if ( HasSSE3() ) {
|
|
flags |= CPUID_SSE3;
|
|
}
|
|
|
|
return flags;
|
|
#else
|
|
return CPUID_GENERIC;
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
===============================================================================
|
|
|
|
FPU
|
|
|
|
===============================================================================
|
|
*/
|
|
|
|
typedef struct bitFlag_s {
|
|
const char *name;
|
|
int bit;
|
|
} bitFlag_t;
|
|
|
|
static byte fpuState[128], *statePtr = fpuState;
|
|
static char fpuString[2048];
|
|
static bitFlag_t controlWordFlags[] = {
|
|
{ "Invalid operation", 0 },
|
|
{ "Denormalized operand", 1 },
|
|
{ "Divide-by-zero", 2 },
|
|
{ "Numeric overflow", 3 },
|
|
{ "Numeric underflow", 4 },
|
|
{ "Inexact result (precision)", 5 },
|
|
{ "Infinity control", 12 },
|
|
{ "", 0 }
|
|
};
|
|
static const char *precisionControlField[] = {
|
|
"Single Precision (24-bits)",
|
|
"Reserved",
|
|
"Double Precision (53-bits)",
|
|
"Double Extended Precision (64-bits)"
|
|
};
|
|
static const char *roundingControlField[] = {
|
|
"Round to nearest",
|
|
"Round down",
|
|
"Round up",
|
|
"Round toward zero"
|
|
};
|
|
static bitFlag_t statusWordFlags[] = {
|
|
{ "Invalid operation", 0 },
|
|
{ "Denormalized operand", 1 },
|
|
{ "Divide-by-zero", 2 },
|
|
{ "Numeric overflow", 3 },
|
|
{ "Numeric underflow", 4 },
|
|
{ "Inexact result (precision)", 5 },
|
|
{ "Stack fault", 6 },
|
|
{ "Error summary status", 7 },
|
|
{ "FPU busy", 15 },
|
|
{ "", 0 }
|
|
};
|
|
|
|
/*
|
|
===============
|
|
Sys_FPU_PrintStateFlags
|
|
===============
|
|
*/
|
|
int Sys_FPU_PrintStateFlags( char *ptr, int ctrl, int stat, int tags, int inof, int inse, int opof, int opse ) {
|
|
#ifdef _MSC_VER
|
|
int i, length = 0;
|
|
|
|
length += sprintf( ptr+length, "CTRL = %08x\n"
|
|
"STAT = %08x\n"
|
|
"TAGS = %08x\n"
|
|
"INOF = %08x\n"
|
|
"INSE = %08x\n"
|
|
"OPOF = %08x\n"
|
|
"OPSE = %08x\n"
|
|
"\n",
|
|
ctrl, stat, tags, inof, inse, opof, opse );
|
|
|
|
length += sprintf( ptr+length, "Control Word:\n" );
|
|
for ( i = 0; controlWordFlags[i].name[0]; i++ ) {
|
|
length += sprintf( ptr+length, " %-30s = %s\n", controlWordFlags[i].name, ( ctrl & ( 1 << controlWordFlags[i].bit ) ) ? "true" : "false" );
|
|
}
|
|
length += sprintf( ptr+length, " %-30s = %s\n", "Precision control", precisionControlField[(ctrl>>8)&3] );
|
|
length += sprintf( ptr+length, " %-30s = %s\n", "Rounding control", roundingControlField[(ctrl>>10)&3] );
|
|
|
|
length += sprintf( ptr+length, "Status Word:\n" );
|
|
for ( i = 0; statusWordFlags[i].name[0]; i++ ) {
|
|
ptr += sprintf( ptr+length, " %-30s = %s\n", statusWordFlags[i].name, ( stat & ( 1 << statusWordFlags[i].bit ) ) ? "true" : "false" );
|
|
}
|
|
length += sprintf( ptr+length, " %-30s = %d%d%d%d\n", "Condition code", (stat>>8)&1, (stat>>9)&1, (stat>>10)&1, (stat>>14)&1 );
|
|
length += sprintf( ptr+length, " %-30s = %d\n", "Top of stack pointer", (stat>>11)&7 );
|
|
|
|
return length;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
===============
|
|
Sys_FPU_StackIsEmpty
|
|
===============
|
|
*/
|
|
bool Sys_FPU_StackIsEmpty( void ) {
|
|
#ifdef _MSC_VER
|
|
__asm {
|
|
mov eax, statePtr
|
|
fnstenv [eax]
|
|
mov eax, [eax+8]
|
|
xor eax, 0xFFFFFFFF
|
|
and eax, 0x0000FFFF
|
|
jz empty
|
|
}
|
|
return false;
|
|
empty:
|
|
return true;
|
|
#else
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
===============
|
|
Sys_FPU_ClearStack
|
|
===============
|
|
*/
|
|
void Sys_FPU_ClearStack( void ) {
|
|
#ifdef _MSC_VER
|
|
__asm {
|
|
mov eax, statePtr
|
|
fnstenv [eax]
|
|
mov eax, [eax+8]
|
|
xor eax, 0xFFFFFFFF
|
|
mov edx, (3<<14)
|
|
emptyStack:
|
|
mov ecx, eax
|
|
and ecx, edx
|
|
jz done
|
|
fstp st
|
|
shr edx, 2
|
|
jmp emptyStack
|
|
done:
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
===============
|
|
Sys_FPU_GetState
|
|
|
|
gets the FPU state without changing the state
|
|
===============
|
|
*/
|
|
const char *Sys_FPU_GetState( void ) {
|
|
#ifdef _MSC_VER
|
|
double fpuStack[8] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
|
|
double *fpuStackPtr = fpuStack;
|
|
int i, numValues;
|
|
char *ptr;
|
|
|
|
__asm {
|
|
mov esi, statePtr
|
|
mov edi, fpuStackPtr
|
|
fnstenv [esi]
|
|
mov esi, [esi+8]
|
|
xor esi, 0xFFFFFFFF
|
|
mov edx, (3<<14)
|
|
xor eax, eax
|
|
mov ecx, esi
|
|
and ecx, edx
|
|
jz done
|
|
fst qword ptr [edi+0]
|
|
inc eax
|
|
shr edx, 2
|
|
mov ecx, esi
|
|
and ecx, edx
|
|
jz done
|
|
fxch st(1)
|
|
fst qword ptr [edi+8]
|
|
inc eax
|
|
fxch st(1)
|
|
shr edx, 2
|
|
mov ecx, esi
|
|
and ecx, edx
|
|
jz done
|
|
fxch st(2)
|
|
fst qword ptr [edi+16]
|
|
inc eax
|
|
fxch st(2)
|
|
shr edx, 2
|
|
mov ecx, esi
|
|
and ecx, edx
|
|
jz done
|
|
fxch st(3)
|
|
fst qword ptr [edi+24]
|
|
inc eax
|
|
fxch st(3)
|
|
shr edx, 2
|
|
mov ecx, esi
|
|
and ecx, edx
|
|
jz done
|
|
fxch st(4)
|
|
fst qword ptr [edi+32]
|
|
inc eax
|
|
fxch st(4)
|
|
shr edx, 2
|
|
mov ecx, esi
|
|
and ecx, edx
|
|
jz done
|
|
fxch st(5)
|
|
fst qword ptr [edi+40]
|
|
inc eax
|
|
fxch st(5)
|
|
shr edx, 2
|
|
mov ecx, esi
|
|
and ecx, edx
|
|
jz done
|
|
fxch st(6)
|
|
fst qword ptr [edi+48]
|
|
inc eax
|
|
fxch st(6)
|
|
shr edx, 2
|
|
mov ecx, esi
|
|
and ecx, edx
|
|
jz done
|
|
fxch st(7)
|
|
fst qword ptr [edi+56]
|
|
inc eax
|
|
fxch st(7)
|
|
done:
|
|
mov numValues, eax
|
|
}
|
|
|
|
int ctrl = *(int *)&fpuState[0];
|
|
int stat = *(int *)&fpuState[4];
|
|
int tags = *(int *)&fpuState[8];
|
|
int inof = *(int *)&fpuState[12];
|
|
int inse = *(int *)&fpuState[16];
|
|
int opof = *(int *)&fpuState[20];
|
|
int opse = *(int *)&fpuState[24];
|
|
|
|
ptr = fpuString;
|
|
ptr += sprintf( ptr,"FPU State:\n"
|
|
"num values on stack = %d\n", numValues );
|
|
for ( i = 0; i < 8; i++ ) {
|
|
ptr += sprintf( ptr, "ST%d = %1.10e\n", i, fpuStack[i] );
|
|
}
|
|
|
|
Sys_FPU_PrintStateFlags( ptr, ctrl, stat, tags, inof, inse, opof, opse );
|
|
|
|
return fpuString;
|
|
#else
|
|
return "";
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
===============
|
|
Sys_FPU_EnableExceptions
|
|
===============
|
|
*/
|
|
void Sys_FPU_EnableExceptions( int exceptions ) {
|
|
#ifdef _MSC_VER
|
|
__asm {
|
|
mov eax, statePtr
|
|
mov ecx, exceptions
|
|
and cx, 63
|
|
not cx
|
|
fnstcw word ptr [eax]
|
|
mov bx, word ptr [eax]
|
|
or bx, 63
|
|
and bx, cx
|
|
mov word ptr [eax], bx
|
|
fldcw word ptr [eax]
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
===============
|
|
Sys_FPU_SetPrecision
|
|
===============
|
|
*/
|
|
void Sys_FPU_SetPrecision( int precision ) {
|
|
#ifdef _MSC_VER
|
|
short precisionBitTable[4] = { 0, 1, 3, 0 };
|
|
short precisionBits = precisionBitTable[precision & 3] << 8;
|
|
short precisionMask = ~( ( 1 << 9 ) | ( 1 << 8 ) );
|
|
|
|
__asm {
|
|
mov eax, statePtr
|
|
mov cx, precisionBits
|
|
fnstcw word ptr [eax]
|
|
mov bx, word ptr [eax]
|
|
and bx, precisionMask
|
|
or bx, cx
|
|
mov word ptr [eax], bx
|
|
fldcw word ptr [eax]
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
================
|
|
Sys_FPU_SetRounding
|
|
================
|
|
*/
|
|
void Sys_FPU_SetRounding( int rounding ) {
|
|
#ifdef _MSC_VER
|
|
short roundingBitTable[4] = { 0, 1, 2, 3 };
|
|
short roundingBits = roundingBitTable[rounding & 3] << 10;
|
|
short roundingMask = ~( ( 1 << 11 ) | ( 1 << 10 ) );
|
|
|
|
__asm {
|
|
mov eax, statePtr
|
|
mov cx, roundingBits
|
|
fnstcw word ptr [eax]
|
|
mov bx, word ptr [eax]
|
|
and bx, roundingMask
|
|
or bx, cx
|
|
mov word ptr [eax], bx
|
|
fldcw word ptr [eax]
|
|
}
|
|
#endif
|
|
}
|