SSE2 instruction set support is now required

This commit is contained in:
myT 2019-09-16 20:47:16 +02:00
parent 39af360fdc
commit 983b53cbbb
17 changed files with 108 additions and 592 deletions

View file

@ -1,6 +1,8 @@
DD Mmm 19 - 1.52
chg: SSE2 instruction set support is now required
chg: removed FreeType 2 and the unused R_REGISTERFONT syscalls that were using it
fix: r_monitor on Linux is now 0-based and the value doesn't change incorrectly on its own anymore

View file

@ -2108,31 +2108,27 @@ typedef struct {
int reg;
int bit;
int flag;
qbool noTest;
qbool required;
} cpuFeatureBit_t;
#if idx64
#define IS_X64 qtrue
#define BASIC_CPU_FEATURES (CPU_MMX | CPU_SSE | CPU_SSE2)
#else
#define IS_X64 qfalse
#define BASIC_CPU_FEATURES 0
#endif
static const cpuFeatureBit_t cpu_featureBits[] = {
{ " MMX", 3, 23, CPU_MMX, IS_X64 },
{ " SSE", 3, 25, CPU_SSE, IS_X64 },
{ " SSE2", 3, 26, CPU_SSE2, IS_X64 },
// the following aren't used anywhere for now:
// { " SSE3", 2, 0, CPU_SSE3, qfalse },
// { " SSSE3", 2, 9, CPU_SSSE3, qfalse },
// { " SSE4.1", 2, 19, CPU_SSE41, qfalse },
// { " SSE4.2", 2, 20, CPU_SSE42, qfalse },
#if id386
{ " MMX", 3, 23, 0, qtrue },
{ " SSE", 3, 25, 0, qtrue },
{ " SSE2", 3, 26, 0, qtrue },
#endif
{ " SSE3", 2, 0, CPU_SSE3, qfalse },
{ " SSSE3", 2, 9, CPU_SSSE3, qfalse },
{ " SSE4.1", 2, 19, CPU_SSE41, qfalse },
{ " SSE4.2", 2, 20, CPU_SSE42, qfalse }
// we want to avoid AVX for anything that isn't really super costly
// because otherwise the power management changes will be counter-productive
// { " AVX", 2, 28, CPU_AVX, qfalse }
// for AVX2 and later, you'd need to call cpuid with eax=7 and ecx=0 ("extended features")
};
int cpu_features = BASIC_CPU_FEATURES;
int cpu_features = 0;
static qbool Com_GetProcessorInfo()
{
@ -2141,18 +2137,22 @@ static qbool Com_GetProcessorInfo()
int regs[4];
const char* name = Com_ProcessorName();
if ( name == NULL || !Com_CPUID( 1, regs ) ) {
cpu_features = BASIC_CPU_FEATURES;
return qfalse;
}
char s[256] = "";
Q_strcat( s, sizeof(s), name );
int features = BASIC_CPU_FEATURES;
int features = 0;
for (int i = 0; i < ARRAY_LEN(cpu_featureBits); i++) {
const cpuFeatureBit_t* f = cpu_featureBits + i;
const qbool active = ( regs[f->reg] & ( 1 << f->bit ) ) != 0;
if ( f->noTest || (regs[f->reg] & (1 << f->bit)) ) {
if ( f->required && !active ) {
Com_Error( ERR_FATAL, "CNQ3 requires a processor with SSE2 support\n" );
}
if ( active ) {
Q_strcat( s, sizeof(s), f->s );
features |= f->flag;
}
@ -2165,9 +2165,6 @@ static qbool Com_GetProcessorInfo()
return qtrue;
}
#undef BASIC_CPU_FEATURES
#undef IS_X64
static const cmdTableItem_t com_cmds[] =
{

View file

@ -593,44 +593,8 @@ void SetPlaneSignbits (cplane_t *out) {
BoxOnPlaneSide
Returns 1, 2, or 1 + 2
// this is the slow, general version
int BoxOnPlaneSide2 (vec3_t emins, vec3_t emaxs, struct cplane_s *p)
{
int i;
float dist1, dist2;
int sides;
vec3_t corners[2];
for (i=0 ; i<3 ; i++)
{
if (p->normal[i] < 0)
{
corners[0][i] = emins[i];
corners[1][i] = emaxs[i];
}
else
{
corners[1][i] = emins[i];
corners[0][i] = emaxs[i];
}
}
dist1 = DotProduct (p->normal, corners[0]) - p->dist;
dist2 = DotProduct (p->normal, corners[1]) - p->dist;
sides = 0;
if (dist1 >= 0)
sides = 1;
if (dist2 < 0)
sides |= 2;
return sides;
}
==================
*/
#if !id386 || defined(__GNUC__)
int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const struct cplane_s* p )
{
float dist1, dist2;
@ -695,242 +659,6 @@ int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const struct cplane_
return sides;
}
#else
#pragma warning( disable: 4035 )
__declspec( naked ) int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const struct cplane_s* p )
{
static int bops_initialized;
static int Ljmptab[8];
__asm {
push ebx
cmp bops_initialized, 1
je initialized
mov bops_initialized, 1
mov Ljmptab[0*4], offset Lcase0
mov Ljmptab[1*4], offset Lcase1
mov Ljmptab[2*4], offset Lcase2
mov Ljmptab[3*4], offset Lcase3
mov Ljmptab[4*4], offset Lcase4
mov Ljmptab[5*4], offset Lcase5
mov Ljmptab[6*4], offset Lcase6
mov Ljmptab[7*4], offset Lcase7
initialized:
mov edx,dword ptr[4+12+esp]
mov ecx,dword ptr[4+4+esp]
xor eax,eax
mov ebx,dword ptr[4+8+esp]
mov al,byte ptr[17+edx]
cmp al,8
jge Lerror
fld dword ptr[0+edx]
fld st(0)
jmp dword ptr[Ljmptab+eax*4]
Lcase0:
fmul dword ptr[ebx]
fld dword ptr[0+4+edx]
fxch st(2)
fmul dword ptr[ecx]
fxch st(2)
fld st(0)
fmul dword ptr[4+ebx]
fld dword ptr[0+8+edx]
fxch st(2)
fmul dword ptr[4+ecx]
fxch st(2)
fld st(0)
fmul dword ptr[8+ebx]
fxch st(5)
faddp st(3),st(0)
fmul dword ptr[8+ecx]
fxch st(1)
faddp st(3),st(0)
fxch st(3)
faddp st(2),st(0)
jmp LSetSides
Lcase1:
fmul dword ptr[ecx]
fld dword ptr[0+4+edx]
fxch st(2)
fmul dword ptr[ebx]
fxch st(2)
fld st(0)
fmul dword ptr[4+ebx]
fld dword ptr[0+8+edx]
fxch st(2)
fmul dword ptr[4+ecx]
fxch st(2)
fld st(0)
fmul dword ptr[8+ebx]
fxch st(5)
faddp st(3),st(0)
fmul dword ptr[8+ecx]
fxch st(1)
faddp st(3),st(0)
fxch st(3)
faddp st(2),st(0)
jmp LSetSides
Lcase2:
fmul dword ptr[ebx]
fld dword ptr[0+4+edx]
fxch st(2)
fmul dword ptr[ecx]
fxch st(2)
fld st(0)
fmul dword ptr[4+ecx]
fld dword ptr[0+8+edx]
fxch st(2)
fmul dword ptr[4+ebx]
fxch st(2)
fld st(0)
fmul dword ptr[8+ebx]
fxch st(5)
faddp st(3),st(0)
fmul dword ptr[8+ecx]
fxch st(1)
faddp st(3),st(0)
fxch st(3)
faddp st(2),st(0)
jmp LSetSides
Lcase3:
fmul dword ptr[ecx]
fld dword ptr[0+4+edx]
fxch st(2)
fmul dword ptr[ebx]
fxch st(2)
fld st(0)
fmul dword ptr[4+ecx]
fld dword ptr[0+8+edx]
fxch st(2)
fmul dword ptr[4+ebx]
fxch st(2)
fld st(0)
fmul dword ptr[8+ebx]
fxch st(5)
faddp st(3),st(0)
fmul dword ptr[8+ecx]
fxch st(1)
faddp st(3),st(0)
fxch st(3)
faddp st(2),st(0)
jmp LSetSides
Lcase4:
fmul dword ptr[ebx]
fld dword ptr[0+4+edx]
fxch st(2)
fmul dword ptr[ecx]
fxch st(2)
fld st(0)
fmul dword ptr[4+ebx]
fld dword ptr[0+8+edx]
fxch st(2)
fmul dword ptr[4+ecx]
fxch st(2)
fld st(0)
fmul dword ptr[8+ecx]
fxch st(5)
faddp st(3),st(0)
fmul dword ptr[8+ebx]
fxch st(1)
faddp st(3),st(0)
fxch st(3)
faddp st(2),st(0)
jmp LSetSides
Lcase5:
fmul dword ptr[ecx]
fld dword ptr[0+4+edx]
fxch st(2)
fmul dword ptr[ebx]
fxch st(2)
fld st(0)
fmul dword ptr[4+ebx]
fld dword ptr[0+8+edx]
fxch st(2)
fmul dword ptr[4+ecx]
fxch st(2)
fld st(0)
fmul dword ptr[8+ecx]
fxch st(5)
faddp st(3),st(0)
fmul dword ptr[8+ebx]
fxch st(1)
faddp st(3),st(0)
fxch st(3)
faddp st(2),st(0)
jmp LSetSides
Lcase6:
fmul dword ptr[ebx]
fld dword ptr[0+4+edx]
fxch st(2)
fmul dword ptr[ecx]
fxch st(2)
fld st(0)
fmul dword ptr[4+ecx]
fld dword ptr[0+8+edx]
fxch st(2)
fmul dword ptr[4+ebx]
fxch st(2)
fld st(0)
fmul dword ptr[8+ecx]
fxch st(5)
faddp st(3),st(0)
fmul dword ptr[8+ebx]
fxch st(1)
faddp st(3),st(0)
fxch st(3)
faddp st(2),st(0)
jmp LSetSides
Lcase7:
fmul dword ptr[ecx]
fld dword ptr[0+4+edx]
fxch st(2)
fmul dword ptr[ebx]
fxch st(2)
fld st(0)
fmul dword ptr[4+ecx]
fld dword ptr[0+8+edx]
fxch st(2)
fmul dword ptr[4+ebx]
fxch st(2)
fld st(0)
fmul dword ptr[8+ecx]
fxch st(5)
faddp st(3),st(0)
fmul dword ptr[8+ebx]
fxch st(1)
faddp st(3),st(0)
fxch st(3)
faddp st(2),st(0)
LSetSides:
faddp st(2),st(0)
fcomp dword ptr[12+edx]
xor ecx,ecx
fnstsw ax
fcomp dword ptr[12+edx]
and ah,1
xor ah,1
add cl,ah
fnstsw ax
and ah,1
add ah,ah
add cl,ah
pop ebx
mov eax,ecx
ret
Lerror:
int 3
}
}
#pragma warning( default: 4035 )
#endif
///////////////////////////////////////////////////////////////

View file

@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#define id386 0
#define idppc 0
#define idx64 0
#define idSSE2 0
#else
@ -44,6 +45,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#define idx64 0
#endif
#if id386 || idx64
#define idSSE2 1
#else
#define idSSE2 0
#endif
#if (defined(powerc) || defined(powerpc) || defined(ppc) || \
defined(__ppc) || defined(__ppc__)) && !defined(C_ONLY)
#define idppc 1

View file

@ -246,12 +246,13 @@ VIRTUAL MACHINE
==============================================================
*/
typedef enum {
CPU_MMX = (1 << 0),
CPU_SSE = (1 << 1),
CPU_SSE2 = (1 << 2)
} cpuFeatureFlags_t;
typedef enum {
CPU_SSE3 = (1 << 0),
CPU_SSSE3 = (1 << 1),
CPU_SSE41 = (1 << 2),
CPU_SSE42 = (1 << 3)
} cpuFeatureFlags_t;
extern int cpu_features;
typedef struct vm_s vm_t;

View file

@ -150,9 +150,6 @@ typedef enum
FUNC_ENTR = 0,
FUNC_CALL,
FUNC_SYSC,
#if id386
FUNC_FTOL,
#endif
FUNC_BCPY,
FUNC_PSOF,
FUNC_OSOF,
@ -171,10 +168,6 @@ static instruction_t *inst = NULL;
static instruction_t *ci;
static instruction_t *ni;
#if id386
static int fp_cw[2] = { 0x0000, 0x0F7F }; // [0] - current value, [1] - round towards zero
#endif
static int ip;
static int lastConst;
static opcode_t pop1;
@ -237,14 +230,6 @@ static void VM_FreeBuffers( void )
}
#if id386
static qbool HasSSEFP( void )
{
return ( cpu_features & CPU_SSE ) && ( cpu_features & CPU_SSE2 );
}
#endif
static void Emit1( int v )
{
if ( code )
@ -373,14 +358,7 @@ static void EmitCommand( ELastCommand command )
case LAST_COMMAND_STORE_FLOAT_EDI:
{
const int oldOffset = compiledOfs;
#if id386
if ( HasSSEFP() )
#endif
EmitString( "f3 0f 11 07" ); // movss dword ptr [edi], xmm0
#if id386
else
EmitString( "D9 1F" ); // fstp dword ptr [edi]
#endif
EmitString( "f3 0f 11 07" ); // movss dword ptr [edi], xmm0
floatStoreInstLength = compiledOfs - oldOffset;
break;
}
@ -552,7 +530,7 @@ static void EmitCheckReg( vm_t *vm, int reg, int size )
}
static int EmitLoadFloatEDI_SSE( vm_t *vm )
static int EmitLoadFloatEDI( vm_t *vm )
{
// movss dword ptr [edi], xmm0
if ( LastCommand == LAST_COMMAND_STORE_FLOAT_EDI )
@ -569,36 +547,6 @@ static int EmitLoadFloatEDI_SSE( vm_t *vm )
}
#if id386
static int EmitLoadFloatEDI_X87( vm_t *vm )
{
// fstp dword ptr [edi]
if ( LastCommand == LAST_COMMAND_STORE_FLOAT_EDI )
{
if ( !vm )
return 1;
REWIND( 2 );
LastCommand = LAST_COMMAND_NONE;
return 1;
}
EmitString( "D9 07" ); // fld dword ptr [edi]
return 0;
}
#endif
static int EmitLoadFloatEDI( vm_t *vm )
{
#if id386
if ( !HasSSEFP() )
return EmitLoadFloatEDI_X87( vm );
#endif
return EmitLoadFloatEDI_SSE( vm );
}
const char *FarJumpStr( int op, int *n )
{
switch ( op )
@ -844,20 +792,6 @@ funcOffset[FUNC_SYSC] = compiledOfs;
}
#if id386
static void EmitFTOLFunc(vm_t *vm)
{
EmitRexString( "B8" ); // mov eax, &fp_cw[0]
EmitPtr( &fp_cw[0] );
EmitString( "9B D9 38" ); // fnstcw word ptr [eax]
EmitString( "D9 68 04" ); // fldcw word ptr [eax+4]
EmitString( "DB 1F" ); // fistp dword ptr [edi]
EmitString( "D9 28" ); // fldcw word ptr [eax]
EmitString( "C3" ); // ret
}
#endif
static void EmitBCPYFunc(vm_t *vm)
{
// FIXME: range check
@ -929,61 +863,6 @@ static void EmitDATAFunc(vm_t *vm)
}
#if id386
/*
=================
EmitFCalcEDI
=================
*/
static void EmitFCalcEDI(int op)
{
switch ( op )
{
case OP_ADDF: EmitString( "D8 07" ); break; // fadd dword ptr [edi]
case OP_SUBF: EmitString( "D8 27" ); break; // fsub dword ptr [edi]
case OP_MULF: EmitString( "D8 0F" ); break; // fmul dword ptr [edi]
case OP_DIVF: EmitString( "D8 37" ); break; // fdiv dword ptr [edi]
default: Com_Error( ERR_DROP, "bad float op" ); break;
};
}
/*
=================
EmitFCalcPop
=================
*/
static void EmitFCalcPop(int op)
{
switch ( op )
{
case OP_ADDF: EmitString( "DE C1" ); break; // faddp
case OP_SUBF: EmitString( "DE E9" ); break; // fsubp
case OP_MULF: EmitString( "DE C9" ); break; // fmulp
case OP_DIVF: EmitString( "DE F9" ); break; // fdivp
default: Com_Error( ERR_DROP, "bad opcode %02x", op ); break;
};
}
/*
=================
CommuteFloatOp
=================
*/
static int CommuteFloatOp( int op )
{
switch ( op ) {
case OP_LEF: return OP_GEF;
case OP_LTF: return OP_GTF;
case OP_GEF: return OP_LEF;
case OP_GTF: return OP_LTF;
default: return op;
}
}
#endif
/*
=================
ConstOptimize
@ -1126,28 +1005,16 @@ static qboolean ConstOptimize(vm_t *vm)
case OP_ADDF:
case OP_SUBF:
v = ci->value;
#if id386
if ( HasSSEFP() ) {
#endif
EmitLoadFloatEDI( vm );
EmitString( "C7 45 00" ); // mov dword ptr [ebp], v
Emit4( v );
EmitString( "f3 0f 10 4d 00" ); // movss xmm1, dword ptr [ebp]
switch( op1 ) {
case OP_ADDF: EmitString( "0f 58 c1" ); break; // addps xmm0, xmm1
case OP_SUBF: EmitString( "0f 5c c1" ); break; // subps xmm0, xmm1
case OP_MULF: EmitString( "0f 59 c1" ); break; // mulps xmm0, xmm1
case OP_DIVF: EmitString( "0f 5e c1" ); break; // divps xmm0, xmm1
}
#if id386
} else {
EmitLoadFloatEDI( vm );
EmitString( "C7 45 00" ); // mov [ebp], 0x12345678
Emit4( v );
EmitString( "D9 45 00" ); // fld dword ptr [ebp]
EmitFCalcPop( op1 ); // fmulp/fdivp/faddp/fsubp
EmitLoadFloatEDI( vm );
EmitString( "C7 45 00" ); // mov dword ptr [ebp], v
Emit4( v );
EmitString( "f3 0f 10 4d 00" ); // movss xmm1, dword ptr [ebp]
switch( op1 ) {
case OP_ADDF: EmitString( "0f 58 c1" ); break; // addps xmm0, xmm1
case OP_SUBF: EmitString( "0f 5c c1" ); break; // subps xmm0, xmm1
case OP_MULF: EmitString( "0f 59 c1" ); break; // mulps xmm0, xmm1
case OP_DIVF: EmitString( "0f 5e c1" ); break; // divps xmm0, xmm1
}
#endif
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
ip +=1;
return qtrue;
@ -1240,32 +1107,14 @@ static qboolean ConstOptimize(vm_t *vm)
#endif
v = ci->value;
// try to inline some syscalls
#if id386
if ( HasSSEFP() ) {
#endif
// we let the C run-time handle sin/cos because they'll do better than the x87 crap
if ( v == ~TRAP_SQRT ) {
EmitString( "f3 0f 10 45 08" ); // movss xmm0, dword ptr [ebp + 8]
EmitAddEDI4( vm );
EmitString( "f3 0f 51 c0" ); // sqrtss xmm0, xmm0
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
ip += 1;
return qtrue;
}
#if id386
} else if ( v == ~TRAP_SIN || v == ~TRAP_COS || v == ~TRAP_SQRT ) {
EmitString( "D9 45 08" ); // fld dword ptr [ebp + 8]
switch ( v ) {
case ~TRAP_SQRT: EmitString( "D9 FA" ); break;
case ~TRAP_SIN: EmitString( "D9 FE" ); break;
case ~TRAP_COS: EmitString( "D9 FF" ); break;
}
if ( v == ~TRAP_SQRT ) {
EmitString( "f3 0f 10 45 08" ); // movss xmm0, dword ptr [ebp + 8]
EmitAddEDI4( vm );
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI ); // fstp dword ptr[edi]
EmitString( "f3 0f 51 c0" ); // sqrtss xmm0, xmm0
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
ip += 1;
return qtrue;
}
#endif
if ( v < 0 ) // syscall
{
@ -1292,38 +1141,18 @@ static qboolean ConstOptimize(vm_t *vm)
case OP_LEF:
case OP_GTF:
case OP_GEF:
#if id386
if ( HasSSEFP() ) {
#endif
EmitLoadFloatEDI( vm );
EmitCommand( LAST_COMMAND_SUB_DI_4 );
v = ci->value;
if ( v == 0 ) {
EmitString( "0f 57 c9" ); // xorps xmm1, xmm1
} else {
EmitString( "C7 45 00" ); // mov dword ptr [ebp], v
Emit4( v );
EmitString( "f3 0f 10 4d 00" ); // movss xmm1, dword ptr [ebp]
}
EmitString( "0f 2f c1" ); // comiss xmm0, xmm1
EmitJump( vm, ni, ni->op, ni->value );
#if id386
EmitLoadFloatEDI( vm );
EmitCommand( LAST_COMMAND_SUB_DI_4 );
v = ci->value;
if ( v == 0 ) {
EmitString( "0f 57 c9" ); // xorps xmm1, xmm1
} else {
EmitLoadFloatEDI( vm );
EmitCommand( LAST_COMMAND_SUB_DI_4 );
v = ci->value;
if ( v == 0 ) {
EmitString( "D9 EE" ); // fldz
} else {
EmitString( "C7 45 00" ); // mov [ebp], 0x12345678
Emit4( v );
EmitString( "D9 45 00" ); // fld dword ptr [ebp]
}
EmitString( "DF E9" ); // fucomip - requires a P6 class CPU (Pentium Pro)
EmitString( "DD D8" ); // fstp st(0)
EmitJump( vm, ni, CommuteFloatOp( ni->op ), ni->value );
EmitString( "C7 45 00" ); // mov dword ptr [ebp], v
Emit4( v );
EmitString( "f3 0f 10 4d 00" ); // movss xmm1, dword ptr [ebp]
}
#endif
EmitString( "0f 2f c1" ); // comiss xmm0, xmm1
EmitJump( vm, ni, ni->op, ni->value );
ip +=1;
return qtrue;
@ -2062,24 +1891,11 @@ __compile:
case OP_LEF:
case OP_GTF:
case OP_GEF:
#if id386
if ( HasSSEFP() ) {
#endif
EmitLoadFloatEDI( vm );
EmitString( "f3 0f 10 4f fc" ); // movss xmm1, dword ptr [edi-4]
EmitCommand( LAST_COMMAND_SUB_DI_8 ); // sub edi, 8
EmitString( "0f 2f c8" ); // comiss xmm1, xmm0
EmitJump( vm, ci, ci->op, ci->value );
#if id386
} else {
EmitLoadFloatEDI( vm ); // fld dword ptr [edi]
EmitCommand( LAST_COMMAND_SUB_DI_8 ); // sub edi, 8
EmitString( "D9 47 04" ); // fld dword ptr [edi+4]
EmitString( "DF E9" ); // fucomip - requires a P6 class CPU (Pentium Pro)
EmitString( "DD D8" ); // fstp st(0)
EmitJump( vm, ci, ci->op, ci->value );
}
#endif
EmitLoadFloatEDI( vm );
EmitString( "f3 0f 10 4f fc" ); // movss xmm1, dword ptr [edi-4]
EmitCommand( LAST_COMMAND_SUB_DI_8 ); // sub edi, 8
EmitString( "0f 2f c8" ); // comiss xmm1, xmm0
EmitJump( vm, ci, ci->op, ci->value );
pop1 = OP_UNDEF;
break;
@ -2191,17 +2007,9 @@ __compile:
case OP_NEGF:
EmitLoadFloatEDI( vm );
#if id386
if ( HasSSEFP() ) {
#endif
EmitString( "0f 57 c9" ); // xorps xmm1, xmm1
EmitString( "0f 5c c8" ); // subps xmm1, xmm0
EmitString( "0f 28 c1" ); // movaps xmm0, xmm1
#if id386
} else {
EmitString( "D9 E0" ); // fchs
}
#endif
EmitString( "0f 57 c9" ); // xorps xmm1, xmm1
EmitString( "0f 5c c8" ); // subps xmm1, xmm0
EmitString( "0f 28 c1" ); // movaps xmm0, xmm1
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
break;
@ -2209,55 +2017,27 @@ __compile:
case OP_SUBF:
case OP_DIVF:
case OP_MULF:
#if id386
if ( HasSSEFP() ) {
#endif
EmitString( "f3 0f 10 47 fc" ); // movss xmm0, dword ptr [edi-4]
EmitString( "f3 0f 10 0f" ); // movss xmm1, dword ptr [edi]
switch( ci->op ) {
case OP_ADDF: EmitString( "0f 58 c1" ); break; // addps xmm0, xmm1
case OP_SUBF: EmitString( "0f 5c c1" ); break; // subps xmm0, xmm1
case OP_MULF: EmitString( "0f 59 c1" ); break; // mulps xmm0, xmm1
case OP_DIVF: EmitString( "0f 5e c1" ); break; // divps xmm0, xmm1
}
EmitString( "f3 0f 11 47 fc" ); // movss dword ptr [edi-4], xmm0
#if id386
} else {
EmitString( "D9 47 FC" ); // fld dword ptr [edi-4]
EmitFCalcEDI( ci->op ); // fadd|fsub|fmul|fdiv dword ptr [edi]
EmitString( "D9 5F FC" ); // fstp dword ptr [edi-4]
EmitString( "f3 0f 10 47 fc" ); // movss xmm0, dword ptr [edi-4]
EmitString( "f3 0f 10 0f" ); // movss xmm1, dword ptr [edi]
switch( ci->op ) {
case OP_ADDF: EmitString( "0f 58 c1" ); break; // addps xmm0, xmm1
case OP_SUBF: EmitString( "0f 5c c1" ); break; // subps xmm0, xmm1
case OP_MULF: EmitString( "0f 59 c1" ); break; // mulps xmm0, xmm1
case OP_DIVF: EmitString( "0f 5e c1" ); break; // divps xmm0, xmm1
}
#endif
EmitString( "f3 0f 11 47 fc" ); // movss dword ptr [edi-4], xmm0
EmitCommand( LAST_COMMAND_SUB_DI_4 ); // sub edi, 4
break;
case OP_CVIF:
#if id386
if ( HasSSEFP() ) {
#endif
EmitString( "f3 0f 2a 07" ); // cvtsi2ss xmm0, dword ptr [edi]
#if id386
} else {
EmitString( "DB 07" ); // fild dword ptr [edi]
}
#endif
EmitString( "f3 0f 2a 07" ); // cvtsi2ss xmm0, dword ptr [edi]
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
break;
case OP_CVFI:
#if id386
if ( HasSSEFP() ) {
#endif
EmitLoadFloatEDI( vm );
EmitString( "f3 0f 2c c0" ); // cvttss2si eax, xmm0
EmitString( "89 07" ); // mov dword ptr [edi], eax
#if id386
} else {
EmitLoadFloatEDI( vm ); // fld dword ptr [edi]
// call the library conversion function
EmitCallOffset( FUNC_FTOL ); // call +FUNC_FTOL
}
#endif
EmitLoadFloatEDI( vm );
EmitString( "f3 0f 2c c0" ); // cvttss2si eax, xmm0
EmitString( "89 07" ); // mov dword ptr [edi], eax
break;
case OP_SEX8:
@ -2337,16 +2117,11 @@ __compile:
// ****************
// system functions
// ****************
EmitAlign( 4 );
funcOffset[FUNC_CALL] = compiledOfs;
EmitCallFunc( vm );
#if id386
EmitAlign( 4 );
funcOffset[FUNC_FTOL] = compiledOfs;
EmitFTOLFunc( vm );
#endif
EmitAlign( 4 );
funcOffset[FUNC_BCPY] = compiledOfs;
EmitBCPYFunc( vm );

View file

@ -19,7 +19,7 @@ ifeq ($(config),debug_x32)
INCLUDES +=
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings -std=c++98
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings -mmmx -msse -msse2 -std=c++98
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS +=
@ -73,7 +73,7 @@ ifeq ($(config),release_x32)
INCLUDES +=
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1 -std=c++98
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1 -mmmx -msse -msse2 -std=c++98
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS +=

View file

@ -19,7 +19,7 @@ ifeq ($(config),debug_x32)
INCLUDES +=
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings -x c++ -std=c++98
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings -mmmx -msse -msse2 -x c++ -std=c++98
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS += ../../.build/debug_x32/libbotlib.a -ldl -lm
@ -81,7 +81,7 @@ ifeq ($(config),release_x32)
INCLUDES +=
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1 -x c++ -std=c++98
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1 -mmmx -msse -msse2 -x c++ -std=c++98
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS += ../../.build/release_x32/libbotlib.a -ldl -lm

View file

@ -19,7 +19,7 @@ ifeq ($(config),debug_x32)
INCLUDES +=
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings -x c++ -std=c++98
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings -mmmx -msse -msse2 -x c++ -std=c++98
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS += ../../.build/debug_x32/libbotlib.a ../../.build/debug_x32/librenderer.a ../../.build/debug_x32/liblibjpeg-turbo.a -ldl -lm -lSDL2
@ -81,7 +81,7 @@ ifeq ($(config),release_x32)
INCLUDES +=
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1 -x c++ -std=c++98
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1 -mmmx -msse -msse2 -x c++ -std=c++98
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS += ../../.build/release_x32/libbotlib.a ../../.build/release_x32/librenderer.a ../../.build/release_x32/liblibjpeg-turbo.a -ldl -lm -lSDL2

View file

@ -19,7 +19,7 @@ ifeq ($(config),debug_x32)
INCLUDES += -I../../code/libjpeg-turbo -I../../code/libjpeg-turbo/simd
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings -mmmx -msse -msse2
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS +=
@ -73,7 +73,7 @@ ifeq ($(config),release_x32)
INCLUDES += -I../../code/libjpeg-turbo -I../../code/libjpeg-turbo/simd
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1 -mmmx -msse -msse2
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS +=

View file

@ -19,7 +19,7 @@ ifeq ($(config),debug_x32)
INCLUDES +=
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings -std=c++98
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -g -Wno-unused-parameter -Wno-write-strings -mmmx -msse -msse2 -std=c++98
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS +=
@ -73,7 +73,7 @@ ifeq ($(config),release_x32)
INCLUDES +=
FORCE_INCLUDE +=
ALL_CPPFLAGS += $(CPPFLAGS) -MMD -MP $(DEFINES) $(INCLUDES)
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1 -std=c++98
ALL_CFLAGS += $(CFLAGS) $(ALL_CPPFLAGS) -m32 -fomit-frame-pointer -ffast-math -Os -g -msse2 -Wno-unused-parameter -Wno-write-strings -g1 -mmmx -msse -msse2 -std=c++98
ALL_CXXFLAGS += $(CXXFLAGS) $(ALL_CFLAGS) -fno-exceptions -fno-rtti
ALL_RESFLAGS += $(RESFLAGS) $(DEFINES) $(INCLUDES)
LIBS +=

View file

@ -232,6 +232,9 @@ local function ApplyProjectSettings(outputExe)
buildoptions { "/GL" }
linkoptions { "" }
filter { "action:vs*", "platforms:x32" }
buildoptions { "/arch:SSE2" }
-- disable the "conversion from 'X' to 'Y', possible loss of data" warning
-- this should be removed once the x64 port is complete
filter { "action:vs*", "platforms:x64" }
@ -257,6 +260,9 @@ local function ApplyProjectSettings(outputExe)
buildoptions { "-g1" }
linkoptions { "" }
filter { "action:gmake", "platforms:x32" }
buildoptions { "-mmmx -msse -msse2" }
end
local function ApplyLibProjectSettings()

View file

@ -106,7 +106,7 @@
<ExceptionHandling>false</ExceptionHandling>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
<AdditionalOptions>/Gm %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/Gm /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
@ -157,7 +157,7 @@
<FloatingPointModel>Fast</FloatingPointModel>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<AdditionalOptions>/GL %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/GL /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>

View file

@ -110,7 +110,7 @@
<ExceptionHandling>false</ExceptionHandling>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
<AdditionalOptions>/Gm %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/Gm /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
@ -175,7 +175,7 @@ copy "..\..\.bin\debug_x64\cnq3-server-x64.pdb" "$(QUAKE3DIR)"</Command>
<FloatingPointModel>Fast</FloatingPointModel>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<AdditionalOptions>/GL %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/GL /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>

View file

@ -110,7 +110,7 @@
<ExceptionHandling>false</ExceptionHandling>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
<AdditionalOptions>/Gm %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/Gm /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
@ -175,7 +175,7 @@ copy "..\..\.bin\debug_x64\cnq3-x64.pdb" "$(QUAKE3DIR)"</Command>
<FloatingPointModel>Fast</FloatingPointModel>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<AdditionalOptions>/GL %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/GL /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>

View file

@ -107,7 +107,7 @@
<ExceptionHandling>false</ExceptionHandling>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
<AdditionalOptions>/Gm %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/Gm /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
<CompileAs>CompileAsC</CompileAs>
</ClCompile>
<Link>
@ -162,7 +162,7 @@
<FloatingPointModel>Fast</FloatingPointModel>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<AdditionalOptions>/GL %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/GL /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
<CompileAs>CompileAsC</CompileAs>
</ClCompile>
<Link>

View file

@ -106,7 +106,7 @@
<ExceptionHandling>false</ExceptionHandling>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
<AdditionalOptions>/Gm %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/Gm /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
@ -157,7 +157,7 @@
<FloatingPointModel>Fast</FloatingPointModel>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<AdditionalOptions>/GL %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/GL /arch:SSE2 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>