mirror of
https://bitbucket.org/CPMADevs/cnq3
synced 2024-11-12 23:44:29 +00:00
using the roundss SSE4.1 instruction for inlining floor and ceil syscalls when possible
fixed sharedTraps_t listing syscalls that were not actually at the same index for all 3 VMs
This commit is contained in:
parent
1115cb39b0
commit
c5192d49b7
5 changed files with 63 additions and 21 deletions
|
@ -89,6 +89,8 @@ chg: r_textureMode and r_measureOverdraw were removed
|
||||||
|
|
||||||
chg: r_speeds 1 now reports more precise timings, V-Sync status, GPU time and the selected back-end
|
chg: r_speeds 1 now reports more precise timings, V-Sync status, GPU time and the selected back-end
|
||||||
|
|
||||||
|
chg: using the roundss SSE4.1 instruction for inlining floor and ceil syscalls when possible
|
||||||
|
|
||||||
chg: improved face and grid processing performance with SSE2
|
chg: improved face and grid processing performance with SSE2
|
||||||
|
|
||||||
chg: r_lightmap is now latched again
|
chg: r_lightmap is now latched again
|
||||||
|
|
|
@ -227,6 +227,21 @@ typedef enum {
|
||||||
// 1.32
|
// 1.32
|
||||||
G_FS_SEEK,
|
G_FS_SEEK,
|
||||||
|
|
||||||
|
G_MEMSET = 100,
|
||||||
|
G_MEMCPY,
|
||||||
|
G_STRNCPY,
|
||||||
|
G_SIN,
|
||||||
|
G_COS,
|
||||||
|
G_ATAN2,
|
||||||
|
G_SQRT,
|
||||||
|
G_MATRIXMULTIPLY,
|
||||||
|
G_ANGLEVECTORS,
|
||||||
|
G_PERPENDICULARVECTOR,
|
||||||
|
G_FLOOR,
|
||||||
|
G_CEIL,
|
||||||
|
G_TESTPRINTINT,
|
||||||
|
G_TESTPRINTFLOAT,
|
||||||
|
|
||||||
BOTLIB_SETUP = 200, // ( void );
|
BOTLIB_SETUP = 200, // ( void );
|
||||||
BOTLIB_SHUTDOWN, // ( void );
|
BOTLIB_SHUTDOWN, // ( void );
|
||||||
BOTLIB_LIBVAR_SET,
|
BOTLIB_LIBVAR_SET,
|
||||||
|
|
|
@ -270,15 +270,8 @@ typedef enum {
|
||||||
TRAP_SIN,
|
TRAP_SIN,
|
||||||
TRAP_COS,
|
TRAP_COS,
|
||||||
TRAP_ATAN2,
|
TRAP_ATAN2,
|
||||||
TRAP_SQRT,
|
TRAP_SQRT
|
||||||
TRAP_MATRIXMULTIPLY,
|
// note that ceil/floor etc have different numbers across VMs
|
||||||
TRAP_ANGLEVECTORS,
|
|
||||||
TRAP_PERPENDICULARVECTOR,
|
|
||||||
TRAP_FLOOR,
|
|
||||||
TRAP_CEIL,
|
|
||||||
|
|
||||||
TRAP_TESTPRINTINT,
|
|
||||||
TRAP_TESTPRINTFLOAT
|
|
||||||
} sharedTraps_t;
|
} sharedTraps_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
|
|
@ -863,6 +863,24 @@ static void EmitDATAFunc(vm_t *vm)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static qbool IsFloorTrap(vm_t *vm, int trap)
|
||||||
|
{
|
||||||
|
if ( vm->index == VM_CGAME || vm->index == VM_UI )
|
||||||
|
return trap == ~107;
|
||||||
|
|
||||||
|
return trap == ~110; // VM_GAME
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static qbool IsCeilTrap(vm_t *vm, int trap)
|
||||||
|
{
|
||||||
|
if ( vm->index == VM_CGAME || vm->index == VM_UI )
|
||||||
|
return trap == ~108;
|
||||||
|
|
||||||
|
return trap == ~111; // VM_GAME
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
=================
|
=================
|
||||||
ConstOptimize
|
ConstOptimize
|
||||||
|
@ -1114,6 +1132,20 @@ static qboolean ConstOptimize(vm_t *vm)
|
||||||
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
|
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
|
||||||
ip += 1;
|
ip += 1;
|
||||||
return qtrue;
|
return qtrue;
|
||||||
|
} else if ( IsFloorTrap( vm, v ) && ( cpu_features & CPU_SSE41 ) != 0 ) {
|
||||||
|
EmitString( "f3 0f 10 45 08" ); // movss xmm0, dword ptr [ebp + 8]
|
||||||
|
EmitAddEDI4( vm );
|
||||||
|
EmitString( "66 0f 3a 0a c0 01" ); // roundss xmm0, xmm0, 1 (exceptions not masked)
|
||||||
|
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
|
||||||
|
ip += 1;
|
||||||
|
return qtrue;
|
||||||
|
} else if ( IsCeilTrap( vm, v ) && ( cpu_features & CPU_SSE41 ) != 0 ) {
|
||||||
|
EmitString( "f3 0f 10 45 08" ); // movss xmm0, dword ptr [ebp + 8]
|
||||||
|
EmitAddEDI4( vm );
|
||||||
|
EmitString( "66 0f 3a 0a c0 02" ); // roundss xmm0, xmm0, 2 (exceptions not masked)
|
||||||
|
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
|
||||||
|
ip += 1;
|
||||||
|
return qtrue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( v < 0 ) // syscall
|
if ( v < 0 ) // syscall
|
||||||
|
|
|
@ -764,46 +764,46 @@ static intptr_t SV_GameSystemCalls( intptr_t* args )
|
||||||
case BOTLIB_AI_GENETIC_PARENTS_AND_CHILD_SELECTION:
|
case BOTLIB_AI_GENETIC_PARENTS_AND_CHILD_SELECTION:
|
||||||
return botlib_export->ai.GeneticParentsAndChildSelection(args[1], VMA(2), VMA(3), VMA(4), VMA(5));
|
return botlib_export->ai.GeneticParentsAndChildSelection(args[1], VMA(2), VMA(3), VMA(4), VMA(5));
|
||||||
|
|
||||||
case TRAP_MEMSET:
|
case G_MEMSET:
|
||||||
Com_Memset( VMA(1), args[2], args[3] );
|
Com_Memset( VMA(1), args[2], args[3] );
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case TRAP_MEMCPY:
|
case G_MEMCPY:
|
||||||
Com_Memcpy( VMA(1), VMA(2), args[3] );
|
Com_Memcpy( VMA(1), VMA(2), args[3] );
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case TRAP_STRNCPY:
|
case G_STRNCPY:
|
||||||
strncpy( VMA(1), VMA(2), args[3] );
|
strncpy( VMA(1), VMA(2), args[3] );
|
||||||
return args[1];
|
return args[1];
|
||||||
|
|
||||||
case TRAP_SIN:
|
case G_SIN:
|
||||||
return PASSFLOAT( sin( VMF(1) ) );
|
return PASSFLOAT( sin( VMF(1) ) );
|
||||||
|
|
||||||
case TRAP_COS:
|
case G_COS:
|
||||||
return PASSFLOAT( cos( VMF(1) ) );
|
return PASSFLOAT( cos( VMF(1) ) );
|
||||||
|
|
||||||
case TRAP_ATAN2:
|
case G_ATAN2:
|
||||||
return PASSFLOAT( atan2( VMF(1), VMF(2) ) );
|
return PASSFLOAT( atan2( VMF(1), VMF(2) ) );
|
||||||
|
|
||||||
case TRAP_SQRT:
|
case G_SQRT:
|
||||||
return PASSFLOAT( sqrt( VMF(1) ) );
|
return PASSFLOAT( sqrt( VMF(1) ) );
|
||||||
|
|
||||||
case TRAP_MATRIXMULTIPLY:
|
case G_MATRIXMULTIPLY:
|
||||||
MatrixMultiply( VMA(1), VMA(2), VMA(3) );
|
MatrixMultiply( VMA(1), VMA(2), VMA(3) );
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case TRAP_ANGLEVECTORS:
|
case G_ANGLEVECTORS:
|
||||||
AngleVectors( VMA(1), VMA(2), VMA(3), VMA(4) );
|
AngleVectors( VMA(1), VMA(2), VMA(3), VMA(4) );
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case TRAP_PERPENDICULARVECTOR:
|
case G_PERPENDICULARVECTOR:
|
||||||
PerpendicularVector( VMA(1), VMA(2) );
|
PerpendicularVector( VMA(1), VMA(2) );
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case TRAP_FLOOR:
|
case G_FLOOR:
|
||||||
return PASSFLOAT( floor( VMF(1) ) );
|
return PASSFLOAT( floor( VMF(1) ) );
|
||||||
|
|
||||||
case TRAP_CEIL:
|
case G_CEIL:
|
||||||
return PASSFLOAT( ceil( VMF(1) ) );
|
return PASSFLOAT( ceil( VMF(1) ) );
|
||||||
|
|
||||||
// extensions
|
// extensions
|
||||||
|
|
Loading…
Reference in a new issue