mirror of
https://bitbucket.org/CPMADevs/cnq3
synced 2024-11-10 06:31:48 +00:00
using the roundss SSE4.1 instruction for inlining floor and ceil syscalls when possible
fixed sharedTraps_t listing syscalls that were not actually at the same index for all 3 VMs
This commit is contained in:
parent
1115cb39b0
commit
c5192d49b7
5 changed files with 63 additions and 21 deletions
|
@ -89,6 +89,8 @@ chg: r_textureMode and r_measureOverdraw were removed
|
|||
|
||||
chg: r_speeds 1 now reports more precise timings, V-Sync status, GPU time and the selected back-end
|
||||
|
||||
chg: using the roundss SSE4.1 instruction for inlining floor and ceil syscalls when possible
|
||||
|
||||
chg: improved face and grid processing performance with SSE2
|
||||
|
||||
chg: r_lightmap is now latched again
|
||||
|
|
|
@ -227,6 +227,21 @@ typedef enum {
|
|||
// 1.32
|
||||
G_FS_SEEK,
|
||||
|
||||
G_MEMSET = 100,
|
||||
G_MEMCPY,
|
||||
G_STRNCPY,
|
||||
G_SIN,
|
||||
G_COS,
|
||||
G_ATAN2,
|
||||
G_SQRT,
|
||||
G_MATRIXMULTIPLY,
|
||||
G_ANGLEVECTORS,
|
||||
G_PERPENDICULARVECTOR,
|
||||
G_FLOOR,
|
||||
G_CEIL,
|
||||
G_TESTPRINTINT,
|
||||
G_TESTPRINTFLOAT,
|
||||
|
||||
BOTLIB_SETUP = 200, // ( void );
|
||||
BOTLIB_SHUTDOWN, // ( void );
|
||||
BOTLIB_LIBVAR_SET,
|
||||
|
|
|
@ -270,15 +270,8 @@ typedef enum {
|
|||
TRAP_SIN,
|
||||
TRAP_COS,
|
||||
TRAP_ATAN2,
|
||||
TRAP_SQRT,
|
||||
TRAP_MATRIXMULTIPLY,
|
||||
TRAP_ANGLEVECTORS,
|
||||
TRAP_PERPENDICULARVECTOR,
|
||||
TRAP_FLOOR,
|
||||
TRAP_CEIL,
|
||||
|
||||
TRAP_TESTPRINTINT,
|
||||
TRAP_TESTPRINTFLOAT
|
||||
TRAP_SQRT
|
||||
// note that ceil/floor etc have different numbers across VMs
|
||||
} sharedTraps_t;
|
||||
|
||||
typedef enum {
|
||||
|
|
|
@ -863,6 +863,24 @@ static void EmitDATAFunc(vm_t *vm)
|
|||
}
|
||||
|
||||
|
||||
static qbool IsFloorTrap(vm_t *vm, int trap)
|
||||
{
|
||||
if ( vm->index == VM_CGAME || vm->index == VM_UI )
|
||||
return trap == ~107;
|
||||
|
||||
return trap == ~110; // VM_GAME
|
||||
}
|
||||
|
||||
|
||||
static qbool IsCeilTrap(vm_t *vm, int trap)
|
||||
{
|
||||
if ( vm->index == VM_CGAME || vm->index == VM_UI )
|
||||
return trap == ~108;
|
||||
|
||||
return trap == ~111; // VM_GAME
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
=================
|
||||
ConstOptimize
|
||||
|
@ -1114,6 +1132,20 @@ static qboolean ConstOptimize(vm_t *vm)
|
|||
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
|
||||
ip += 1;
|
||||
return qtrue;
|
||||
} else if ( IsFloorTrap( vm, v ) && ( cpu_features & CPU_SSE41 ) != 0 ) {
|
||||
EmitString( "f3 0f 10 45 08" ); // movss xmm0, dword ptr [ebp + 8]
|
||||
EmitAddEDI4( vm );
|
||||
EmitString( "66 0f 3a 0a c0 01" ); // roundss xmm0, xmm0, 1 (exceptions not masked)
|
||||
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
|
||||
ip += 1;
|
||||
return qtrue;
|
||||
} else if ( IsCeilTrap( vm, v ) && ( cpu_features & CPU_SSE41 ) != 0 ) {
|
||||
EmitString( "f3 0f 10 45 08" ); // movss xmm0, dword ptr [ebp + 8]
|
||||
EmitAddEDI4( vm );
|
||||
EmitString( "66 0f 3a 0a c0 02" ); // roundss xmm0, xmm0, 2 (exceptions not masked)
|
||||
EmitCommand( LAST_COMMAND_STORE_FLOAT_EDI );
|
||||
ip += 1;
|
||||
return qtrue;
|
||||
}
|
||||
|
||||
if ( v < 0 ) // syscall
|
||||
|
|
|
@ -764,46 +764,46 @@ static intptr_t SV_GameSystemCalls( intptr_t* args )
|
|||
case BOTLIB_AI_GENETIC_PARENTS_AND_CHILD_SELECTION:
|
||||
return botlib_export->ai.GeneticParentsAndChildSelection(args[1], VMA(2), VMA(3), VMA(4), VMA(5));
|
||||
|
||||
case TRAP_MEMSET:
|
||||
case G_MEMSET:
|
||||
Com_Memset( VMA(1), args[2], args[3] );
|
||||
return 0;
|
||||
|
||||
case TRAP_MEMCPY:
|
||||
case G_MEMCPY:
|
||||
Com_Memcpy( VMA(1), VMA(2), args[3] );
|
||||
return 0;
|
||||
|
||||
case TRAP_STRNCPY:
|
||||
case G_STRNCPY:
|
||||
strncpy( VMA(1), VMA(2), args[3] );
|
||||
return args[1];
|
||||
|
||||
case TRAP_SIN:
|
||||
case G_SIN:
|
||||
return PASSFLOAT( sin( VMF(1) ) );
|
||||
|
||||
case TRAP_COS:
|
||||
case G_COS:
|
||||
return PASSFLOAT( cos( VMF(1) ) );
|
||||
|
||||
case TRAP_ATAN2:
|
||||
case G_ATAN2:
|
||||
return PASSFLOAT( atan2( VMF(1), VMF(2) ) );
|
||||
|
||||
case TRAP_SQRT:
|
||||
case G_SQRT:
|
||||
return PASSFLOAT( sqrt( VMF(1) ) );
|
||||
|
||||
case TRAP_MATRIXMULTIPLY:
|
||||
case G_MATRIXMULTIPLY:
|
||||
MatrixMultiply( VMA(1), VMA(2), VMA(3) );
|
||||
return 0;
|
||||
|
||||
case TRAP_ANGLEVECTORS:
|
||||
case G_ANGLEVECTORS:
|
||||
AngleVectors( VMA(1), VMA(2), VMA(3), VMA(4) );
|
||||
return 0;
|
||||
|
||||
case TRAP_PERPENDICULARVECTOR:
|
||||
case G_PERPENDICULARVECTOR:
|
||||
PerpendicularVector( VMA(1), VMA(2) );
|
||||
return 0;
|
||||
|
||||
case TRAP_FLOOR:
|
||||
case G_FLOOR:
|
||||
return PASSFLOAT( floor( VMF(1) ) );
|
||||
|
||||
case TRAP_CEIL:
|
||||
case G_CEIL:
|
||||
return PASSFLOAT( ceil( VMF(1) ) );
|
||||
|
||||
// extensions
|
||||
|
|
Loading…
Reference in a new issue