rallyunlimited-engine/code/qcommon/vm_x86.c
2024-02-02 19:46:17 +03:00

4896 lines
117 KiB
C

/*
===========================================================================
Copyright (C) 1999-2005 Id Software, Inc.
Copyright (C) 2011-2021 Quake3e project
This file is part of Quake III Arena source code.
Quake III Arena source code is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Quake III Arena source code is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Quake III Arena source code; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
===========================================================================
*/
// load time compiler and execution environment for x86_64
// with dynamic register allocation and various optimizations
#include "vm_local.h"
#include "../ui/ui_public.h"
#include "../cgame/cg_public.h"
#include "../game/g_public.h"
#ifdef _WIN32
#include <windows.h>
#endif
#ifdef __FreeBSD__
#include <sys/types.h>
#endif
#ifndef _WIN32
#include <sys/mman.h> // for PROT_ stuff
#endif
/* need this on NX enabled systems (i386 with PAE kernel or noexec32=on x86_64) */
#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
#define VM_X86_MMAP
#endif
#define DEBUG_VM
//#define DEBUG_INT
//#define DUMP_CODE
//#define VM_LOG_SYSCALLS
#define JUMP_OPTIMIZE 1
#if JUMP_OPTIMIZE
#define NUM_PASSES 3
#define PASS_INIT 0
#define PASS_COMPRESS 1
#define PASS_EXPAND_ONLY 2
#define NUM_COMPRESSIONS 2
#define FJUMP_THRESHOLD 48
#else
#define NUM_PASSES 1
#endif
#define DYN_ALLOC_RX
#define DYN_ALLOC_SX
#define CONST_CACHE_RX
#define CONST_CACHE_SX
#define REGS_OPTIMIZE
#define ADDR_OPTIMIZE
#define LOAD_OPTIMIZE
#define FPU_OPTIMIZE
#define CONST_OPTIMIZE
//#define RET_OPTIMIZE // increases code size
//#define MACRO_OPTIMIZE // slows down a bit?
// allow sharing both variables and constants in registers
#define REG_TYPE_MASK
// number of variables/memory mappings per register
#define REG_MAP_COUNT 4
#define R_PSTACK R_ESI
#define R_OPSTACK R_EDI
#define R_DATABASE R_EBX
#define R_PROCBASE R_EBP
#if idx64
#define R_DATAMASK R_R11
#define R_INSPOINTERS R_R12
#define R_SYSCALL R_R13
#define R_STACKBOTTOM R_R14
#define R_OPSTACKTOP R_R15
#endif
#define FUNC_ALIGN 4
/*
-------------
eax scratch
ebx* dataBase
ecx scratch (required for shifts)
edx scratch (required for divisions)
esi* programStack
edi* opStack
ebp* procStack ( dataBase + programStack )
-------------
rax scratch
rbx* dataBase
rcx scratch (required for shifts)
rdx scratch (required for divisions)
rsi* programStack
rdi* opStack
rbp* procStack ( dataBase + programStack )
rsp*
r8 scratch
r9 scratch
r10 scratch
r11 scratch - dataMask
r12* instructionPointers
r13* systemCall
r14* stackBottom
r15* opStackTop
xmm0 scratch
xmm1 scratch
xmm2 scratch
xmm3 scratch
xmm4 scratch
xmm5 scratch
Windows ABI: you are required to preserve the XMM6-XMM15 registers
System V ABI: you don't have to preserve any of the XMM registers, RSI/RDI are callee-saved
Example how data segment will look like during vmMain execution:
| .... |
|------| vm->programStack -=36 (8+12+16) // set by vmMain
| ???? | +0 - unused, reserved for interpreter
| ???? | +4 - unused, reserved for interpreter
|-------
| arg0 | +8 \
| arg4 | +12 | - passed arguments, accessible from subroutines
| arg8 | +16 /
|------|
| loc0 | +20 \
| loc4 | +24 \ - locals, accessible only from local scope
| loc8 | +28 /
| lc12 | +32 /
|------| vm->programStack -= 24 ( 8 + MAX_VMMAIN_CALL_ARGS*4 ) // set by VM_CallCompiled()
| ???? | +0 - unused, reserved for interpreter
| ???? | +4 - unused, reserved for interpreter
| arg0 | +8 \
| arg1 | +12 \ - passed arguments, accessible from vmMain
| arg2 | +16 /
| arg3 | +20 /
|------| vm->programStack = vm->dataMask + 1 // set by VM_Create()
jump/call opStack rules:
1) opStack must be 8 before conditional jump
2) opStack must be 4 before unconditional jump
3) opStack must be >=4 before OP_CALL
4) opStack must remain the same after OP_CALL
5) you may not jump in/call locations with opStack != 0
*/
typedef enum
{
FUNC_ENTR = 0,
FUNC_CALL,
FUNC_SYSC,
FUNC_BCPY,
FUNC_PSOF,
FUNC_OSOF,
FUNC_BADJ,
FUNC_ERRJ,
FUNC_DATR,
FUNC_DATW,
FUNC_LAST
} func_t;
// macro opcode sequences
#ifdef MACRO_OPTIMIZE
typedef enum {
MOP_UNDEF = OP_MAX,
MOP_ADD,
MOP_SUB,
MOP_BAND,
MOP_BOR,
MOP_BXOR
} macro_op_t;
#endif
static byte *code;
static int compiledOfs;
static int *instructionOffsets;
static intptr_t *instructionPointers;
static instruction_t *inst = NULL;
static int ip, pass;
#if JUMP_OPTIMIZE
static int jumpSizeChanged;
#endif
static int funcOffset[ FUNC_LAST ];
static void *VM_Alloc_Compiled( vm_t *vm, int codeLength, int tableLength );
static void VM_Destroy_Compiled( vm_t *vm );
static void VM_FreeBuffers( void );
static void Emit1( int v );
static void Emit2( int16_t v );
static void Emit4( int32_t v );
#if idx64
static void Emit8( int64_t v );
#endif
#ifdef _MSC_VER
#define DROP( reason, ... ) \
do { \
VM_FreeBuffers(); \
Com_Error( ERR_DROP, "%s: " reason, __func__, __VA_ARGS__ ); \
} while(0)
#else
#define DROP( reason, args... ) \
do { \
VM_FreeBuffers(); \
Com_Error( ERR_DROP, "%s: " reason, __func__, ##args ); \
} while(0)
#endif
#define SWAP_INT( X, Y ) do { int T = X; X = Y; Y = T; } while ( 0 )
// x86 instruction encoding
typedef enum {
R_EAX = 0x00,
R_ECX = 0x01,
R_EDX = 0x02,
R_EBX = 0x03,
R_ESP = 0x04,
R_EBP = 0x05,
R_ESI = 0x06,
R_EDI = 0x07,
#if idx64
R_R8 = 0x08,
R_R9 = 0x09,
R_R10 = 0x0A,
R_R11 = 0x0B,
R_R12 = 0x0C,
R_R13 = 0x0D,
R_R14 = 0x0E,
R_R15 = 0x0F,
R_MASK = 0x0F,
R_REX = 0x10 // mask to force 64-bit operation
#else
R_MASK = 0x07,
R_REX = 0x00
#endif
} intreg_t;
#define RNN_SHIFT 3
#define REX_SHIFT 4
typedef enum {
R_XMM0 = 0x00,
R_XMM1 = 0x01,
R_XMM2 = 0x02,
R_XMM3 = 0x03,
R_XMM4 = 0x04,
R_XMM5 = 0x05
} xmmreg_t;
typedef union {
struct {
unsigned b : 1; // 1-bit extension of MODRM.r_m or SIB.base
unsigned x : 1; // 1-bit extension of SIB.index
unsigned r : 1; // 1-bit extension of MODRM.r_x
unsigned w : 1; // 64-bit operand size or default (0)
unsigned f : 4; // always 0x4 (0b0100)
} s;
uint8_t v;
} rex_t;
typedef enum {
MOD_NO_DISP = 0, // 0-byte displacement
MOD_DISP4_ONLY_RM_5 = 0, // displacement-only mode with r_m = 5 (101)
MOD_SIB_NO_DISP_RM_4 = 0, // SIB with no displacement with r_m = 4 (100)
MOD_DISP1 = 1, // 1-byte signed displacement
MOD_DISP4 = 2, // 4-byte signed displacement
MOD_REG_TO_REG = 3 // register addressing mode
} mod_t;
typedef union {
struct {
unsigned r_m : 3; // direct or indirect register operand with opt.displacement, REX.b can extend this by 1 bit
unsigned r_x : 3; // register or opcode extension, REX.r can extend this by 1 bit
unsigned mod : 2; // see mod_t
} s;
uint8_t v;
} modrm_t;
typedef union {
struct {
unsigned base : 3; // base register to use, REX.b can extend by 1 bit
unsigned index : 3; // index register to use, REX.x can extend by 1 bit
unsigned scale : 2; // 1 2 4 8
} s;
uint8_t v;
} sib_t;
#if idx64
static void emit_rex3( const uint32_t base, const uint32_t reg, const uint32_t idx )
{
rex_t rex;
rex.v = 0x40;
rex.s.b = ( base >> RNN_SHIFT ) & 1;
rex.s.r = ( reg >> RNN_SHIFT ) & 1;
rex.s.x = ( idx >> RNN_SHIFT ) & 1;
rex.s.w = ( ( base | reg | idx ) >> REX_SHIFT ) & 1;
if ( rex.v != 0x40 )
Emit1( rex.v );
}
static void emit_rex2( const uint32_t base, const uint32_t reg )
{
rex_t rex;
rex.v = 0x40;
rex.s.b = ( base >> RNN_SHIFT ) & 1;
rex.s.r = ( reg >> RNN_SHIFT ) & 1;
rex.s.w = ( ( base | reg ) >> REX_SHIFT ) & 1;
if ( rex.v != 0x40 )
Emit1( rex.v );
}
static void emit_rex1( const uint32_t base )
{
rex_t rex;
rex.v = 0x40;
rex.s.b = ( base >> RNN_SHIFT ) & 1;
rex.s.w = ( base >> REX_SHIFT ) & 1;
if ( rex.v != 0x40 )
Emit1( rex.v );
}
#endif
// reg <-> [offset]
static void emit_modrm_offset( uint32_t reg, int32_t offset )
{
modrm_t modrm;
modrm.s.mod = MOD_DISP4_ONLY_RM_5; // displacement-only mode with ( r_m == 0x5 )
modrm.s.r_m = 5; // 101
modrm.s.r_x = reg;
Emit1( modrm.v );
Emit4( offset );
}
// reg <-> [base + offset]
static void emit_modrm_base_offset( uint32_t reg, uint32_t base, int32_t offset )
{
modrm_t modrm;
modrm.s.r_m = base;
modrm.s.r_x = reg;
if ( offset >= -127 && offset < 128 ) {
if ( offset != 0 || modrm.s.r_m == 0x5 /* 101 */ ) {
modrm.s.mod = MOD_DISP1; // 1-byte displacement
Emit1( modrm.v );
if ( modrm.s.r_m == 0x4 /* 100 */ ) {
Emit1( 0x24 ); // SIB: 00:100:100
}
Emit1( offset );
} else {
modrm.s.mod = MOD_NO_DISP; // 0-byte displacement
Emit1( modrm.v );
if ( modrm.s.r_m == 0x4 /* 100 */ ) {
Emit1( 0x24 ); // SIB: 00:100:100
}
}
} else {
modrm.s.mod = MOD_DISP4; // 4-byte displacement
Emit1( modrm.v );
if ( modrm.s.r_m == 0x4 /* 100 */ ) {
Emit1( 0x24 ); // SIB: 00:100:100
}
Emit4( offset );
}
}
// reg <-> [base + index*scale + disp]
static void emit_modrm_base_index( uint32_t reg, uint32_t base, uint32_t index, int scale, int32_t disp )
{
modrm_t modrm;
sib_t sib;
sib.s.base = base;
sib.s.index = index;
switch ( scale ) {
case 8: sib.s.scale = 3; break;
case 4: sib.s.scale = 2; break;
case 2: sib.s.scale = 1; break;
default: sib.s.scale = 0; break;
}
modrm.s.r_x = reg;
modrm.s.r_m = 4; // 100
if ( disp == 0 ) {
if ( sib.s.base == 5 /* 101 */ ) {
modrm.s.mod = MOD_DISP1; // dummy 1-byte displacement
Emit1( modrm.v );
Emit1( sib.v );
Emit1( 0x0 ); // displacement
} else {
modrm.s.mod = MOD_SIB_NO_DISP_RM_4; // SIB with no displacement ( r_m == 0x4 )
Emit1( modrm.v );
Emit1( sib.v );
}
} else {
if ( disp >= -128 && disp <= 127 ) {
modrm.s.mod = MOD_DISP1;
Emit1( modrm.v );
Emit1( sib.v );
Emit1( disp );
} else {
modrm.s.mod = MOD_DISP4;
Emit1( modrm.v );
Emit1( sib.v );
Emit4( disp );
}
}
}
// reg <-> reg
static void emit_modrm_reg( uint32_t base, uint32_t regx )
{
modrm_t modrm;
modrm.s.mod = MOD_REG_TO_REG;
modrm.s.r_m = base;
modrm.s.r_x = regx;
Emit1( modrm.v );
}
static void emit_op_reg( int prefix, int opcode, uint32_t base, uint32_t reg )
{
#if idx64
emit_rex2( base, reg );
#endif
if ( prefix != 0 )
Emit1( prefix );
Emit1( opcode );
emit_modrm_reg( base, reg );
}
// offset is RIP-related in 64-bit mode
static void emit_op_reg_offset( int prefix, int opcode, uint32_t reg, int32_t offset )
{
#if idx64
emit_rex2( 0x0, reg );
#endif
if ( prefix != 0 )
Emit1( prefix );
Emit1( opcode );
emit_modrm_offset( reg, offset );
}
static void emit_op_reg_base_offset( int prefix, int opcode, uint32_t reg, uint32_t base, int32_t offset )
{
#if idx64
emit_rex2( base, reg );
#endif
if ( prefix != 0 )
Emit1( prefix );
Emit1( opcode );
emit_modrm_base_offset( reg, base, offset );
}
static void emit_op2_reg_base_offset( int prefix, int opcode, int opcode2, uint32_t reg, uint32_t base, int32_t offset )
{
#if idx64
emit_rex2( base, reg );
#endif
if ( prefix != 0 )
Emit1( prefix );
Emit1( opcode );
Emit1( opcode2 );
emit_modrm_base_offset( reg, base, offset );
}
static void emit_op_reg_base_index( int prefix, int opcode, uint32_t reg, uint32_t base, uint32_t index, int scale, int32_t disp )
{
if ( ( index & R_MASK ) == 4 ) {
if ( scale == 1 && ( base & R_MASK ) != 4 ) {
SWAP_INT( index, base ); // swap index with base
} else {
#ifndef DEBUG_INT
DROP( "incorrect index register" );
#else
return; // R_ESP cannot be used as index register
#endif
}
} else if ( disp != 0 && ( base & 7 ) == 5 && scale == 1 && ( index & R_MASK ) != 4 ) {
SWAP_INT( index, base ); // avoid using dummy displacement with R_EBP
}
#if idx64
emit_rex3( base, reg, index );
#endif
if ( prefix != 0 )
Emit1( prefix );
Emit1( opcode );
emit_modrm_base_index( reg, base, index, scale, disp );
}
static void emit_op_reg_index_offset( int opcode, uint32_t reg, uint32_t index, int scale, int32_t offset )
{
modrm_t modrm;
sib_t sib;
if ( ( index & R_MASK ) == 4 )
return;
#if idx64
emit_rex3( 0x0, reg, index );
#endif
// modrm = 00:<reg>:100
modrm.s.mod = MOD_SIB_NO_DISP_RM_4;
modrm.s.r_x = reg;
modrm.s.r_m = 4;
// sib = <scale>:<index>:101
sib.s.base = 5; // 101 - (index*scale + disp4) mode
sib.s.index = index;
switch ( scale ) {
case 8: sib.s.scale = 3; break;
case 4: sib.s.scale = 2; break;
case 2: sib.s.scale = 1; break;
default: sib.s.scale = 0; break;
}
Emit1( opcode );
Emit1( modrm.v );
Emit1( sib.v );
Emit4( offset );
}
static void emit_lea( uint32_t reg, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0, 0x8D, reg, base, offset );
}
static void emit_lea_base_index( uint32_t reg, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0, 0x8D, reg, base, index, 1, 0 );
}
#if 0
static void emit_lea_index_scale( uint32_t reg, uint32_t index, int scale, int32_t offset )
{
emit_op_reg_index_offset( 0x8D, reg, index, scale, offset );
}
#endif
static void emit_lea_base_index_offset( uint32_t reg, uint32_t base, uint32_t index, int32_t offset )
{
emit_op_reg_base_index( 0, 0x8D, reg, base, index, 1, offset );
}
static void emit_mov_rx( uint32_t base, uint32_t reg )
{
emit_op_reg( 0, 0x89, base, reg );
}
static void emit_sex8( uint32_t base, uint32_t reg )
{
emit_op_reg( 0x0F, 0xBE, reg, base );
}
static void emit_zex8( uint32_t base, uint32_t reg )
{
emit_op_reg( 0x0F, 0xB6, reg, base );
}
static void emit_sex16( uint32_t base, uint32_t reg )
{
emit_op_reg( 0x0F, 0xBF, reg, base );
}
static void emit_zex16( uint32_t base, uint32_t reg )
{
emit_op_reg( 0x0F, 0xB7, reg, base );
}
static void emit_test_rx( uint32_t base, uint32_t reg )
{
emit_op_reg( 0, 0x85, base, reg );
}
static void emit_test_rx_imm32( uint32_t base, int32_t imm32 )
{
if ( (base & R_MASK) == R_EAX ) {
#if idx64
emit_rex1( base );
#endif
Emit1( 0xA9 );
} else {
emit_op_reg( 0, 0xF7, base, 0x0 );
}
Emit4( imm32 );
}
static void emit_cmp_rx( uint32_t base, uint32_t reg )
{
emit_op_reg( 0, 0x39, base, reg );
}
/*static*/ void emit_cmp_rx_mem( uint32_t reg, int32_t offset )
{
emit_op_reg_offset( 0, 0x3B, reg, offset );
}
static void emit_and_rx( uint32_t base, uint32_t reg )
{
emit_op_reg( 0, 0x21, base, reg );
}
static void emit_or_rx( uint32_t base, uint32_t reg )
{
emit_op_reg( 0, 0x09, base, reg );
}
static void emit_xor_rx( uint32_t base, uint32_t reg )
{
emit_op_reg( 0, 0x31, base, reg );
}
static void emit_mov_rx_imm32( uint32_t reg, int32_t imm32 )
{
#if idx64
emit_rex1( reg & ~R_REX );
#endif
Emit1( 0xB8 + ( reg & 7 ) );
Emit4( imm32 );
}
#if idx64
static void emit_mov_rx_imm64( uint32_t reg, int64_t imm64 )
{
emit_rex1( reg | R_REX );
Emit1( 0xB8 + ( reg & 7 ) );
Emit8( imm64 );
}
#endif
// wrapper function
static void mov_rx_imm32( uint32_t reg, int32_t imm32 )
{
if ( imm32 == 0 ) {
emit_xor_rx( reg & ~R_REX, reg & ~R_REX );
} else {
emit_mov_rx_imm32( reg, imm32 );
}
}
#if idx64
// wrapper function
static void mov_rx_imm64( uint32_t reg, int64_t imm64 )
{
if ( imm64 == 0 ) {
emit_xor_rx( reg & ~R_REX, reg & ~R_REX );
} else {
if ( (uint64_t) imm64 > 0xFFFFFFFFULL ) {
// move to 64-bit register
if ( imm64 < 0 && imm64 >= -0x80000000LL ) {
// sign-extend to 64 bit
emit_rex1( reg | R_REX );
Emit1( 0xC7 );
Emit1( 0xC0 + ( reg & 7 ) ); // modrm: 11.000.reg
Emit4( (int32_t)imm64 );
} else {
// worst case
emit_mov_rx_imm64( reg, imm64 );
}
} else {
// move to 32-bit register with implicit zero-extension to 64-bits
emit_mov_rx_imm32( reg, (int32_t)imm64 );
}
}
}
#endif
static void mov_rx_ptr( uint32_t reg, const void *ptr )
{
#if idx64
mov_rx_imm64( reg, (intptr_t) ptr );
#else
mov_rx_imm32( reg, (intptr_t) ptr );
#endif
}
static void emit_not_rx( uint32_t reg )
{
modrm_t modrm;
#if idx64
emit_rex1( reg );
#endif
Emit1( 0xF7 );
modrm.v = 0xD0;
modrm.s.r_m = reg;
Emit1( modrm.v );
}
static void emit_neg_rx( uint32_t reg )
{
modrm_t modrm;
#if idx64
emit_rex1( reg );
#endif
Emit1( 0xF7 );
modrm.v = 0xD8;
modrm.s.r_m = reg;
Emit1( modrm.v );
}
static void emit_load1( uint32_t reg, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0x0F, 0xB6, reg, base, offset );
}
static void emit_load1_sex( uint32_t reg, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0x0F, 0xBE, reg, base, offset );
}
static void emit_load2( uint32_t reg, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0x0F, 0xB7, reg, base, offset );
}
static void emit_load2_sex( uint32_t reg, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0x0F, 0xBF, reg, base, offset );
}
static void emit_load4( uint32_t reg, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0, 0x8B, reg, base, offset );
}
/*static*/ void emit_load_rx_offset( uint32_t reg, int32_t offset )
{
emit_op_reg_offset( 0, 0x8B, reg, offset );
}
static void emit_load1_index( uint32_t reg, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0x0F, 0xB6, reg, base, index, 1, 0 );
}
static void emit_load1_sex_index( uint32_t reg, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0x0F, 0xBE, reg, base, index, 1, 0 );
}
static void emit_load2_index( uint32_t reg, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0x0F, 0xB7, reg, base, index, 1, 0 );
}
static void emit_load2_sex_index( uint32_t reg, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0x0F, 0xBF, reg, base, index, 1, 0 );
}
static void emit_load4_index( uint32_t reg, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0, 0x8B, reg, base, index, 1, 0 );
}
#if 0
static void emit_load4_index_offset( uint32_t reg, uint32_t base, uint32_t index, int scale, int32_t offset )
{
emit_op_reg_base_index( 0, 0x8B, reg, base, index, scale, offset );
}
#endif
// R_REX prefix flag in [reg] may expand store to 8 bytes
static void emit_store_rx( uint32_t reg, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0, 0x89, reg, base, offset );
}
/*static*/ void emit_store_rx_offset( uint32_t reg, int32_t offset )
{
emit_op_reg_offset( 0, 0x89, reg, offset );
}
static void emit_store_imm32( int32_t imm32, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xC7, 0, base, offset );
Emit4( imm32 );
}
static void emit_store_imm32_index( int32_t imm32, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0, 0xC7, 0, base, index, 1, 0 );
Emit4( imm32 );
}
static void emit_store2_rx( uint32_t reg, uint32_t base, int32_t offset )
{
Emit1( 0x66 );
emit_op_reg_base_offset( 0, 0x89, reg, base, offset );
}
static void emit_store2_imm16( int imm16, uint32_t base, int32_t offset )
{
Emit1( 0x66 );
emit_op_reg_base_offset( 0, 0xC7, 0x0, base, offset );
Emit2( imm16 );
}
static void emit_store2_imm16_index( int imm16, uint32_t base, uint32_t index )
{
Emit1( 0x66 );
emit_op_reg_base_index( 0, 0xC7, 0x0, base, index, 1, 0 );
Emit2( imm16 );
}
static void emit_store1_rx( int reg, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0, 0x88, reg, base, offset );
}
static void emit_store1_imm8( int imm8, uint32_t base, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xC6, 0x0, base, offset );
Emit1( imm8 );
}
static void emit_store1_imm8_index( int imm8, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0, 0xC6, 0x0, base, index, 1, 0 );
Emit1( imm8 );
}
static void emit_store4_index( uint32_t reg, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0, 0x89, reg, base, index, 1, 0 );
}
static void emit_store2_index( uint32_t reg, uint32_t base, uint32_t index )
{
Emit1( 0x66 );
emit_op_reg_base_index( 0, 0x89, reg, base, index, 1, 0 );
}
static void emit_store1_index( uint32_t reg, uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0, 0x88, reg, base, index, 1, 0 );
}
/*static*/ void emit_jump_index( uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0, 0xFF, 0x4, base, index, sizeof( void* ), 0 );
}
/*static*/ void emit_jump_index_offset( int32_t offset, uint32_t index )
{
emit_op_reg_index_offset( 0xFF, 0x4, index, sizeof( void * ), offset );
}
void emit_call_index( uint32_t base, uint32_t index )
{
emit_op_reg_base_index( 0, 0xFF, 0x2, base, index, sizeof( void* ), 0 );
}
/*static*/ void emit_call_index_offset( int32_t offset, uint32_t index )
{
emit_op_reg_index_offset( 0xFF, 0x2, index, sizeof( void * ), offset );
}
/*static*/ void emit_call_indir( int32_t offset )
{
Emit1( 0xFF );
Emit1( 0x15 );
Emit4( offset );
}
/*static*/ void emit_call_rx( uint32_t reg )
{
emit_op_reg( 0, 0xFF, reg & ~R_REX, 0x2 );
}
static void emit_add_rx( uint32_t base, uint32_t reg )
{
emit_op_reg( 0, 0x01, base, reg );
}
/*static*/ void emit_pushad( void )
{
Emit1( 0x60 );
}
/*static*/ void emit_popad( void )
{
Emit1( 0x61 );
}
static void emit_push( uint32_t reg )
{
#if idx64
emit_rex1( reg & ~R_REX );
#endif
Emit1( 0x50 + ( reg & 7 ) );
}
static void emit_pop( uint32_t reg )
{
#if idx64
emit_rex1( reg & ~R_REX );
#endif
Emit1( 0x58 + ( reg & 7 ) );
}
static void emit_ret( void )
{
Emit1( 0xC3 );
}
static void emit_nop( void )
{
Emit1( 0x90 );
}
static void emit_brk( void )
{
Emit1( 0xCC );
}
enum {
X_ADD,
X_OR,
X_ADC,
X_SBB,
X_AND,
X_SUB,
X_XOR,
X_CMP
};
static void emit_op_rx_imm32( int xop, uint32_t reg, int32_t imm32 )
{
if ( imm32 < -128 || imm32 > 127 ) {
if ( reg == R_EAX ) {
switch ( xop ) {
case X_ADD: Emit1( 0x05 ); break;
case X_OR: Emit1( 0x0D ); break;
case X_ADC: Emit1( 0x15 ); break;
case X_SBB: Emit1( 0x1D ); break;
case X_AND: Emit1( 0x25 ); break;
case X_SUB: Emit1( 0x2D ); break;
case X_XOR: Emit1( 0x35 ); break;
case X_CMP: Emit1( 0x3D ); break;
default: DROP( "unknown xop %i", xop );
}
} else {
emit_op_reg( 0, 0x81, reg, xop );
}
Emit4( imm32 );
} else {
emit_op_reg( 0, 0x83, reg, xop );
Emit1( imm32 );
}
}
#ifdef MACRO_OPTIMIZE
static void emit_op_mem_imm( int xop, int base, int32_t offset, int32_t imm32 )
{
if ( imm32 < -128 || imm32 > 127 ) {
emit_op_reg_base_offset( 0, 0x81, xop, base, offset );
Emit4( imm32 );
} else {
emit_op_reg_base_offset( 0, 0x83, xop, base, offset );
Emit1( imm32 );
}
}
#endif
static void emit_mul_rx_imm( int reg, int32_t imm32 )
{
if ( imm32 < -128 || imm32 > 127 ) {
emit_op_reg( 0, 0x69, reg, reg );
Emit4( imm32 );
} else {
emit_op_reg( 0, 0x6B, reg, reg );
Emit1( imm32 );
}
}
static void emit_shl_rx_imm( int reg, int8_t imm8 )
{
emit_op_reg( 0, 0xC1, reg, 4 );
Emit1( imm8 );
}
static void emit_shr_rx_imm( int reg, int8_t imm8 )
{
emit_op_reg( 0, 0xC1, reg, 5 );
Emit1( imm8 );
}
static void emit_sar_rx_imm( int reg, int8_t imm8 )
{
emit_op_reg( 0, 0xC1, reg, 7 );
Emit1( imm8 );
}
static void emit_sub_rx( int base, int reg )
{
emit_op_reg( 0, 0x29, base, reg );
}
static void emit_mul_rx( int base, int reg )
{
emit_op_reg( 0x0F, 0xAF, reg, base );
}
static void emit_cdq( void )
{
Emit1( 0x99 );
}
static void emit_idiv_rx( uint32_t reg )
{
#if idx64
emit_rex1( reg );
#endif
Emit1( 0xF7 );
Emit1( 0xF8 + ( reg & 7 ) );
}
static void emit_udiv_rx( uint32_t reg )
{
#if idx64
emit_rex1( reg );
#endif
Emit1( 0xF7 );
Emit1( 0xF0 + ( reg & 7 ) );
}
static void emit_shl_rx( uint32_t reg )
{
#if idx64
emit_rex1( reg );
#endif
Emit1( 0xD3 );
Emit1( 0xE0 + ( reg & 7 ) );
}
static void emit_shr_rx( uint32_t reg )
{
#if idx64
emit_rex1( reg );
#endif
Emit1( 0xD3 );
Emit1( 0xE8 + ( reg & 7 ) );
}
static void emit_sar_rx( uint32_t reg )
{
#if idx64
emit_rex1( reg );
#endif
Emit1( 0xD3 );
Emit1( 0xF8 + ( reg & 7 ) );
}
#if 0
static void emit_xchg_rx( uint32_t reg1, uint32_t reg2 )
{
emit_op_reg( 0, 0x87, reg2, reg1 );
}
#endif
/* FPU functions */
static void emit_mov_sx( uint32_t dst, uint32_t src )
{
emit_op_reg( 0x0F, 0x28, src, dst );
}
static void emit_mov_sx_rx( uint32_t xmmreg, uint32_t intreg )
{
Emit1( 0x66 );
emit_op_reg( 0x0F, 0x6E, intreg, xmmreg );
}
static void emit_mov_rx_sx( uint32_t intreg, uint32_t xmmreg )
{
Emit1( 0x66 );
emit_op_reg( 0x0F, 0x7E, intreg, xmmreg );
}
static void emit_xor_sx( uint32_t dst, uint32_t src )
{
emit_op_reg( 0x0F, 0x57, src, dst );
}
static void emit_ucomiss( uint32_t base, uint32_t reg )
{
emit_op_reg( 0x0F, 0x2E, reg, base );
}
static void emit_comiss( uint32_t base, uint32_t reg )
{
emit_op_reg( 0x0F, 0x2F, reg, base );
}
static void emit_load_sx( uint32_t reg, uint32_t base, int32_t offset )
{
Emit1( 0xF3 );
emit_op_reg_base_offset( 0x0F, 0x10, reg, base, offset );
}
static void emit_load_sx_index( uint32_t reg, uint32_t base, uint32_t index )
{
Emit1( 0xF3 );
emit_op_reg_base_index( 0x0F, 0x10, reg, base, index, 1, 0 );
}
#if 0
static void emit_load_sx_index_offset( uint32_t reg, uint32_t base, uint32_t index, int scale, int32_t offset )
{
Emit1( 0xF3 );
emit_op_reg_base_index( 0x0F, 0x10, reg, base, index, scale, offset );
}
#endif
static void emit_store_sx( uint32_t reg, uint32_t base, int32_t offset )
{
Emit1( 0xF3 );
emit_op_reg_base_offset( 0x0F, 0x11, reg, base, offset );
}
static void emit_store_sx_index( uint32_t reg, uint32_t base, uint32_t index )
{
Emit1( 0xF3 );
emit_op_reg_base_index( 0x0F, 0x11, reg, base, index, 1, 0 );
}
static void emit_add_sx( uint32_t dst, uint32_t src )
{
emit_op_reg( 0x0F, 0x58, src, dst );
}
static void emit_sub_sx( uint32_t dst, uint32_t src )
{
emit_op_reg( 0x0F, 0x5C, src, dst );
}
static void emit_mul_sx( uint32_t dst, uint32_t src )
{
emit_op_reg( 0x0F, 0x59, src, dst );
}
static void emit_div_sx( uint32_t dst, uint32_t src )
{
emit_op_reg( 0x0F, 0x5E, src, dst );
}
#if 0
static void emit_add_sx_mem( uint32_t reg, uint32_t base, int32_t offset )
{
Emit1( 0xF3 );
emit_op_reg_base_offset( 0x0F, 0x58, reg, base, offset );
}
static void emit_sub_sx_mem( uint32_t reg, uint32_t base, int32_t offset )
{
Emit1( 0xF3 );
emit_op_reg_base_offset( 0x0F, 0x5C, reg, base, offset );
}
static void emit_mul_sx_mem( uint32_t reg, uint32_t base, int32_t offset )
{
Emit1( 0xF3 );
emit_op_reg_base_offset( 0x0F, 0x59, reg, base, offset );
}
static void emit_div_sx_mem( uint32_t reg, uint32_t base, int32_t offset )
{
Emit1( 0xF3 );
emit_op_reg_base_offset( 0x0F, 0x5E, reg, base, offset );
}
#endif
static void emit_cvtsi2ss( uint32_t xmmreg, uint32_t intreg )
{
Emit1( 0xF3 );
emit_op_reg( 0x0F, 0x2A, intreg, xmmreg );
}
static void emit_cvttss2si( uint32_t intreg, uint32_t xmmreg )
{
Emit1( 0xF3 );
emit_op_reg( 0x0F, 0x2C, xmmreg, intreg );
}
static void emit_sqrt( uint32_t xmmreg, uint32_t base, int32_t offset )
{
Emit1( 0xF3 );
emit_op_reg_base_offset( 0x0F, 0x51, xmmreg, base, offset );
}
static void emit_floor( uint32_t xmmreg, uint32_t base, int32_t offset )
{
Emit1( 0x66 );
emit_op2_reg_base_offset( 0x0F, 0x3A, 0x0A, xmmreg, base, offset );
Emit1( 0x01 ); // exceptions not masked
}
static void emit_ceil( uint32_t xmmreg, uint32_t base, int32_t offset )
{
Emit1( 0x66 );
emit_op2_reg_base_offset( 0x0F, 0x3A, 0x0A, xmmreg, base, offset );
Emit1( 0x02 ); // exceptions not masked
}
// legacy x87 functions
static void emit_fld( uint32_t reg, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xD9, 0x0, reg, offset );
}
static void emit_fstp( uint32_t reg, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xD9, 0x3, reg, offset );
}
static void emit_fild( uint32_t reg, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xDB, 0x0, reg, offset );
}
static void emit_fistp( uint32_t reg, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xDB, 0x3, reg, offset );
}
static void emit_fadd( uint32_t reg, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xD8, 0x0, reg, offset );
}
static void emit_fsub( uint32_t reg, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xD8, 0x4, reg, offset );
}
static void emit_fmul( uint32_t reg, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xD8, 0x1, reg, offset );
}
static void emit_fdiv( uint32_t reg, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xD8, 0x6, reg, offset );
}
static void emit_fcomp( uint32_t reg, int32_t offset )
{
emit_op_reg_base_offset( 0, 0xD8, 0x3, reg, offset );
}
// -------------- virtual opStack management ---------------
static uint32_t alloc_rx( uint32_t pref );
static uint32_t alloc_rx_const( uint32_t pref, uint32_t imm );
static uint32_t alloc_rx_local( uint32_t pref, uint32_t imm );
static uint32_t alloc_sx( uint32_t pref );
// ---------------- register allocation --------------------
// register allocation preferences
#define FORCED 0x20 // load function must return specified register
#define TEMP 0x40 // hint: temporary allocation, will not be stored on opStack
#define RCONST 0x80 // register value will be not modified
#define XMASK 0x100 // exclude masked registers
#define SHIFT4 0x200 // load bottom item
#define RMASK 0x0F
// array sizes for cached/meta registers
#if idx64
#define NUM_RX_REGS 11 // [EAX..R10]
#define NUM_SX_REGS 6 // [XMM0..XMM5]
#else
#define NUM_RX_REGS 3 // EAX, ECX, EDX
#define NUM_SX_REGS 6 // [XMM0..XMM5]
#endif
// general-purpose register list available for dynamic allocation
static const uint32_t rx_list_alloc[] = {
R_EAX, R_EDX, R_ECX
#if idx64
,R_R8, R_R9, R_R10
#endif
};
// FPU scalar register list available for dynamic allocation
static const uint32_t sx_list_alloc[] = {
R_XMM0, R_XMM1, R_XMM2,
R_XMM3, R_XMM4, R_XMM5
};
#ifdef CONST_CACHE_RX
static const uint32_t rx_list_cache[] = {
R_EDX, R_ECX, R_EAX
};
#endif
#ifdef CONST_CACHE_SX
static const uint32_t sx_list_cache[] = {
R_XMM0, R_XMM1, R_XMM2,
R_XMM3, R_XMM4, R_XMM5
};
#endif
// types of items on the opStack
typedef enum {
TYPE_RAW, // stored value
TYPE_CONST, // constant
TYPE_LOCAL, // address of local variable
TYPE_RX, // volatile - general-purpose register
TYPE_SX, // volatile - FPU scalar register
} opstack_value_t;
typedef enum {
RTYPE_UNUSED = 0x0,
RTYPE_CONST = 0x1,
RTYPE_VAR = 0x2
} reg_value_t;
typedef struct opstack_s {
uint32_t value;
int offset;
opstack_value_t type;
int safe_arg;
} opstack_t;
typedef struct var_addr_s {
int32_t addr; // variable address/offset
uint8_t base; // procBase or dataBase register, ranges should NOT overlap
uint8_t size; // 1,2,4
} var_addr_t;
typedef enum {
Z_NONE,
Z_EXT8,
S_EXT8,
Z_EXT16,
S_EXT16,
} ext_t;
typedef struct reg_s {
int type_mask;
struct {
uint32_t value;
} cnst;
// register value can be mapped to many memory regions
struct {
var_addr_t map[REG_MAP_COUNT];
unsigned idx; // next allocation slot
} vars;
uint32_t ip; // ip of last reference
int refcnt; // reference counter
ext_t ext; // zero/sign-extension flags
} reg_t;
static int opstack;
static opstack_t opstackv[PROC_OPSTACK_SIZE + 1];
// cached register values
static reg_t rx_regs[NUM_RX_REGS];
static reg_t sx_regs[NUM_SX_REGS];
// masked register can't be allocated or flushed to opStack on register pressure
static int32_t rx_mask[NUM_RX_REGS];
static int32_t sx_mask[NUM_SX_REGS];
static qboolean find_free_rx( void ) {
uint32_t i, n;
#if 1
for ( i = 0; i < ARRAY_LEN( rx_list_alloc ); i++ ) {
n = rx_list_alloc[i];
if ( rx_regs[n].type_mask == RTYPE_UNUSED ) {
return qtrue;
}
}
#endif
return qfalse;
}
static void wipe_reg_range( reg_t *reg, const var_addr_t *v ) {
if ( reg->type_mask & RTYPE_VAR ) {
uint32_t c, n;
for ( c = 0, n = 0; n < ARRAY_LEN( reg->vars.map ); n++ ) {
var_addr_t *var = &reg->vars.map[n];
if ( var->size != 0 ) {
c++;
if ( var->base == v->base ) {
if ( v->addr < var->addr + var->size && v->addr + v->size > var->addr ) {
memset( var, 0, sizeof( *var ) );
//var->size = 0;
c--; continue;
}
}
}
}
if ( c == 0 ) {
reg->type_mask &= ~RTYPE_VAR;
reg->ext = Z_NONE;
} else {
//reg->type_mask |= RTYPE_VAR;
}
}
}
static void wipe_var_range( const var_addr_t *v )
{
#ifdef LOAD_OPTIMIZE
uint32_t i;
#ifdef DEBUG_VM
if ( v->size == 0 || v->base == 0 )
DROP( "incorrect variable setup" );
#endif
// wipe all types of overlapping variables
for ( i = 0; i < ARRAY_LEN( rx_regs ); i++ ) {
wipe_reg_range( &rx_regs[i], v );
}
for ( i = 0; i < ARRAY_LEN( sx_regs ); i++ ) {
wipe_reg_range( &sx_regs[i], v );
}
#endif
}
static void set_var_map( reg_t *r, const var_addr_t *v ) {
uint32_t n;
for ( n = 0; n < ARRAY_LEN( r->vars.map ); n++ ) {
if ( r->vars.map[n].size == 0 ) {
r->vars.map[n] = *v;
r->vars.idx = ( n + 1 ) % ARRAY_LEN( r->vars.map );
return;
}
}
r->vars.map[r->vars.idx] = *v;
r->vars.idx = ( r->vars.idx + 1 ) % ARRAY_LEN( r->vars.map );
}
static void set_rx_var( uint32_t reg, const var_addr_t *v ) {
#ifdef LOAD_OPTIMIZE
if ( reg < ARRAY_LEN( rx_regs ) ) {
reg_t *r = rx_regs + reg;
#ifdef REG_TYPE_MASK
r->type_mask |= RTYPE_VAR;
#else
r->type_mask = RTYPE_VAR;
#endif
set_var_map( r, v );
r->refcnt++; // = 1;
r->ip = ip;
}
#endif
}
static void set_rx_ext( uint32_t reg, ext_t ext ) {
#ifdef LOAD_OPTIMIZE
if ( reg >= ARRAY_LEN( rx_regs ) )
DROP( "register index %i is out of range", reg );
else
rx_regs[reg].ext = ext;
#endif
}
static void set_sx_var( uint32_t reg, const var_addr_t *v ) {
#ifdef LOAD_OPTIMIZE
if ( reg < ARRAY_LEN( sx_regs ) ) {
reg_t *r = sx_regs + reg;
#ifdef REG_TYPE_MASK
r->type_mask |= RTYPE_VAR;
#else
r->type_mask = RTYPE_VAR;
#endif
set_var_map( r, v );
r->refcnt++; // = 1;
r->ip = ip;
}
#endif
}
static reg_t *find_rx_var( uint32_t *reg, const var_addr_t *v ) {
#ifdef LOAD_OPTIMIZE
uint32_t i;
for ( i = 0; i < ARRAY_LEN( rx_regs ); i++ ) {
reg_t *r = &rx_regs[i];
if ( r->type_mask & RTYPE_VAR ) {
uint32_t n;
for ( n = 0; n < ARRAY_LEN( r->vars.map ); n++ ) {
if ( r->vars.map[n].size && r->vars.map[n].addr == v->addr && r->vars.map[n].size == v->size && r->vars.map[n].base == v->base ) {
r->refcnt++;
r->ip = ip;
*reg = i;
return r;
}
}
}
}
#endif
return NULL;
}
static qboolean find_sx_var( uint32_t *reg, const var_addr_t *v ) {
#ifdef LOAD_OPTIMIZE
uint32_t i;
for ( i = 0; i < ARRAY_LEN( sx_regs ); i++ ) {
reg_t *r = &sx_regs[i];
if ( r->type_mask & RTYPE_VAR ) {
uint32_t n;
for ( n = 0; n < ARRAY_LEN( r->vars.map ); n++ ) {
if ( r->vars.map[n].size && r->vars.map[n].addr == v->addr && r->vars.map[n].size == v->size && r->vars.map[n].base == v->base ) {
r->refcnt++;
r->ip = ip;
*reg = i;
return qtrue;
}
}
}
}
#endif // LOAD_OPTIMIZE
return qfalse;
}
static void reduce_map_size( reg_t *reg, uint32_t size ) {
int i;
for ( i = 0; i < ARRAY_LEN( reg->vars.map ); i++ ) {
if ( reg->vars.map[i].size > size ) {
reg->vars.map[i].size = size;
}
}
}
static reg_t *rx_on_top( void ) {
opstack_t *it = &opstackv[ opstack ];
if ( it->type == TYPE_RX ) {
return &rx_regs[ it->value ];
} else {
return NULL;
}
}
static void wipe_vars( void )
{
#ifdef LOAD_OPTIMIZE
uint32_t i;
reg_t *r;
for ( i = 0; i < ARRAY_LEN( rx_regs ); i++ ) {
r = &rx_regs[i];
memset( &r->vars, 0, sizeof( r->vars ) );
r->type_mask &= ~RTYPE_VAR;
r->ext = Z_NONE;
}
for ( i = 0; i < ARRAY_LEN( sx_regs ); i++ ) {
r = &sx_regs[i];
memset( &r->vars, 0, sizeof( r->vars ) );
r->type_mask &= ~RTYPE_VAR;
r->ext = Z_NONE;
}
#endif
}
static qboolean search_opstack( opstack_value_t type, uint32_t value ) {
int i;
for ( i = 1; i <= opstack; i++ ) {
if ( opstackv[i].type == type && opstackv[i].value == value ) {
return qtrue;
}
}
return qfalse;
}
static void wipe_rx_meta( uint32_t reg )
{
#ifdef DEBUG_VM
if ( reg >= ARRAY_LEN( rx_regs ) )
DROP( "incorrect register index %i", reg );
#endif
memset( &rx_regs[reg], 0, sizeof( rx_regs[0] ) );
//rx_regs[reg].type_mask = RTYPE_UNUSED;
}
static void wipe_sx_meta( uint32_t reg )
{
#ifdef DEBUG_VM
if ( reg >= ARRAY_LEN( sx_regs ) )
DROP( "incorrect register index %i", reg );
#endif
memset( &sx_regs[reg], 0, sizeof( sx_regs[0] ) );
//sx_regs[reg].type_mask = RTYPE_UNUSED;
}
static void mask_rx( uint32_t reg )
{
rx_mask[reg]++;
}
static void mask_sx( uint32_t reg )
{
sx_mask[reg]++;
}
static void unmask_rx( uint32_t reg )
{
#ifdef DEBUG_VM
if ( rx_mask[reg] <= 0 ) {
DROP( "register R%i is already unmasked", reg );
}
#endif
rx_mask[reg]--;
}
static void unmask_sx( uint32_t reg )
{
#ifdef DEBUG_VM
if ( sx_mask[reg] <= 0 ) {
DROP( "register S%i is already unmasked", reg );
}
#endif
sx_mask[reg]--;
}
static void mov_sx_imm( uint32_t reg, uint32_t imm32 )
{
if ( imm32 == 0 ) {
emit_xor_sx( reg, reg );
} else {
uint32_t rx = alloc_rx_const( R_ECX | TEMP, imm32 ); // ecx = imm32
emit_mov_sx_rx( reg, rx ); // xmmX = ecx
unmask_rx( rx );
}
}
static void set_local_address( uint32_t reg, const uint32_t addr )
{
emit_lea( reg, R_PSTACK, addr ); // reg = programStack + addr
}
static void flush_item( opstack_t *it )
{
switch ( it->type ) {
case TYPE_RX:
if ( it->offset >= 0 )
emit_store_rx( it->value, R_OPSTACK, it->offset ); // opStack[ it->offset ] = eax
break;
case TYPE_SX:
emit_store_sx( it->value, R_OPSTACK, it->offset ); // opStack[ it->offset ] = xmm0
break;
case TYPE_CONST:
if ( it->value == 0 ) {
uint32_t rx = alloc_rx_const( R_EAX, it->value ); // mov eax, const
emit_store_rx( rx, R_OPSTACK, it->offset ); // opStack[ it->offset ] = eax
unmask_rx( rx );
} else {
emit_store_imm32( it->value, R_OPSTACK, it->offset ); // opStack[ it->offset ] = const
}
break;
case TYPE_LOCAL: {
uint32_t rx = alloc_rx_local( R_EAX, it->value ); // lea eax, [pStack + address]
emit_store_rx( rx, R_OPSTACK, it->offset ); // opStack[ it->offset ] = eax
unmask_rx( rx );
break;
}
default:
break;
}
it->type = TYPE_RAW;
it->safe_arg = 0;
}
static void flush_items( opstack_value_t type, uint32_t value ) {
int i;
for ( i = 0; i <= opstack; i++ ) {
opstack_t *it = opstackv + i;
if ( it->type == type && it->value == value ) {
flush_item( it );
}
}
}
static void init_opstack( void )
{
opstack = 0;
Com_Memset( &rx_mask[0], 0, sizeof( rx_mask ) );
Com_Memset( &sx_mask[0], 0, sizeof( sx_mask ) );
Com_Memset( &opstackv[0], 0, sizeof( opstackv ) );
Com_Memset( &rx_regs[0], 0, sizeof( rx_regs ) );
Com_Memset( &sx_regs[0], 0, sizeof( sx_regs ) );
}
static qboolean scalar_on_top( void )
{
#ifdef DEBUG_VM
if ( opstack >= PROC_OPSTACK_SIZE || opstack <= 0 )
DROP( "bad opstack %i", opstack * 4 );
#endif
#ifdef FPU_OPTIMIZE
if ( opstackv[ opstack ].type == TYPE_SX )
return qtrue;
#endif
return qfalse;
}
static qboolean addr_on_top( var_addr_t *addr )
{
#ifdef DEBUG_VM
if ( opstack >= PROC_OPSTACK_SIZE || opstack <= 0 )
DROP( "bad opstack %i", opstack * 4 );
#endif
#ifdef ADDR_OPTIMIZE
if ( opstackv[ opstack ].type == TYPE_CONST ) {
addr->addr = opstackv[opstack].value;
addr->base = R_DATABASE;
addr->size = 0;
return qtrue;
}
if ( opstackv[ opstack ].type == TYPE_LOCAL ) {
addr->addr = opstackv[opstack].value;
addr->base = R_PROCBASE;
addr->size = 0;
return qtrue;
}
#endif
return qfalse;
}
static qboolean const_on_top( void )
{
#ifdef DEBUG_VM
if ( opstack >= PROC_OPSTACK_SIZE || opstack <= 0 )
DROP( "bad opstack %i", opstack * 4 );
#endif
#ifdef ADDR_OPTIMIZE
if ( opstackv[ opstack ].type == TYPE_CONST )
return qtrue;
#endif
return qfalse;
}
static int32_t top_value( void )
{
opstack_t *it = &opstackv[ opstack ];
return it->value;
}
static void discard_top( void )
{
opstack_t *it = &opstackv[ opstack ];
it->type = TYPE_RAW;
it->safe_arg = 0;
}
#if 0
static int is_safe_arg( void )
{
#ifdef DEBUG_VM
if ( opstack >= PROC_OPSTACK_SIZE || opstack <= 0 )
DROP( "bad opstack %i", opstack * 4 );
#endif
return opstackv[ opstack ].safe_arg;
}
#endif
static void inc_opstack( void )
{
#ifdef DEBUG_VM
if ( opstack >= PROC_OPSTACK_SIZE )
DROP( "opstack overflow - %i", opstack * 4 );
#endif
opstack += 1;
#ifdef DEBUG_VM
if ( opstackv[ opstack ].type != TYPE_RAW )
DROP( "bad item type %i at opstack %i", opstackv[ opstack ].type, opstack * 4 );
#endif
}
static void dec_opstack( void )
{
#ifdef DEBUG_VM
opstack_t *it;
if ( opstack <= 0 )
DROP( "opstack underflow - %i", opstack * 4 );
it = &opstackv[ opstack ];
if ( it->type != TYPE_RAW )
DROP( "opstack[%i]: item type %i is not consumed", opstack * 4, it->type );
#endif
opstack -= 1;
}
static void dec_opstack_discard( void )
{
opstack_t *it;
it = &opstackv[ opstack ];
#ifdef DEBUG_VM
if ( opstack <= 0 )
DROP( "opstack underflow - %i", opstack * 4 );
if ( it->type != TYPE_RAW && ( it->type != TYPE_RX || it->offset >= 0 ) )
DROP( "opstack[%i]: item type %i is not consumed", opstack * 4, it->type );
#endif
it->type = TYPE_RAW; // discard value
it->safe_arg = 0;
opstack -= 1;
}
// returns bitmask of registers present on opstack
static uint32_t build_opstack_mask( opstack_value_t reg_type )
{
uint32_t mask = 0;
int i;
for ( i = 0; i <= opstack; i++ ) {
opstack_t *it = opstackv + i;
if ( it->type == reg_type ) {
mask |= ( 1 << it->value );
}
}
return mask;
}
static uint32_t build_rx_mask( void )
{
uint32_t i, mask = 0;
for ( i = 0; i < ARRAY_LEN( rx_mask ); i++ ) {
if ( rx_mask[i] ) {
mask |= 1 << i;
}
}
return mask;
}
static uint32_t build_sx_mask( void )
{
uint32_t i, mask = 0;
for ( i = 0; i < ARRAY_LEN( sx_mask ); i++ ) {
if ( sx_mask[i] ) {
mask |= 1 << i;
}
}
return mask;
}
// allocate register with local address value
static uint32_t alloc_rx_local( uint32_t pref, uint32_t imm )
{
uint32_t rx = alloc_rx( pref );
set_local_address( rx, imm );
return rx;
}
// returns qtrue if specified constant is found
static reg_t *find_rx_const( uint32_t imm, uint32_t mask )
{
#ifdef CONST_CACHE_RX
uint32_t i, n;
reg_t *r;
for ( i = 0; i < ARRAY_LEN( rx_list_cache ); i++ ) {
n = rx_list_cache[ i ];
r = &rx_regs[ n ];
if ( r->type_mask & RTYPE_CONST && ( r->cnst.value & mask ) == imm ) {
return r;
}
}
#endif
return NULL;
}
// allocate integer register with constant value
static uint32_t alloc_rx_const( uint32_t pref, uint32_t imm )
{
#ifdef CONST_CACHE_RX
reg_t *r;
#endif
uint32_t rx;
#ifdef CONST_CACHE_RX
#ifdef DYN_ALLOC_RX
if ( ( pref & FORCED ) == 0 ) {
// support only dynamic allocation mode
const uint32_t mask = build_rx_mask() | build_opstack_mask( TYPE_RX );
int min_ref = MAX_QINT;
int min_ip = MAX_QINT;
int idx = -1;
int i, n;
if ( ( pref & XMASK ) == 0 ) {
// we can select from already masked registers
for ( n = 0; n < ARRAY_LEN( rx_regs ); n++ ) {
r = &rx_regs[n];
if ( r->type_mask & RTYPE_CONST && r->cnst.value == imm ) {
r->refcnt++;
r->ip = ip;
mask_rx( n );
return n;
}
}
}
for ( i = 0; i < ARRAY_LEN( rx_list_cache ); i++ ) {
n = rx_list_cache[i];
if ( mask & ( 1 << n ) ) {
// target register must be unmasked and not present on the opStack
continue;
}
r = &rx_regs[n];
if ( r->type_mask & RTYPE_CONST && r->cnst.value == imm ) {
// exact match, re-use this register
r->refcnt++; // increase reference count
r->ip = ip; // update address too
mask_rx( n );
return n;
}
if ( r->type_mask == RTYPE_UNUSED ) {
idx = n;
break;
}
if ( ( r->refcnt < min_ref ) || ( r->refcnt == min_ref && r->ip < min_ip ) ) {
// update least referenced item index
min_ref = r->refcnt;
min_ip = r->ip;
idx = n;
continue;
}
}
if ( idx != -1 ) {
r = &rx_regs[ idx ];
memset( &r->vars, 0, sizeof( r->vars ) );
r->type_mask = RTYPE_CONST;
r->cnst.value = imm;
r->refcnt = 1;
r->ip = ip;
r->ext = Z_NONE;
mov_rx_imm32( idx, imm );
mask_rx( idx );
return idx;
}
// else go to usual allocation to handle register spilling
}
#endif // DYN_ALLOC_RX
#endif // CONST_CACHE_RX
rx = alloc_rx( pref );
mov_rx_imm32( rx, imm );
#ifdef CONST_CACHE_RX
r = &rx_regs[ rx ];
//memset( &r->vars, 0, sizeof( r->vars ) );
r->type_mask = RTYPE_CONST;
r->cnst.value = imm;
r->refcnt = 1;
r->ip = ip;
//r->ext = Z_NONE;
#endif
return rx;
}
// allocate scalar register with constant value
static uint32_t alloc_sx_const( uint32_t pref, uint32_t imm )
{
#ifdef CONST_CACHE_SX
reg_t *r;
#endif
uint32_t sx;
#ifdef CONST_CACHE_SX
#ifdef DYN_ALLOC_SX
if ( ( pref & FORCED ) == 0 ) {
// support only dynamic allocation mode
const uint32_t mask = build_sx_mask() | build_opstack_mask( TYPE_SX );
int min_ref = MAX_QINT;
int min_ip = MAX_QINT;
int idx = -1;
int i, n;
if ( ( pref & XMASK ) == 0 ) {
// we can select from already masked registers
for ( n = 0; n < ARRAY_LEN( sx_regs ); n++ ) {
r = &sx_regs[n];
if ( r->type_mask & RTYPE_CONST && r->cnst.value == imm ) {
r->refcnt++;
r->ip = ip;
mask_sx( n );
return n;
}
}
}
for ( i = 0; i < ARRAY_LEN( sx_list_cache ); i++ ) {
n = sx_list_cache[i];
if ( mask & ( 1 << n ) ) {
// target register must be unmasked and not present on the opStack
continue;
}
r = &sx_regs[n];
if ( r->type_mask & RTYPE_CONST && r->cnst.value == imm ) {
// exact match, re-use this register
r->refcnt++; // increase reference count
r->ip = ip; // update address too
mask_sx( n );
return n;
}
if ( r->type_mask == RTYPE_UNUSED ) {
idx = n;
break;
}
if ( ( r->refcnt < min_ref ) || ( r->refcnt == min_ref && r->ip < min_ip ) ) {
// update least referenced item index
min_ref = r->refcnt;
min_ip = r->ip;
idx = n;
continue;
}
}
if ( idx != -1 ) {
r = &sx_regs[ idx ];
memset( &r->vars, 0, sizeof( r->vars ) );
r->type_mask = RTYPE_CONST;
r->cnst.value = imm;
r->refcnt = 1;
r->ip = ip;
r->ext = Z_NONE;
mov_sx_imm( idx, imm );
mask_sx( idx );
return idx;
}
// else go to usual allocation to handle register spilling
}
#endif // DYN_ALLOC_SX
#endif // CONST_CACHE_SX
sx = alloc_sx( pref );
mov_sx_imm( sx, imm );
#ifdef CONST_CACHE_SX
r = &sx_regs[sx];
//memset( &r->vars, 0, sizeof( r->vars ) );
r->type_mask = RTYPE_CONST;
r->cnst.value = imm;
r->refcnt = 1;
r->ip = ip;
//r->ext = Z_NONE;
#endif
return sx;
}
static uint32_t dyn_alloc_rx( uint32_t pref )
{
const uint32_t _rx_mask = build_rx_mask();
const uint32_t mask = _rx_mask | build_opstack_mask( TYPE_RX );
const reg_t *reg, *used = NULL;
uint32_t i, n;
// try to bypass registers with metadata
for ( i = 0; i < ARRAY_LEN( rx_list_alloc ); i++ ) {
n = rx_list_alloc[i];
if ( mask & ( 1 << n ) ) {
continue;
}
reg = &rx_regs[n];
if ( reg->type_mask != RTYPE_UNUSED ) {
// mark least used item
if ( !used || reg->refcnt < used->refcnt || ( reg->refcnt == used->refcnt && reg->ip < used->ip ) ) {
used = reg;
}
continue;
}
wipe_rx_meta( n );
mask_rx( n );
return n;
}
if ( used ) {
// no free slots but something occupied by metadata
uint32_t idx = used - rx_regs;
wipe_rx_meta( idx );
mask_rx( idx );
return idx;
}
// no free registers, flush bottom of the opStack
for ( i = 0; i <= opstack; i++ ) {
opstack_t *it = opstackv + i;
if ( it->type == TYPE_RX ) {
n = it->value;
// skip masked registers
if ( _rx_mask & ( 1 << n ) ) {
continue;
}
flush_item( it );
flush_items( TYPE_RX, n ); // flush cloned registers too
wipe_rx_meta( n );
mask_rx( n );
return n;
}
}
return ~0U;
}
// integer register allocation
static uint32_t alloc_rx( uint32_t pref )
{
uint32_t reg;
#ifdef DYN_ALLOC_RX
if ( ( pref & FORCED ) == 0 ) {
uint32_t v = dyn_alloc_rx( pref );
if ( v == ~0U ) {
DROP( "no free registers at ip %i, pref %x, opStack %i, mask %04x", ip, pref, opstack * 4, build_rx_mask() );
}
return v;
}
#endif
reg = pref & RMASK;
#ifdef DEBUG_VM
if ( reg >= ARRAY_LEN( rx_mask ) )
DROP( "forced register R%i index overflowed!", reg );
else if ( rx_mask[reg] )
DROP( "forced register R%i is already masked!", reg );
#endif
// FORCED option: find and flush target register
flush_items( TYPE_RX, reg );
wipe_rx_meta( reg );
mask_rx( reg );
return reg;
}
static uint32_t dyn_alloc_sx( uint32_t pref )
{
const uint32_t _sx_mask = build_sx_mask();
const uint32_t mask = _sx_mask | build_opstack_mask( TYPE_SX );
const reg_t *reg, *used = NULL;
uint32_t i, n;
// try to bypass registers with metadata
for ( i = 0; i < ARRAY_LEN( sx_list_alloc ); i++ ) {
n = sx_list_alloc[i];
if ( mask & ( 1 << n ) ) {
continue;
}
reg = &sx_regs[n];
if ( reg->type_mask != RTYPE_UNUSED ) {
// mark least used item
if ( !used || reg->refcnt < used->refcnt || ( reg->refcnt == used->refcnt && reg->ip < used->ip ) ) {
used = reg;
}
continue;
}
wipe_sx_meta( n );
mask_sx( n );
return n;
}
if ( used ) {
// no free slots but something occupied by metadata
uint32_t idx = used - sx_regs;
wipe_sx_meta( idx );
mask_sx( idx );
return idx;
}
// no free registers, flush bottom of the opStack
for ( i = 0; i <= opstack; i++ ) {
opstack_t *it = opstackv + i;
if ( it->type == TYPE_SX ) {
n = it->value;
// skip masked registers
if ( _sx_mask & ( 1 << n ) ) {
continue;
}
flush_item( it );
flush_items( TYPE_SX, n ); // flush cloned registers too
wipe_sx_meta( n );
mask_sx( n );
return n;
}
}
return ~0U;
}
// scalar register allocation
static uint32_t alloc_sx( uint32_t pref )
{
uint32_t reg;
#ifdef DYN_ALLOC_SX
if ( ( pref & FORCED ) == 0 ) {
uint32_t v = dyn_alloc_sx( pref );
if ( v == ~0U ) {
DROP( "no free registers at ip %i, pref %x, opStack %i, mask %04x", ip, pref, opstack * 4, build_sx_mask() );
}
return v;
}
#endif
reg = pref & RMASK;
#ifdef DEBUG_VM
if ( reg >= ARRAY_LEN( sx_mask ) )
DROP( "forced register S%i index overflowed!", reg );
else if ( sx_mask[reg] )
DROP( "forced register S%i is already masked!", reg );
#endif
// FORCED option: find and flush target register
flush_items( TYPE_SX, reg );
wipe_sx_meta( reg );
mask_sx( reg );
return reg;
}
/*
==============
flush_volatile
flush any cached register/address/constant to opstack and reset meta (constants mapping)
this MUST be called before any unconditional jump, return or function call
==============
*/
static void flush_volatile( void )
{
int i;
for ( i = 0; i <= opstack; i++ ) {
opstack_t *it = opstackv + i;
if ( it->type == TYPE_RX || it->type == TYPE_SX ) {
flush_item( it );
}
}
// wipe all constants metadata
Com_Memset( &rx_regs[0], 0, sizeof( rx_regs ) );
Com_Memset( &sx_regs[0], 0, sizeof( sx_regs ) );
}
static void flush_opstack( void )
{
int i;
for ( i = 0; i <= opstack; i++ ) {
opstack_t *it = opstackv + i;
flush_item( it );
}
// wipe all constants metadata
Com_Memset( &rx_regs[0], 0, sizeof( rx_regs ) );
Com_Memset( &sx_regs[0], 0, sizeof( sx_regs ) );
}
static void store_rx_opstack( uint32_t reg )
{
opstack_t *it = opstackv + opstack;
#ifdef DEBUG_VM
if ( opstack <= 0 )
DROP( "bad opstack %i", opstack * 4 );
if ( it->type != TYPE_RAW )
DROP( "bad type %i at opstack %i", it->type, opstack * 4 );
#endif
it->type = TYPE_RX;
it->offset = opstack * sizeof( int32_t );
it->value = reg;
it->safe_arg = 0;
unmask_rx( reg ); // so it can be flushed on demand
}
static void store_syscall_opstack( void )
{
opstack_t *it = opstackv + opstack;
#ifdef DEBUG_VM
if ( opstack <= 0 )
DROP( "bad opstack %i", opstack * 4 );
if ( it->type != TYPE_RAW )
DROP( "bad type %i at opstack %i", it->type, opstack * 4 );
#endif
it->type = TYPE_RX;
it->offset = -1; // opstack * sizeof( int32_t )
it->value = R_EAX;
it->safe_arg = 0;
wipe_rx_meta( it->value );
unmask_rx( it->value ); // so it can be flushed on demand
}
static void store_sx_opstack( uint32_t reg )
{
opstack_t *it = opstackv + opstack;
#ifdef DEBUG_VM
if ( opstack <= 0 )
DROP( "bad opstack %i", opstack * 4 );
if ( it->type != TYPE_RAW )
DROP( "bad type %i at opstack %i", it->type, opstack * 4 );
#endif
it->type = TYPE_SX;
it->offset = opstack * sizeof( int32_t );
it->value = reg;
it->safe_arg = 0;
unmask_sx( reg ); // so it can be flushed on demand
}
static void store_item_opstack( instruction_t *ins )
{
opstack_t *it = opstackv + opstack;
#ifdef DEBUG_VM
if ( it->type != TYPE_RAW )
DROP( "bad type %i at opstack %i", it->type, opstack * 4 );
#endif
switch ( ins->op ) {
case OP_CONST: it->type = TYPE_CONST; break;
case OP_LOCAL: it->type = TYPE_LOCAL; break;
default: DROP( "incorrect opcode %i", ins->op );
}
it->offset = opstack * sizeof( int32_t );
it->value = ins->value;
it->safe_arg = ins->safe;
}
static uint32_t finish_rx( uint32_t pref, uint32_t reg ) {
if ( pref & RCONST ) {
// non-destructive operation
return reg;
}
if ( search_opstack( TYPE_RX, reg ) ) {
// another instance is present on opStack
if ( pref & FORCED ) {
// nothing should left for a FORCED register
flush_items( TYPE_RX, reg );
} else {
// copy it
int rx = alloc_rx( R_ECX );
emit_mov_rx( rx, reg );
unmask_rx( reg );
return rx;
}
}
wipe_rx_meta( reg );
return reg;
}
/*
===========
load_rx_opstack
loads current opstack value into specified register
returns masked register number, must be unmasked manually if not stored on the opstack
output register is very likely to be modified unless CONST preference is specified
===========
*/
static uint32_t load_rx_opstack( uint32_t pref )
{
opstack_t *it;
uint32_t opsv;
uint32_t reg;
if ( pref & SHIFT4 ) {
opsv = opstack - 1;
} else {
opsv = opstack;
}
#ifdef DEBUG_VM
if ( opsv <= 0 )
DROP( "bad opstack %i", opsv * 4 );
#endif
it = &opstackv[ opsv ];
reg = pref & RMASK;
if ( it->type == TYPE_RX ) {
#ifdef DYN_ALLOC_RX
if ( !( pref & FORCED ) ) {
mask_rx( it->value );
it->type = TYPE_RAW;
return finish_rx( pref, it->value ); // return current register
}
#endif
// FORCED flag: return exact target register
if ( it->value == reg ) {
mask_rx( it->value );
it->type = TYPE_RAW;
return finish_rx( pref, reg );
} else {
// allocate target register
reg = alloc_rx( pref );
// copy source to target
emit_mov_rx( reg, it->value );
it->type = TYPE_RAW;
return reg;
}
} // it->type == TYPE_RX
// scalar register on the stack
if ( it->type == TYPE_SX ) {
// move from scalar to general-purpose register
reg = alloc_rx( pref );
emit_mov_rx_sx( reg, it->value ); // intreg <- xmmreg
it->type = TYPE_RAW;
return reg;
}
if ( ( pref & RCONST ) == 0 ) {
pref |= XMASK;
} // else we can search for constants in masked registers
if ( it->type == TYPE_CONST ) {
// move constant to general-purpose register
reg = alloc_rx_const( pref, it->value );
it->type = TYPE_RAW;
return finish_rx( pref, reg );
}
if ( it->type == TYPE_LOCAL ) {
reg = alloc_rx_local( pref, it->value );
it->type = TYPE_RAW;
return finish_rx( pref, reg );
}
// default raw type, explicit load from opStack
reg = alloc_rx( pref );
emit_load4( reg, R_OPSTACK, opsv * sizeof( int32_t ) ); // reg32 = *opstack
it->type = TYPE_RAW;
return reg;
}
static void load_rx_opstack2( uint32_t *dst, uint32_t dst_pref, uint32_t *src, uint32_t src_pref )
{
#if 0
*dst = *src = load_rx_opstack( src_pref & ~RCONST ); // source, target = *opstack
#else
*dst = *src = load_rx_opstack( src_pref | RCONST ); // source, target = *opstack
if ( search_opstack( TYPE_RX, *src ) || find_free_rx() ) {
// *src is duplicated on opStack or there is a free register
*dst = alloc_rx( dst_pref & ~RCONST ); // allocate new register for the target
} else {
// will be overwritten, wipe metadata
wipe_rx_meta( *dst );
}
#endif
}
static uint32_t finish_sx( uint32_t pref, uint32_t reg ) {
if ( pref & RCONST ) {
// non-destructive operation
return reg;
}
if ( search_opstack( TYPE_SX, reg ) ) {
// another instance is present on opStack
if ( pref & FORCED ) {
// nothing should left for a FORCED register
flush_items( TYPE_SX, reg );
} else {
// must be copied
int sx = alloc_sx( R_XMM2 );
emit_mov_sx( sx, reg );
unmask_sx( reg );
return sx;
}
}
wipe_sx_meta( reg );
return reg;
}
static void flush_opstack_top( void )
{
#ifdef DEBUG_VM
if ( opstack <= 0 )
DROP( "bad opstack %i", opstack * 4 );
#endif
flush_item( &opstackv[ opstack ] );
}
// we must unmask register manually after allocation/loading
static uint32_t load_sx_opstack( uint32_t pref )
{
opstack_t *it;
uint32_t reg;
uint32_t opsv;
if ( pref & SHIFT4 ) {
opsv = opstack - 1;
} else {
opsv = opstack;
}
#ifdef DEBUG_VM
if ( opsv <= 0 )
DROP( "bad opstack %i", opsv * 4 );
#endif
it = &opstackv[ opsv ];
reg = pref & RMASK;
// scalar register on the stack
if ( it->type == TYPE_SX ) {
#ifdef DYN_ALLOC_SX
if ( !( pref & FORCED ) ) {
mask_sx( it->value );
it->type = TYPE_RAW;
return finish_sx( pref, it->value );
}
#endif
// FORCED flag: return exact target register
if ( it->value == reg ) {
mask_sx( it->value );
it->type = TYPE_RAW;
return finish_sx( pref, reg );
} else {
// allocate target register
reg = alloc_sx( pref );
// copy source to target
emit_mov_sx( reg, it->value );
it->type = TYPE_RAW;
return reg;
}
}
// integer register on the stack
if ( it->type == TYPE_RX ) {
// move from general-purpose to scalar register
// should never happen with FPU type promotion, except syscalls
reg = alloc_sx( pref );
emit_mov_sx_rx( reg, it->value );
it->type = TYPE_RAW;
return reg;
}
if ( ( pref & RCONST ) == 0 ) {
pref |= XMASK;
} // else we can search for constants in masked registers
if ( it->type == TYPE_CONST ) {
// move constant to scalar register
reg = alloc_sx_const( pref, it->value );
it->type = TYPE_RAW;
return finish_sx( pref, reg );
}
if ( it->type == TYPE_LOCAL ) {
uint32_t rx;
// bogus case: local address casted to float
reg = alloc_sx( pref );
rx = alloc_rx_local( R_ECX | RCONST, it->value );
emit_mov_sx_rx( reg, rx ); // move from integer to scalar
unmask_rx( rx );
it->type = TYPE_RAW;
return reg;
}
// default raw type, explicit load from opStack
reg = alloc_sx( pref );
emit_load_sx( reg, R_OPSTACK, opsv * sizeof( int32_t ) ); // xmm_reg = *opstack
it->type = TYPE_RAW;
return reg;
}
static void ErrJump( void )
{
//Com_Error( ERR_NOTDROP, "program tried to execute code outside VM" );
}
static void BadJump( void )
{
//Com_Error( ERR_NOTDROP, "program tried to execute code at bad location inside VM" );
}
static void BadStack( void )
{
//Com_Error( ERR_NOTDROP, "program tried to overflow program stack" );
}
static void BadOpStack( void )
{
//Com_Error( ERR_NOTDROP, "program tried to overflow opcode stack" );
}
static void BadDataRead( void )
{
//Com_Error( ERR_NOTDROP, "program tried to read out of data segment" );
}
static void BadDataWrite( void )
{
//Com_Error( ERR_NOTDROP, "program tried to write out of data segment" );
}
static void( *const errJumpPtr )( void ) = ErrJump;
static void( *const badJumpPtr )( void ) = BadJump;
static void( *const badStackPtr )( void ) = BadStack;
static void( *const badOpStackPtr )( void ) = BadOpStack;
static void( *const badDataReadPtr )( void ) = BadDataRead;
static void( *const badDataWritePtr )( void ) = BadDataWrite;
static void VM_FreeBuffers( void )
{
// should be freed in reversed allocation order
Z_Free( instructionOffsets );
Z_Free( inst );
}
static const ID_INLINE qboolean HasFCOM( void )
{
#if id386
return ( CPU_Flags & CPU_FCOM );
#else
return qtrue; // assume idx64
#endif
}
static const ID_INLINE qboolean HasSSEFP( void )
{
#if id386
return ( CPU_Flags & CPU_SSE );
#else
return qtrue; // assume idx64
#endif
}
static void Emit1( int v )
{
if ( code )
{
code[ compiledOfs ] = v;
}
compiledOfs++;
}
static void Emit2( int16_t v )
{
Emit1( v & 255 );
Emit1( ( v >> 8 ) & 255 );
}
static void Emit4( int32_t v )
{
Emit1( v & 255 );
Emit1( ( v >> 8 ) & 255 );
Emit1( ( v >> 16 ) & 255 );
Emit1( ( v >> 24 ) & 255 );
}
void Emit8( int64_t v )
{
Emit1( ( v >> 0 ) & 255 );
Emit1( ( v >> 8 ) & 255 );
Emit1( ( v >> 16 ) & 255 );
Emit1( ( v >> 24 ) & 255 );
Emit1( ( v >> 32 ) & 255 );
Emit1( ( v >> 40 ) & 255 );
Emit1( ( v >> 48 ) & 255 );
Emit1( ( v >> 56 ) & 255 );
}
static int Hex( int c )
{
if ( c >= '0' && c <= '9' ) {
return c - '0';
}
if ( c >= 'A' && c <= 'F' ) {
return 10 + c - 'A';
}
if ( c >= 'a' && c <= 'f' ) {
return 10 + c - 'a';
}
VM_FreeBuffers();
Com_Error( ERR_DROP, "Hex: bad char '%c'", c );
return 0;
}
static void EmitString( const char *string )
{
int c1, c2;
int v;
while ( 1 ) {
c1 = string[0];
c2 = string[1];
v = ( Hex( c1 ) << 4 ) | Hex( c2 );
Emit1( v );
if ( !string[2] ) {
break;
}
string += 3;
}
}
static void EmitAlign( int align )
{
int i, n;
n = compiledOfs & ( align - 1 );
for ( i = 0; i < n; i++ )
emit_nop();
}
#if JUMP_OPTIMIZE
static const char *NearJumpStr( int op )
{
switch ( op )
{
case OP_EQF:
case OP_EQ: return "74"; // je
case OP_NEF:
case OP_NE: return "75"; // jne
case OP_LTI: return "7C"; // jl
case OP_LEI: return "7E"; // jle
case OP_GTI: return "7F"; // jg
case OP_GEI: return "7D"; // jge
case OP_LTF:
case OP_LTU: return "72"; // jb
case OP_LEF:
case OP_LEU: return "76"; // jbe
case OP_GTF:
case OP_GTU: return "77"; // ja
case OP_GEF:
case OP_GEU: return "73"; // jae
case OP_JUMP: return "EB"; // jmp
//default:
// Com_Error( ERR_DROP, "Bad opcode %i", op );
};
return NULL;
}
#endif
static const char *FarJumpStr( int op, int *n )
{
switch ( op )
{
case OP_EQF:
case OP_EQ: *n = 2; return "0F 84"; // je
case OP_NEF:
case OP_NE: *n = 2; return "0F 85"; // jne
case OP_LTI: *n = 2; return "0F 8C"; // jl
case OP_LEI: *n = 2; return "0F 8E"; // jle
case OP_GTI: *n = 2; return "0F 8F"; // jg
case OP_GEI: *n = 2; return "0F 8D"; // jge
case OP_LTF:
case OP_LTU: *n = 2; return "0F 82"; // jb
case OP_LEF:
case OP_LEU: *n = 2; return "0F 86"; // jbe
case OP_GTF:
case OP_GTU: *n = 2; return "0F 87"; // ja
case OP_GEF:
case OP_GEU: *n = 2; return "0F 83"; // jae
case OP_JUMP: *n = 1; return "E9"; // jmp
};
return NULL;
}
static void EmitJump( instruction_t *i, int op, int addr )
{
const char *str;
int v, jump_size = 0;
qboolean shouldNaNCheck = qfalse;
v = instructionOffsets[addr] - compiledOfs;
if ( HasFCOM() ) {
// EQF, LTF and LEF use je/jb/jbe to conditional branch. je/jb/jbe branch if CF/ZF
// is set. comiss/fucomip was used to perform the compare, so if any of the
// operands are NaN, ZF, CF and PF will be set and je/jb/jbe would branch.
// However, according to IEEE 754, when the operand is NaN for these comparisons,
// the result must be false. So, we emit `jp` before je/jb/jbe to skip
// the branch if the result is NaN.
if ( op == OP_EQF || op == OP_LTF || op == OP_LEF ) {
shouldNaNCheck = qtrue;
}
} else {
// Similar to above, NaN needs to be accounted for. When HasFCOM() is false,
// fcomp is used to perform the compare and EmitFloatJump is called. Which in turn,
// preserves C2 when masking and calls EmitJump with OP_NE. When any of the operands
// are NaN, C2 and C0/C3 (whichever was also masked) will be set. So like the previous
// case, we can use PF to skip the branch if the result is NaN.
if ( op == OP_EQF || op == OP_LTF || op == OP_LEF ) {
shouldNaNCheck = qtrue;
}
}
if ( shouldNaNCheck ) {
v -= 2; // 2 bytes needed to account for NaN
Emit1( 0x7A ); // jp, target will be filled once we know if next inst is a near or far jump
}
#if JUMP_OPTIMIZE
if ( i->njump ) {
// expansion, can happen
if ( pass != PASS_INIT && ( v < -126 || v > 129 ) ) {
str = FarJumpStr( op, &jump_size );
if ( shouldNaNCheck ) {
Emit1( jump_size + 4 ); // target for NaN branch
}
EmitString( str );
Emit4( v - 4 - jump_size );
i->njump = 0;
jumpSizeChanged++;
return;
}
if ( shouldNaNCheck ) {
Emit1( 0x02 ); // target for NaN branch
}
EmitString( NearJumpStr( op ) );
Emit1( v - 2 );
return;
}
if ( pass == PASS_COMPRESS || ( pass == PASS_INIT && addr < ip ) ) {
if ( v >= -126 && v <= 129 ) {
if ( shouldNaNCheck ) {
Emit1( 0x02 ); // target for NaN branch
}
EmitString( NearJumpStr( op ) );
Emit1( v - 2 );
if ( !i->njump )
jumpSizeChanged++;
i->njump = 1;
return;
}
}
#endif
str = FarJumpStr( op, &jump_size );
if ( jump_size == 0 ) {
Com_Error( ERR_DROP, "VM_CompileX86 error: %s\n", "bad jump size" );
return;
}
if ( shouldNaNCheck ) {
Emit1( jump_size + 4 ); // target for NaN branch
}
EmitString( str );
Emit4( v - 4 - jump_size );
}
static void EmitCallAddr( vm_t *vm, int addr )
{
const int v = instructionOffsets[ addr ] - compiledOfs;
EmitString( "E8" );
Emit4( v - 5 );
}
static void EmitCallOffset( func_t Func )
{
const int v = funcOffset[ Func ] - compiledOfs;
EmitString( "E8" ); // call +funcOffset[ Func ]
Emit4( v - 5 );
}
static void emit_CheckReg( vm_t *vm, uint32_t reg, func_t func )
{
if ( vm->forceDataMask || !( vm_rtChecks->integer & VM_RTCHECK_DATA ) )
{
#if idx64
emit_and_rx( reg, R_DATAMASK ); // reg = reg & dataMask
#else
emit_op_rx_imm32( X_AND, reg, vm->dataMask ); // reg = reg & vm->dataMask
#endif
return;
}
#if idx64
emit_cmp_rx( reg, R_DATAMASK ); // cmp reg, dataMask
#else
emit_op_rx_imm32( X_CMP, reg, vm->dataMask ); // cmp reg, vm->dataMask
#endif
// error reporting
EmitString( "0F 87" ); // ja +errorFunction
Emit4( funcOffset[ func ] - compiledOfs - 6 );
}
static void emit_CheckJump( vm_t *vm, uint32_t reg, int32_t proc_base, int32_t proc_len )
{
if ( ( vm_rtChecks->integer & VM_RTCHECK_JUMP ) == 0 ) {
return;
}
if ( proc_base != -1 ) {
uint32_t rx;
// allow jump within local function scope only
// check if (reg - proc_base) > proc_len
rx = alloc_rx( R_EDX | TEMP );
emit_lea( rx, reg, -proc_base ); // lea edx, [reg - procBase]
emit_op_rx_imm32( X_CMP, rx, proc_len ); // cmp edx, proc_len
unmask_rx( rx );
EmitString( "0F 87" ); // ja +funcOffset[FUNC_BADJ]
Emit4( funcOffset[ FUNC_BADJ ] - compiledOfs - 6 );
} else {
// check if reg >= instructionCount
emit_op_rx_imm32( X_CMP, reg, vm->instructionCount ); // cmp reg, vm->instructionCount
EmitString( "0F 83" ); // jae +funcOffset[ FUNC_ERRJ ]
Emit4( funcOffset[ FUNC_ERRJ ] - compiledOfs - 6 );
}
}
static void emit_CheckProc( vm_t *vm, instruction_t *ins )
{
// programStack overflow check
if ( vm_rtChecks->integer & VM_RTCHECK_PSTACK ) {
#if idx64
emit_cmp_rx( R_PSTACK, R_STACKBOTTOM ); // cmp programStack, stackBottom
#else
emit_op_rx_imm32( X_CMP, R_PSTACK, vm->stackBottom ); // cmp programStack, vm->stackBottom
#endif
EmitString( "0F 8C" ); // jl +funcOffset[ FUNC_PSOF ]
Emit4( funcOffset[ FUNC_PSOF ] - compiledOfs - 6 );
}
// opStack overflow check
if ( vm_rtChecks->integer & VM_RTCHECK_OPSTACK ) {
uint32_t rx = alloc_rx( R_EDX | TEMP );
// proc->opStack carries max.used opStack value
emit_lea( rx | R_REX, R_OPSTACK, ins->opStack ); // rdx = opStack + max.opStack
// check if rdx > opstackTop
#if idx64
emit_cmp_rx( rx | R_REX, R_OPSTACKTOP ); // cmp rdx, opStackTop
#else
emit_cmp_rx_mem( rx, (intptr_t) &vm->opStackTop ); // cmp edx, [&vm->opStackTop]
#endif
EmitString( "0F 87" ); // ja +funcOffset[FUNC_OSOF]
Emit4( funcOffset[ FUNC_OSOF ] - compiledOfs - 6 );
unmask_rx( rx );
}
}
#ifdef _WIN32
#define SHADOW_BASE 40
#else // linux/*BSD ABI
#define SHADOW_BASE 8
#endif
#define PUSH_STACK 32
#define PARAM_STACK 128
static void EmitCallFunc( vm_t *vm )
{
static int sysCallOffset = 0;
init_opstack(); // to avoid any side-effects on emit_CheckJump()
emit_test_rx( R_EAX, R_EAX ); // test eax, eax
EmitString( "7C" ); // jl +offset (SystemCall)
Emit1( sysCallOffset ); // will be valid after first pass
sysCallOffset = compiledOfs;
// jump target range check
mask_rx( R_EAX );
emit_CheckJump( vm, R_EAX, -1, 0 );
unmask_rx( R_EAX );
// save procBase and programStack
//emit_push( R_PROCBASE ); // procBase
//emit_push( R_PSTACK ); // programStack
// calling another vm function
#if idx64
emit_call_index( R_INSPOINTERS, R_EAX ); // call qword ptr [instructionPointers+rax*8]
#else
emit_call_index_offset( (intptr_t)instructionPointers, R_EAX ); // call dword ptr [vm->instructionPointers + eax*8]
#endif
// restore proc base and programStack so there is
// no need to validate programStack anymore
//emit_pop( R_PSTACK ); // pop rsi // programStack
//emit_pop( R_PROCBASE ); // pop rbp // procBase
emit_ret(); // ret
sysCallOffset = compiledOfs - sysCallOffset;
// systemCall:
// convert negative num to system call number
// and store right before the first arg
emit_not_rx( R_EAX ); // not eax
// we may jump here from ConstOptimize() also
funcOffset[FUNC_SYSC] = compiledOfs;
#if idx64
// allocate stack for shadow(win32)+parameters
emit_op_rx_imm32( X_SUB, R_ESP | R_REX, SHADOW_BASE + PUSH_STACK + PARAM_STACK ); // sub rsp, 200
emit_lea( R_EDX | R_REX, R_ESP, SHADOW_BASE ); // lea rdx, [ rsp + SHADOW_BASE ]
// save scratch registers
emit_store_rx( R_ESI | R_REX, R_EDX, 0 ); // mov [rdx+00], rsi
emit_store_rx( R_EDI | R_REX, R_EDX, 8 ); // mov [rdx+08], rdi
emit_store_rx( R_R11 | R_REX, R_EDX, 16 ); // mov [rdx+16], r11 - dataMask
// ecx = &int64_params[0]
emit_lea( R_ECX | R_REX, R_ESP, SHADOW_BASE + PUSH_STACK ); // lea rcx, [rsp+SHADOW_BASE+PUSH_STACK]
// save 64-bit syscallNum
emit_store_rx( R_EAX | R_REX, R_ECX, 0 ); // mov [rcx], rax
// vm->programStack = programStack - 4; // or 8
mov_rx_ptr( R_EDX, &vm->programStack ); // mov rdx, &vm->programStack
emit_lea( R_EAX, R_PSTACK, -8 ); // lea eax, [programStack-8]
emit_store_rx( R_EAX, R_EDX, 0 ); // mov [rdx], eax
// params = procBase + 8
emit_lea( R_ESI | R_REX, R_PROCBASE, 8 ); // lea rsi, [procBase + 8]
// rcx = &int64_params[1]
emit_op_rx_imm32( X_ADD, R_ECX | R_REX, 8 ); // add rcx, 8
// dest_params[1-15] = params[1-15];
emit_xor_rx( R_EDX, R_EDX ); // xor edx, edx
// loop
EmitString( "48 63 04 96" ); // movsxd rax, dword [rsi+rdx*4]
EmitString( "48 89 04 D1" ); // mov qword ptr[rcx+rdx*8], rax
EmitString( "83 C2 01" ); // add edx, 1
EmitString( "83 FA" ); // cmp edx, 15
Emit1( ( PARAM_STACK / 8 ) - 1 );
EmitString( "7C F0" ); // jl -16
#ifdef _WIN32
// rcx = &int64_params[0]
emit_op_rx_imm32( X_SUB, R_ECX | R_REX, 8 ); // sub rcx, 8
#else // linux/*BSD ABI
// rdi = &int64_params[0]
emit_lea( R_EDI | R_REX, R_ECX, -8 ); // lea rdi, [rcx-8]
#endif
// currentVm->systemCall( param );
emit_call_rx( R_SYSCALL ); // call r12
// restore registers
emit_lea( R_EDX | R_REX, R_ESP, SHADOW_BASE ); // lea rdx, [rsp + SHADOW_BASE]
emit_load4( R_ESI | R_REX, R_EDX, 0 ); // mov rsi, [rdx+00]
emit_load4( R_EDI | R_REX, R_EDX, 8 ); // mov rdi, [rdx+08]
emit_load4( R_R11 | R_REX, R_EDX, 16 ); // mov r11, [rdx+16]
// store result in opStack[4]
emit_store_rx( R_EAX, R_OPSTACK, 4 ); // *opstack[ opStack + 4 ] = eax
// return stack
emit_op_rx_imm32( X_ADD, R_ESP | R_REX, SHADOW_BASE + PUSH_STACK + PARAM_STACK ); // add rsp, 200
emit_ret(); // ret
#else // id386
// params = (int *)((byte *)currentVM->dataBase + programStack + 4);
emit_lea( R_ECX, R_EBP, 4 ); // lea ecx, [ebp+4]
// function prologue
emit_push( R_EBP ); // push ebp
emit_mov_rx( R_EBP, R_ESP ); // mov ebp, esp
emit_op_rx_imm32( X_SUB, R_ESP, 4 ); // sub esp, 4
// align stack before call
emit_op_rx_imm32( X_AND, R_ESP, -16 ); // and esp, -16
// ABI note: esi/edi must not change during call!
// currentVM->programStack = programStack - 4;
emit_lea( R_EDX, R_PSTACK, -8 ); // lea edx, [esi-8]
emit_store_rx_offset( R_EDX, (intptr_t) &vm->programStack ); // mov[ &vm->programStack ], edx
// params[0] = syscallNum
emit_store_rx( R_EAX, R_ECX, 0 ); // mov [ecx], eax
// cdecl - set params
emit_store_rx( R_ECX, R_ESP, 0 ); // mov [esp], ecx
// currentVm->systemCall( param );
emit_call_indir( (intptr_t) &vm->systemCall ); // call dword ptr [&currentVM->systemCall]
// store result in opStack[4]
emit_store_rx( R_EAX, R_OPSTACK, 4 ); // *opstack[ 4 ] = eax
// function epilogue
emit_mov_rx( R_ESP, R_EBP ); // mov esp, ebp
emit_pop( R_EBP ); // pop ebp
emit_ret(); // ret
#endif
}
static void EmitBCPYFunc( vm_t *vm )
{
emit_push( R_ESI ); // push esi
emit_push( R_EDI ); // push edi
emit_mov_rx( R_ESI, R_EDX ); // mov esi, edx // top of opstack
emit_mov_rx( R_EDI, R_EAX ); // mov edi, eax // bottom of opstack
if ( vm_rtChecks->integer & VM_RTCHECK_DATA )
{
mov_rx_imm32( R_EAX, vm->dataMask ); // mov eax, datamask
emit_and_rx( R_ESI, R_EAX ); // and esi, eax
emit_and_rx( R_EDI, R_EAX ); // and edi, eax
emit_lea_base_index( R_EDX, R_ESI, R_ECX ); // lea edx, [esi + ecx]
emit_and_rx( R_EDX, R_EAX ); // and edx, eax - apply data mask
emit_sub_rx( R_EDX, R_ESI ); // sub edx, esi - source-adjusted counter
emit_add_rx( R_EDX, R_EDI ); // add edx, edi
emit_and_rx( R_EDX, R_EAX ); // and edx, eax - apply data mask
emit_sub_rx( R_EDX, R_EDI ); // sub edx, edi - destination-adjusted counter
emit_mov_rx( R_ECX, R_EDX ); // mov ecx, edx
}
emit_add_rx( R_ESI | R_REX, R_EBX ); // add rsi, rbx
emit_add_rx( R_EDI | R_REX, R_EBX ); // add rdi, rbx
EmitString( "F3 A5" ); // rep movsd
emit_pop( R_EDI ); // pop edi
emit_pop( R_ESI ); // pop esi
emit_ret(); // ret
}
static void EmitFloatJump( instruction_t *i, int op, int addr )
{
switch ( op ) {
case OP_EQF:
EmitString( "80 E4 44" ); // and ah,0x44 (preserve C2 too)
EmitJump( i, OP_NE, addr );
break;
case OP_NEF:
EmitString( "80 E4 40" ); // and ah,0x40
EmitJump( i, OP_EQ, addr );
break;
case OP_LTF:
EmitString( "80 E4 05" ); // and ah,0x05 (preserve C2 too)
EmitJump( i, OP_NE, addr );
break;
case OP_LEF:
EmitString( "80 E4 45" ); // and ah,0x45 (preserve C2 too)
EmitJump( i, OP_NE, addr );
break;
case OP_GTF:
EmitString( "80 E4 41" ); // and ah,0x41
EmitJump( i, OP_EQ, addr );
break;
case OP_GEF:
EmitString( "80 E4 01" ); // and ah,0x01
EmitJump( i, OP_EQ, addr );
break;
};
}
static void EmitPSOFFunc( vm_t *vm )
{
mov_rx_ptr( R_EAX, &badStackPtr ); // mov eax, &badStackPtr
EmitString( "FF 10" ); // call [eax]
emit_ret(); // ret
}
static void EmitOSOFFunc( vm_t *vm )
{
mov_rx_ptr( R_EAX, &badOpStackPtr ); // mov eax, &badOpStackPtr
EmitString( "FF 10" ); // call [eax]
emit_ret(); // ret
}
static void EmitBADJFunc( vm_t *vm )
{
mov_rx_ptr( R_EAX, &badJumpPtr ); // mov eax, &badJumpPtr
EmitString( "FF 10" ); // call [eax]
emit_ret(); // ret
}
static void EmitERRJFunc( vm_t *vm )
{
mov_rx_ptr( R_EAX, &errJumpPtr ); // mov eax, &errJumpPtr
EmitString( "FF 10" ); // call [eax]
emit_ret(); // ret
}
static void EmitDATRFunc( vm_t *vm )
{
mov_rx_ptr( R_EAX, &badDataReadPtr ); // mov eax, &badDataReadPtr
EmitString( "FF 10" ); // call [eax]
emit_ret(); // ret
}
static void EmitDATWFunc( vm_t *vm )
{
mov_rx_ptr( R_EAX, &badDataWritePtr ); // mov eax, &badDataWritePtr
EmitString( "FF 10" ); // call [eax]
emit_ret(); // ret
}
#ifdef CONST_OPTIMIZE
static qboolean IsFloorTrap( const vm_t *vm, const int trap )
{
if ( trap == ~CG_FLOOR && vm->index == VM_CGAME )
return qtrue;
if ( trap == ~UI_FLOOR && vm->index == VM_UI )
return qtrue;
if ( trap == ~G_FLOOR && vm->index == VM_GAME )
return qtrue;
return qfalse;
}
static qboolean IsCeilTrap( const vm_t *vm, const int trap )
{
if ( trap == ~CG_CEIL && vm->index == VM_CGAME )
return qtrue;
if ( trap == ~UI_CEIL && vm->index == VM_UI )
return qtrue;
if ( trap == ~G_CEIL && vm->index == VM_GAME )
return qtrue;
return qfalse;
}
static qboolean NextLoad( const var_addr_t *v, const instruction_t *i, int op )
{
if ( i->jused ) {
return qfalse;
}
if ( v->addr == i->value ) {
if ( i->op == OP_CONST ) {
if ( v->base == R_DATABASE && (i+1)->op == op ) {
return qtrue;
}
}
if ( i->op == OP_LOCAL ) {
if ( v->base == R_PROCBASE && (i+1)->op == op ) {
return qtrue;
}
}
}
return qfalse;
}
static qboolean ConstOptimize( vm_t *vm, instruction_t *ci, instruction_t *ni )
{
var_addr_t var;
switch ( ni->op ) {
case OP_STORE4: {
if ( ci->value == 0 ) {
// "xor eax, eax" + non-const path is shorter
return qfalse;
}
if ( addr_on_top( &var ) ) {
if ( NextLoad( &var, ni + 1, OP_LOAD4 ) ) {
return qfalse; // store value in a register
}
discard_top(); dec_opstack(); // v = *opstack; opstack -= 4
emit_store_imm32( ci->value, var.base, var.addr ); // (dword*)base_reg[ v ] = 0x12345678
var.size = 4;
wipe_var_range( &var );
} else {
int rx = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack; opstack -= 4
emit_CheckReg( vm, rx, FUNC_DATW );
emit_store_imm32_index( ci->value, R_DATABASE, rx ); // (dword*)dataBase[ eax ] = 0x12345678
unmask_rx( rx );
wipe_vars();
}
ip += 1; // OP_STORE4
return qtrue;
}
case OP_STORE2: {
if ( addr_on_top( &var ) ) {
if ( NextLoad( &var, ni + 1, OP_LOAD2 ) || find_rx_const( ci->value, 0xFFFF ) ) {
return qfalse; // store value in a register
}
discard_top(); dec_opstack(); // v = *opstack; opstack -= 4
emit_store2_imm16( ci->value, var.base, var.addr ); // (short*)var.base[ v ] = 0x1234
var.size = 2;
wipe_var_range( &var );
} else {
int rx = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack; opstack -= 4
emit_CheckReg( vm, rx, FUNC_DATW );
emit_store2_imm16_index( ci->value, R_DATABASE, rx ); // (word*)dataBase[ eax ] = 0x12345678
unmask_rx( rx );
wipe_vars();
}
ip += 1; // OP_STORE2
return qtrue;
}
case OP_STORE1: {
if ( addr_on_top( &var ) ) {
if ( NextLoad( &var, ni + 1, OP_LOAD1 ) || find_rx_const( ci->value, 0xFF ) ) {
return qfalse; // store value in a register
}
discard_top(); dec_opstack(); // v = *opstack; opstack -= 4
emit_store1_imm8( ci->value, var.base, var.addr ); // (byte*)base_reg[ v ] = 0x12
var.size = 1;
wipe_var_range( &var );
} else {
int rx = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack; opstack -= 4
emit_CheckReg( vm, rx, FUNC_DATW );
emit_store1_imm8_index( ci->value, R_DATABASE, rx ); // (char*)dataBase[ eax ] = 0x12345678
unmask_rx( rx );
wipe_vars();
}
ip += 1; // OP_STORE1
return qtrue;
}
case OP_ADD: {
int rx = load_rx_opstack( R_EAX ); // eax = *opstack
if ( ci->value == 128 ) {
// small trick to use 1-byte immediate value :P
emit_op_rx_imm32( X_SUB, rx, -128 ); // sub eax, -128
} else {
emit_op_rx_imm32( X_ADD, rx, ci->value ); // add eax, 0x12345678
}
store_rx_opstack( rx ); // *opstack = eax
ip += 1; // OP_ADD
return qtrue;
}
case OP_SUB: {
int rx = load_rx_opstack( R_EAX ); // eax = *opstack
emit_op_rx_imm32( X_SUB, rx, ci->value ); // sub eax, 0x12345678
store_rx_opstack( rx ); // *opstack = eax
ip += 1; // OP_SUB
return qtrue;
}
case OP_MULI:
case OP_MULU: {
int rx = load_rx_opstack( R_EAX ); // eax = *opstack
emit_mul_rx_imm( rx, ci->value ); // imul eax, eax, 0x12345678
store_rx_opstack( rx ); // *opstack = eax
ip += 1; // OP_MUL
return qtrue;
}
case OP_BAND: {
int rx = load_rx_opstack( R_EAX ); // eax = *opstack
if ( !(ni+1)->jused && (ni+1)->op == OP_CONST && (ni+1)->value == 0 && ops[(ni+2)->op].flags & JUMP ) {
if ( (ni+2)->op == OP_EQ || (ni+2)->op == OP_NE ) {
dec_opstack();
emit_test_rx_imm32( rx, ci->value );// test eax, mask
EmitJump( ni+2, (ni+2)->op, (ni+2)->value ); // jcc
unmask_rx( rx );
ip += 3; // OP_BAND + OP_CONST + OP_EQ/OP_NE
return qtrue;
}
}
emit_op_rx_imm32( X_AND, rx, ci->value ); // and eax, 0x12345678
store_rx_opstack( rx ); // *opstack = eax
ip += 1; // OP_BAND
return qtrue;
}
case OP_BOR: {
int rx = load_rx_opstack( R_EAX ); // eax = *opstack
emit_op_rx_imm32( X_OR, rx, ci->value ); // or eax, 0x12345678
store_rx_opstack( rx ); // *opstack = eax
ip += 1; // OP_BOR
return qtrue;
}
case OP_BXOR: {
int rx = load_rx_opstack( R_EAX ); // eax = *opstack
emit_op_rx_imm32( X_XOR, rx, ci->value ); // xor eax, 0x12345678
store_rx_opstack( rx ); // *opstack = eax
ip += 1; // OP_BXOR
return qtrue;
}
case OP_LSH:
if ( ci->value < 0 || ci->value > 31 )
break; // undefined behavior
if ( ci->value ) {
int rx = load_rx_opstack( R_EAX ); // eax = *opstack
emit_shl_rx_imm( rx, ci->value ); // eax = (unsigned)eax << x
store_rx_opstack( rx ); // *opstack = eax
}
ip += 1; // OP_LSH
return qtrue;
case OP_RSHI:
if ( ci->value < 0 || ci->value > 31 )
break; // undefined behavior
if ( ci->value ) {
int rx = load_rx_opstack( R_EAX ); // eax = *opstack
emit_sar_rx_imm( rx, ci->value ); // eax = eax >> x
store_rx_opstack( rx ); // *opstack = eax
}
ip += 1; // OP_RSHI
return qtrue;
case OP_RSHU:
if ( ci->value < 0 || ci->value > 31 )
break; // undefined behavior
if ( ci->value ) {
int rx = load_rx_opstack( R_EAX ); // eax = *opstack
emit_shr_rx_imm( rx, ci->value ); // eax = (unsigned)eax >> x
store_rx_opstack( rx ); // *opstack = eax
}
ip += 1; // OP_RSHU
return qtrue;
case OP_CALL: {
inc_opstack(); // opstack += 4
if ( HasSSEFP() ) {
if ( ci->value == ~TRAP_SQRT ) {
int sx = alloc_sx( R_XMM0 );
emit_sqrt( sx, R_PROCBASE, 8 ); // sqrtss xmm0, dword ptr [ebp + 8]
store_sx_opstack( sx ); // *opstack = xmm0
ip += 1; // OP_CALL
return qtrue;
}
if ( IsFloorTrap( vm, ci->value ) && ( CPU_Flags & CPU_SSE41 ) ) {
int sx = alloc_sx( R_XMM0 );
emit_floor( sx, R_PROCBASE, 8 ); // roundss xmm0, dword ptr [ebp + 8], 1
store_sx_opstack( sx ); // *opstack = xmm0
ip += 1; // OP_CALL
return qtrue;
}
if ( IsCeilTrap( vm, ci->value ) && ( CPU_Flags & CPU_SSE41 ) ) {
int sx = alloc_sx( R_XMM0 );
emit_ceil( sx, R_PROCBASE, 8 ); // roundss xmm0, dword ptr [ebp + 8], 2
store_sx_opstack( sx ); // *opstack = xmm0
ip += 1; // OP_CALL
return qtrue;
}
} else {
// legacy x87 path
}
flush_volatile();
if ( ci->value < 0 ) { // syscall
mask_rx( R_EAX );
mov_rx_imm32( R_EAX, ~ci->value ); // eax - syscall number
if ( opstack != 1 ) {
emit_op_rx_imm32( X_ADD, R_OPSTACK | R_REX, (opstack-1) * sizeof( int32_t ) );
EmitCallOffset( FUNC_SYSC );
emit_op_rx_imm32( X_SUB, R_OPSTACK | R_REX, (opstack-1) * sizeof( int32_t ) );
} else {
EmitCallOffset( FUNC_SYSC );
}
ip += 1; // OP_CALL
store_syscall_opstack();
return qtrue;
}
emit_push( R_OPSTACK ); // push edi
if ( opstack != 1 ) {
emit_op_rx_imm32( X_ADD, R_OPSTACK | R_REX, (opstack-1) * sizeof( int32_t ) ); // add rdi, opstack-4
}
EmitCallAddr( vm, ci->value ); // call +addr
emit_pop( R_OPSTACK ); // pop edi
ip += 1; // OP_CALL
return qtrue;
}
case OP_JUMP:
flush_volatile();
EmitJump( ni, ni->op, ci->value );
ip += 1; // OP_JUMP
return qtrue;
case OP_EQ:
case OP_NE:
case OP_GEI:
case OP_GTI:
case OP_GTU:
case OP_GEU:
case OP_LTU:
case OP_LEU:
case OP_LEI:
case OP_LTI: {
int rx = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack; opstack -= 4
if ( ci->value == 0 && ( ni->op == OP_EQ || ni->op == OP_NE ) ) {
emit_test_rx( rx, rx ); // test eax, eax
} else{
emit_op_rx_imm32( X_CMP, rx, ci->value ); // cmp eax, 0x12345678
}
unmask_rx( rx );
EmitJump( ni, ni->op, ni->value ); // jcc
ip += 1; // OP_cond
return qtrue;
}
}
return qfalse;
}
#endif
/*
=================
VM_FindMOps
Search for known macro-op sequences
=================
*/
static void VM_FindMOps( instruction_t *buf, int instructionCount )
{
instruction_t *i;
int n;
i = buf;
n = 0;
while ( n < instructionCount )
{
if ( i->op == OP_LOCAL ) {
#ifdef MACRO_OPTIMIZE
// OP_LOCAL + OP_LOCAL + OP_LOAD4 + OP_CONST + OP_XXX + OP_STORE4
if ( ( i + 1 )->op == OP_LOCAL && i->value == ( i + 1 )->value && ( i + 2 )->op == OP_LOAD4 && ( i + 3 )->op == OP_CONST && ( i + 4 )->op != OP_UNDEF && ( i + 5 )->op == OP_STORE4 ) {
int v = ( i + 4 )->op;
if ( v == OP_ADD ) {
i->op = MOP_ADD;
i += 6; n += 6;
continue;
}
if ( v == OP_SUB ) {
i->op = MOP_SUB;
i += 6; n += 6;
continue;
}
if ( v == OP_BAND ) {
i->op = MOP_BAND;
i += 6; n += 6;
continue;
}
if ( v == OP_BOR ) {
i->op = MOP_BOR;
i += 6; n += 6;
continue;
}
if ( v == OP_BXOR ) {
i->op = MOP_BXOR;
i += 6; n += 6;
continue;
}
}
#endif
if ( (i+1)->op == OP_CONST && (i+2)->op == OP_CALL && (i+3)->op == OP_STORE4 && (i+4)->op == OP_LOCAL && (i+5)->op == OP_LOAD4 && (i+6)->op == OP_LEAVE ) {
if ( i->value == (i+4)->value && !(i+4)->jused ) {
(i+0)->op = OP_IGNORE; (i+0)->value = 0;
(i+3)->op = OP_IGNORE; (i+3)->value = 0;
(i+4)->op = OP_IGNORE; (i+4)->value = 0;
(i+5)->op = OP_IGNORE; (i+5)->value = 0;
i += 7;
n += 7;
continue;
}
}
}
i++;
n++;
}
}
#ifdef MACRO_OPTIMIZE
/*
=================
EmitMOPs
=================
*/
static qboolean EmitMOPs( vm_t *vm, instruction_t *ci, macro_op_t op )
{
uint32_t reg_base;
int n;
if ( (ci + 1 )->op == OP_LOCAL )
reg_base = R_PROCBASE;
else
reg_base = R_DATABASE;
switch ( op )
{
//[local] += CONST
case MOP_ADD:
n = inst[ ip + 2 ].value;
emit_op_mem_imm( X_ADD, R_PROCBASE, ci->value, n );
ip += 5;
return qtrue;
//[local] -= CONST
case MOP_SUB:
n = inst[ ip + 2 ].value;
emit_op_mem_imm( X_SUB, R_PROCBASE, ci->value, n );
ip += 5;
return qtrue;
//[local] &= CONST
case MOP_BAND:
n = inst[ ip + 2 ].value;
emit_op_mem_imm( X_AND, R_PROCBASE, ci->value, n );
ip += 5;
return qtrue;
//[local] |= CONST
case MOP_BOR:
n = inst[ ip + 2 ].value;
emit_op_mem_imm( X_OR, R_PROCBASE, ci->value, n );
ip += 5;
return qtrue;
//[local] ^= CONST
case MOP_BXOR:
n = inst[ ip + 2 ].value;
emit_op_mem_imm( X_XOR, R_PROCBASE, ci->value, n );
ip += 5;
return qtrue;
}
return qfalse;
}
#endif // MACRO_OPTIMIZE
#ifdef DUMP_CODE
static void dump_code( const char *vmname, uint8_t *c, int32_t code_len )
{
const char *filename = va( "vm-%s.hex", vmname );
fileHandle_t fh = FS_FOpenFileWrite( filename );
if ( fh != FS_INVALID_HANDLE ) {
while ( code_len >= 8 ) {
FS_Printf( fh, "%02x %02x %02x %02x %02x %02x %02x %02x\n", c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7] );
code_len -= 8;
c += 8;
}
while ( code_len > 0 ) {
FS_Printf( fh, "%02x", c[0] );
if ( code_len > 1 )
FS_Write( " ", 1, fh );
code_len -= 1;
c += 1;
}
FS_FCloseFile( fh );
}
}
#endif
/*
=================
VM_Compile
=================
*/
qboolean VM_Compile( vm_t *vm, vmHeader_t *header ) {
const char *errMsg;
int instructionCount;
instruction_t *ci;
int i, n;
uint32_t rx[3];
uint32_t sx[2];
int proc_base;
int proc_len;
#ifdef RET_OPTIMIZE
int proc_end;
#endif
var_addr_t var;
opcode_t sign_extend;
int var_size;
reg_t *reg;
#if JUMP_OPTIMIZE
int num_compress;
#endif
inst = (instruction_t*)Z_Malloc( (header->instructionCount + 8 ) * sizeof( instruction_t ) );
instructionOffsets = (int*)Z_Malloc( header->instructionCount * sizeof( int ) );
errMsg = VM_LoadInstructions( (byte *) header + header->codeOffset, header->codeLength, header->instructionCount, inst );
if ( !errMsg ) {
errMsg = VM_CheckInstructions( inst, vm->instructionCount, vm->jumpTableTargets, vm->numJumpTableTargets, vm->exactDataLength );
}
if ( errMsg ) {
VM_FreeBuffers();
Com_Printf( "VM_CompileX86 error: %s\n", errMsg );
return qfalse;
}
VM_ReplaceInstructions( vm, inst );
VM_FindMOps( inst, vm->instructionCount );
#if JUMP_OPTIMIZE
for ( i = 0; i < header->instructionCount; i++ ) {
if ( ops[inst[i].op].flags & JUMP ) {
int d = inst[i].value - i;
// we can correctly calculate backward jump offsets even at initial pass
// but for forward jumps we do some estimation
// too low threshold will reduce compression
// too high threshold may invoke extra expansion passes
if ( d > 0 && d <= FJUMP_THRESHOLD ) {
inst[i].njump = 1;
}
}
}
num_compress = 0;
#endif
code = NULL; // we will allocate memory later, after last defined pass
instructionPointers = NULL;
memset( funcOffset, 0, sizeof( funcOffset ) );
instructionCount = header->instructionCount;
for( pass = 0; pass < NUM_PASSES; pass++ )
{
__compile:
// translate all instructions
ip = 0;
compiledOfs = 0;
#if JUMP_OPTIMIZE
jumpSizeChanged = 0;
#endif
proc_base = -1;
proc_len = 0;
#ifdef RET_OPTIMIZE
proc_end = 0;
#endif
init_opstack();
#ifdef DEBUG_INT
emit_brk();
#endif
#if idx64
emit_push( R_EBP ); // push rbp
emit_push( R_EBX ); // push rbx
emit_push( R_ESI ); // push rsi
emit_push( R_EDI ); // push rdi
emit_push( R_R12 ); // push r12
emit_push( R_R13 ); // push r13
emit_push( R_R14 ); // push r14
emit_push( R_R15 ); // push r15
mov_rx_ptr( R_DATABASE, vm->dataBase ); // mov rbx, vm->dataBase
// do not use wrapper, force constant size there
emit_mov_rx_imm64( R_INSPOINTERS, (intptr_t) instructionPointers ); // mov r8, vm->instructionPointers
mov_rx_imm32( R_DATAMASK, vm->dataMask ); // mov r11d, vm->dataMask
mov_rx_imm32( R_STACKBOTTOM, vm->stackBottom ); // mov r14d, vm->stackBottom
mov_rx_ptr( R_EAX, &vm->opStack ); // mov rax, &vm->opStack
emit_load4( R_OPSTACK | R_REX, R_EAX, 0 ); // mov rdi, [rax]
mov_rx_ptr( R_SYSCALL, vm->systemCall ); // mov r13, vm->systemCall
mov_rx_ptr( R_EAX, &vm->programStack ); // mov rax, &vm->programStack
emit_load4( R_PSTACK, R_EAX, 0 ); // mov esi, dword ptr [rax]
emit_lea( R_OPSTACKTOP | R_REX, R_OPSTACK, sizeof( int32_t ) * ( MAX_OPSTACK_SIZE - 1 ) ); // lea r15, [opStack + opStackSize - 4]
EmitCallOffset( FUNC_ENTR );
#ifdef DEBUG_VM
mov_rx_ptr( R_EAX, &vm->programStack ); // mov rax, &vm->programStack
emit_store_rx( R_PSTACK, R_EAX, 0 ); // mov [rax], esi
#endif
emit_pop( R_R15 ); // pop r15
emit_pop( R_R14 ); // pop r14
emit_pop( R_R13 ); // pop r13
emit_pop( R_R12 ); // pop r12
emit_pop( R_EDI ); // pop rdi
emit_pop( R_ESI ); // pop rsi
emit_pop( R_EBX ); // pop rbx
emit_pop( R_EBP ); // pop rbp
emit_ret(); // ret
#else // id386
emit_pushad(); // pushad
mov_rx_ptr( R_DATABASE, vm->dataBase ); // mov ebx, vm->dataBase
emit_load_rx_offset( R_PSTACK, (intptr_t) &vm->programStack ); // mov esi, [&vm->programStack]
emit_load_rx_offset( R_OPSTACK, (intptr_t) &vm->opStack ); // mov edi, [&vm->opStack]
EmitCallOffset( FUNC_ENTR );
#ifdef DEBUG_VM
emit_store_rx_offset( R_PSTACK, (intptr_t) &vm->programStack ); // mov [&vm->programStack], esi
#endif
// emit_store_rx_offset( R_OPSTACK, (intptr_t) &vm->opStack ); // // [&vm->opStack], edi
emit_popad(); // popad
emit_ret(); // ret
#endif // id386
EmitAlign( FUNC_ALIGN );
// main function entry offset
funcOffset[FUNC_ENTR] = compiledOfs;
while ( ip < instructionCount ) {
ci = &inst[ip + 0];
#ifdef REGS_OPTIMIZE
if ( ci->jused )
#endif
{
// we can safely perform register optimizations only in case if
// we are 100% sure that current instruction is not a jump label
flush_volatile();
}
instructionOffsets[ ip++ ] = compiledOfs;
switch ( ci->op ) {
case OP_UNDEF:
emit_brk(); // int 3
break;
case OP_IGNORE:
break;
case OP_BREAK:
emit_brk(); // int 3
break;
case OP_ENTER:
EmitAlign( FUNC_ALIGN );
instructionOffsets[ ip-1 ] = compiledOfs;
proc_base = ip; // this points on next instruction after OP_ENTER
// locate endproc
for ( proc_len = -1, i = ip; i < header->instructionCount; i++ ) {
if ( inst[ i ].op == OP_PUSH && inst[ i + 1 ].op == OP_LEAVE ) {
proc_len = i - proc_base;
#ifdef RET_OPTIMIZE
proc_end = i + 1;
#endif
break;
}
}
if ( proc_len == 0 ) {
// empty function, just return
emit_ret();
ip += 2; // OP_PUSH + OP_LEAVE
break;
}
emit_push( R_PROCBASE ); // procBase
emit_push( R_PSTACK ); // programStack
emit_op_rx_imm32( X_SUB, R_PSTACK, ci->value ); // sub programStack, 0x12
emit_lea_base_index( R_PROCBASE | R_REX, R_DATABASE, R_PSTACK ); // procBase = dataBase + programStack
emit_CheckProc( vm, ci );
break;
case OP_LEAVE:
flush_opstack();
dec_opstack(); // opstack -= 4
#ifdef DEBUG_VM
if ( opstack != 0 )
DROP( "opStack corrupted on OP_LEAVE" );
#endif
#ifdef RET_OPTIMIZE
if ( !ci->endp && proc_base >= 0 ) {
// jump to last OP_LEAVE instruction in this function
if ( inst[ ip + 0 ].op == OP_PUSH && inst[ ip + 1 ].op == OP_LEAVE ) {
// next instruction is proc_end
} else {
EmitJump( ci, OP_JUMP, proc_end );
}
break;
}
#endif
emit_pop( R_PSTACK ); // pop rsi // programStack
emit_pop( R_PROCBASE ); // pop rbp // procBase
emit_ret(); // ret
break;
case OP_CALL:
rx[0] = load_rx_opstack( R_EAX | FORCED ); // eax = *opstack
flush_volatile();
if ( opstack != 1 ) {
emit_op_rx_imm32( X_ADD, R_OPSTACK | R_REX, ( opstack - 1 ) * sizeof( int32_t ) );
EmitCallOffset( FUNC_CALL ); // call +FUNC_CALL
emit_op_rx_imm32( X_SUB, R_OPSTACK | R_REX, ( opstack - 1 ) * sizeof( int32_t ) );
} else {
EmitCallOffset( FUNC_CALL ); // call +FUNC_CALL
}
unmask_rx( rx[0] );
break;
case OP_PUSH:
inc_opstack(); // opstack += 4
if ( (ci + 1)->op == OP_LEAVE ) {
proc_base = -1;
}
break;
case OP_POP:
dec_opstack_discard(); // opstack -= 4
break;
case OP_CONST:
#ifdef CONST_OPTIMIZE
if ( ConstOptimize( vm, ci + 0, ci + 1 ) )
break;
#endif
inc_opstack(); // opstack += 4
store_item_opstack( ci );
break;
case OP_LOCAL:
inc_opstack(); // opstack += 4
store_item_opstack( ci );
break;
case OP_JUMP:
rx[0] = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack; opstack -= 4
flush_volatile();
emit_CheckJump( vm, rx[0], proc_base, proc_len ); // check if eax is within current proc
#if idx64
emit_jump_index( R_INSPOINTERS, rx[0] ); // jmp qword ptr [instructionPointers + rax*8]
#else
emit_jump_index_offset( (intptr_t) instructionPointers, rx[0] ); // jmp dword ptr [instructionPointers + eax*4]
#endif
unmask_rx( rx[0] );
break;
case OP_EQ:
case OP_NE:
case OP_LTI:
case OP_LEI:
case OP_GTI:
case OP_GEI:
case OP_LTU:
case OP_LEU:
case OP_GTU:
case OP_GEU: {
rx[0] = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack; opstack -= 4
rx[1] = load_rx_opstack( R_EDX | RCONST ); dec_opstack(); // edx = *opstack; opstack -= 4
emit_cmp_rx( rx[1], rx[0] ); // cmp edx, eax
unmask_rx( rx[0] );
unmask_rx( rx[1] );
EmitJump( ci, ci->op, ci->value );
break;
}
case OP_EQF:
case OP_NEF:
case OP_LTF:
case OP_LEF:
case OP_GTF:
case OP_GEF: {
if ( HasSSEFP() ) {
sx[0] = load_sx_opstack( R_XMM0 | RCONST ); dec_opstack(); // xmm0 = *opstack; opstack -= 4
sx[1] = load_sx_opstack( R_XMM1 | RCONST ); dec_opstack(); // xmm1 = *opstack; opstack -= 4
if ( ci->op == OP_EQF || ci->op == OP_NEF ) {
emit_ucomiss( sx[1], sx[0] ); // ucomiss xmm1, xmm0
} else {
emit_comiss( sx[1], sx[0] ); // comiss xmm1, xmm0
}
unmask_sx( sx[0] );
unmask_sx( sx[1] );
EmitJump( ci, ci->op, ci->value );
break;
} else {
// legacy x87 path
flush_opstack_top(); dec_opstack();
flush_opstack_top(); dec_opstack();
if ( HasFCOM() ) {
emit_fld( R_OPSTACK, 8 ); // fld dword ptr [opStack+8]
emit_fld( R_OPSTACK, 4 ); // fld dword ptr [opStack+4]
EmitString( "DF E9" ); // fucomip
EmitString( "DD D8" ); // fstp st(0)
EmitJump( ci, ci->op, ci->value );
} else {
alloc_rx( R_EAX | FORCED );
emit_fld( R_OPSTACK, 4 ); // fld dword ptr [opStack+4]
emit_fcomp( R_OPSTACK, 8 ); // fcomp dword ptr [opStack+8]
EmitString( "DF E0" ); // fnstsw ax
EmitFloatJump( ci, ci->op, ci->value );
unmask_rx( R_EAX );
}
break;
}
}
case OP_LOAD1:
case OP_LOAD2:
case OP_LOAD4:
#ifdef FPU_OPTIMIZE
if ( ci->op == OP_LOAD4 && ci->fpu && HasSSEFP() ) {
if ( addr_on_top( &var ) ) {
// address specified by CONST/LOCAL
discard_top();
var.size = 4;
if ( find_sx_var( &sx[0], &var ) ) {
// already cached in some register
mask_sx( sx[0] );
} else {
// not cached, perform load
sx[0] = alloc_sx( R_XMM0 );
emit_load_sx( sx[0], var.base, var.addr ); // xmmm0 = var.base[var.addr]
set_sx_var( sx[0], &var );
}
} else {
// address stored in register
rx[0] = load_rx_opstack( R_EAX | RCONST ); // eax = *opstack
emit_CheckReg( vm, rx[0], FUNC_DATR );
sx[0] = alloc_sx( R_XMM0 );
emit_load_sx_index( sx[0], R_DATABASE, rx[0] ); // xmmm0 = dataBase[eax]
unmask_rx( rx[0] );
}
store_sx_opstack( sx[0] ); // *opstack = xmm0
break;
}
#endif
switch ( ci->op ) {
case OP_LOAD1: var_size = 1; sign_extend = OP_SEX8; break;
case OP_LOAD2: var_size = 2; sign_extend = OP_SEX16; break;
default: var_size = 4; sign_extend = OP_UNDEF; break;
}
// integer path
if ( addr_on_top( &var ) ) {
// address specified by CONST/LOCAL
discard_top();
var.size = var_size;
if ( ( reg = find_rx_var( &rx[0], &var ) ) != NULL ) {
// already cached in some register
// do zero extension if needed
switch ( ci->op ) {
case OP_LOAD1:
if ( reg->ext != Z_EXT8 ) {
emit_zex8( rx[0], rx[0] ); // movzx eax, al
// invalidate any mappings that overlaps with high [8..31] bits
//var.addr += 1; var.size = 3;
//wipe_reg_range( rx_regs + rx[0], &var );
// TODO: just reduce mapping size?
reduce_map_size( reg, 1 );
// modify constant
reg->cnst.value &= 0xFF;
reg->ext = Z_EXT8;
}
break;
case OP_LOAD2:
if ( reg->ext != Z_EXT16 ) {
emit_zex16( rx[0], rx[0] ); // movzx eax, ax
// invalidate any mappings that overlaps with high [16..31] bits
//var.addr += 2; var.size = 2;
//wipe_reg_range( rx_regs + rx[0], &var );
reduce_map_size( reg, 2 );
// modify constant
reg->cnst.value &= 0xFFFF;
reg->ext = Z_EXT16;
}
break;
case OP_LOAD4:
reg->ext = Z_NONE;
break;
}
mask_rx( rx[0] );
} else {
// not cached, perform load
rx[0] = alloc_rx( R_EAX ); // allocate new register, wipe its metadata
if ( (ci+1)->op == sign_extend && sign_extend != OP_UNDEF ) {
// merge with following sign-extension instruction
switch ( ci->op ) {
case OP_LOAD1: emit_load1_sex( rx[0], var.base, var.addr ); var.size = 1; set_rx_ext( rx[0], S_EXT8 ); break; // eax = (signed byte)var.base[var.addr]
case OP_LOAD2: emit_load2_sex( rx[0], var.base, var.addr ); var.size = 2; set_rx_ext( rx[0], S_EXT16 ); break; // eax = (signed short)var.base[var.addr]
}
ip += 1; // OP_SEX/OP_SEX16
} else {
// usual load with zero-extension
switch ( ci->op ) {
case OP_LOAD1: emit_load1( rx[0], var.base, var.addr ); var.size = 1; set_rx_ext( rx[0], Z_EXT8 ); break; // eax = (unsigned byte)var.base[var.addr]
case OP_LOAD2: emit_load2( rx[0], var.base, var.addr ); var.size = 2; set_rx_ext( rx[0], Z_EXT16 ); break; // eax = (unsigned short)var.base[var.addr]
case OP_LOAD4: emit_load4( rx[0], var.base, var.addr ); var.size = 4; set_rx_ext( rx[0], Z_NONE ); break; // eax = (dword)var.base[var.addr]
}
} // load with zero-extension
set_rx_var( rx[0], &var );
} // not cached, perform load
} else {
// address stored in register
// rx[0] = rx[1] = load_rx_opstack( R_EAX ); // target, address = *opstack
load_rx_opstack2( &rx[0], R_EDX, &rx[1], R_EAX ); // target, address = *opstack
emit_CheckReg( vm, rx[1], FUNC_DATR ); // check address bounds
if ( (ci+1)->op == sign_extend && sign_extend != OP_UNDEF ) {
// merge with following sign-extension instruction
switch ( ci->op ) {
case OP_LOAD1: emit_load1_sex_index( rx[0], R_DATABASE, rx[1] ); set_rx_ext( rx[0], S_EXT8 ); break; // target = (signed byte)[dataBase + address]
case OP_LOAD2: emit_load2_sex_index( rx[0], R_DATABASE, rx[1] ); set_rx_ext( rx[0], S_EXT16 ); break; // target = (unsigned short)[dataBase + address]
}
ip += 1; // OP_SEX8/OP_SEX16
} else {
// usual load with zero-extension
switch ( ci->op ) {
case OP_LOAD1: emit_load1_index( rx[0], R_DATABASE, rx[1] ); set_rx_ext( rx[0], Z_EXT8 ); break; // target = (unsigned byte)[dataBase + address]
case OP_LOAD2: emit_load2_index( rx[0], R_DATABASE, rx[1] ); set_rx_ext( rx[0], Z_EXT16 ); break; // target = (unsigned short)[dataBase + address]
default: emit_load4_index( rx[0], R_DATABASE, rx[1] ); set_rx_ext( rx[0], Z_NONE ); break; // target = (dword)dataBase[dataBase + address]
}
}
if ( rx[1] != rx[0] ) {
unmask_rx( rx[1] );
}
}
store_rx_opstack( rx[0] ); // *opstack = target
break;
case OP_STORE1:
case OP_STORE2:
case OP_STORE4:
if ( scalar_on_top() && ci->op == OP_STORE4 && HasSSEFP() ) {
sx[0] = load_sx_opstack( R_XMM0 | RCONST ); dec_opstack(); // xmm0 = *opstack; opstack -= 4
if ( addr_on_top( &var ) ) {
// address specified by CONST/LOCAL
discard_top(); dec_opstack();
emit_store_sx( sx[0], var.base, var.addr ); // baseReg[n] = xmm0
var.size = 4;
wipe_var_range( &var );
set_sx_var( sx[0], &var ); // update metadata
} else {
rx[1] = load_rx_opstack( R_EDX | RCONST ); dec_opstack(); // edx = *opstack; opstack -= 4
emit_CheckReg( vm, rx[1], FUNC_DATW );
emit_store_sx_index( sx[0], R_DATABASE, rx[1] ); // dataBase[edx] = xmm0
unmask_rx( rx[1] );
wipe_vars(); // unknown/dynamic address, wipe all register mappings
}
unmask_sx( sx[0] );
} else {
// integer path
rx[0] = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack; opstack -= 4
if ( addr_on_top( &var ) ) {
// address specified by CONST/LOCAL
discard_top(); dec_opstack();
switch ( ci->op ) {
case OP_STORE1: emit_store1_rx( rx[0], var.base, var.addr ); var.size = 1; break; // (byte*)var.base[var.addr] = al
case OP_STORE2: emit_store2_rx( rx[0], var.base, var.addr ); var.size = 2; break; // (short*)var.base[var.addr] = ax
default: emit_store_rx( rx[0], var.base, var.addr ); var.size = 4; break; // (dword*)var.base[var.addr] = eax
}
wipe_var_range( &var );
set_rx_var( rx[0], &var ); // update metadata
} else {
rx[1] = load_rx_opstack( R_EDX | RCONST ); dec_opstack(); // edx = *opstack; opstack -= 4
emit_CheckReg( vm, rx[1], FUNC_DATW );
switch ( ci->op ) {
case OP_STORE1: emit_store1_index( rx[0], R_DATABASE, rx[1] ); break; // (byte*)dataBase[edx] = al
case OP_STORE2: emit_store2_index( rx[0], R_DATABASE, rx[1] ); break; // (short*)dataBase[edx] = ax
default: emit_store4_index( rx[0], R_DATABASE, rx[1] ); break; // (dword*)dataBase[edx] = eax
}
unmask_rx( rx[1] );
wipe_vars(); // unknown/dynamic address, wipe all register mappings
}
unmask_rx( rx[0] );
}
break;
case OP_ARG:
var.base = R_PROCBASE;
var.addr = ci->value;
var.size = 4;
wipe_var_range( &var );
if ( scalar_on_top() && HasSSEFP() ) {
sx[0] = load_sx_opstack( R_XMM0 | RCONST ); dec_opstack(); // xmm0 = *opstack; opstack -=4
emit_store_sx( sx[0], var.base, var.addr ); // [procBase + v] = xmm0
unmask_sx( sx[0] );
} else {
if ( const_on_top() && top_value() != 0 ) {
n = top_value(); discard_top(); dec_opstack();
emit_store_imm32( n, var.base, var.addr ); // [procBase + v] = n
} else {
rx[0] = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack; opstack -=4
emit_store_rx( rx[0], var.base, var.addr ); // [procBase + v] = eax
unmask_rx( rx[0] );
}
}
break;
case OP_BLOCK_COPY:
rx[0] = load_rx_opstack( R_EDX | FORCED ); dec_opstack(); // edx - src
rx[1] = load_rx_opstack( R_EAX | FORCED ); dec_opstack(); // eax - dst
rx[2] = alloc_rx( R_ECX | FORCED ); // flush and reserve ecx register
mov_rx_imm32( rx[2], ci->value >> 2 ); // mov ecx, 0x12345678 / 4
EmitCallOffset( FUNC_BCPY );
unmask_rx( rx[2] );
unmask_rx( rx[1] );
unmask_rx( rx[0] );
wipe_vars();
break;
case OP_SEX8:
case OP_SEX16:
case OP_NEGI:
case OP_BCOM:
if ( ci->op == OP_SEX8 || ci->op == OP_SEX16 ) {
// skip sign-extension for `if ( var == 0 )` tests if we already zero-extended
reg = rx_on_top();
if ( reg && (ci+1)->op == OP_CONST && (ci+1)->value == 0 && ( (ci+2)->op == OP_EQ || (ci+2)->op == OP_NE ) ) {
if ( !(ci+1)->jused && !(ci+2)->jused ) {
if ( ci->op == OP_SEX8 && reg->ext == Z_EXT8 ) {
break;
}
if ( ci->op == OP_SEX16 && reg->ext == Z_EXT16 ) {
break;
}
}
}
}
rx[0] = load_rx_opstack( R_EAX ); // eax = *opstack
switch ( ci->op ) {
case OP_SEX8: emit_sex8( rx[0], rx[0] ); break; // movsx eax, al
case OP_SEX16: emit_sex16( rx[0], rx[0] ); break; // movsx eax, ax
case OP_NEGI: emit_neg_rx( rx[0] ); break; // neg eax
case OP_BCOM: emit_not_rx( rx[0] ); break; // not eax
}
store_rx_opstack( rx[0] ); // *opstack = eax
break;
case OP_ADD:
rx[0] = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack
rx[1] = load_rx_opstack( R_ECX ); // opstack-=4; ecx = *opstack
#ifdef CONST_OPTIMIZE
// optimize OP_ADD + OP_CONST + OP_ADD
if ( !(ci+1)->jused && (ci+1)->op == OP_CONST && (ci+2)->op == OP_ADD ) {
emit_lea_base_index_offset( rx[1], rx[1], rx[0], (ci+1)->value ); // lea ecx, [ecx + eax + const]
ip += 2; // OP_CONST + OP_ADD
} else
#endif
emit_add_rx( rx[1], rx[0] ); // add ecx, eax
unmask_rx( rx[0] );
store_rx_opstack( rx[1] ); // *opstack = ecx
break;
//case OP_ADD:
case OP_SUB:
case OP_MULI:
case OP_MULU:
case OP_BAND:
case OP_BOR:
case OP_BXOR:
rx[0] = load_rx_opstack( R_EAX | RCONST ); dec_opstack(); // eax = *opstack
rx[1] = load_rx_opstack( R_ECX ); // opstack-=4; ecx = *opstack
switch ( ci->op ) {
//case OP_ADD: emit_add_rx( rx[1], rx[0] ); break; // add ecx, eax
case OP_SUB: emit_sub_rx( rx[1], rx[0] ); break; // sub ecx, eax
case OP_MULI: emit_mul_rx( rx[1], rx[0] ); break; // imul ecx, eax
case OP_MULU: emit_mul_rx( rx[1], rx[0] ); break; // imul ecx, eax
case OP_BAND: emit_and_rx( rx[1], rx[0] ); break; // and ecx, eax
case OP_BOR: emit_or_rx( rx[1], rx[0] ); break; // or ecx, eax
case OP_BXOR: emit_xor_rx( rx[1], rx[0] ); break; // xor ecx, eax
}
unmask_rx( rx[0] );
store_rx_opstack( rx[1] ); // *opstack = ecx
break;
case OP_LSH:
case OP_RSHU:
case OP_RSHI:
rx[0] = load_rx_opstack( R_ECX | FORCED | RCONST ); dec_opstack(); // ecx = *opstack
rx[1] = load_rx_opstack( R_EAX ); // opstack-=4; eax = *opstack
switch ( ci->op ) {
case OP_LSH: emit_shl_rx( rx[1] ); break; // shl eax, cl
case OP_RSHU: emit_shr_rx( rx[1] ); break; // shr eax, cl
case OP_RSHI: emit_sar_rx( rx[1] ); break; // sar eax, cl
}
unmask_rx( rx[0] );
store_rx_opstack( rx[1] ); // *opstack = eax
break;
case OP_DIVI:
case OP_DIVU:
case OP_MODI:
case OP_MODU:
rx[1] = load_rx_opstack( R_EAX | FORCED | SHIFT4 ); // eax = *(opstack-4)
rx[2] = alloc_rx( R_EDX | FORCED ); // flush and reserve edx register
rx[0] = load_rx_opstack( R_ECX | RCONST | XMASK ); dec_opstack(); // ecx = *opstack; opstack -= 4
if ( rx[0] == rx[2] || rx[1] == rx[2] )
DROP( "incorrect register setup, rx_mask=%04x", build_rx_mask() );
if ( ci->op == OP_DIVI || ci->op == OP_MODI ) {
emit_cdq(); // cdq
emit_idiv_rx( rx[0] ); // idiv eax, ecx
} else {
emit_xor_rx( rx[2], rx[2] ); // xor edx, edx
emit_udiv_rx( rx[0] ); // div ecx
}
unmask_rx( rx[0] );
if ( ci->op == OP_DIVI || ci->op == OP_DIVU ) {
unmask_rx( rx[2] );
store_rx_opstack( rx[1] ); // *opstack = eax
} else {
unmask_rx( rx[1] );
store_rx_opstack( rx[2] ); // *opstack = edx
}
break;
case OP_ADDF:
case OP_SUBF:
case OP_MULF:
case OP_DIVF:
if ( HasSSEFP() ) {
sx[0] = load_sx_opstack( R_XMM0 | RCONST ); dec_opstack(); // xmm0 = *opstack
sx[1] = load_sx_opstack( R_XMM1 ); // opstack -= 4; xmm1 = *opstack
switch ( ci->op ) {
case OP_ADDF: emit_add_sx( sx[1], sx[0] ); break; // xmm1 = xmm1 + xmm0
case OP_SUBF: emit_sub_sx( sx[1], sx[0] ); break; // xmm1 = xmm1 - xmm0
case OP_MULF: emit_mul_sx( sx[1], sx[0] ); break; // xmm1 = xmm1 * xmm0
case OP_DIVF: emit_div_sx( sx[1], sx[0] ); break; // xmm1 = xmm1 / xmm0
}
unmask_sx( sx[0] );
store_sx_opstack( sx[1] ); // *opstack = xmm0
} else {
// legacy x87 path
flush_opstack_top(); dec_opstack(); // value
flush_opstack_top(); // target
emit_fld( R_OPSTACK, opstack * sizeof( int32_t ) );
switch ( ci->op ) {
case OP_ADDF: emit_fadd( R_OPSTACK, ( opstack + 1 ) * sizeof( int32_t ) ); break;
case OP_SUBF: emit_fsub( R_OPSTACK, ( opstack + 1 ) * sizeof( int32_t ) ); break;
case OP_MULF: emit_fmul( R_OPSTACK, ( opstack + 1 ) * sizeof( int32_t ) ); break;
case OP_DIVF: emit_fdiv( R_OPSTACK, ( opstack + 1 ) * sizeof( int32_t ) ); break;
}
emit_fstp( R_OPSTACK, opstack * sizeof( int32_t ) );
}
break;
case OP_NEGF:
if ( HasSSEFP() ) {
sx[0] = load_sx_opstack( R_XMM0 | RCONST ); // xmm0 = *opstack
sx[1] = alloc_sx( R_XMM1 );
emit_xor_sx( sx[1], sx[1] ); // xorps xmm1, xmm1
emit_sub_sx( sx[1], sx[0] ); // subps xmm1, xmm0
unmask_sx( sx[0] );
store_sx_opstack( sx[1] ); // *opstack = xmm1
} else {
// legacy x87 path
flush_opstack_top();
emit_fld( R_OPSTACK, opstack * sizeof( int32_t ) ); // fld dword ptr [opStack]
EmitString( "D9 E0" ); // fchs
emit_fstp( R_OPSTACK, opstack * sizeof( int32_t ) ); // fstp dword ptr [opStack]
}
break;
case OP_CVIF:
if ( HasSSEFP() ) {
sx[0] = alloc_sx( R_XMM0 );
rx[0] = load_rx_opstack( R_EAX | RCONST ); // eax = *opstack
emit_cvtsi2ss( sx[0], rx[0] ); // cvtsi2ss xmm0, eax
unmask_rx( rx[0] );
store_sx_opstack( sx[0] ); // *opstack = xmm0
} else {
flush_opstack_top();
emit_fild( R_OPSTACK, opstack * sizeof( int32_t ) ); // fild dword ptr [opStack]
emit_fstp( R_OPSTACK, opstack * sizeof( int32_t ) ); // fstp dword ptr [opStack]
}
break;
case OP_CVFI:
if ( HasSSEFP() ) {
rx[0] = alloc_rx( R_EAX );
sx[0] = load_sx_opstack( R_XMM0 | RCONST ); // xmm0 = *opstack
emit_cvttss2si( rx[0], sx[0] ); // cvttss2si xmm0, eax
unmask_sx( sx[0] );
store_rx_opstack( rx[0] ); // *opstack = eax
} else {
static int32_t fp_cw[2] = { 0x0000, 0x0F7F }; // [0] - current value, [1] - round towards zero
flush_opstack_top();
alloc_rx( R_EAX | FORCED );
emit_fld( R_OPSTACK, opstack * sizeof( int32_t ) ); // fld dword ptr [opStack]
mov_rx_ptr( R_EAX, &fp_cw );
EmitString( "9B D9 38" ); // fnstcw word ptr [eax]
EmitString( "D9 68 04" ); // fldcw word ptr [eax+4]
emit_fistp( R_OPSTACK, opstack * sizeof( int32_t ) ); // fistp dword ptr [opStack]
EmitString( "D9 28" ); // fldcw word ptr [eax]
unmask_rx( R_EAX );
}
break;
#ifdef MACRO_OPTIMIZE
case MOP_ADD:
case MOP_SUB:
case MOP_BAND:
case MOP_BOR:
case MOP_BXOR:
if ( !EmitMOPs( vm, ci, ci->op ) )
Com_Error( ERR_FATAL, "VM_CompileX86: bad opcode %02X", ci->op );
break;
#endif
default:
Com_Error( ERR_FATAL, "VM_CompileX86: bad opcode %02X", ci->op );
VM_FreeBuffers();
return qfalse;
}
}
// ****************
// system functions
// ****************
EmitAlign( FUNC_ALIGN );
funcOffset[FUNC_CALL] = compiledOfs;
EmitCallFunc( vm );
EmitAlign( FUNC_ALIGN );
funcOffset[FUNC_BCPY] = compiledOfs;
EmitBCPYFunc( vm );
// ***************
// error functions
// ***************
// bad jump
EmitAlign( FUNC_ALIGN );
funcOffset[FUNC_BADJ] = compiledOfs;
EmitBADJFunc( vm );
// error jump
EmitAlign( FUNC_ALIGN );
funcOffset[FUNC_ERRJ] = compiledOfs;
EmitERRJFunc( vm );
// programStack overflow
EmitAlign( FUNC_ALIGN );
funcOffset[FUNC_PSOF] = compiledOfs;
EmitPSOFFunc( vm );
// opStack overflow
EmitAlign( FUNC_ALIGN );
funcOffset[FUNC_OSOF] = compiledOfs;
EmitOSOFFunc( vm );
// read access range violation
EmitAlign( FUNC_ALIGN );
funcOffset[ FUNC_DATR ] = compiledOfs;
EmitDATRFunc( vm );
// write access range violation
EmitAlign( FUNC_ALIGN );
funcOffset[ FUNC_DATW ] = compiledOfs;
EmitDATWFunc( vm );
EmitAlign( sizeof( intptr_t ) ); // for instructionPointers
#if JUMP_OPTIMIZE
if ( pass == PASS_COMPRESS && ++num_compress < NUM_COMPRESSIONS && jumpSizeChanged ) {
pass = PASS_COMPRESS;
goto __compile;
}
if ( jumpSizeChanged ) {
if ( pass == PASS_EXPAND_ONLY ) {
pass = PASS_EXPAND_ONLY;
goto __compile;
}
}
#endif
} // for( pass = 0; pass < n; pass++ )
n = header->instructionCount * sizeof( intptr_t );
if ( code == NULL ) {
code = (byte*)VM_Alloc_Compiled( vm, PAD(compiledOfs,8), n );
if ( code == NULL ) {
return qfalse;
}
instructionPointers = (intptr_t*)(byte*)(code + PAD(compiledOfs,8));
//vm->instructionPointers = instructionPointers; // for debug purposes?
pass = NUM_PASSES-1; // repeat last pass
goto __compile;
}
#ifdef DUMP_CODE
dump_code( vm->name, code, compiledOfs );
#endif
// offset all the instruction pointers for the new location
for ( i = 0; i < header->instructionCount; i++ ) {
if ( !inst[i].jused ) {
instructionPointers[ i ] = (intptr_t)badJumpPtr;
continue;
}
instructionPointers[ i ] = (intptr_t)vm->codeBase.ptr + instructionOffsets[ i ];
}
VM_FreeBuffers();
#ifdef VM_X86_MMAP
if ( mprotect( vm->codeBase.ptr, vm->codeSize, PROT_READ|PROT_EXEC ) ) {
VM_Destroy_Compiled( vm );
Com_Printf( S_COLOR_YELLOW "VM_CompileX86: mprotect failed\n" );
return qfalse;
}
#elif _WIN32
{
DWORD oldProtect = 0;
// remove write permissions.
if ( !VirtualProtect( vm->codeBase.ptr, vm->codeSize, PAGE_EXECUTE_READ, &oldProtect ) ) {
VM_Destroy_Compiled( vm );
Com_Printf( S_COLOR_YELLOW "VM_CompileX86: VirtualProtect failed\n" );
return qfalse;
}
}
#endif
vm->destroy = VM_Destroy_Compiled;
Com_Printf( "VM file %s compiled to %i bytes of code\n", vm->name, compiledOfs );
return qtrue;
}
/*
=================
VM_Alloc_Compiled
=================
*/
static void *VM_Alloc_Compiled( vm_t *vm, int codeLength, int tableLength )
{
void *ptr;
int length;
length = codeLength + tableLength;
#ifdef VM_X86_MMAP
ptr = mmap( NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0 );
if ( ptr == MAP_FAILED ) {
Com_Error( ERR_FATAL, "VM_CompileX86: mmap failed" );
return NULL;
}
#elif _WIN32
// allocate memory with EXECUTE permissions under windows.
ptr = VirtualAlloc( NULL, length, MEM_COMMIT, PAGE_EXECUTE_READWRITE );
if ( !ptr ) {
Com_Error( ERR_FATAL, "VM_CompileX86: VirtualAlloc failed" );
return NULL;
}
#else
ptr = malloc( length );
if ( !ptr ) {
Com_Error( ERR_FATAL, "VM_CompileX86: malloc failed" );
return NULL;
}
#endif
vm->codeBase.ptr = (byte*)ptr;
vm->codeLength = codeLength;
vm->codeSize = length;
return vm->codeBase.ptr;
}
/*
==============
VM_Destroy_Compiled
==============
*/
static void VM_Destroy_Compiled( vm_t* vm )
{
#ifdef VM_X86_MMAP
munmap( vm->codeBase.ptr, vm->codeSize );
#elif _WIN32
VirtualFree( vm->codeBase.ptr, 0, MEM_RELEASE );
#else
free( vm->codeBase.ptr );
#endif
vm->codeBase.ptr = NULL;
}
/*
==============
VM_CallCompiled
This function is called directly by the generated code
==============
*/
int32_t VM_CallCompiled( vm_t *vm, int nargs, int32_t *args )
{
int32_t opStack[MAX_OPSTACK_SIZE];
int32_t stackOnEntry;
int32_t *image;
#if id386
int32_t *oldOpTop;
#endif
int i;
// we might be called recursively, so this might not be the very top
stackOnEntry = vm->programStack;
#if id386
oldOpTop = vm->opStackTop;
#endif
vm->programStack -= ( MAX_VMMAIN_CALL_ARGS + 2 ) * sizeof( int32_t );
// set up the stack frame
image = (int32_t*) ( vm->dataBase + vm->programStack );
for ( i = 0; i < nargs; i++ ) {
image[i + 2] = args[i];
}
// these only needed for interpreter:
// image[1] = 0; // return stack
// image[0] = -1; // will terminate loop on return
#ifdef DEBUG_VM
opStack[0] = 0xDEADC0DE;
#endif
opStack[1] = 0;
vm->opStack = opStack;
#if id386
vm->opStackTop = opStack + ARRAY_LEN( opStack ) - 1;
#endif
vm->codeBase.func(); // go into generated code
#ifdef DEBUG_VM
if ( opStack[0] != 0xDEADC0DE ) {
Com_Error( ERR_DROP, "%s(%s): opStack corrupted in compiled code", __func__, vm->name );
}
if ( vm->programStack != (int32_t)( stackOnEntry - ( MAX_VMMAIN_CALL_ARGS + 2 ) * sizeof( int32_t ) ) ) {
Com_Error( ERR_DROP, "%s(%s): programStack corrupted in compiled code", __func__, vm->name );
}
#endif
vm->programStack = stackOnEntry;
#if id386
vm->opStackTop = oldOpTop;
#endif
return opStack[1];
}