mirror of
https://github.com/Q3Rally-Team/rallyunlimited-engine.git
synced 2024-11-21 20:10:57 +00:00
3541 lines
100 KiB
C
3541 lines
100 KiB
C
/*
|
|
===========================================================================
|
|
Copyright (C) 1999-2005 Id Software, Inc.
|
|
Copyright (C) 2020-2021 Quake3e project
|
|
|
|
This file is part of Quake III Arena source code.
|
|
|
|
Quake III Arena source code is free software; you can redistribute it
|
|
and/or modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the License,
|
|
or (at your option) any later version.
|
|
|
|
Quake III Arena source code is distributed in the hope that it will be
|
|
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Quake III Arena source code; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
===========================================================================
|
|
*/
|
|
|
|
// load time compiler and execution environment for ARM aarch64
|
|
// with dynamic register allocation and various optimizations
|
|
|
|
#ifdef _WIN32
|
|
#include <windows.h>
|
|
#pragma warning( disable : 4245 ) // conversion from int to XXX, signed/unsigned mismatch
|
|
#pragma warning( disable : 4146 ) // unary minus operator applied to unsigned type, result still unsigned
|
|
#else
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/mman.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <math.h>
|
|
#endif
|
|
|
|
#include "vm_local.h"
|
|
|
|
#define NUM_PASSES 1
|
|
|
|
// additional integrity checks
|
|
#define DEBUG_VM
|
|
|
|
// various definitions to enable/disable particular optimization
|
|
|
|
// use dynamic allocation of integer/scalar registers
|
|
#define DYN_ALLOC_RX
|
|
#define DYN_ALLOC_SX
|
|
|
|
// re-use constants previously stored in scratch registers
|
|
#define CONST_CACHE_RX
|
|
#define CONST_CACHE_SX
|
|
|
|
#define REGS_OPTIMIZE
|
|
#define FPU_OPTIMIZE
|
|
#define CONST_OPTIMIZE
|
|
#define ADDR_OPTIMIZE
|
|
#define LOAD_OPTIMIZE
|
|
#define RET_OPTIMIZE
|
|
#define USE_LITERAL_POOL
|
|
|
|
// allow sharing both variables and constants in registers
|
|
#define REG_TYPE_MASK
|
|
// number of variables/memory mappings per register
|
|
#define REG_MAP_COUNT 4
|
|
|
|
#define FUNC_ALIGN 16
|
|
|
|
//#define DUMP_CODE
|
|
|
|
// registers map
|
|
|
|
// general purpose registers:
|
|
// R0..R17 can be used as a scratch registers
|
|
// R18 must not be used, especially on windows
|
|
// R19..R29 and R31 must be preserved
|
|
|
|
// FPU scalar registers:
|
|
// S0..S7 can be used as a scratch registers
|
|
// S8..S15 must be preserved
|
|
// S16..S31 can be used as a scratch registers
|
|
|
|
#define R0 0 // scratch, return value
|
|
#define R1 1 // scratch
|
|
#define R2 2 // scratch
|
|
#define R3 3 // scratch
|
|
#define R4 4 // scratch
|
|
#define R5 5 // scratch
|
|
#define R6 6 // scratch
|
|
#define R7 7 // scratch
|
|
#define R8 8 // scratch, indirect return value
|
|
#define R9 9 // scratch
|
|
#define R10 10 // scratch
|
|
#define R11 11 // scratch
|
|
#define R12 12 // scratch
|
|
#define R13 13 // scratch
|
|
#define R14 14 // scratch
|
|
#define R15 15 // scratch
|
|
#define R16 16 // intra-procedure-call scratch
|
|
#define R17 17 // intra-procedure-call scratch - opStack shift
|
|
#define R18 18 // ! platform-specific, do not use
|
|
#define R19 19 // * litBase
|
|
#define R20 20 // * vmBase
|
|
#define R21 21 // * opStack
|
|
#define R22 22 // * opStackTop
|
|
#define R23 23 // * instructionPointers
|
|
#define R24 24 // * programStack
|
|
#define R25 25 // * programStackBottom
|
|
#define R26 26 // * dataBase
|
|
#define R27 27 // * dataMask
|
|
#define R28 28 // * procBase
|
|
#define R29 29 // * FP
|
|
#define R30 30 // link register
|
|
#define R31 31 // stack or zero
|
|
|
|
#define FP R29
|
|
#define LR R30
|
|
#define SP R31
|
|
|
|
#define rLITBASE R19
|
|
#define rVMBASE R20
|
|
#define rOPSTACK R21
|
|
#define rOPSTACKTOP R22
|
|
#define rINSPOINTERS R23
|
|
#define rPSTACK R24
|
|
#define rPSTACKBOTTOM R25
|
|
#define rDATABASE R26
|
|
#define rDATAMASK R27
|
|
#define rPROCBASE R28
|
|
|
|
#define S0 0
|
|
#define S1 1
|
|
#define S2 2
|
|
|
|
typedef enum
|
|
{
|
|
FUNC_ENTR,
|
|
FUNC_BCPY,
|
|
FUNC_CALL,
|
|
FUNC_SYSC,
|
|
FUNC_SYSF,
|
|
FUNC_PSOF,
|
|
FUNC_OSOF,
|
|
FUNC_BADJ,
|
|
FUNC_OUTJ,
|
|
FUNC_BADR,
|
|
FUNC_BADW,
|
|
OFFSET_T_LAST
|
|
} offset_t;
|
|
|
|
|
|
static uint32_t *code;
|
|
static uint32_t compiledOfs;
|
|
|
|
static instruction_t *inst = NULL;
|
|
|
|
static uint32_t ip;
|
|
static uint32_t pass;
|
|
static uint32_t savedOffset[ OFFSET_T_LAST ];
|
|
|
|
|
|
// literal pool
|
|
#ifdef USE_LITERAL_POOL
|
|
|
|
#define MAX_LITERALS 4096
|
|
#define LIT_HASH_SIZE 512
|
|
#define LIT_HASH_FUNC(v) ((v*157)&(LIT_HASH_SIZE-1))
|
|
|
|
typedef struct literal_s {
|
|
struct literal_s *next;
|
|
uint32_t value;
|
|
} literal_t;
|
|
|
|
static uint32_t numLiterals;
|
|
static literal_t *litHash[ LIT_HASH_SIZE ];
|
|
static literal_t litList[ MAX_LITERALS ];
|
|
|
|
static void VM_InitLiterals( void )
|
|
{
|
|
Com_Memset( litHash, 0, sizeof( litHash ) );
|
|
Com_Memset( litList, 0, sizeof( litList ) );
|
|
numLiterals = 0;
|
|
}
|
|
|
|
static int VM_SearchLiteral( const uint32_t value )
|
|
{
|
|
uint32_t h = LIT_HASH_FUNC( value );
|
|
literal_t *lt = litHash[ h ];
|
|
|
|
while ( lt ) {
|
|
if ( lt->value == value ) {
|
|
return (lt - &litList[0]);
|
|
}
|
|
lt = lt->next;
|
|
}
|
|
|
|
if ( numLiterals >= ARRAY_LEN( litList ) ) {
|
|
return -1;
|
|
}
|
|
|
|
lt = &litList[ numLiterals ];
|
|
lt->next = litHash[ h ];
|
|
lt->value = value;
|
|
litHash[ h ] = lt;
|
|
|
|
return numLiterals++;
|
|
}
|
|
#endif // USE_LITERAL_POOL
|
|
|
|
|
|
#ifdef _MSC_VER
|
|
#define DROP( reason, ... ) \
|
|
do { \
|
|
VM_FreeBuffers(); \
|
|
Com_Error( ERR_DROP, "%s: " reason, __func__, __VA_ARGS__ ); \
|
|
} while(0)
|
|
#else
|
|
#define DROP( reason, args... ) \
|
|
do { \
|
|
VM_FreeBuffers(); \
|
|
Com_Error( ERR_DROP, "%s: " reason, __func__, ##args ); \
|
|
} while(0)
|
|
#endif
|
|
|
|
|
|
static void VM_FreeBuffers( void )
|
|
{
|
|
// should be freed in reversed allocation order
|
|
//if ( instructionOffsets ) {
|
|
// Z_Free( instructionOffsets );
|
|
// instructionOffsets = NULL;
|
|
//}
|
|
|
|
if ( inst ) {
|
|
Z_Free( inst );
|
|
inst = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
static void VM_Destroy_Compiled( vm_t *vm )
|
|
{
|
|
if ( vm->codeBase.ptr )
|
|
{
|
|
#ifdef _WIN32
|
|
VirtualFree( vm->codeBase.ptr, 0, MEM_RELEASE );
|
|
#else
|
|
if ( munmap( vm->codeBase.ptr, vm->codeLength ) )
|
|
Com_Printf( S_COLOR_RED "%s(): memory unmap failed, possible memory leak!\n", __func__ );
|
|
#endif
|
|
}
|
|
|
|
vm->codeBase.ptr = NULL;
|
|
}
|
|
|
|
|
|
static void __attribute__((__noreturn__)) OutJump( void )
|
|
{
|
|
//Com_Error( ERR_NOTDROP, "program tried to execute code outside VM" );
|
|
}
|
|
|
|
|
|
static void __attribute__((__noreturn__)) BadJump( void )
|
|
{
|
|
//Com_Error( ERR_NOTDROP, "program tried to execute code at bad location inside VM" );
|
|
}
|
|
|
|
static void __attribute__((__noreturn__)) ErrBadProgramStack( void )
|
|
{
|
|
//Com_Error( ERR_NOTDROP, "program tried to overflow programStack" );
|
|
}
|
|
|
|
|
|
static void __attribute__((__noreturn__)) ErrBadOpStack( void )
|
|
{
|
|
//Com_Error( ERR_NOTDROP, "program tried to overflow opStack" );
|
|
}
|
|
|
|
|
|
static void __attribute__( ( __noreturn__ ) ) ErrBadDataRead( void )
|
|
{
|
|
//Com_Error( ERR_NOTDROP, "program tried to read out of data segment" );
|
|
}
|
|
|
|
|
|
static void __attribute__( ( __noreturn__ ) ) ErrBadDataWrite( void )
|
|
{
|
|
//Com_Error( ERR_NOTDROP, "program tried to write out of data segment" );
|
|
}
|
|
|
|
|
|
static void emit( uint32_t isn )
|
|
{
|
|
if ( code )
|
|
{
|
|
code[ compiledOfs >> 2 ] = isn;
|
|
}
|
|
|
|
compiledOfs += 4;
|
|
}
|
|
|
|
|
|
// conditions
|
|
#define EQ (0b0000) // equal/equals zero
|
|
#define NE (0b0001) // not equal
|
|
#define CS (0b0010) // unsigned higher or same
|
|
#define HS CS // unsigned higher or same
|
|
#define CC (0b0011) // unsigned lower
|
|
#define LO CC // unsigned lower
|
|
#define MI (0b0100) // minus/negative
|
|
#define PL (0b0101) // plus/positive or zero
|
|
#define VS (0b0110) // overflow
|
|
#define VC (0b0111) // no overflow
|
|
#define HI (0b1000) // unsigned higher
|
|
#define LS (0b1001) // unsigned lower or same
|
|
#define GE (0b1010) // signed greater or equal
|
|
#define LT (0b1011) // signed less than
|
|
#define GT (0b1100) // signed greater than
|
|
#define LE (0b1101) // signed less than or equal
|
|
#define AL (0b1110) // always
|
|
#define NV (0b1111) // never
|
|
|
|
#define WZR 0b11111
|
|
#define XZR 0b11111
|
|
|
|
#define NOP ( (0b1101010100<<22) | (0b000011<<16) | (0b00100000<<8) | 0b00011111 )
|
|
#define BRK(imm16) ( (0b11010100001<<21) | (imm16<<5) )
|
|
#define RET(Rn) ( (0b1101011<<25) | (0b0010<<21) | (0b11111<<16) | (0b000000<<10) | (Rn<<5) | 0b00000 /*Rm*/ )
|
|
|
|
#define MOVZ32(Rd,imm16) ( (0<<31) /*sf*/ | (0b10100101<<23) | (0b00<<21) | (((imm16)&0xFFFF)<<5) | Rd )
|
|
#define MOVZ32_16(Rd,imm16) ( (0<<31) /*sf*/ | (0b10100101<<23) | (0b01<<21) | (((imm16)&0xFFFF)<<5) | Rd )
|
|
#define MOVZ64(Rd,imm16) ( (1<<31) /*sf*/ | (0b10100101<<23) | (0b00<<21) | (((imm16)&0xFFFF)<<5) | Rd )
|
|
|
|
#define MOVK32_16(Rd,imm16) ( (0<<31) /*sf*/ | (0b11100101<<23) | (0b01<<21) | (((imm16)&0xFFFF)<<5) | Rd )
|
|
#define MOVK64_16(Rd,imm16) ( (1<<31) /*sf*/ | (0b11100101<<23) | (0b01<<21) | (((imm16)&0xFFFF)<<5) | Rd )
|
|
#define MOVK64_32(Rd,imm16) ( (1<<31) /*sf*/ | (0b11100101<<23) | (0b10<<21) | (((imm16)&0xFFFF)<<5) | Rd )
|
|
#define MOVK64_48(Rd,imm16) ( (1<<31) /*sf*/ | (0b11100101<<23) | (0b11<<21) | (((imm16)&0xFFFF)<<5) | Rd )
|
|
|
|
#define MOVN32(Rd,imm16) ( (0<<31) /*sf*/ | (0b00100101<<23) | (0b00<<21) | ((imm16&0xFFFF)<<5) | Rd )
|
|
#define MOVN32_16(Rd,imm16) ( (0<<31) /*sf*/ | (0b00100101<<23) | (0b01<<21) | ((imm16&0xFFFF)<<5) | Rd )
|
|
#define MOVN64(Rd,imm16) ( (1<<31) /*sf*/ | (0b00100101<<23) | (0b00<<21) | ((imm16&0xFFFF)<<5) | Rd )
|
|
#define MOVN64_16(Rd,imm16) ( (1<<31) /*sf*/ | (0b00100101<<23) | (0b01<<21) | ((imm16&0xFFFF)<<5) | Rd )
|
|
#define MOVN64_32(Rd,imm16) ( (1<<31) /*sf*/ | (0b00100101<<23) | (0b10<<21) | ((imm16&0xFFFF)<<5) | Rd )
|
|
#define MOVN64_48(Rd,imm16) ( (1<<31) /*sf*/ | (0b00100101<<23) | (0b11<<21) | ((imm16&0xFFFF)<<5) | Rd )
|
|
|
|
#define ORR32(Rd, Rn, Rm) ( (0<<31) /*sf*/ | 0b0101010 << 24 | 0b00<<22 /*shift*/ | (0<<21) /*N*/ | (Rm<<16) | 0b000000<<10 /*imm6*/ | (Rn<<5) | Rd )
|
|
#define ORR64(Rd, Rn, Rm) ( (1<<31) /*sf*/ | 0b0101010 << 24 | 0b00<<22 /*shift*/ | (0<<21) /*N*/ | (Rm<<16) | 0b000000<<10 /*imm6*/ | (Rn<<5) | Rd )
|
|
|
|
#define EOR32(Rd, Rn, Rm) ( (0<<31) /*sf*/ | (0b1001010<<24) | 0b00<<22 /*shift*/ | (0<<21) /*N*/ | (Rm<<16) | 0b000000<<10 /*imm6*/ | (Rn<<5) | Rd )
|
|
#define EOR64(Rd, Rn, Rm) ( (1<<31) /*sf*/ | (0b1001010<<24) | 0b00<<22 /*shift*/ | (0<<21) /*N*/ | (Rm<<16) | 0b000000<<10 /*imm6*/ | (Rn<<5) | Rd )
|
|
|
|
#define AND32(Rd, Rn, Rm) ( (0<<31) /*sf*/ | (0b0001010<<24) | 0b00<<22 /*shift*/ | (0<<21) /*N*/ | (Rm<<16) | 0b000000<<10 /*imm6*/ | (Rn<<5) | Rd )
|
|
#define AND64(Rd, Rn, Rm) ( (1<<31) /*sf*/ | (0b0001010<<24) | 0b00<<22 /*shift*/ | (0<<21) /*N*/ | (Rm<<16) | 0b000000<<10 /*imm6*/ | (Rn<<5) | Rd )
|
|
|
|
#define AND32i(Rd, Rn, immrs) ( (0<<31) /*sf*/ | (0b00<<29) | (0b100100 << 23) | ((immrs) << 10) | ((Rn)<<5) | (Rd) )
|
|
#define ORR32i(Rd, Rn, immrs) ( (0<<31) /*sf*/ | (0b01<<29) | (0b100100 << 23) | ((immrs) << 10) | ((Rn)<<5) | (Rd) )
|
|
#define EOR32i(Rd, Rn, immrs) ( (0<<31) /*sf*/ | (0b10<<29) | (0b100100 << 23) | ((immrs) << 10) | ((Rn)<<5) | (Rd) )
|
|
|
|
#define MOV32(Rd, Rm) ORR32(Rd, WZR, Rm)
|
|
#define MOV64(Rd, Rm) ORR64(Rd, XZR, Rm)
|
|
|
|
#define MOV32i(Rd, immrs) ORR32i(Rd, WZR, immrs)
|
|
|
|
// MUL, alias for MADD
|
|
#define MUL32(Rd, Rn, Rm) ( (0<<31) | (0b00<<29) | (0b11011<<24) | (0b000<<21) | (Rm<<16) | (0<<15) | (WZR<<10) /*Ra*/ | (Rn<<5) | Rd )
|
|
|
|
// ADD (shifted register)
|
|
#define ADD32(Rd, Rn, Rm) ( (0<<31) | (0b0001011000<<21) | (Rm<<16) | (0b000000<<10) /*imm6*/ | (Rn<<5) | Rd )
|
|
#define ADD64(Rd, Rn, Rm) ( (1<<31) | (0b0001011000<<21) | (Rm<<16) | (0b000000<<10) /*imm6*/ | (Rn<<5) | Rd )
|
|
|
|
// ADD (immediate)
|
|
#define ADD32i(Rd, Rn, pimm12) ( (0<<31) | (0b00100010<<23) | (0<<22) /*sh*/ | ((pimm12)<<10) | (Rn<<5) | Rd )
|
|
#define ADD64i(Rd, Rn, pimm12) ( (1<<31) | (0b00100010<<23) | (0<<22) /*sh*/ | ((pimm12)<<10) | (Rn<<5) | Rd )
|
|
|
|
// SUB (shifted register)
|
|
#define SUB32(Rd, Rn, Rm) ( (0<<31) | 0b1001011000<<21 | (Rm<<16) | 0b000000<<10 /*imm6*/ | (Rn<<5) | Rd )
|
|
#define SUB64(Rd, Rn, Rm) ( (1<<31) | 0b1001011000<<21 | (Rm<<16) | 0b000000<<10 /*imm6*/ | (Rn<<5) | Rd )
|
|
|
|
// SUB (immediate)
|
|
#define SUB32i(Rd, Rn, pimm12) ( (0<<31) | (0b10100010<<23) | (0<<22) /*sh*/ | ((pimm12)<<10) | (Rn<<5) | Rd )
|
|
#define SUB64i(Rd, Rn, pimm12) ( (1<<31) | (0b10100010<<23) | (0<<22) /*sh*/ | ((pimm12)<<10) | (Rn<<5) | Rd )
|
|
|
|
#define SDIV32(Rd, Rn, Rm) ( (0<<31) | (0b00<<29) | (0b11010110<<21) | (Rm<<16) | (0b00001<<11) | (1<<10) | (Rn<<5) | Rd )
|
|
#define UDIV32(Rd, Rn, Rm) ( (0<<31) | (0b00<<29) | (0b11010110<<21) | (Rm<<16) | (0b00001<<11) | (0<<10) | (Rn<<5) | Rd )
|
|
|
|
#define MSUB32(Rd, Rn,Rm, Ra) ( (0<<31) | (0b00<<29) | (0b11011<<24) | (0b000<<21) | (Rm<<16) | (1<<15) | (Ra<<10) | (Rn<<5) | Rd )
|
|
|
|
// MVN, alias for ORN (shifted register)
|
|
#define MVN32(Rd, Rm) ( (0<<31) | (0b01<<29) | (0b01010<<24) | (0b001<<21) | (Rm<<16) | (0b000000<<10) | (0b11111<<5) | Rd )
|
|
|
|
// NEG (shifted register), alias for SUB(shifted register)
|
|
#define NEG32(Rd, Rm) SUB32(Rd, WZR, Rm)
|
|
//#define NEG64(Rd, Rm) SUB64(Rd, XZR, Rm)
|
|
|
|
// LSL (register)
|
|
#define LSL32(Rd, Rn, Rm) ( (0<<31) | (0b00<<29) | (0b11010110<<21) | (Rm<<16) | (0b0010<<12) | (0b00<<10) | (Rn<<5) | Rd )
|
|
|
|
// LSR (register)
|
|
#define LSR32(Rd, Rn, Rm) ( (0<<31) | (0b00<<29) | (0b11010110<<21) | (Rm<<16) | (0b0010<<12) | (0b01<<10) | (Rn<<5) | Rd )
|
|
|
|
// ASR (register)
|
|
#define ASR32(Rd, Rn, Rm) ( (0<<31) | (0b00<<29) | (0b11010110<<21) | (Rm<<16) | (0b0010<<12) | (0b10<<10) | (Rn<<5) | Rd )
|
|
|
|
// LSL (immediate in range 1..31)
|
|
#define LSL32i(Rd, Rn, shift) ( (0<<31) | (0b10<<29) | (0b100110<<23) | (0<<22) | (((-(shift))&31)<<16) | ((31-(shift))<<10) | ((Rn)<<5) | Rd )
|
|
|
|
// LSR (immediate in range 1..31)
|
|
#define LSR32i(Rd, Rn, shift) ( (0<<31) | (0b10<<29) | (0b100110<<23) | (0<<22) | ((shift)<<16) | (31<<10) | ((Rn)<<5) | Rd )
|
|
|
|
// ASR (immediate in range 1..31)
|
|
#define ASR32i(Rd, Rn, shift) ( (0<<31) | (0b00<<29) | (0b100110<<23) | (0<<22) | ((shift)<<16) | (31<<10) | ((Rn)<<5) | Rd )
|
|
|
|
|
|
// LDP - load pair of registers with signed offset
|
|
#define LDP32(Rt1,Rt2,Rn,simm7) ( 0b00<<30 | 0b101<<27 | 0<<26 | 0b010<<23 | 1<<22 /*L*/ | ((((simm7)>>2)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
#define LDP64(Rt1,Rt2,Rn,simm7) ( 0b10<<30 | 0b101<<27 | 0<<26 | 0b010<<23 | 1<<22 /*L*/ | ((((simm7)>>3)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
|
|
// LDP - load pair of registers with post-index
|
|
#define LDP32post(Rt1,Rt2,Rn,simm7) ( 0b00<<30 | 0b101<<27 | 0<<26 | 0b001<<23 | 1<<22 /*L*/ | ((((simm7)>>2)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
#define LDP64post(Rt1,Rt2,Rn,simm7) ( 0b10<<30 | 0b101<<27 | 0<<26 | 0b001<<23 | 1<<22 /*L*/ | ((((simm7)>>3)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
|
|
// LDP - load pair of registers with pre-index
|
|
#define LDP32pre(Rt1,Rt2,Rn,simm7) ( 0b00<<30 | 0b101<<27 | 0<<26 | 0b011<<23 | 1<<22 /*L*/ | ((((simm7)>>2)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
#define LDP64pre(Rt1,Rt2,Rn,simm7) ( 0b10<<30 | 0b101<<27 | 0<<26 | 0b011<<23 | 1<<22 /*L*/ | ((((simm7)>>3)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
|
|
// STP - store pair of registers with signed offset
|
|
#define STP32(Rt1,Rt2,Rn,simm7) ( 0b00<<30 | 0b101<<27 | 0<<26 | 0b010<<23 | 0<<22 /*L*/ | ((((simm7)>>2)&0x7F)<<15) | ((Rt2)<<10) | ((Rn)<<5) | (Rt1) )
|
|
#define STP64(Rt1,Rt2,Rn,simm7) ( 0b10<<30 | 0b101<<27 | 0<<26 | 0b010<<23 | 0<<22 /*L*/ | ((((simm7)>>3)&0x7F)<<15) | ((Rt2)<<10) | ((Rn)<<5) | (Rt1) )
|
|
|
|
// STP - load pair of registers with post-index
|
|
#define STP32post(Rt1,Rt2,Rn,simm7) ( 0b00<<30 | 0b101<<27 | 0<<26 | 0b001<<23 | 0<<22 /*L*/ | ((((simm7)>>2)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
#define STP64post(Rt1,Rt2,Rn,simm7) ( 0b10<<30 | 0b101<<27 | 0<<26 | 0b001<<23 | 0<<22 /*L*/ | ((((simm7)>>3)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
|
|
// LDP - load pair of registers with pre-index
|
|
#define STP32pre(Rt1,Rt2,Rn,simm7) ( 0b00<<30 | 0b101<<27 | 0<<26 | 0b011<<23 | 0<<22 /*L*/ | ((((simm7)>>2)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
#define STP64pre(Rt1,Rt2,Rn,simm7) ( 0b10<<30 | 0b101<<27 | 0<<26 | 0b011<<23 | 0<<22 /*L*/ | ((((simm7)>>3)&0x7F)<<15) | Rt2<<10 | Rn<<5 | Rt1 )
|
|
|
|
|
|
#define LDR32iwpost(Rt, Rn, simm9) ( (0b10<<30) | 0b111000010<<21 | ((simm9&511) << 12) | (0b01 << 10) | (Rn << 5) | Rt )
|
|
#define LDR32iwpre(Rt, Rn, simm9) ( (0b10<<30) | 0b111000010<<21 | ((simm9&511) << 12) | (0b11 << 10) | (Rn << 5) | Rt )
|
|
#define LDR32ipre(Rt, Rn, simm9) ( (0b10<<30) | 0b111000010<<21 | ((simm9&511) << 12) | (0b00 << 10) | (Rn << 5) | Rt )
|
|
|
|
#define LDR32(Rt, Rn, Rm) ( (0b10<<30) | (0b111000011<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*#0*/ | (0b10<<10) | (Rn << 5) | Rt )
|
|
#define LDRH32(Rt, Rn, Rm) ( (0b01<<30) | (0b111000011<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*#0*/ | (0b10<<10) | (Rn << 5) | Rt )
|
|
#define LDRB32(Rt, Rn, Rm) ( (0b00<<30) | (0b111000011<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*#0*/ | (0b10<<10) | (Rn << 5) | Rt )
|
|
|
|
#define LDR32i(Rt, Rn, imm12) ( (0b10<<30) | (0b11100101<<22) | (imm12_scale((imm12),2) << 10) | (Rn << 5) | Rt )
|
|
#define LDRH32i(Rt, Rn, imm12) ( (0b01<<30) | (0b11100101<<22) | (imm12_scale((imm12),1) << 10) | (Rn << 5) | Rt )
|
|
#define LDRB32i(Rt, Rn, imm12) ( (0b00<<30) | (0b11100101<<22) | (imm12_scale((imm12),0) << 10) | (Rn << 5) | Rt )
|
|
#define LDRB32iwpost(Rt, Rn, simm9) ( (0b00<<30) | (0b111000010<<21) | (((simm9)&511)<<12) | (0b01<<10) | ((Rn)<<5) | (Rt) )
|
|
|
|
#define LDRSB32(Rt, Rn, Rm) ( (0b00<<30) | (0b111000<<24) | (0b11<<22) /*opc*/ | (1<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*S*/ | (0b10<<10) | (Rn<<5) | Rt )
|
|
#define LDRSH32(Rt, Rn, Rm) ( (0b01<<30) | (0b111000<<24) | (0b11<<22) /*opc*/ | (1<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*S*/ | (0b10<<10) | (Rn<<5) | Rt )
|
|
|
|
#define LDRSB32i(Rt, Rn, imm12) ( (0b00<<30) | (0b111001<<24) | (0b11<<22) | (imm12_scale(imm12,0)<<10) | (Rn<<5) | Rt )
|
|
#define LDRSH32i(Rt, Rn, imm12) ( (0b01<<30) | (0b111001<<24) | (0b11<<22) | (imm12_scale(imm12,1)<<10) | (Rn<<5) | Rt )
|
|
|
|
#define LDRSWi(Rt, Rn, imm12) ( (0b10<<30) | (0b111001<<24) | (0b10<<22) | (imm12_scale(imm12,2)<<10) | ((Rn)<<5) | (Rt) )
|
|
|
|
//#define LDR32_4(Rt, Rn, Rm) ( (0b10<<30) | (0b111000011<<21) | (Rm<<16) | (0b011<<13) /*LSL*/ | (1<<12) /*#2*/ | (0b10<<10) | (Rn << 5) | Rt )
|
|
#define LDR64_8(Rt, Rn, Rm) ( (0b11<<30) | (0b111000011<<21) | (Rm<<16) | (0b011<<13) /*LSL*/ | (1<<12) /*#3*/ | (0b10<<10) | (Rn << 5) | Rt )
|
|
|
|
#define LDR64iwpost(Rt, Rn, simm9) ( (0b11<<30) | (0b111000010<<21) | ((simm9&511) << 12) | (0b01 << 10) | (Rn << 5) | Rt )
|
|
#define LDR64iwpre(Rt, Rn, simm9) ( (0b11<<30) | (0b111000010<<21) | ((simm9&511) << 12) | (0b11 << 10) | (Rn << 5) | Rt )
|
|
#define LDR64i(Rt, Rn, imm12) ( (0b11<<30) | (0b11100101<<22) | (imm12_scale(imm12,3) << 10) | (Rn << 5) | Rt )
|
|
|
|
#define STR32iwpost(Rt, Rn, simm9) ( (0b10<<30) | (0b111000000<<21) | ((simm9&511) << 12) | (0b01<<10) | (Rn<<5) | Rt )
|
|
#define STR32iwpre(Rt, Rn, simm9) ( (0b10<<30) | (0b111000000<<21) | ((simm9&511) << 12) | (0b11<<10) | (Rn<<5) | Rt )
|
|
#define STR32ipre(Rt, Rn, simm9) ( (0b10<<30) | (0b111000000<<21) | ((simm9&511) << 12) | (0b00<<10) | (Rn<<5) | Rt )
|
|
|
|
#define STRB32i(Rt, Rn, imm12) ( (0b00<<30) | (0b11100100<<22) | (imm12_scale((imm12),0) << 10) | (Rn << 5) | Rt )
|
|
#define STRH32i(Rt, Rn, imm12) ( (0b01<<30) | (0b11100100<<22) | (imm12_scale((imm12),1) << 10) | (Rn << 5) | Rt )
|
|
#define STR32i(Rt, Rn, imm12) ( (0b10<<30) | (0b11100100<<22) | (imm12_scale((imm12),2) << 10) | (Rn << 5) | Rt )
|
|
|
|
#define STR64iwpost(Rt, Rn, simm9) ( (0b11<<30) | (0b111000000<<21) | ((simm9&511) << 12) | (0b01<<10) | (Rn<<5) | Rt )
|
|
#define STR64iwpre(Rt, Rn, simm9) ( (0b11<<30) | (0b111000000<<21) | ((simm9&511) << 12) | (0b11<<10) | (Rn<<5) | Rt )
|
|
#define STR64i(Rt, Rn, imm12) ( (0b11<<30) | (0b11100100<<22) | (imm12_scale(imm12,3) << 10) | (Rn << 5) | Rt )
|
|
|
|
//#define STR32(Rt, Rn, Rm) ( (0b10<<30) | (0b111000001<<21) | (Rm<<16) | (0b011<<13) /*LSL*/ | (0<<12) /*#0*/ | (0b10<<10) | (Rn<<5) | Rt )
|
|
#define STR32(Rt, Rn, Rm) ( (0b10<<30) | (0b111000001<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*#0*/ | (0b10<<10) | (Rn<<5) | Rt )
|
|
#define STRH32(Rt, Rn, Rm) ( (0b01<<30) | (0b111000001<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*#0*/ | (0b10<<10) | (Rn<<5) | Rt )
|
|
#define STRB32(Rt, Rn, Rm) ( (0b00<<30) | (0b111000001<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*#0*/ | (0b10<<10) | (Rn<<5) | Rt )
|
|
#define STRB32iwpost(Rt, Rn, simm9) ( (0b00<<30) | (0b111000000<<21) | (((simm9)&511)<<12) | (0b01<<10) | ((Rn)<<5) | (Rt) )
|
|
|
|
// LDR (literal) - PC-related load
|
|
#define LDR32lit(Rt,simm19) ( (0b00<<30) | (0b011<<27) | (0<<26) | (0b00<<24) | (encode_offset19(simm19)<<5) | Rt )
|
|
#define LDR64lit(Rt,simm19) ( (0b01<<30) | (0b011<<27) | (0<<26) | (0b00<<24) | (encode_offset19(simm19)<<5) | Rt )
|
|
|
|
//#define STR32_4(Rt, Rn, Rm) ( (0b10<<30) | 0b111000001<<21 | Rm << 16 | 0b011<<13 /*LSL*/ | 1<<12 /*#2*/ | 0b10 << 10 | (Rn << 5) | Rt )
|
|
//#define STR64_8(Rt, Rn, Rm) ( (0b11<<30) | 0b111000001<<21 | Rm << 16 | 0b011<<13 /*LSL*/ | 1<<12 /*#3*/ | 0b10 << 10 | (Rn << 5) | Rt )
|
|
|
|
#define SXTB(Rd, Rn) ( (0<<31) | (0b00<<29) | (0b100110<<23) | (0<<22) /*N*/ | (0b000000<<16) /*immr*/ | (0b000111<<10) /*imms*/ | (Rn<<5) | Rd )
|
|
#define UXTB(Rd, Rn) ( (0<<31) | (0b10<<29) | (0b100110<<23) | (0<<22) /*N*/ | (0b000000<<16) /*immr*/ | (0b000111<<10) /*imms*/ | (Rn<<5) | Rd )
|
|
#define SXTH(Rd, Rn) ( (0<<31) | (0b00<<29) | (0b100110<<23) | (0<<22) /*N*/ | (0b000000<<16) /*immr*/ | (0b001111<<10) /*imms*/ | (Rn<<5) | Rd )
|
|
#define UXTH(Rd, Rn) ( (0<<31) | (0b10<<29) | (0b100110<<23) | (0<<22) /*N*/ | (0b000000<<16) /*immr*/ | (0b001111<<10) /*imms*/ | (Rn<<5) | Rd )
|
|
|
|
// CMP (immediate)
|
|
#define CMP32i(Rn, imm12) ( (0<<31) | (0b11<<29) | (0b100010<<23) | (0<<22) /*sh*/ | (imm12) << 10 | (Rn<<5) | WZR /*Rd*/ )
|
|
#define CMP64i(Rn, imm12) ( (1<<31) | (0b11<<29) | (0b100010<<23) | (0<<22) /*sh*/ | (imm12) << 10 | (Rn<<5) | XZR /*Rd*/ )
|
|
|
|
// CMP (shifted register)
|
|
#define CMP32(Rn, Rm) ( (0<<31) | (0b11<<29) | (0b01011<<24) | (0b00<<22) /*sh*/ | (0<<21) | (Rm<<16) | (0b000000<<10) /*imm6*/ | (Rn<<5) | WZR /*Rd*/ )
|
|
#define CMP64(Rn, Rm) ( (1<<31) | (0b11<<29) | (0b01011<<24) | (0b00<<22) /*sh*/ | (0<<21) | (Rm<<16) | (0b000000<<10) /*imm6*/ | (Rn<<5) | XZR /*Rd*/ )
|
|
|
|
// CBZ - Compare and Branch on Zero
|
|
#define CBZ32(Rt, simm19) ( (0<<31) | (0b011010<<25) | (0<<24) /*op*/ | (encode_offset19(simm19)<<5) | Rt )
|
|
|
|
// CBNZ - Compare and Branch on Nonzero
|
|
#define CBNZ32(Rt, simm19) ( (0<<31) | (0b011010<<25) | (1<<24) /*op*/ | (encode_offset19(simm19)<<5) | Rt )
|
|
|
|
// conditional branch within +/-1M
|
|
#define Bcond(cond, simm19) ( (0b0101010<<25) | (0<<24) | (encode_offset19(simm19)<<5) | (0<<4) | cond )
|
|
|
|
// unconditional branch within +/-128M
|
|
#define B(simm26) ( (0<<31) | (0b00101<<26) | encode_offset26(simm26) )
|
|
|
|
// branch with link within +/-128M
|
|
#define BL(simm26) ( (1<<31) | (0b00101<<26) | encode_offset26(simm26) )
|
|
|
|
// branch to register
|
|
#define BR(Rn) ( (0b1101011<<25) | (0<<24) | (0<<23) | (0b00<<21) | (0b11111<<16) | (0b0000<<12) | (0<<11) /*A*/ | (0<<10) /*M*/ | (Rn<<5) | 0b00000 /*Rm*/ )
|
|
|
|
// branch with link to register
|
|
#define BLR(Rn) ( (0b1101011<<25) | (0<<24) | (0<<23) | (0b01<<21) | (0b11111<<16) | (0b0000<<12) | (0<<11) /*A*/ | (0<<10) /*M*/ | (Rn<<5) | 0b00000 /*Rm*/ )
|
|
|
|
// Prefetch Memory (immediate)
|
|
#define PRFMi(Rt, Rn, imm12) ( (0b11111<<27) | (0b00110<<22) | (((imm12>>3)&0xFFF)<<10) | ((Rn)<<5) | (Rt) )
|
|
// Rt register fields:
|
|
// policy
|
|
#define KEEP 0
|
|
#define STRM 1 // non-temporal hint
|
|
// target cache
|
|
#define L1 (0b00<<1)
|
|
#define L2 (0b01<<1)
|
|
#define L3 (0b10<<1)
|
|
// type
|
|
#define PLD (0b00<<3) // prefetch for load
|
|
#define PLI (0b01<<3) // preload instructions
|
|
#define PST (0b10<<3) // prefetch for store
|
|
|
|
|
|
#define FABS(Sd, Sn) ( (0b000<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (0b00000110000<<10) | (Sn<<5) | Sd )
|
|
#define FSQRT(Sd, Sn) ( (0b000<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (0b00001110000<<10) | (Sn<<5) | Sd )
|
|
#define FNEG(Sd, Sn) ( (0b000<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (0b0000<<17) | (0b10<<15) | (0b10000<<10) | (Sn<<5) | Sd )
|
|
#define FADD(Sd, Sn, Sm) ( (0b000<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (Sm<<16) | (0b001<<13) | (0<<12) /*op*/ | (0b10<<10) | (Sn<<5) | Sd )
|
|
#define FSUB(Sd, Sn, Sm) ( (0b000<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (Sm<<16) | (0b001<<13) | (1<<12) /*op*/ | (0b10<<10) | (Sn<<5) | Sd )
|
|
#define FMUL(Sd, Sn, Sm) ( (0b000<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (Sm<<16) | (0<<15) /*op*/ | (0b000<<12) | (0b10<<10) | (Sn<<5) | Sd )
|
|
#define FDIV(Sd, Sn, Sm) ( (0b000<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (Sm<<16) | (0b0001<<12) | (0b10<<10) | (Sn<<5) | Sd )
|
|
|
|
#define FCMP(Sn, Sm) ( (0b000<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (Sm<<16) | (0b00<<14) | (0b1000<<10) | (Sn<<5) | (0b00<<3) /*opc*/ | 0b000 )
|
|
#define FCMP0(Sn) ( (0b000<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (0<<16) | (0b00<<14) | (0b1000<<10) | (Sn<<5) | (0b01<<3) /*opc*/ | 0b000 )
|
|
|
|
// single precision to signed integer
|
|
#define FCVTZS(Rd, Sn) ( (0<<31) | (0b00<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (0b11<<19) /*rmode*/ | (0b000<<16) /*opcode*/ | (0b000000<<10) | (Sn<<5) | Rd )
|
|
// signed integer to single precision
|
|
#define SCVTF(Sd, Rn) ( (0<<31) | (0b00<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (0b00<<19) /*rmode*/ | (0b010<<16) /*opcode*/ | (0b000000<<10) | (Rn<<5) | Sd )
|
|
|
|
// move scalar to scalar
|
|
#define FMOV(Sd, Sn) ( (0<<31) | (0b00<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (0b00<<19) /*rmode*/ | (0b000<<16) /*opcode*/ | (0b010000<<10) | (Sn<<5) | Sd )
|
|
// move scalar to general
|
|
#define FMOVgs(Rd, Sn) ( (0<<31) | (0b00<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (0b00<<19) /*rmode*/ | (0b110<<16) /*opcode*/ | (0b000000<<10) | (Sn<<5) | Rd )
|
|
// move general to scalar
|
|
#define FMOVsg(Sd, Rn) ( (0<<31) | (0b00<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | (0b00<<19) /*rmode*/ | (0b111<<16) /*opcode*/ | (0b000000<<10) | (Rn<<5) | Sd )
|
|
// move immediate to scalar
|
|
#define FMOVi(Sd, imm8) ( (0<<31) | (0b00<<29) | (0b11110<<24) | (0b00<<22) | (1<<21) | ((imm8)<<13) | (0b100<<10) | (0b00000<<5) | Sd )
|
|
|
|
#define VLDR(St, Rn, Rm) ( (0b10<<30) | (0b111<<27) | (1<<26) | (0b00<<24) | (0b01<<22) /*opc*/ | (1<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*S*/ | (0b10<<10) | (Rn<<5) | St )
|
|
#define VSTR(St, Rn, Rm) ( (0b10<<30) | (0b111<<27) | (1<<26) | (0b00<<24) | (0b00<<22) /*opc*/ | (1<<21) | (Rm<<16) | (0b010<<13) /*UXTW*/ | (0<<12) /*S*/ | (0b10<<10) | (Rn<<5) | St )
|
|
|
|
#define VLDRi(St, Rn, imm12) ( (0b10<<30) | (0b111<<27) | (1<<26) | (0b01<<24) | (0b01<<22) /*opc*/ | (imm12_scale(imm12,2) << 10) | (Rn<<5) | St )
|
|
#define VSTRi(St, Rn, imm12) ( (0b10<<30) | (0b111<<27) | (1<<26) | (0b01<<24) | (0b00<<22) /*opc*/ | (imm12_scale(imm12,2) << 10) | (Rn<<5) | St )
|
|
|
|
|
|
static qboolean can_encode_imm12( const uint32_t imm12, const uint32_t scale )
|
|
{
|
|
const uint32_t mask = (1<<scale) - 1;
|
|
|
|
if ( imm12 & mask || imm12 >= 4096 * (1 << scale) )
|
|
return qfalse;
|
|
|
|
return qtrue;
|
|
}
|
|
|
|
|
|
static uint32_t imm12_scale( const uint32_t imm12, const uint32_t scale )
|
|
{
|
|
const uint32_t mask = (1<<scale) - 1;
|
|
|
|
if ( imm12 & mask || imm12 >= 4096 * (1 << scale) )
|
|
DROP( "can't encode offset %i with scale %i", imm12, (1 << scale) );
|
|
|
|
return imm12 >> scale;
|
|
}
|
|
|
|
|
|
static qboolean encode_arith_imm( const uint32_t imm, uint32_t *res ) {
|
|
|
|
if ( imm <= 0xFFF ) {
|
|
*res = imm;
|
|
return qtrue;
|
|
}
|
|
|
|
if ( (imm >> 12) <= 0xFFF && (imm & 0xFFF) == 0 ) {
|
|
*res = (1 << 12) | (imm >> 12);
|
|
return qtrue;
|
|
}
|
|
|
|
return qfalse;
|
|
}
|
|
|
|
|
|
static int shifted_mask( const uint64_t v ) {
|
|
const uint64_t m = v - 1;
|
|
return ( ( ( m | v ) + 1 ) & m ) == 0;
|
|
}
|
|
|
|
|
|
static qboolean encode_logic_imm( const uint64_t v, uint32_t reg_size, uint32_t *res ) {
|
|
uint64_t mask, imm;
|
|
uint32_t size, len;
|
|
uint32_t N, immr, imms;
|
|
|
|
// determine element size
|
|
if ( reg_size == 64 ) {
|
|
mask = 0xFFFFFFFF;
|
|
size = 32;
|
|
} else {
|
|
if ( v > 0xFFFFFFFF ) {
|
|
return qfalse;
|
|
}
|
|
mask = 0xFFFF;
|
|
size = 16;
|
|
}
|
|
for ( ;; ) {
|
|
if ( ( v & mask ) != ( (v >> size) & mask ) || size == 1 ) {
|
|
mask |= mask << size;
|
|
size <<= 1;
|
|
break;
|
|
}
|
|
size >>= 1;
|
|
mask >>= size;
|
|
}
|
|
|
|
imm = v & mask;
|
|
|
|
// early reject
|
|
if ( !shifted_mask( imm ) && !shifted_mask( ~( imm | ~mask ) ) ) {
|
|
return qfalse;
|
|
}
|
|
|
|
// rotate right to set leading zero and trailing one
|
|
mask = 1ULL << ( size - 1 ) | 1;
|
|
for ( immr = 0; immr < size; immr++ ) {
|
|
if ( ( imm & mask ) == 1 ) {
|
|
break;
|
|
}
|
|
imm = ( ( imm & 1 ) << ( size - 1 ) ) | ( imm >> 1 );
|
|
}
|
|
|
|
if ( immr == size ) {
|
|
// all ones/zeros, unsupported
|
|
return qfalse;
|
|
}
|
|
|
|
// count trailing bits set
|
|
for ( len = 0; len < size; len++ ) {
|
|
if ( ( ( imm >> len ) & 1 ) == 0 ) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
//if ( len == size || ( imm >> len ) != 0 ) {
|
|
// return qfalse;
|
|
//}
|
|
|
|
N = ( size >> 6 ) & 1;
|
|
imms = (63 & (64 - size*2)) | (len - 1);
|
|
*res = ( N << 12 ) | ( (size - immr) << 6 ) | imms;
|
|
|
|
return qtrue;
|
|
}
|
|
|
|
|
|
// check if we can encode single-precision scalar immediate
|
|
static qboolean can_encode_f32_imm( const uint32_t v )
|
|
{
|
|
uint32_t exp3 = (v >> 25) & ((1<<6)-1);
|
|
|
|
if ( exp3 != 0x20 && exp3 != 0x1F )
|
|
return qfalse;
|
|
|
|
if ( v & ((1<<19)-1) )
|
|
return qfalse;
|
|
|
|
return qtrue;
|
|
}
|
|
|
|
|
|
static uint32_t encode_f32_imm( const uint32_t v )
|
|
{
|
|
return (((v >> 31) & 0x1) << 7) | (((v >> 23) & 0x7) << 4) | ((v >> 19) & 0xF);
|
|
}
|
|
|
|
|
|
static void emit_MOVXi( uint32_t reg, uint64_t imm )
|
|
{
|
|
emit( MOVZ64( reg, imm & 0xFFFF ) );
|
|
|
|
if ( imm <= 0xFFFF )
|
|
return;
|
|
|
|
emit( MOVK64_16( reg, (imm >> 16)&0xFFFF ) );
|
|
|
|
if ( imm <= 0xFFFFFFFF )
|
|
return;
|
|
|
|
emit( MOVK64_32( reg, (imm >> 32)&0xFFFF ) );
|
|
|
|
if ( imm <= 0xFFFFFFFFFFFF )
|
|
return;
|
|
|
|
emit( MOVK64_48( reg, (imm >> 48)&0xFFFF ) );
|
|
}
|
|
|
|
|
|
static void emit_MOVRi( uint32_t reg, uint32_t imm )
|
|
{
|
|
uint32_t immrs;
|
|
|
|
if ( imm <= 0xFFFF ) {
|
|
emit( MOVZ32( reg, imm ) );
|
|
return;
|
|
}
|
|
|
|
if ( ( imm & 0xFFFF ) == 0 ) {
|
|
emit( MOVZ32_16( reg, (imm >> 16)&0xFFFF ) );
|
|
return;
|
|
}
|
|
|
|
if ( ~imm <= 0xFFFF ) {
|
|
emit( MOVN32( reg, ~imm ) );
|
|
return;
|
|
}
|
|
|
|
if ( encode_logic_imm( imm, 32, &immrs ) ) {
|
|
emit( MOV32i( reg, immrs ) );
|
|
return;
|
|
}
|
|
|
|
emit( MOVZ32( reg, imm & 0xFFFF ) );
|
|
emit( MOVK32_16( reg, (imm >> 16)&0xFFFF ) );
|
|
}
|
|
|
|
|
|
static uint32_t alloc_rx( uint32_t pref );
|
|
|
|
static qboolean find_rx_const( uint32_t imm );
|
|
static uint32_t alloc_rx_const( uint32_t pref, uint32_t imm );
|
|
static uint32_t alloc_rx_local( uint32_t pref, uint32_t imm );
|
|
|
|
static uint32_t alloc_sx( uint32_t pref );
|
|
|
|
|
|
// ---------------- register allocation --------------------
|
|
|
|
// register allocation preferences
|
|
|
|
#define FORCED 0x20 // load function must return specified register
|
|
#define TEMP 0x40 // hint: temporary allocation, will not be stored on opStack
|
|
#define RCONST 0x80 // hint: register value will be not modified
|
|
#define XMASK 0x100 // exclude masked registers
|
|
|
|
#define RMASK 0x1F
|
|
|
|
// array sizes for cached/meta registers
|
|
#define NUM_RX_REGS 18 // [R0..R17]
|
|
#define NUM_SX_REGS 32 // [S0..S31]
|
|
|
|
// general-purpose register list available for dynamic allocation
|
|
static const uint32_t rx_list_alloc[] = {
|
|
R0, R1, R2, R3, // R0-R3 are required minimum
|
|
R4, R5, R6, R7,
|
|
R8, R9, R10, R11,
|
|
R12, R13, R14, R15,
|
|
R16, R17
|
|
};
|
|
|
|
// FPU scalar register list available for dynamic allocation
|
|
static const uint32_t sx_list_alloc[] = {
|
|
S0, S1, 2, 3, 4, 5, 6, 7, // S0 and S1 are required minimum
|
|
// S8..S15 must be preserved
|
|
16, 17, 18, 19, 20, 21, 22, 23,
|
|
24, 25, 26, 27, 28, 29, 30, 31
|
|
};
|
|
|
|
#ifdef CONST_CACHE_RX
|
|
static const uint32_t rx_list_cache[] = {
|
|
//R0, R1,
|
|
R2, R3,
|
|
R4, R5, R6, R7,
|
|
R8, R9, R10, R11,
|
|
R12, R13, R14, R15,
|
|
R16, R17,
|
|
};
|
|
#endif
|
|
|
|
#ifdef CONST_CACHE_SX
|
|
static const uint32_t sx_list_cache[] = {
|
|
S0, S1, 2, 3, 4, 5, 6, 7,
|
|
// S8..S15 must be preserved
|
|
16, 17, 18, 19, 20, 21, 22, 23,
|
|
24, 25, 26, 27, 28, 29, 30, 31
|
|
};
|
|
#endif
|
|
|
|
// types of items on the opStack
|
|
typedef enum {
|
|
TYPE_RAW, // stored value
|
|
TYPE_CONST, // constant
|
|
TYPE_LOCAL, // address of local variable
|
|
TYPE_RX, // volatile - general-purpose register
|
|
TYPE_SX, // volatile - FPU scalar register
|
|
} opstack_value_t;
|
|
|
|
typedef enum {
|
|
RTYPE_UNUSED = 0x0,
|
|
RTYPE_CONST = 0x1,
|
|
RTYPE_VAR = 0x2
|
|
} reg_value_t;
|
|
|
|
typedef struct opstack_s {
|
|
uint32_t value;
|
|
int offset;
|
|
opstack_value_t type;
|
|
int safe_arg;
|
|
} opstack_t;
|
|
|
|
typedef struct var_addr_s {
|
|
int32_t addr; // variable address/offset
|
|
uint8_t base; // procBase or dataBase register, ranges should NOT overlap
|
|
uint8_t size; // 1,2,4
|
|
} var_addr_t;
|
|
|
|
typedef enum {
|
|
Z_NONE,
|
|
Z_EXT8,
|
|
S_EXT8,
|
|
Z_EXT16,
|
|
S_EXT16,
|
|
} ext_t;
|
|
|
|
typedef struct reg_s {
|
|
int type_mask;
|
|
struct {
|
|
uint32_t value;
|
|
} cnst;
|
|
// register value can be mapped to many memory regions
|
|
struct {
|
|
var_addr_t map[REG_MAP_COUNT];
|
|
unsigned idx; // next allocation slot
|
|
} vars;
|
|
uint32_t ip; // ip of last reference
|
|
int refcnt; // reference counter
|
|
ext_t ext; // zero/sign-extension flags
|
|
} reg_t;
|
|
|
|
static int opstack;
|
|
static opstack_t opstackv[PROC_OPSTACK_SIZE + 1];
|
|
|
|
// cached register values
|
|
|
|
static reg_t rx_regs[NUM_RX_REGS];
|
|
static reg_t sx_regs[NUM_SX_REGS];
|
|
|
|
// masked register can't be allocated or flushed to opStack on register pressure
|
|
|
|
static int32_t rx_mask[NUM_RX_REGS];
|
|
static int32_t sx_mask[NUM_SX_REGS];
|
|
|
|
|
|
static qboolean find_free_rx( void ) {
|
|
uint32_t i, n;
|
|
|
|
for ( i = 0; i < ARRAY_LEN( rx_list_alloc ); i++ ) {
|
|
n = rx_list_alloc[i];
|
|
if ( rx_regs[n].type_mask == RTYPE_UNUSED ) {
|
|
return qtrue;
|
|
}
|
|
}
|
|
|
|
return qfalse;
|
|
}
|
|
|
|
|
|
static qboolean find_free_sx( void ) {
|
|
uint32_t i, n;
|
|
|
|
for ( i = 0; i < ARRAY_LEN( sx_list_alloc ); i++ ) {
|
|
n = sx_list_alloc[i];
|
|
if ( sx_regs[n].type_mask == RTYPE_UNUSED ) {
|
|
return qtrue;
|
|
}
|
|
}
|
|
|
|
return qfalse;
|
|
}
|
|
|
|
|
|
static void wipe_reg_range( reg_t *reg, const var_addr_t *v ) {
|
|
if ( reg->type_mask & RTYPE_VAR ) {
|
|
uint32_t c, n;
|
|
for ( c = 0, n = 0; n < ARRAY_LEN( reg->vars.map ); n++ ) {
|
|
var_addr_t *var = ®->vars.map[n];
|
|
if ( var->size != 0 ) {
|
|
c++;
|
|
if ( var->base == v->base ) {
|
|
if ( v->addr < var->addr + var->size && v->addr + v->size > var->addr ) {
|
|
memset( var, 0, sizeof( *var ) );
|
|
//var->size = 0;
|
|
c--; continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if ( c == 0 ) {
|
|
reg->type_mask &= ~RTYPE_VAR;
|
|
reg->ext = Z_NONE;
|
|
} else {
|
|
//reg->type_mask |= RTYPE_VAR;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void wipe_var_range( const var_addr_t *v )
|
|
{
|
|
#ifdef LOAD_OPTIMIZE
|
|
uint32_t i;
|
|
#ifdef DEBUG_VM
|
|
if ( v->size == 0 || v->base == 0 )
|
|
DROP( "incorrect variable setup" );
|
|
#endif
|
|
// wipe all types of overlapping variables
|
|
for ( i = 0; i < ARRAY_LEN( rx_regs ); i++ ) {
|
|
wipe_reg_range( &rx_regs[i], v );
|
|
}
|
|
for ( i = 0; i < ARRAY_LEN( sx_regs ); i++ ) {
|
|
wipe_reg_range( &sx_regs[i], v );
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
static void set_var_map( reg_t *r, const var_addr_t *v ) {
|
|
uint32_t n;
|
|
for ( n = 0; n < ARRAY_LEN( r->vars.map ); n++ ) {
|
|
if ( r->vars.map[n].size == 0 ) {
|
|
r->vars.map[n] = *v;
|
|
r->vars.idx = ( n + 1 ) % ARRAY_LEN( r->vars.map );
|
|
return;
|
|
}
|
|
}
|
|
r->vars.map[r->vars.idx] = *v;
|
|
r->vars.idx = ( r->vars.idx + 1 ) % ARRAY_LEN( r->vars.map );
|
|
}
|
|
|
|
|
|
static void set_rx_var( uint32_t reg, const var_addr_t *v ) {
|
|
#ifdef LOAD_OPTIMIZE
|
|
if ( reg < ARRAY_LEN( rx_regs ) ) {
|
|
reg_t *r = rx_regs + reg;
|
|
#ifdef REG_TYPE_MASK
|
|
r->type_mask |= RTYPE_VAR;
|
|
#else
|
|
r->type_mask = RTYPE_VAR;
|
|
#endif
|
|
set_var_map( r, v );
|
|
r->refcnt++; // = 1;
|
|
r->ip = ip;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void set_rx_ext( uint32_t reg, ext_t ext ) {
|
|
#ifdef LOAD_OPTIMIZE
|
|
if ( reg >= ARRAY_LEN( rx_regs ) )
|
|
DROP( "register value %i s out of range", reg );
|
|
rx_regs[reg].ext = ext;
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
static void set_sx_var( uint32_t reg, const var_addr_t *v ) {
|
|
#ifdef LOAD_OPTIMIZE
|
|
if ( reg < ARRAY_LEN( sx_regs ) ) {
|
|
reg_t *r = sx_regs + reg;
|
|
#ifdef REG_TYPE_MASK
|
|
r->type_mask |= RTYPE_VAR;
|
|
#else
|
|
r->type_mask = RTYPE_VAR;
|
|
#endif
|
|
set_var_map( r, v );
|
|
r->refcnt++; // = 1;
|
|
r->ip = ip;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
static reg_t *find_rx_var( uint32_t *reg, const var_addr_t *v ) {
|
|
#ifdef LOAD_OPTIMIZE
|
|
uint32_t i;
|
|
for ( i = 0; i < ARRAY_LEN( rx_regs ); i++ ) {
|
|
reg_t *r = &rx_regs[i];
|
|
if ( r->type_mask & RTYPE_VAR ) {
|
|
uint32_t n;
|
|
for ( n = 0; n < ARRAY_LEN( r->vars.map ); n++ ) {
|
|
if ( r->vars.map[n].size && r->vars.map[n].addr == v->addr && r->vars.map[n].size == v->size && r->vars.map[n].base == v->base ) {
|
|
r->refcnt++;
|
|
r->ip = ip;
|
|
*reg = i;
|
|
return r;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static qboolean find_sx_var( uint32_t *reg, const var_addr_t *v ) {
|
|
#ifdef LOAD_OPTIMIZE
|
|
uint32_t i;
|
|
for ( i = 0; i < ARRAY_LEN( sx_regs ); i++ ) {
|
|
reg_t *r = &sx_regs[i];
|
|
if ( r->type_mask & RTYPE_VAR ) {
|
|
uint32_t n;
|
|
for ( n = 0; n < ARRAY_LEN( r->vars.map ); n++ ) {
|
|
if ( r->vars.map[n].size && r->vars.map[n].addr == v->addr && r->vars.map[n].size == v->size && r->vars.map[n].base == v->base ) {
|
|
r->refcnt++;
|
|
r->ip = ip;
|
|
*reg = i;
|
|
return qtrue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif // LOAD_OPTIMIZE
|
|
return qfalse;
|
|
}
|
|
|
|
|
|
static void reduce_map_size( reg_t *reg, uint32_t size ) {
|
|
int i;
|
|
for ( i = 0; i < ARRAY_LEN( reg->vars.map ); i++ ) {
|
|
if ( reg->vars.map[i].size > size ) {
|
|
reg->vars.map[i].size = size;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static reg_t *rx_on_top( void ) {
|
|
opstack_t *it = &opstackv[ opstack ];
|
|
if ( it->type == TYPE_RX ) {
|
|
return &rx_regs[ it->value ];
|
|
} else {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
|
|
static void wipe_vars( void )
|
|
{
|
|
#ifdef LOAD_OPTIMIZE
|
|
uint32_t i;
|
|
reg_t *r;
|
|
|
|
for ( i = 0; i < ARRAY_LEN( rx_regs ); i++ ) {
|
|
r = &rx_regs[i];
|
|
memset( &r->vars, 0, sizeof( r->vars ) );
|
|
r->type_mask &= ~RTYPE_VAR;
|
|
r->ext = Z_NONE;
|
|
}
|
|
for ( i = 0; i < ARRAY_LEN( sx_regs ); i++ ) {
|
|
r = &sx_regs[i];
|
|
memset( &r->vars, 0, sizeof( r->vars ) );
|
|
r->type_mask &= ~RTYPE_VAR;
|
|
r->ext = Z_NONE;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
static qboolean search_opstack( opstack_value_t type, uint32_t value ) {
|
|
int i;
|
|
for ( i = 1; i <= opstack; i++ ) {
|
|
if ( opstackv[i].type == type && opstackv[i].value == value ) {
|
|
return qtrue;
|
|
}
|
|
}
|
|
return qfalse;
|
|
}
|
|
|
|
|
|
static void wipe_rx_meta( uint32_t reg )
|
|
{
|
|
#ifdef DEBUG_VM
|
|
if ( reg >= ARRAY_LEN( rx_regs ) )
|
|
DROP( "incorrect register index %i", reg );
|
|
#endif
|
|
memset( &rx_regs[reg], 0, sizeof( rx_regs[0] ) );
|
|
//rx_regs[reg].type_mask = RTYPE_UNUSED;
|
|
}
|
|
|
|
|
|
static void wipe_sx_meta( uint32_t reg )
|
|
{
|
|
#ifdef DEBUG_VM
|
|
if ( reg >= ARRAY_LEN( sx_regs ) )
|
|
DROP( "incorrect register index %i", reg );
|
|
#endif
|
|
memset( &sx_regs[reg], 0, sizeof( sx_regs[0] ) );
|
|
//sx_regs[reg].type_mask = RTYPE_UNUSED;
|
|
}
|
|
|
|
|
|
static void mask_rx( uint32_t reg )
|
|
{
|
|
rx_mask[reg]++;
|
|
}
|
|
|
|
|
|
static void mask_sx( uint32_t reg )
|
|
{
|
|
sx_mask[reg]++;
|
|
}
|
|
|
|
|
|
static void unmask_rx( uint32_t reg )
|
|
{
|
|
#ifdef DEBUG_VM
|
|
if ( rx_mask[reg] <= 0 ) {
|
|
DROP( "register R%i is already unmasked", reg );
|
|
}
|
|
#endif
|
|
rx_mask[reg]--;
|
|
}
|
|
|
|
|
|
static void unmask_sx( uint32_t reg )
|
|
{
|
|
#ifdef DEBUG_VM
|
|
if ( sx_mask[reg] <= 0 ) {
|
|
DROP( "register S%i is already unmasked", reg );
|
|
}
|
|
#endif
|
|
sx_mask[reg]--;
|
|
}
|
|
|
|
|
|
static void emit_MOVSi( uint32_t reg, uint32_t imm )
|
|
{
|
|
uint32_t rx;
|
|
|
|
#ifdef USE_LITERAL_POOL
|
|
int litIndex;
|
|
#endif
|
|
if ( imm == 0 ) {
|
|
emit( FMOVsg( reg, WZR ) );
|
|
return;
|
|
}
|
|
|
|
if ( can_encode_f32_imm( imm ) ) {
|
|
emit( FMOVi( reg, encode_f32_imm( imm ) ) );
|
|
return;
|
|
}
|
|
|
|
#ifdef USE_LITERAL_POOL
|
|
litIndex = VM_SearchLiteral( imm );
|
|
if ( litIndex >= 0 ) {
|
|
emit( VLDRi( reg, rLITBASE, (litIndex*4) ) );
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
rx = alloc_rx_const( R2, imm ); // rx = imm
|
|
emit(FMOVsg(reg, rx)); // sX = rX
|
|
unmask_rx( rx );
|
|
}
|
|
|
|
|
|
static void set_local_address( uint32_t reg, uint32_t addr )
|
|
{
|
|
uint32_t imm;
|
|
if ( encode_arith_imm( addr, &imm ) ) {
|
|
emit(ADD32i(reg, rPSTACK, imm)); // reg = pstack + addr
|
|
} else {
|
|
if ( find_rx_const( addr ) ) {
|
|
uint32_t rx = alloc_rx_const( R3, addr ); // rx = const
|
|
emit(ADD32(reg, rPSTACK, rx)); // reg = pstack + const
|
|
unmask_rx( rx );
|
|
} else {
|
|
emit_MOVRi(reg, addr); // reg = addr
|
|
emit(ADD32(reg, rPSTACK, reg)); // reg = pstack + reg
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void flush_item( opstack_t *it )
|
|
{
|
|
uint32_t rx;
|
|
|
|
switch ( it->type ) {
|
|
|
|
case TYPE_RX:
|
|
if ( it->offset >= 0 )
|
|
emit(STR32i(it->value, rOPSTACK, it->offset)); // *opstack = rX
|
|
break;
|
|
|
|
case TYPE_SX:
|
|
emit(VSTRi(it->value, rOPSTACK, it->offset)); // *opstack = sX
|
|
break;
|
|
|
|
case TYPE_CONST:
|
|
rx = alloc_rx_const( R2, it->value );
|
|
emit(STR32i(rx, rOPSTACK, it->offset)); // *opstack = r2
|
|
unmask_rx( rx );
|
|
break;
|
|
|
|
case TYPE_LOCAL:
|
|
rx = alloc_rx_local( R2 | TEMP, it->value );
|
|
emit(STR32i(rx, rOPSTACK, it->offset)); // *opstack = r2
|
|
unmask_rx( rx );
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
it->type = TYPE_RAW;
|
|
it->safe_arg = 0;
|
|
}
|
|
|
|
|
|
static void flush_items( opstack_value_t type, uint32_t value ) {
|
|
int i;
|
|
|
|
for ( i = 0; i <= opstack; i++ ) {
|
|
opstack_t *it = opstackv + i;
|
|
if ( it->type == type && it->value == value ) {
|
|
flush_item( it );
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void init_opstack( void )
|
|
{
|
|
opstack = 0;
|
|
|
|
Com_Memset( &rx_mask[0], 0, sizeof( rx_mask ) );
|
|
Com_Memset( &sx_mask[0], 0, sizeof( sx_mask ) );
|
|
|
|
Com_Memset( &opstackv[0], 0, sizeof( opstackv ) );
|
|
|
|
Com_Memset( &rx_regs[0], 0, sizeof( rx_regs ) );
|
|
Com_Memset( &sx_regs[0], 0, sizeof( sx_regs ) );
|
|
}
|
|
|
|
|
|
static qboolean scalar_on_top( void )
|
|
{
|
|
#ifdef DEBUG_VM
|
|
if ( opstack >= PROC_OPSTACK_SIZE || opstack <= 0 )
|
|
DROP( "bad opstack %i", opstack * 4 );
|
|
#endif
|
|
#ifdef FPU_OPTIMIZE
|
|
if ( opstackv[ opstack ].type == TYPE_SX )
|
|
return qtrue;
|
|
#endif
|
|
return qfalse;
|
|
}
|
|
|
|
|
|
static qboolean addr_on_top( var_addr_t *addr )
|
|
{
|
|
#ifdef DEBUG_VM
|
|
if ( opstack >= PROC_OPSTACK_SIZE || opstack <= 0 )
|
|
DROP( "bad opstack %i", opstack * 4 );
|
|
#endif
|
|
#ifdef ADDR_OPTIMIZE
|
|
if ( opstackv[ opstack ].type == TYPE_CONST ) {
|
|
addr->addr = opstackv[opstack].value;
|
|
addr->base = rDATABASE;
|
|
addr->size = 0;
|
|
return qtrue;
|
|
}
|
|
if ( opstackv[ opstack ].type == TYPE_LOCAL ) {
|
|
addr->addr = opstackv[opstack].value;
|
|
addr->base = rPROCBASE;
|
|
addr->size = 0;
|
|
return qtrue;
|
|
}
|
|
#endif
|
|
return qfalse;
|
|
}
|
|
|
|
|
|
static void discard_top( void )
|
|
{
|
|
opstack_t *it = &opstackv[ opstack ];
|
|
it->type = TYPE_RAW;
|
|
it->safe_arg = 0;
|
|
}
|
|
|
|
#if 1
|
|
static int is_safe_arg( void )
|
|
{
|
|
#ifdef DEBUG_VM
|
|
if ( opstack >= PROC_OPSTACK_SIZE || opstack <= 0 )
|
|
DROP( "bad opstack %i", opstack * 4 );
|
|
#endif
|
|
return opstackv[ opstack ].safe_arg;
|
|
}
|
|
#endif
|
|
|
|
static void inc_opstack( void )
|
|
{
|
|
#ifdef DEBUG_VM
|
|
if ( opstack >= PROC_OPSTACK_SIZE )
|
|
DROP( "opstack overflow - %i", opstack * 4 );
|
|
#endif
|
|
|
|
opstack += 1;
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( opstackv[ opstack ].type != TYPE_RAW )
|
|
DROP( "bad item type %i at opstack %i", opstackv[ opstack ].type, opstack * 4 );
|
|
#endif
|
|
}
|
|
|
|
|
|
static void dec_opstack( void )
|
|
{
|
|
#ifdef DEBUG_VM
|
|
opstack_t *it;
|
|
|
|
if ( opstack <= 0 )
|
|
DROP( "opstack underflow - %i", opstack * 4 );
|
|
|
|
it = &opstackv[ opstack ];
|
|
if ( it->type != TYPE_RAW )
|
|
DROP( "opstack[%i]: item type %i is not consumed", opstack * 4, it->type );
|
|
#endif
|
|
opstack -= 1;
|
|
}
|
|
|
|
|
|
static void dec_opstack_discard( void )
|
|
{
|
|
opstack_t *it;
|
|
|
|
it = &opstackv[ opstack ];
|
|
#ifdef DEBUG_VM
|
|
if ( opstack <= 0 )
|
|
DROP( "opstack underflow - %i", opstack * 4 );
|
|
|
|
if ( it->type != TYPE_RAW && ( it->type != TYPE_RX || it->offset >= 0 ) )
|
|
DROP( "opstack[%i]: item type %i is not consumed", opstack * 4, it->type );
|
|
#endif
|
|
|
|
it->type = TYPE_RAW; // discard value
|
|
it->safe_arg = 0;
|
|
|
|
opstack -= 1;
|
|
}
|
|
|
|
|
|
// returns bitmask of registers present on opstack
|
|
static uint32_t build_opstack_mask( opstack_value_t reg_type )
|
|
{
|
|
uint32_t mask = 0;
|
|
int i;
|
|
for ( i = 0; i <= opstack; i++ ) {
|
|
opstack_t *it = opstackv + i;
|
|
if ( it->type == reg_type ) {
|
|
mask |= ( 1 << it->value );
|
|
}
|
|
}
|
|
return mask;
|
|
}
|
|
|
|
|
|
static uint32_t build_rx_mask( void )
|
|
{
|
|
uint32_t i, mask = 0;
|
|
for ( i = 0; i < ARRAY_LEN( rx_mask ); i++ ) {
|
|
if ( rx_mask[i] ) {
|
|
mask |= 1 << i;
|
|
}
|
|
}
|
|
return mask;
|
|
}
|
|
|
|
|
|
static uint32_t build_sx_mask( void )
|
|
{
|
|
uint32_t i, mask = 0;
|
|
for ( i = 0; i < ARRAY_LEN( sx_mask ); i++ ) {
|
|
if ( sx_mask[i] ) {
|
|
mask |= 1 << i;
|
|
}
|
|
}
|
|
return mask;
|
|
}
|
|
|
|
|
|
// allocate register with local address value
|
|
static uint32_t alloc_rx_local( uint32_t pref, uint32_t imm )
|
|
{
|
|
uint32_t rx = alloc_rx( pref );
|
|
set_local_address( rx, imm );
|
|
|
|
return rx;
|
|
}
|
|
|
|
|
|
// returns qtrue if specified constant is found or there is a free register to store it
|
|
static qboolean find_rx_const( uint32_t imm )
|
|
{
|
|
#ifdef CONST_CACHE_RX
|
|
uint32_t mask = build_rx_mask() | build_opstack_mask( TYPE_RX );
|
|
int i;
|
|
|
|
for ( i = 0; i < ARRAY_LEN( rx_list_cache ); i++ ) {
|
|
reg_t *r;
|
|
uint32_t n = rx_list_cache[ i ];
|
|
if ( mask & ( 1 << n ) ) {
|
|
// target register must be unmasked
|
|
continue;
|
|
}
|
|
r = &rx_regs[ n ];
|
|
if ( r->type_mask & RTYPE_CONST && r->cnst.value == imm ) {
|
|
return qtrue;
|
|
}
|
|
if ( r->type_mask == RTYPE_UNUSED ) {
|
|
return qtrue;
|
|
}
|
|
}
|
|
#endif
|
|
return qfalse;
|
|
}
|
|
|
|
|
|
// allocate integer register with constant value
|
|
static uint32_t alloc_rx_const( uint32_t pref, uint32_t imm )
|
|
{
|
|
#ifdef CONST_CACHE_RX
|
|
reg_t *r;
|
|
#endif
|
|
uint32_t rx;
|
|
|
|
#ifdef CONST_CACHE_RX
|
|
#ifdef DYN_ALLOC_RX
|
|
if ( ( pref & FORCED ) == 0 ) {
|
|
// support only dynamic allocation mode
|
|
const uint32_t mask = build_rx_mask() | build_opstack_mask( TYPE_RX );
|
|
int min_ref = MAX_QINT;
|
|
int min_ip = MAX_QINT;
|
|
int idx = -1;
|
|
int i, n;
|
|
|
|
if ( ( pref & XMASK ) == 0 ) {
|
|
// we can select from already masked registers
|
|
for ( n = 0; n < ARRAY_LEN( rx_regs ); n++ ) {
|
|
r = &rx_regs[n];
|
|
if ( r->type_mask & RTYPE_CONST && r->cnst.value == imm ) {
|
|
r->refcnt++;
|
|
r->ip = ip;
|
|
mask_rx( n );
|
|
return n;
|
|
}
|
|
}
|
|
}
|
|
|
|
for ( i = 0; i < ARRAY_LEN( rx_list_cache ); i++ ) {
|
|
n = rx_list_cache[i];
|
|
if ( mask & ( 1 << n ) ) {
|
|
// target register must be unmasked and not present on the opStack
|
|
continue;
|
|
}
|
|
r = &rx_regs[n];
|
|
if ( r->type_mask & RTYPE_CONST && r->cnst.value == imm ) {
|
|
// exact match, re-use this register
|
|
r->refcnt++; // increase reference count
|
|
r->ip = ip; // update address too
|
|
mask_rx( n );
|
|
return n;
|
|
}
|
|
if ( r->type_mask == RTYPE_UNUSED ) {
|
|
idx = n;
|
|
break;
|
|
}
|
|
if ( ( r->refcnt < min_ref ) || ( r->refcnt == min_ref && r->ip < min_ip ) ) {
|
|
// update least referenced item index
|
|
min_ref = r->refcnt;
|
|
min_ip = r->ip;
|
|
idx = n;
|
|
continue;
|
|
}
|
|
}
|
|
if ( idx != -1 ) {
|
|
r = &rx_regs[ idx ];
|
|
memset( &r->vars, 0, sizeof( r->vars ) );
|
|
r->type_mask = RTYPE_CONST;
|
|
r->cnst.value = imm;
|
|
r->refcnt = 1;
|
|
r->ip = ip;
|
|
r->ext = Z_NONE;
|
|
emit_MOVRi( idx, imm );
|
|
mask_rx( idx );
|
|
return idx;
|
|
}
|
|
// else go to usual allocation to handle register spilling
|
|
}
|
|
#endif // DYN_ALLOC_RX
|
|
#endif // CONST_CACHE_RX
|
|
|
|
rx = alloc_rx( pref );
|
|
emit_MOVRi( rx, imm );
|
|
|
|
#ifdef CONST_CACHE_RX
|
|
r = &rx_regs[ rx ];
|
|
//memset( &r->vars, 0, sizeof( r->vars ) );
|
|
r->type_mask = RTYPE_CONST;
|
|
r->cnst.value = imm;
|
|
r->refcnt = 1;
|
|
r->ip = ip;
|
|
//r->ext = Z_NONE;
|
|
#endif
|
|
|
|
return rx;
|
|
}
|
|
|
|
|
|
// allocate scalar register with constant value
|
|
static uint32_t alloc_sx_const( uint32_t pref, uint32_t imm )
|
|
{
|
|
#ifdef CONST_CACHE_SX
|
|
reg_t *r;
|
|
#endif
|
|
uint32_t sx;
|
|
|
|
#ifdef CONST_CACHE_SX
|
|
#ifdef DYN_ALLOC_SX
|
|
if ( ( pref & FORCED ) == 0 ) {
|
|
// support only dynamic allocation mode
|
|
const uint32_t mask = build_sx_mask() | build_opstack_mask( TYPE_SX );
|
|
int min_ref = MAX_QINT;
|
|
int min_ip = MAX_QINT;
|
|
int idx = -1;
|
|
int i, n;
|
|
|
|
if ( ( pref & XMASK ) == 0 ) {
|
|
// we can select from already masked registers
|
|
for ( n = 0; n < ARRAY_LEN( sx_regs ); n++ ) {
|
|
r = &sx_regs[n];
|
|
if ( r->type_mask & RTYPE_CONST && r->cnst.value == imm ) {
|
|
r->refcnt++;
|
|
r->ip = ip;
|
|
mask_sx( n );
|
|
return n;
|
|
}
|
|
}
|
|
}
|
|
|
|
for ( i = 0; i < ARRAY_LEN( sx_list_cache ); i++ ) {
|
|
n = sx_list_cache[i];
|
|
if ( mask & ( 1 << n ) ) {
|
|
// target register must be unmasked and not present on the opStack
|
|
continue;
|
|
}
|
|
r = &sx_regs[n];
|
|
if ( r->type_mask & RTYPE_CONST && r->cnst.value == imm ) {
|
|
// exact match, re-use this register
|
|
r->refcnt++; // increase reference count
|
|
r->ip = ip; // update address too
|
|
mask_sx( n );
|
|
return n;
|
|
}
|
|
if ( r->type_mask == RTYPE_UNUSED ) {
|
|
idx = n;
|
|
break;
|
|
}
|
|
if ( ( r->refcnt < min_ref ) || ( r->refcnt == min_ref && r->ip < min_ip ) ) {
|
|
// update least referenced item index
|
|
min_ref = r->refcnt;
|
|
min_ip = r->ip;
|
|
idx = n;
|
|
continue;
|
|
}
|
|
}
|
|
if ( idx != -1 ) {
|
|
r = &sx_regs[ idx ];
|
|
memset( &r->vars, 0, sizeof( r->vars ) );
|
|
r->type_mask = RTYPE_CONST;
|
|
r->cnst.value = imm;
|
|
r->refcnt = 1;
|
|
r->ip = ip;
|
|
r->ext = Z_NONE;
|
|
emit_MOVSi( idx, imm );
|
|
mask_sx( idx );
|
|
return idx;
|
|
}
|
|
// else go to usual allocation to handle register spilling
|
|
}
|
|
#endif // DYN_ALLOC_SX
|
|
#endif // CONST_CACHE_SX
|
|
|
|
sx = alloc_sx( pref );
|
|
emit_MOVSi( sx, imm );
|
|
|
|
#ifdef CONST_CACHE_SX
|
|
r = &sx_regs[sx];
|
|
//memset( &r->vars, 0, sizeof( r->vars ) );
|
|
r->type_mask = RTYPE_CONST;
|
|
r->cnst.value = imm;
|
|
r->refcnt = 1;
|
|
r->ip = ip;
|
|
//r->ext = Z_NONE;
|
|
#endif
|
|
|
|
return sx;
|
|
}
|
|
|
|
|
|
static uint32_t dyn_alloc_rx( uint32_t pref )
|
|
{
|
|
const uint32_t _rx_mask = build_rx_mask();
|
|
const uint32_t mask = _rx_mask | build_opstack_mask( TYPE_RX );
|
|
const reg_t *reg, *used = NULL;
|
|
uint32_t i, n;
|
|
|
|
// try to bypass registers with metadata
|
|
for ( i = 0; i < ARRAY_LEN( rx_list_alloc ); i++ ) {
|
|
n = rx_list_alloc[i];
|
|
if ( mask & ( 1 << n ) ) {
|
|
continue;
|
|
}
|
|
reg = &rx_regs[n];
|
|
if ( reg->type_mask != RTYPE_UNUSED ) {
|
|
// mark least used item
|
|
if ( !used || reg->refcnt < used->refcnt || ( reg->refcnt == used->refcnt && reg->ip < used->ip ) ) {
|
|
used = reg;
|
|
}
|
|
continue;
|
|
}
|
|
wipe_rx_meta( n );
|
|
mask_rx( n );
|
|
return n;
|
|
}
|
|
|
|
if ( used ) {
|
|
// no free slots but something occupied by metadata
|
|
uint32_t idx = used - rx_regs;
|
|
wipe_rx_meta( idx );
|
|
mask_rx( idx );
|
|
return idx;
|
|
}
|
|
|
|
// no free registers, flush bottom of the opStack
|
|
for ( i = 0; i <= opstack; i++ ) {
|
|
opstack_t *it = opstackv + i;
|
|
if ( it->type == TYPE_RX ) {
|
|
n = it->value;
|
|
// skip masked registers
|
|
if ( _rx_mask & ( 1 << n ) ) {
|
|
continue;
|
|
}
|
|
flush_item( it );
|
|
flush_items( TYPE_RX, n ); // flush cloned registers too
|
|
wipe_rx_meta( n );
|
|
mask_rx( n );
|
|
return n;
|
|
}
|
|
}
|
|
|
|
return ~0U;
|
|
}
|
|
|
|
|
|
// integer register allocation
|
|
static uint32_t alloc_rx( uint32_t pref )
|
|
{
|
|
uint32_t reg;
|
|
|
|
#ifdef DYN_ALLOC_RX
|
|
if ( ( pref & FORCED ) == 0 ) {
|
|
uint32_t v = dyn_alloc_rx( pref );
|
|
if ( v == ~0U ) {
|
|
DROP( "no free registers at ip %i, pref %x, opStack %i, mask %04x", ip, pref, opstack * 4, build_rx_mask() );
|
|
}
|
|
return v;
|
|
}
|
|
#endif
|
|
|
|
reg = pref & RMASK;
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( reg >= ARRAY_LEN( rx_mask ) )
|
|
DROP( "forced register R%i index overflowed!", reg );
|
|
else if ( rx_mask[reg] )
|
|
DROP( "forced register R%i is already masked!", reg );
|
|
#endif
|
|
|
|
// FORCED option: find and flush target register
|
|
flush_items( TYPE_RX, reg );
|
|
|
|
wipe_rx_meta( reg );
|
|
mask_rx( reg );
|
|
return reg;
|
|
}
|
|
|
|
|
|
static uint32_t dyn_alloc_sx( uint32_t pref )
|
|
{
|
|
const uint32_t _sx_mask = build_sx_mask();
|
|
const uint32_t mask = _sx_mask | build_opstack_mask( TYPE_SX );
|
|
const reg_t *reg, *used = NULL;
|
|
uint32_t i, n;
|
|
|
|
// try to bypass registers with metadata
|
|
for ( i = 0; i < ARRAY_LEN( sx_list_alloc ); i++ ) {
|
|
n = sx_list_alloc[i];
|
|
if ( mask & ( 1 << n ) ) {
|
|
continue;
|
|
}
|
|
reg = &sx_regs[n];
|
|
if ( reg->type_mask != RTYPE_UNUSED ) {
|
|
// mark least used item
|
|
if ( !used || reg->refcnt < used->refcnt || ( reg->refcnt == used->refcnt && reg->ip < used->ip ) ) {
|
|
used = reg;
|
|
}
|
|
continue;
|
|
}
|
|
wipe_sx_meta( n );
|
|
mask_sx( n );
|
|
return n;
|
|
}
|
|
|
|
if ( used ) {
|
|
// no free slots but something occupied by metadata
|
|
uint32_t idx = used - sx_regs;
|
|
wipe_sx_meta( idx );
|
|
mask_sx( idx );
|
|
return idx;
|
|
}
|
|
|
|
// no free registers, flush bottom of the opStack
|
|
for ( i = 0; i <= opstack; i++ ) {
|
|
opstack_t *it = opstackv + i;
|
|
if ( it->type == TYPE_SX ) {
|
|
n = it->value;
|
|
// skip masked registers
|
|
if ( _sx_mask & ( 1 << n ) ) {
|
|
continue;
|
|
}
|
|
flush_item( it );
|
|
flush_items( TYPE_SX, n ); // flush cloned registers too
|
|
wipe_sx_meta( n );
|
|
mask_sx( n );
|
|
return n;
|
|
}
|
|
}
|
|
|
|
return ~0U;
|
|
}
|
|
|
|
|
|
// scalar register allocation
|
|
static uint32_t alloc_sx( uint32_t pref )
|
|
{
|
|
uint32_t reg;
|
|
|
|
#ifdef DYN_ALLOC_SX
|
|
if ( ( pref & FORCED ) == 0 ) {
|
|
uint32_t v = dyn_alloc_sx( pref );
|
|
if ( v == ~0U ) {
|
|
DROP( "no free registers at ip %i, pref %x, opStack %i, mask %04x", ip, pref, opstack * 4, build_sx_mask() );
|
|
}
|
|
return v;
|
|
}
|
|
#endif
|
|
|
|
reg = pref & RMASK;
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( reg >= ARRAY_LEN( sx_mask ) )
|
|
DROP( "forced register S%i index overflowed!", reg );
|
|
else if ( sx_mask[reg] )
|
|
DROP( "forced register S%i is already masked!", reg );
|
|
#endif
|
|
|
|
// FORCED option: find and flush target register
|
|
flush_items( TYPE_SX, reg );
|
|
|
|
wipe_sx_meta( reg );
|
|
mask_sx( reg );
|
|
return reg;
|
|
}
|
|
|
|
|
|
/*
|
|
==============
|
|
flush_volatile
|
|
|
|
flush any cached register/address/constant to opstack and reset meta (constants mapping)
|
|
this MUST be called before any unconditional jump, return or function call
|
|
==============
|
|
*/
|
|
static void flush_volatile( void )
|
|
{
|
|
int i;
|
|
|
|
for ( i = 0; i <= opstack; i++ ) {
|
|
opstack_t *it = opstackv + i;
|
|
if ( it->type == TYPE_RX || it->type == TYPE_SX ) {
|
|
flush_item( it );
|
|
}
|
|
}
|
|
|
|
// wipe all constants metadata
|
|
Com_Memset( &rx_regs[0], 0, sizeof( rx_regs ) );
|
|
Com_Memset( &sx_regs[0], 0, sizeof( sx_regs ) );
|
|
}
|
|
|
|
|
|
static void flush_opstack( void )
|
|
{
|
|
int i;
|
|
|
|
for ( i = 0; i <= opstack; i++ ) {
|
|
opstack_t *it = opstackv + i;
|
|
flush_item( it );
|
|
}
|
|
|
|
// wipe all constants metadata
|
|
Com_Memset( &rx_regs[0], 0, sizeof( rx_regs ) );
|
|
Com_Memset( &sx_regs[0], 0, sizeof( sx_regs ) );
|
|
}
|
|
|
|
|
|
static void store_rx_opstack( uint32_t reg )
|
|
{
|
|
opstack_t *it = opstackv + opstack;
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( opstack <= 0 )
|
|
DROP( "bad opstack %i", opstack * 4 );
|
|
|
|
if ( it->type != TYPE_RAW )
|
|
DROP( "bad type %i at opstack %i", it->type, opstack * 4 );
|
|
#endif
|
|
|
|
it->type = TYPE_RX;
|
|
it->offset = opstack * sizeof( int32_t );
|
|
it->value = reg;
|
|
it->safe_arg = 0;
|
|
|
|
unmask_rx( reg ); // so it can be flushed on demand
|
|
}
|
|
|
|
|
|
static void store_syscall_opstack( void )
|
|
{
|
|
opstack_t *it = opstackv + opstack;
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( opstack <= 0 )
|
|
DROP( "bad opstack %i", opstack * 4 );
|
|
|
|
if ( it->type != TYPE_RAW )
|
|
DROP( "bad type %i at opstack %i", it->type, opstack * 4 );
|
|
#endif
|
|
|
|
it->type = TYPE_RX;
|
|
it->offset = -1; // opstack * sizeof( int32_t )
|
|
it->value = R0;
|
|
it->safe_arg = 0;
|
|
|
|
wipe_rx_meta( it->value );
|
|
|
|
unmask_rx( it->value ); // so it can be flushed on demand
|
|
}
|
|
|
|
|
|
static void store_sx_opstack( uint32_t reg )
|
|
{
|
|
opstack_t *it = opstackv + opstack;
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( opstack <= 0 )
|
|
DROP( "bad opstack %i", opstack * 4 );
|
|
|
|
if ( it->type != TYPE_RAW )
|
|
DROP( "bad type %i at opstack %i", it->type, opstack * 4 );
|
|
#endif
|
|
|
|
it->type = TYPE_SX;
|
|
it->offset = opstack * sizeof( int32_t );
|
|
it->value = reg;
|
|
it->safe_arg = 0;
|
|
|
|
unmask_sx( reg ); // so it can be flushed on demand
|
|
}
|
|
|
|
|
|
static void store_item_opstack( instruction_t *ins )
|
|
{
|
|
opstack_t *it = opstackv + opstack;
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( it->type != TYPE_RAW )
|
|
DROP( "bad type %i at opstack %i", it->type, opstack * 4 );
|
|
#endif
|
|
switch ( ins->op ) {
|
|
case OP_CONST: it->type = TYPE_CONST; break;
|
|
case OP_LOCAL: it->type = TYPE_LOCAL; break;
|
|
default: DROP( "incorrect opcode %i", ins->op );
|
|
}
|
|
|
|
it->offset = opstack * sizeof( int32_t );
|
|
it->value = ins->value;
|
|
it->safe_arg = ins->safe;
|
|
}
|
|
|
|
|
|
static uint32_t finish_rx( uint32_t pref, uint32_t reg ) {
|
|
|
|
if ( pref & RCONST ) {
|
|
// non-destructive operation
|
|
return reg;
|
|
}
|
|
|
|
if ( search_opstack( TYPE_RX, reg ) ) {
|
|
// another instance is present on opStack
|
|
if ( pref & FORCED ) {
|
|
// nothing should left for a FORCED register
|
|
flush_items( TYPE_RX, reg );
|
|
} else {
|
|
// copy it
|
|
int rx = alloc_rx( R2 );
|
|
emit(MOV32(rx, reg));
|
|
unmask_rx( reg );
|
|
return rx;
|
|
}
|
|
}
|
|
|
|
wipe_rx_meta( reg );
|
|
return reg;
|
|
}
|
|
|
|
|
|
/*
|
|
===========
|
|
load_rx_opstack
|
|
|
|
loads current opstack value into specified register
|
|
returns masked register number, must be unmasked manually if not stored on the opstack
|
|
output register is very likely to be modified unless CONST preference is specified
|
|
===========
|
|
*/
|
|
static uint32_t load_rx_opstack( uint32_t pref )
|
|
{
|
|
opstack_t *it = opstackv + opstack;
|
|
uint32_t reg = pref & RMASK;
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( opstack <= 0 )
|
|
DROP( "bad opstack %i", opstack*4 );
|
|
#endif
|
|
|
|
if ( it->type == TYPE_RX ) {
|
|
#ifdef DYN_ALLOC_RX
|
|
if ( !( pref & FORCED ) ) {
|
|
mask_rx( it->value );
|
|
it->type = TYPE_RAW;
|
|
return finish_rx( pref, it->value ); // return current register
|
|
}
|
|
#endif
|
|
// FORCED flag: return exact target register
|
|
if ( it->value == reg ) {
|
|
mask_rx( it->value );
|
|
it->type = TYPE_RAW;
|
|
return finish_rx( pref, reg );
|
|
} else {
|
|
// allocate target register
|
|
reg = alloc_rx( pref );
|
|
|
|
// copy source to target
|
|
emit(MOV32(reg, it->value));
|
|
|
|
it->type = TYPE_RAW;
|
|
return reg;
|
|
}
|
|
} // it->type == TYPE_RX
|
|
|
|
// scalar register on the stack
|
|
if ( it->type == TYPE_SX ) {
|
|
// move from scalar to general-purpose register
|
|
reg = alloc_rx( pref );
|
|
|
|
emit(FMOVgs(reg, it->value));
|
|
|
|
it->type = TYPE_RAW;
|
|
return reg;
|
|
}
|
|
|
|
if ( ( pref & RCONST ) == 0 ) {
|
|
pref |= XMASK;
|
|
} // else we can search for constants in masked registers
|
|
|
|
if ( it->type == TYPE_CONST ) {
|
|
// move constant to general-purpose register
|
|
reg = alloc_rx_const( pref, it->value );
|
|
it->type = TYPE_RAW;
|
|
return finish_rx( pref, reg );
|
|
}
|
|
|
|
if ( it->type == TYPE_LOCAL ) {
|
|
reg = alloc_rx_local( pref, it->value );
|
|
it->type = TYPE_RAW;
|
|
return finish_rx( pref, reg );
|
|
}
|
|
|
|
// default raw type, explicit load from opStack
|
|
reg = alloc_rx( pref );
|
|
|
|
emit(LDR32i(reg, rOPSTACK, opstack * sizeof(int32_t))); // rX = *opstack
|
|
it->type = TYPE_RAW;
|
|
return reg;
|
|
}
|
|
|
|
|
|
static uint32_t finish_sx( uint32_t pref, uint32_t reg ) {
|
|
|
|
if ( pref & RCONST ) {
|
|
// non-destructive operation
|
|
return reg;
|
|
}
|
|
|
|
if ( search_opstack( TYPE_SX, reg ) ) {
|
|
// another instance is present on opStack
|
|
if ( pref & FORCED ) {
|
|
// nothing should left for a FORCED register
|
|
flush_items( TYPE_SX, reg );
|
|
} else {
|
|
// must be copied
|
|
int sx = alloc_sx( S2 );
|
|
emit(FMOV(sx, reg));
|
|
unmask_sx( reg );
|
|
return sx;
|
|
}
|
|
}
|
|
|
|
wipe_sx_meta( reg );
|
|
return reg;
|
|
}
|
|
|
|
|
|
static void load_rx_opstack2( uint32_t *dst, uint32_t dst_pref, uint32_t *src, uint32_t src_pref )
|
|
{
|
|
#if 0
|
|
*dst = *src = load_rx_opstack( src_pref &= ~RCONST ); // source, target = *opstack
|
|
#else
|
|
*dst = *src = load_rx_opstack( src_pref | RCONST ); // source, target = *opstack
|
|
if ( search_opstack( TYPE_RX, *src ) || find_free_rx() ) {
|
|
// *src is duplicated on opStack or there is a free register
|
|
*dst = alloc_rx( dst_pref & ~RCONST ); // allocate new register for the target
|
|
} else {
|
|
// will be overwritten, wipe metadata
|
|
wipe_rx_meta( *dst );
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
// we must unmask register manually after allocation/loading
|
|
static uint32_t load_sx_opstack( uint32_t pref )
|
|
{
|
|
opstack_t *it = opstackv + opstack;
|
|
uint32_t reg = pref & RMASK;
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( opstack <= 0 )
|
|
DROP( "bad opstack %i", opstack*4 );
|
|
#endif
|
|
|
|
// scalar register on the stack
|
|
if ( it->type == TYPE_SX ) {
|
|
#ifdef DYN_ALLOC_SX
|
|
if ( !( pref & FORCED ) ) {
|
|
mask_sx( it->value );
|
|
it->type = TYPE_RAW;
|
|
return finish_sx( pref, it->value );
|
|
}
|
|
#endif
|
|
// FORCED flag: return exact target register
|
|
if ( it->value == reg ) {
|
|
mask_sx( it->value );
|
|
it->type = TYPE_RAW;
|
|
return finish_sx( pref, reg );
|
|
} else {
|
|
// allocate target register
|
|
reg = alloc_sx( pref );
|
|
|
|
// copy source to target
|
|
emit(FMOV(reg, it->value));
|
|
|
|
it->type = TYPE_RAW;
|
|
return reg;
|
|
}
|
|
}
|
|
|
|
// integer register on the stack
|
|
if ( it->type == TYPE_RX ) {
|
|
// move from general-purpose to scalar register
|
|
// should never happen with FPU type promotion, except syscalls
|
|
reg = alloc_sx( pref );
|
|
|
|
emit( FMOVsg( reg, it->value ) );
|
|
|
|
it->type = TYPE_RAW;
|
|
return reg;
|
|
}
|
|
|
|
if ( ( pref & RCONST ) == 0 ) {
|
|
pref |= XMASK;
|
|
} // else we can search for constants in masked registers
|
|
|
|
if ( it->type == TYPE_CONST ) {
|
|
// move constant to scalar register
|
|
reg = alloc_sx_const( pref, it->value );
|
|
it->type = TYPE_RAW;
|
|
return finish_sx( pref, reg );
|
|
}
|
|
|
|
if ( it->type == TYPE_LOCAL ) {
|
|
uint32_t rx;
|
|
|
|
// bogus case: local address casted to float
|
|
reg = alloc_sx( pref );
|
|
rx = alloc_rx_local( R2 | RCONST, it->value );
|
|
emit( FMOVsg( reg, rx ) );
|
|
unmask_rx( rx );
|
|
|
|
it->type = TYPE_RAW;
|
|
return reg;
|
|
}
|
|
|
|
// default raw type, explicit load from opStack
|
|
reg = alloc_sx( pref );
|
|
emit( VLDRi( reg, rOPSTACK, opstack * sizeof( int32_t ) ) ); // sX = *opstack
|
|
it->type = TYPE_RAW;
|
|
return reg;
|
|
}
|
|
|
|
|
|
static void load_sx_opstack2( uint32_t *dst, uint32_t dst_pref, uint32_t *src, uint32_t src_pref )
|
|
{
|
|
#if 0
|
|
*dst = *src = load_sx_opstack( src_pref ); // source, target = *opstack
|
|
#else
|
|
*dst = *src = load_sx_opstack( src_pref | RCONST ); // source, target = *opstack
|
|
if ( search_opstack( TYPE_SX, *src ) || find_free_sx() ) {
|
|
// *src is duplicated on opStack or there is a free register
|
|
*dst = alloc_sx( dst_pref & ~RCONST ); // allocate new register for the target
|
|
} else {
|
|
// will be overwritten, wipe metadata
|
|
wipe_sx_meta( *dst );
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
static uint32_t get_comp( int op )
|
|
{
|
|
switch ( op ) {
|
|
case OP_EQ: return EQ;
|
|
case OP_NE: return NE;
|
|
case OP_LTI: return LT;
|
|
case OP_LEI: return LE;
|
|
case OP_GTI: return GT;
|
|
case OP_GEI: return GE;
|
|
case OP_LTU: return LO;
|
|
case OP_LEU: return LS;
|
|
case OP_GTU: return HI;
|
|
case OP_GEU: return HS;
|
|
case OP_EQF: return EQ;
|
|
case OP_NEF: return NE;
|
|
case OP_LTF: return MI;
|
|
case OP_LEF: return LS;
|
|
case OP_GTF: return GT;
|
|
case OP_GEF: return GE;
|
|
default: DROP( "unexpected op %i", op );
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static uint32_t encode_offset26( uint32_t ofs )
|
|
{
|
|
const uint32_t x = ofs >> 2;
|
|
const uint32_t t = x >> 26;
|
|
|
|
if ( ( ( t != 0x0F && t != 0x00 ) || ( ofs & 3 ) ) && pass != 0 )
|
|
DROP( "can't encode %i", ofs );
|
|
|
|
return x & 0x03FFFFFF;
|
|
}
|
|
|
|
|
|
static uint32_t encode_offset19( uint32_t ofs )
|
|
{
|
|
const uint32_t x = ofs >> 2;
|
|
const uint32_t t = x >> 19;
|
|
|
|
if ( ( ( t != 0x7FF && t != 0x00 ) || ( ofs & 3 ) ) && pass != 0 )
|
|
DROP( "can't encode %i", ofs );
|
|
|
|
return x & 0x7FFFF;
|
|
}
|
|
|
|
|
|
static void emitAlign( const uint32_t align )
|
|
{
|
|
while ( compiledOfs & (align-1) )
|
|
emit(NOP);
|
|
}
|
|
|
|
|
|
static void emitFuncOffset( vm_t *vm, offset_t func )
|
|
{
|
|
uint32_t offset = savedOffset[ func ] - compiledOfs;
|
|
|
|
emit( BL( offset ) );
|
|
}
|
|
|
|
|
|
static void emit_CheckReg( vm_t *vm, uint32_t reg, offset_t func )
|
|
{
|
|
if ( vm->forceDataMask || !( vm_rtChecks->integer & VM_RTCHECK_DATA ) ) {
|
|
emit( AND32( reg, rDATAMASK, reg ) ); // rN = rN & rDATAMASK
|
|
return;
|
|
}
|
|
|
|
emit( CMP32( reg, rDATAMASK ) );
|
|
emit( Bcond( LO, +8 ) );
|
|
emitFuncOffset( vm, func ); // error function
|
|
}
|
|
|
|
|
|
static void emit_CheckJump( vm_t *vm, uint32_t reg, int proc_base, int proc_len )
|
|
{
|
|
uint32_t rx[2], imm;
|
|
|
|
if ( ( vm_rtChecks->integer & VM_RTCHECK_JUMP ) == 0 ) {
|
|
return;
|
|
}
|
|
|
|
if ( proc_base != -1 ) {
|
|
// allow jump within local function scope only
|
|
rx[0] = alloc_rx( R2 | TEMP );
|
|
if ( encode_arith_imm( proc_base, &imm ) )
|
|
emit(SUB32i(rx[0], reg, imm)); // r2 = ip - procBase
|
|
else {
|
|
emit_MOVRi(rx[0], proc_base); // r2 = procBase
|
|
emit(SUB32(rx[0], reg, rx[0])); // r2 = ip - R2
|
|
}
|
|
// (ip > proc_len) ?
|
|
if ( encode_arith_imm( proc_len, &imm ) ) {
|
|
emit(CMP32i(rx[0], imm)); // cmp r2, proclen
|
|
} else {
|
|
rx[1] = alloc_rx_const( R1, proc_len ); // r1 = procLen
|
|
emit(CMP32(rx[0], rx[1])); // cmp r2, r1
|
|
unmask_rx( rx[1] );
|
|
}
|
|
unmask_rx( rx[0] );
|
|
emit(Bcond(LS, +8)); // jump over if unsigned less or same
|
|
emitFuncOffset(vm, FUNC_OUTJ);
|
|
} else {
|
|
// check if reg >= header->instructionCount
|
|
rx[0] = alloc_rx( R2 | TEMP );
|
|
emit(LDR32i(rx[0], rVMBASE, offsetof(vm_t, instructionCount))); // r2 = vm->instructionCount
|
|
emit(CMP32(reg, rx[0])); // cmp reg, r2
|
|
emit(Bcond(LO, +8)); // jump over if unsigned less
|
|
emitFuncOffset(vm, FUNC_OUTJ); // error function
|
|
unmask_rx( rx[0] );
|
|
}
|
|
}
|
|
|
|
|
|
static void emit_CheckProc( vm_t *vm, instruction_t *ins )
|
|
{
|
|
uint32_t imm;
|
|
|
|
// programStack overflow check
|
|
if ( vm_rtChecks->integer & VM_RTCHECK_PSTACK ) {
|
|
emit(CMP32(rPSTACK, rPSTACKBOTTOM)); // check if pStack < vm->stackBottom
|
|
emit(Bcond(GE, +8)); // jump over if signed higher or equal
|
|
emitFuncOffset( vm, FUNC_PSOF ); // error function
|
|
}
|
|
|
|
// opStack overflow check
|
|
if ( vm_rtChecks->integer & VM_RTCHECK_OPSTACK ) {
|
|
uint32_t n = ins->opStack; // proc->opStack carries max.used opStack value
|
|
uint32_t rx = alloc_rx( R2 | TEMP );
|
|
if ( encode_arith_imm( n, &imm ) ) {
|
|
emit(ADD64i(rx, rOPSTACK, imm));// r2 = opstack + max.opStack
|
|
} else {
|
|
emit_MOVRi(rx, n); // r2 = max.opStack
|
|
emit(ADD64(rx, rOPSTACK, rx)); // r2 = opStack + r2
|
|
}
|
|
emit(CMP64(rx, rOPSTACKTOP)); // check if r2 > vm->opstackTop
|
|
emit(Bcond(LS, +8)); // jump over if unsigned less or equal
|
|
emitFuncOffset( vm, FUNC_OSOF );
|
|
unmask_rx( rx );
|
|
}
|
|
}
|
|
|
|
|
|
static void emitCallFunc( vm_t *vm )
|
|
{
|
|
int i;
|
|
init_opstack(); // to avoid any side-effects on emit_CheckJump()
|
|
|
|
savedOffset[ FUNC_CALL ] = compiledOfs; // to jump from OP_CALL
|
|
|
|
emit(CMP32i(R0, 0)); // check if syscall
|
|
emit(Bcond(LT, savedOffset[ FUNC_SYSC ] - compiledOfs));
|
|
|
|
// check if R0 >= header->instructionCount
|
|
mask_rx( R0 );
|
|
emit_CheckJump( vm, R0, -1, 0 );
|
|
unmask_rx( R0 );
|
|
|
|
// local function call
|
|
emit(LDR64_8(R16, rINSPOINTERS, R0)); // r16 = instructionPointers[ r0 ]
|
|
emit(BR(R16));
|
|
emit(BRK(0));
|
|
|
|
// syscall
|
|
savedOffset[ FUNC_SYSC ] = compiledOfs; // to jump from OP_CALL
|
|
|
|
emit(MVN32(R0, R0)); // r0 = ~r0
|
|
|
|
savedOffset[ FUNC_SYSF ] = compiledOfs; // to jump from ConstOptimize()
|
|
|
|
emit(SUB64i(SP, SP, 128+16)); // SP -= (128 + 16)
|
|
|
|
// save LR, (+FP) because it will be clobbered by BLR instruction
|
|
emit(STP64(LR, FP, SP, 128)); // SP[128] = { LR, FP }
|
|
|
|
// modify VM stack pointer for recursive VM entry
|
|
|
|
//currentVM->programStack = pstack - 8;
|
|
emit(SUB32i(R1, rPSTACK, 8)); // r1 = pstack - 8
|
|
emit(STR32i(R1, rVMBASE, offsetof(vm_t, programStack))); // vm->programStack = r1
|
|
|
|
// sign-extend arguments starting from [procBase+8]
|
|
// R0 is already zero-extended
|
|
emit(LDRSWi(R1, rPROCBASE, 8));
|
|
emit(STP64(R0, R1, SP, 0));
|
|
for ( i = 2; i < 16; i += 2 ) {
|
|
emit(LDRSWi(R0, rPROCBASE, 4+(i+0)*4));
|
|
emit(LDRSWi(R1, rPROCBASE, 4+(i+1)*4));
|
|
emit(STP64(R0, R1, SP, (i/2)*16));
|
|
}
|
|
|
|
emit(ADD64i(R0, SP, 0)); // r0 = sp
|
|
|
|
//ret = currentVM->systemCall( args );
|
|
emit(LDR64i(R16, rVMBASE, offsetof(vm_t,systemCall))); // r16 = vm->systemCall
|
|
emit(BLR(R16)); // call [r16]( r0 )
|
|
|
|
// restore LR, FP
|
|
emit(LDP64(LR, FP, SP, 128)); // { LR, FP } = SP[ 128 ]
|
|
emit(ADD64i(SP, SP, 128+16)); // SP += 128 + 16
|
|
|
|
// store return value
|
|
emit(STR32i(R0, rOPSTACK, 4)); // *(opstack+4) = r0
|
|
|
|
emit(RET(LR));
|
|
}
|
|
|
|
|
|
// R0 - src, R1 - dst, R2 - count, R3 - scratch
|
|
static void emitBlockCopyFunc( vm_t *vm )
|
|
{
|
|
// adjust R2 if needed
|
|
emit(AND32(R0, R0, rDATAMASK)); // r0 &= dataMask
|
|
emit(AND32(R1, R1, rDATAMASK)); // r1 &= dataMask
|
|
|
|
emit(ADD32(R3, R0, R2)); // r3 = r0 + r2
|
|
emit(AND32(R3, R3, rDATAMASK)); // r3 &= dataMask
|
|
emit(SUB32(R2, R3, R0)); // r2 = r3 - r0
|
|
|
|
emit(ADD32(R3, R1, R2)); // r3 = r1 + r2
|
|
emit(AND32(R3, R3, rDATAMASK)); // r3 &= dataMask
|
|
emit(SUB32(R2, R3, R1)); // r2 = r3 - r1
|
|
|
|
emit(ADD64(R0, R0, rDATABASE)); // r0 += dataBase
|
|
emit(ADD64(R1, R1, rDATABASE)); // r1 += dataBase
|
|
|
|
emitAlign( 16 );
|
|
emit(CMP32i(R2, 8));
|
|
emit(Bcond(LT, +20)); // jump to next block if R2 is less than 8
|
|
emit(LDR64iwpost(R3, R0, 8)); // r3 = [r0]; r0 += 8
|
|
emit(STR64iwpost(R3, R1, 8)); // r[1] = r3; r1 += 8
|
|
emit(SUB32i(R2, R2, 8)); // r2 -= 8
|
|
emit(B(-20));
|
|
|
|
emitAlign( 16 );
|
|
emit(CMP32i(R2, 4));
|
|
emit(Bcond(LT, +20)); // jump to next block if R2 is less than 4
|
|
emit(LDR32iwpost(R3, R0, 4)); // r3 = [r0]; r0 += 4
|
|
emit(STR32iwpost(R3, R1, 4)); // r[1] = r3; r1 += 4
|
|
emit(SUB32i(R2, R2, 4)); // r2 -= 4
|
|
emit(B(-20));
|
|
|
|
emitAlign( 16 );
|
|
emit(CMP32i(R2, 1));
|
|
emit(Bcond(LT, +20)); // jump to next block if R2 is less than 1
|
|
emit(LDRB32iwpost(R3, R0, 1)); // r3 = [r0]; r0 += 1
|
|
emit(STRB32iwpost(R3, R1, 1)); // r[1] = r3; r1 += 1
|
|
emit(SUB32i(R2, R2, 1)); // r2 -= 1
|
|
emit(B(-20));
|
|
|
|
emit(RET(LR));
|
|
}
|
|
|
|
|
|
static void emitBlockCopy( vm_t *vm, const uint32_t count )
|
|
{
|
|
if ( count == 12 ) // most common case - 3d vector copy
|
|
{
|
|
uint32_t rx[3];
|
|
int safe_arg[2];
|
|
|
|
rx[0] = load_rx_opstack( R0 ); // src: r0 = *opstack;
|
|
safe_arg[0] = is_safe_arg();
|
|
dec_opstack(); // opstack -= 4
|
|
|
|
rx[1] = load_rx_opstack( R1 ); // dst: r1 = *opstack
|
|
safe_arg[1] = is_safe_arg();
|
|
dec_opstack(); // opstack -= 4
|
|
|
|
if ( !safe_arg[0] )
|
|
emit(AND32(rx[0], rx[0], rDATAMASK)); // r0 &= dataMask
|
|
|
|
if ( !safe_arg[1] )
|
|
emit(AND32(rx[1], rx[1], rDATAMASK)); // r1 &= dataMask
|
|
|
|
emit(ADD64(rx[0], rx[0], rDATABASE));
|
|
emit(ADD64(rx[1], rx[1], rDATABASE));
|
|
|
|
rx[2] = alloc_rx( R2 | TEMP );
|
|
|
|
// load/store double word
|
|
emit(LDR64i(rx[2], rx[0], 0));
|
|
emit(STR64i(rx[2], rx[1], 0));
|
|
|
|
// load/store word
|
|
emit(LDR32i(rx[2], rx[0], 8));
|
|
emit(STR32i(rx[2], rx[1], 8));
|
|
|
|
unmask_rx( rx[2] );
|
|
|
|
unmask_rx( rx[1] );
|
|
unmask_rx( rx[0] );
|
|
return;
|
|
}
|
|
|
|
load_rx_opstack( R0 | FORCED ); dec_opstack(); // src: r0 = *opstack; opstack -= 4
|
|
load_rx_opstack( R1 | FORCED ); dec_opstack(); // dst: r1 = *opstack; opstack -= 4
|
|
alloc_rx( R2 | FORCED ); // r2 - count
|
|
alloc_rx( R3 | FORCED ); // r3 - scratch
|
|
emit_MOVRi(R2, count);
|
|
emitFuncOffset( vm, FUNC_BCPY );
|
|
unmask_rx( R3 );
|
|
unmask_rx( R2 );
|
|
unmask_rx( R1 );
|
|
unmask_rx( R0 );
|
|
}
|
|
|
|
|
|
#ifdef CONST_OPTIMIZE
|
|
static qboolean ConstOptimize( vm_t *vm, instruction_t *ci, instruction_t *ni )
|
|
{
|
|
uint32_t immrs;
|
|
uint32_t rx[3];
|
|
uint32_t sx[2];
|
|
uint32_t x, imm;
|
|
|
|
switch ( ni->op ) {
|
|
|
|
case OP_ADD:
|
|
case OP_SUB:
|
|
if ( encode_arith_imm( ci->value, &imm ) ) {
|
|
//rx[1] = rx[0] = load_rx_opstack( R0 ); // r0 = *opstack
|
|
load_rx_opstack2( &rx[1], R1, &rx[0], R0 ); // r1 = r0 = *opstack
|
|
switch ( ni->op ) {
|
|
case OP_ADD: emit( ADD32i( rx[1], rx[0], imm ) ); break; // r1 = r0 + imm
|
|
case OP_SUB: emit( SUB32i( rx[1], rx[0], imm ) ); break; // r1 = r0 - imm
|
|
}
|
|
if ( rx[0] != rx[1] ) {
|
|
unmask_rx( rx[0] );
|
|
}
|
|
store_rx_opstack( rx[1] ); // *opstack = r1
|
|
ip += 1; // OP_ADD | OP_SUB
|
|
return qtrue;
|
|
}
|
|
break;
|
|
|
|
case OP_BAND:
|
|
case OP_BOR:
|
|
case OP_BXOR:
|
|
x = ci->value;
|
|
if ( encode_logic_imm( x, 32, &immrs ) ) {
|
|
//rx[1] = rx[0] = load_rx_opstack( R0 ); // r1 = r0 = *opstack
|
|
load_rx_opstack2( &rx[1], R1, &rx[0], R0 ); // r1 = r0 = *opstack
|
|
switch ( ni->op ) {
|
|
case OP_BAND: emit( AND32i( rx[1], rx[0], immrs ) ); break; // r1 = r0 & const
|
|
case OP_BOR: emit( ORR32i( rx[1], rx[0], immrs ) ); break; // r1 = r0 | const
|
|
case OP_BXOR: emit( EOR32i( rx[1], rx[0], immrs ) ); break; // r1 = r0 ^ const
|
|
}
|
|
if ( rx[0] != rx[1] ) {
|
|
unmask_rx( rx[0] );
|
|
}
|
|
store_rx_opstack( rx[1] ); // *opstack = r1
|
|
ip += 1; // OP_BAND | OP_BOR | OP_BXOR
|
|
return qtrue;
|
|
}
|
|
break;
|
|
|
|
case OP_LSH:
|
|
case OP_RSHI:
|
|
case OP_RSHU:
|
|
if ( ci->value < 0 || ci->value > 31 )
|
|
break; // undefined behavior
|
|
if ( ci->value ) {
|
|
//rx[1] = rx[0] = load_rx_opstack( R0 ); // r1 = r0 = *opstack
|
|
load_rx_opstack2( &rx[1], R1, &rx[0], R0 ); // r1 = r0 = *opstack
|
|
switch ( ni->op ) {
|
|
case OP_LSH: emit( LSL32i( rx[1], rx[0], ci->value ) ); break; // r1 = r0 << x
|
|
case OP_RSHI: emit( ASR32i( rx[1], rx[0], ci->value ) ); break; // r1 = r0 >> x
|
|
case OP_RSHU: emit( LSR32i( rx[1], rx[0], ci->value ) ); break; // r1 = (unsigned)r0 >> x
|
|
}
|
|
if ( rx[0] != rx[1] ) {
|
|
unmask_rx( rx[0] );
|
|
}
|
|
store_rx_opstack( rx[1] ); // *opstack = r1
|
|
}
|
|
ip += 1; // OP_LSH | OP_RSHI | OP_RSHU
|
|
return qtrue;
|
|
|
|
case OP_JUMP:
|
|
flush_volatile();
|
|
emit(B(vm->instructionPointers[ ci->value ] - compiledOfs));
|
|
ip += 1; // OP_JUMP
|
|
return qtrue;
|
|
|
|
case OP_CALL:
|
|
inc_opstack(); // opstack += 4
|
|
if ( ci->value == ~TRAP_SQRT ) {
|
|
sx[0] = alloc_sx( S0 );
|
|
emit(VLDRi(sx[0], rPROCBASE, 8)); // s0 = [procBase + 8]
|
|
emit(FSQRT(sx[0], sx[0])); // s0 = sqrtf( s0 )
|
|
store_sx_opstack( sx[0] ); // *opstack = s0
|
|
ip += 1; // OP_CALL
|
|
return qtrue;
|
|
}
|
|
flush_volatile();
|
|
if ( ci->value == ~TRAP_SIN || ci->value == ~TRAP_COS ) {
|
|
sx[0] = S0; mask_sx( sx[0] );
|
|
rx[0] = alloc_rx( R16 );
|
|
emit(VLDRi(sx[0], rPROCBASE, 8)); // s0 = [procBase + 8]
|
|
if ( ci->value == ~TRAP_SIN )
|
|
emit_MOVXi(rx[0], (intptr_t)sinf);
|
|
else
|
|
emit_MOVXi(rx[0], (intptr_t)cosf);
|
|
emit(BLR(rx[0]));
|
|
unmask_rx( rx[0] );
|
|
store_sx_opstack( sx[0] ); // *opstack = s0
|
|
ip += 1; // OP_CALL
|
|
return qtrue;
|
|
}
|
|
if ( ci->value < 0 ) // syscall
|
|
{
|
|
alloc_rx( R0 | FORCED );
|
|
emit_MOVRi(R0, ~ci->value); // r0 = syscall number
|
|
if ( opstack != 1 ) {
|
|
emit( ADD64i( rOPSTACK, rOPSTACK, ( opstack - 1 ) * sizeof( int32_t ) ) );
|
|
emitFuncOffset( vm, FUNC_SYSF );
|
|
emit( SUB64i( rOPSTACK, rOPSTACK, ( opstack - 1 ) * sizeof( int32_t ) ) );
|
|
} else {
|
|
emitFuncOffset( vm, FUNC_SYSF );
|
|
}
|
|
ip += 1; // OP_CALL;
|
|
store_syscall_opstack();
|
|
return qtrue;
|
|
}
|
|
if ( opstack != 1 ) {
|
|
emit( ADD64i( rOPSTACK, rOPSTACK, ( opstack - 1 ) * sizeof( int32_t ) ) );
|
|
emit( BL( vm->instructionPointers[ci->value] - compiledOfs ) );
|
|
emit( SUB64i( rOPSTACK, rOPSTACK, ( opstack - 1 ) * sizeof( int32_t ) ) );
|
|
} else {
|
|
emit( BL( vm->instructionPointers[ci->value] - compiledOfs ) );
|
|
}
|
|
ip += 1; // OP_CALL;
|
|
return qtrue;
|
|
|
|
case OP_EQ:
|
|
case OP_NE:
|
|
case OP_GEI:
|
|
case OP_GTI:
|
|
case OP_GTU:
|
|
case OP_GEU:
|
|
case OP_LTU:
|
|
case OP_LEU:
|
|
case OP_LEI:
|
|
case OP_LTI: {
|
|
uint32_t comp = get_comp( ni->op );
|
|
rx[0] = load_rx_opstack( R0 | RCONST ); dec_opstack(); // r0 = *opstack; opstack -= 4
|
|
x = ci->value;
|
|
if ( x == 0 && ( ni->op == OP_EQ || ni->op == OP_NE ) ) {
|
|
if ( ni->op == OP_EQ )
|
|
emit(CBZ32(rx[0], vm->instructionPointers[ ni->value ] - compiledOfs));
|
|
else
|
|
emit(CBNZ32(rx[0], vm->instructionPointers[ ni->value ] - compiledOfs));
|
|
} else {
|
|
if ( encode_arith_imm( x, &imm ) ) {
|
|
emit(CMP32i(rx[0], imm));
|
|
} else {
|
|
rx[1] = alloc_rx_const( R2, x );
|
|
emit(CMP32(rx[0], rx[1]));
|
|
unmask_rx( rx[1] );
|
|
}
|
|
emit(Bcond(comp, vm->instructionPointers[ ni->value ] - compiledOfs));
|
|
}
|
|
}
|
|
unmask_rx( rx[0] );
|
|
ip += 1; // OP_cond
|
|
return qtrue;
|
|
|
|
case OP_EQF:
|
|
case OP_NEF:
|
|
case OP_LTF:
|
|
case OP_LEF:
|
|
case OP_GTF:
|
|
case OP_GEF:
|
|
if ( ci->value == 0 ) {
|
|
uint32_t comp = get_comp( ni->op );
|
|
sx[0] = load_sx_opstack( S0 | RCONST ); dec_opstack(); // s0 = *opstack; opstack -= 4
|
|
emit( FCMP0( sx[0] ) );
|
|
emit( Bcond( comp, vm->instructionPointers[ni->value] - compiledOfs ) );
|
|
unmask_sx( sx[0] );
|
|
ip += 1; // OP_cond
|
|
return qtrue;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return qfalse;
|
|
}
|
|
#endif // CONST_OPTIMIZE
|
|
|
|
|
|
#ifdef DUMP_CODE
|
|
static void dump_code( const char *vmname, uint32_t *code, int32_t code_len )
|
|
{
|
|
const char *filename = va( "vm-%s.hex", vmname );
|
|
fileHandle_t fh = FS_FOpenFileWrite( filename );
|
|
if ( fh != FS_INVALID_HANDLE ) {
|
|
uint32_t i;
|
|
for ( i = 0; i < code_len; i++ ) {
|
|
FS_Printf( fh, "%02x %02x %02x %02x\n", ( code[i] >> 0 ) & 0xFF, ( code[i] >> 8 ) & 0xFF, ( code[i] >> 16 ) & 0xFF, ( code[i] >> 24 ) & 0xFF );
|
|
}
|
|
FS_FCloseFile( fh );
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
qboolean VM_Compile( vm_t *vm, vmHeader_t *header )
|
|
{
|
|
instruction_t *ci;
|
|
const char *errMsg;
|
|
uint32_t *litBase;
|
|
uint32_t rx[4], imm;
|
|
uint32_t sx[3];
|
|
int proc_base;
|
|
int proc_len;
|
|
#ifdef RET_OPTIMIZE
|
|
int proc_end;
|
|
#endif
|
|
var_addr_t var;
|
|
reg_t *reg;
|
|
int i;
|
|
|
|
inst = (instruction_t*)Z_Malloc( (header->instructionCount + 8 ) * sizeof( instruction_t ) );
|
|
//instructionOffsets = (uint32_t*)Z_Malloc( header->instructionCount * sizeof( uint32_t ) );
|
|
|
|
errMsg = VM_LoadInstructions( (byte *) header + header->codeOffset, header->codeLength, header->instructionCount, inst );
|
|
if ( !errMsg ) {
|
|
errMsg = VM_CheckInstructions( inst, vm->instructionCount, vm->jumpTableTargets, vm->numJumpTableTargets, vm->exactDataLength );
|
|
}
|
|
|
|
if ( errMsg ) {
|
|
VM_FreeBuffers();
|
|
Com_Printf( S_COLOR_YELLOW "%s(%s) error: %s\n", __func__, vm->name, errMsg );
|
|
return qfalse;
|
|
}
|
|
|
|
if ( !vm->instructionPointers ) {
|
|
vm->instructionPointers = Hunk_Alloc( header->instructionCount * sizeof(vm->instructionPointers[0]), h_high );
|
|
}
|
|
|
|
VM_ReplaceInstructions( vm, inst );
|
|
|
|
litBase = NULL;
|
|
#ifdef USE_LITERAL_POOL
|
|
VM_InitLiterals();
|
|
#endif
|
|
|
|
memset( savedOffset, 0, sizeof( savedOffset ) );
|
|
|
|
code = NULL;
|
|
vm->codeBase.ptr = NULL;
|
|
|
|
for ( pass = 0; pass < NUM_PASSES; pass++ ) {
|
|
__recompile:
|
|
|
|
// translate all instructions
|
|
ip = 0;
|
|
compiledOfs = 0;
|
|
|
|
proc_base = -1;
|
|
proc_len = 0;
|
|
#ifdef RET_OPTIMIZE
|
|
proc_end = 0;
|
|
#endif
|
|
|
|
init_opstack();
|
|
|
|
emit(SUB64i(SP, SP, 96)); // SP -= 96
|
|
|
|
emit(STP64(R20, R21, SP, 0));
|
|
emit(STP64(R22, R23, SP, 16));
|
|
emit(STP64(R24, R25, SP, 32));
|
|
emit(STP64(R26, R27, SP, 48));
|
|
emit(STP64(R28, R29, SP, 64));
|
|
emit(STP64(R19, LR, SP, 80));
|
|
|
|
emit_MOVXi(rLITBASE, (intptr_t)litBase );
|
|
emit_MOVXi(rVMBASE, (intptr_t)vm );
|
|
emit_MOVXi(rINSPOINTERS, (intptr_t)vm->instructionPointers );
|
|
emit_MOVXi(rDATABASE, (intptr_t)vm->dataBase );
|
|
|
|
emit_MOVRi(rDATAMASK, vm->dataMask);
|
|
emit_MOVRi(rPSTACKBOTTOM, vm->stackBottom);
|
|
|
|
// these are volatile variables
|
|
emit(LDR64i(rOPSTACK, rVMBASE, offsetof(vm_t, opStack)));
|
|
emit(LDR64i(rOPSTACKTOP, rVMBASE, offsetof(vm_t, opStackTop)));
|
|
emit(LDR32i(rPSTACK, rVMBASE, offsetof(vm_t, programStack)));
|
|
|
|
emitFuncOffset( vm, FUNC_ENTR ); // call vmMain()
|
|
|
|
#ifdef DEBUG_VM
|
|
emit(STR32i(rPSTACK, rVMBASE, offsetof(vm_t, programStack))); // vm->programStack = rPSTACK;
|
|
#endif
|
|
|
|
emit(LDP64(R20, R21, SP, 0));
|
|
emit(LDP64(R22, R23, SP, 16));
|
|
emit(LDP64(R24, R25, SP, 32));
|
|
emit(LDP64(R26, R27, SP, 48));
|
|
emit(LDP64(R28, R29, SP, 64));
|
|
emit(LDP64(R19, LR, SP, 80));
|
|
|
|
emit(ADD64i(SP, SP, 96)); // SP += 96
|
|
|
|
emit(RET(LR));
|
|
|
|
#ifdef FUNC_ALIGN
|
|
emitAlign( FUNC_ALIGN );
|
|
#endif
|
|
|
|
savedOffset[ FUNC_ENTR ] = compiledOfs; // offset to vmMain() entry point
|
|
|
|
while ( ip < header->instructionCount ) {
|
|
|
|
ci = &inst[ ip + 0 ];
|
|
|
|
#ifdef REGS_OPTIMIZE
|
|
if ( ci->jused )
|
|
#endif
|
|
{
|
|
// we can safely perform register optimizations only in case if
|
|
// we are 100% sure that current instruction is not a jump label
|
|
flush_volatile();
|
|
}
|
|
|
|
vm->instructionPointers[ ip++ ] = compiledOfs;
|
|
|
|
switch ( ci->op )
|
|
{
|
|
case OP_UNDEF:
|
|
emit( BRK( 0 ) );
|
|
break;
|
|
|
|
case OP_IGNORE:
|
|
break;
|
|
|
|
case OP_BREAK:
|
|
emit( BRK( 3 ) );
|
|
break;
|
|
|
|
case OP_ENTER:
|
|
#ifdef FUNC_ALIGN
|
|
emitAlign( FUNC_ALIGN );
|
|
#endif
|
|
vm->instructionPointers[ ip - 1 ] = compiledOfs;
|
|
|
|
proc_base = ip; // this points on next instruction after OP_ENTER
|
|
// locate endproc
|
|
for ( proc_len = -1, i = ip; i < header->instructionCount; i++ ) {
|
|
if ( inst[ i ].op == OP_PUSH && inst[ i + 1 ].op == OP_LEAVE ) {
|
|
proc_len = i - proc_base;
|
|
#ifdef RET_OPTIMIZE
|
|
proc_end = i + 1;
|
|
#endif
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( proc_len == 0 ) {
|
|
// empty function, just return
|
|
emit( RET( LR ) );
|
|
ip += 2; // OP_PUSH + OP_LEAVE
|
|
break;
|
|
}
|
|
|
|
// save opStack, LR
|
|
emit(STP64pre(LR, rOPSTACK, SP, -16));
|
|
|
|
// save programStack, procBase
|
|
emit(STP64pre(rPSTACK, rPROCBASE, SP, -16));
|
|
|
|
if ( encode_arith_imm( ci->value, &imm ) ) {
|
|
emit(SUB32i(rPSTACK, rPSTACK, imm)); // pstack -= arg
|
|
} else {
|
|
rx[0] = alloc_rx_const( R2, ci->value ); // r2 = arg
|
|
emit(SUB32(rPSTACK, rPSTACK, rx[0])); // pstack -= r2
|
|
unmask_rx( rx[0] );
|
|
}
|
|
|
|
emit_CheckProc( vm, ci );
|
|
emit(ADD64(rPROCBASE, rPSTACK, rDATABASE)); // procBase = programStack + dataBase
|
|
break;
|
|
|
|
case OP_LEAVE:
|
|
flush_opstack();
|
|
dec_opstack(); // opstack -= 4
|
|
#ifdef DEBUG_VM
|
|
if ( opstack != 0 )
|
|
DROP( "opStack corrupted on OP_LEAVE" );
|
|
#endif
|
|
|
|
#ifdef RET_OPTIMIZE
|
|
if ( !ci->endp && proc_base >= 0 ) {
|
|
// jump to last OP_LEAVE instruction in this function
|
|
if ( inst[ ip + 0 ].op == OP_PUSH && inst[ ip + 1 ].op == OP_LEAVE ) {
|
|
// next instruction is proc_end
|
|
} else {
|
|
emit( B( vm->instructionPointers[ proc_end ] - compiledOfs ) );
|
|
}
|
|
break;
|
|
}
|
|
#endif
|
|
// restore programStack, procBase
|
|
emit( LDP64post( rPSTACK, rPROCBASE, SP, 16 ) );
|
|
// restore LR, opStack
|
|
emit( LDP64post( LR, rOPSTACK, SP, 16 ) );
|
|
// return to caller
|
|
emit( RET( LR ) );
|
|
break;
|
|
|
|
case OP_CALL:
|
|
rx[0] = load_rx_opstack( R0 | FORCED ); // r0 = *opstack
|
|
flush_volatile();
|
|
if ( opstack != 1 ) {
|
|
emit( ADD64i( rOPSTACK, rOPSTACK, ( opstack - 1 ) * sizeof( int32_t ) ) );
|
|
emitFuncOffset( vm, FUNC_CALL );
|
|
emit( SUB64i( rOPSTACK, rOPSTACK, ( opstack - 1 ) * sizeof( int32_t ) ) );
|
|
} else {
|
|
emitFuncOffset( vm, FUNC_CALL );
|
|
}
|
|
unmask_rx( rx[0] );
|
|
break;
|
|
|
|
case OP_PUSH:
|
|
inc_opstack(); // opstack += 4
|
|
if ( (ci + 1)->op == OP_LEAVE ) {
|
|
proc_base = -1;
|
|
}
|
|
break;
|
|
|
|
case OP_POP:
|
|
dec_opstack_discard(); // opstack -= 4
|
|
break;
|
|
|
|
case OP_CONST:
|
|
#ifdef CONST_OPTIMIZE
|
|
if ( ConstOptimize( vm, ci + 0, ci + 1 ) )
|
|
break;
|
|
#endif
|
|
inc_opstack(); // opstack += 4
|
|
store_item_opstack( ci );
|
|
break;
|
|
|
|
case OP_LOCAL:
|
|
inc_opstack(); // opstack += 4
|
|
store_item_opstack( ci );
|
|
break;
|
|
|
|
case OP_JUMP:
|
|
rx[0] = load_rx_opstack( R0 | RCONST ); dec_opstack(); // r0 = *opstack; opstack -= 4
|
|
flush_volatile();
|
|
emit_CheckJump( vm, rx[0], proc_base, proc_len ); // check if r0 is within current proc
|
|
rx[1] = alloc_rx( R16 );
|
|
emit(LDR64_8(rx[1], rINSPOINTERS, rx[0])); // r16 = instructionPointers[ r0 ]
|
|
emit(BR(rx[1]));
|
|
unmask_rx( rx[1] );
|
|
unmask_rx( rx[0] );
|
|
wipe_vars();
|
|
break;
|
|
|
|
case OP_EQ:
|
|
case OP_NE:
|
|
case OP_LTI:
|
|
case OP_LEI:
|
|
case OP_GTI:
|
|
case OP_GEI:
|
|
case OP_LTU:
|
|
case OP_LEU:
|
|
case OP_GTU:
|
|
case OP_GEU: {
|
|
uint32_t comp = get_comp( ci->op );
|
|
rx[0] = load_rx_opstack( R0 | RCONST ); dec_opstack(); // r0 = *opstack; opstack -= 4
|
|
rx[1] = load_rx_opstack( R1 | RCONST ); dec_opstack(); // r1 = *opstack; opstack -= 4
|
|
unmask_rx( rx[0] );
|
|
unmask_rx( rx[1] );
|
|
emit(CMP32(rx[1], rx[0]));
|
|
emit(Bcond(comp, vm->instructionPointers[ci->value] - compiledOfs));
|
|
}
|
|
break;
|
|
|
|
case OP_EQF:
|
|
case OP_NEF:
|
|
case OP_LTF:
|
|
case OP_LEF:
|
|
case OP_GTF:
|
|
case OP_GEF: {
|
|
uint32_t comp = get_comp( ci->op );
|
|
sx[0] = load_sx_opstack( S0 | RCONST ); dec_opstack(); // s0 = *opstack; opstack -= 4
|
|
sx[1] = load_sx_opstack( S1 | RCONST ); dec_opstack(); // s1 = *opstack; opstack -= 4
|
|
unmask_sx( sx[0] );
|
|
unmask_sx( sx[1] );
|
|
emit(FCMP(sx[1], sx[0]));
|
|
emit(Bcond(comp, vm->instructionPointers[ci->value] - compiledOfs));
|
|
}
|
|
break;
|
|
|
|
case OP_LOAD1:
|
|
case OP_LOAD2:
|
|
case OP_LOAD4:
|
|
#ifdef FPU_OPTIMIZE
|
|
if ( ci->fpu && ci->op == OP_LOAD4 ) {
|
|
// fpu path
|
|
if ( addr_on_top( &var ) ) {
|
|
// address specified by CONST/LOCAL
|
|
discard_top();
|
|
var.size = 4;
|
|
if ( find_sx_var( &sx[0], &var ) ) {
|
|
// already cached in some register
|
|
mask_sx( sx[0] );
|
|
} else {
|
|
// not cached, perform load
|
|
sx[0] = alloc_sx( S0 );
|
|
if ( can_encode_imm12( var.addr, 2 ) ) {
|
|
emit( VLDRi( sx[0], var.base, var.addr ) ); // s0 = var.base[var.addr]
|
|
} else {
|
|
rx[0] = alloc_rx_const( R2, var.addr ); // r2 = var.addr
|
|
emit( VLDR( sx[0], var.base, rx[0] ) ); // s0 = var.base[r2]
|
|
unmask_rx( rx[0] );
|
|
}
|
|
set_sx_var( sx[0], &var ); // update metadata, this may wipe constant
|
|
}
|
|
} else {
|
|
rx[0] = load_rx_opstack( R0 ); // r0 = *opstack
|
|
emit_CheckReg( vm, rx[0], FUNC_BADR );
|
|
sx[0] = alloc_sx( S0 );
|
|
emit( VLDR( sx[0], rDATABASE, rx[0] ) ); // s0 = dataBase[r0]
|
|
unmask_rx( rx[0] );
|
|
}
|
|
store_sx_opstack( sx[0] ); // *opstack = s0
|
|
break;
|
|
}
|
|
#endif // FPU_OPTIMIZE
|
|
if ( addr_on_top( &var ) ) {
|
|
// address specified by CONST/LOCAL
|
|
opcode_t sign_extend;
|
|
int scale;
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1: var.size = 1; scale = 0; sign_extend = OP_SEX8; break;
|
|
case OP_LOAD2: var.size = 2; scale = 1; sign_extend = OP_SEX16; break;
|
|
default: var.size = 4; scale = 2; sign_extend = OP_UNDEF; break;
|
|
}
|
|
discard_top();
|
|
if ( ( reg = find_rx_var( &rx[0], &var ) ) != NULL ) {
|
|
// already cached in some register
|
|
// do zero extension if needed
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1:
|
|
if ( reg->ext != Z_EXT8 ) {
|
|
emit( UXTB( rx[0], rx[0] ) ); // r0 = (unsigned byte) r0
|
|
// invalidate any mappings that overlaps with high [8..31] bits
|
|
//var.addr += 1; var.size = 3;
|
|
//wipe_reg_range( rx_regs + rx[0], &var );
|
|
reduce_map_size( reg, 1 );
|
|
// modify constant
|
|
reg->cnst.value &= 0xFF;
|
|
reg->ext = Z_EXT8;
|
|
}
|
|
break;
|
|
case OP_LOAD2:
|
|
if ( reg->ext != Z_EXT16 ) {
|
|
emit( UXTH( rx[0], rx[0] ) ); // r0 = (unsigned short) r0
|
|
// invalidate any mappings that overlaps with high [16..31] bits
|
|
//var.addr += 2; var.size = 2;
|
|
//wipe_reg_range( rx_regs + rx[0], &var );
|
|
reduce_map_size( reg, 2 );
|
|
// modify constant
|
|
reg->cnst.value &= 0xFFFF;
|
|
reg->ext = Z_EXT16;
|
|
}
|
|
break;
|
|
case OP_LOAD4:
|
|
reg->ext = Z_NONE;
|
|
break;
|
|
}
|
|
mask_rx( rx[0] );
|
|
} else {
|
|
// not cached, perform load
|
|
rx[0] = alloc_rx( R0 ); // allocate new register
|
|
if ( can_encode_imm12( var.addr, scale ) ) {
|
|
// short encoding
|
|
if ( ( ci + 1 )->op == sign_extend && sign_extend != OP_UNDEF ) {
|
|
// merge with following sign-extension instruction
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1: emit( LDRSB32i( rx[0], var.base, var.addr ) ); set_rx_ext( rx[0], S_EXT8 ); break;
|
|
case OP_LOAD2: emit( LDRSH32i( rx[0], var.base, var.addr ) ); set_rx_ext( rx[0], S_EXT16 ); break;
|
|
}
|
|
ip += 1; // OP_SEX8/OP_SEX16
|
|
} else {
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1: emit( LDRB32i( rx[0], var.base, var.addr ) ); set_rx_ext( rx[0], Z_EXT8 ); break;
|
|
case OP_LOAD2: emit( LDRH32i( rx[0], var.base, var.addr ) ); set_rx_ext( rx[0], Z_EXT16 ); break;
|
|
case OP_LOAD4: emit( LDR32i( rx[0], var.base, var.addr ) ); set_rx_ext( rx[0], Z_NONE ); break;
|
|
}
|
|
}
|
|
} else {
|
|
// long encoding
|
|
rx[1] = alloc_rx_const( R2, var.addr );
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1: var.size = 1; break; // r0 = (unsigned char)var.base[r2]
|
|
case OP_LOAD2: var.size = 2; break; // r0 = (unsigned short)var.base[r2]
|
|
default: var.size = 4; break; // r0 = (unsigned word)var.base[r2]
|
|
}
|
|
if ( ( ci + 1 )->op == sign_extend && sign_extend != OP_UNDEF ) {
|
|
// merge with following sign-extension instruction
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1: emit( LDRSB32( rx[0], var.base, rx[1] ) ); set_rx_ext( rx[0], S_EXT8 ); break; // r0 = (signed char)var.base[r2]
|
|
case OP_LOAD2: emit( LDRSH32( rx[0], var.base, rx[1] ) ); set_rx_ext( rx[0], S_EXT16 ); break; // r0 = (signed short)var.base[r2]
|
|
}
|
|
} else {
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1: emit( LDRB32( rx[0], var.base, rx[1] ) ); set_rx_ext( rx[0], Z_EXT8 ); break; // r0 = (unsigned char)var.base[r2]
|
|
case OP_LOAD2: emit( LDRH32( rx[0], var.base, rx[1] ) ); set_rx_ext( rx[0], Z_EXT16 ); break; // r0 = (unsigned short)var.base[r2]
|
|
default: emit( LDR32( rx[0], var.base, rx[1] ) ); set_rx_ext( rx[0], Z_NONE ); break; // r0 = (unsigned word)var.base[r2]
|
|
}
|
|
}
|
|
unmask_rx( rx[1] );
|
|
} // long encoding
|
|
set_rx_var( rx[0], &var ); // update metadata
|
|
}
|
|
} else {
|
|
// address specified by register
|
|
opcode_t sign_extend;
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1: sign_extend = OP_SEX8; break;
|
|
case OP_LOAD2: sign_extend = OP_SEX16; break;
|
|
default: sign_extend = OP_UNDEF; break;
|
|
}
|
|
|
|
load_rx_opstack2( &rx[0], R1, &rx[1], R0 );
|
|
// rx[0] = rx[1] = load_rx_opstack( R0 ); // target, address = *opstack
|
|
|
|
emit_CheckReg( vm, rx[1], FUNC_BADR );
|
|
if ( ( ci + 1 )->op == sign_extend && sign_extend != OP_UNDEF ) {
|
|
// merge with following sign-extension instruction
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1: emit( LDRSB32( rx[0], rDATABASE, rx[1] ) ); set_rx_ext( rx[0], S_EXT8 ); break; // target = (signed char)dataBase[address]
|
|
case OP_LOAD2: emit( LDRSH32( rx[0], rDATABASE, rx[1] ) ); set_rx_ext( rx[0], S_EXT16 ); break; // target = (signed short)dataBase[address]
|
|
}
|
|
ip += 1; // OP_SEX8/OP_SEX16
|
|
} else {
|
|
switch ( ci->op ) {
|
|
case OP_LOAD1: emit( LDRB32( rx[0], rDATABASE, rx[1] ) ); set_rx_ext( rx[0], Z_EXT8 ); break; // target = (unsigned char)dataBase[address]
|
|
case OP_LOAD2: emit( LDRH32( rx[0], rDATABASE, rx[1] ) ); set_rx_ext( rx[0], Z_EXT16 ); break; // target = (unsigned short)dataBase[address]
|
|
default: emit( LDR32( rx[0], rDATABASE, rx[1] ) ); set_rx_ext( rx[0], Z_NONE ); break; // target = (unsigned word)dataBase[address]
|
|
}
|
|
}
|
|
if ( rx[1] != rx[0] ) {
|
|
unmask_rx( rx[1] );
|
|
}
|
|
}
|
|
store_rx_opstack( rx[0] ); // *opstack = target
|
|
break;
|
|
|
|
case OP_STORE1:
|
|
case OP_STORE2:
|
|
case OP_STORE4:
|
|
if ( scalar_on_top() && ci->op == OP_STORE4 ) {
|
|
sx[0] = load_sx_opstack( S0 | RCONST ); dec_opstack(); // s0 = *opstack; opstack -= 4
|
|
if ( addr_on_top( &var ) ) {
|
|
// address specified by CONST/LOCAL
|
|
discard_top(); dec_opstack();
|
|
var.size = 4;
|
|
if ( can_encode_imm12( var.addr, 2 ) ) {
|
|
emit( VSTRi( sx[0], var.base, var.addr ) ); // var.base[var.addr] = s0
|
|
} else {
|
|
rx[0] = alloc_rx_const( R1, var.addr ); // r1 = var.addr
|
|
emit( VSTR( sx[0], var.base, rx[0] ) ); // var.base[r1] = s0
|
|
unmask_rx( rx[0] );
|
|
}
|
|
wipe_var_range( &var );
|
|
set_sx_var( sx[0], &var ); // update metadata
|
|
} else {
|
|
rx[1] = load_rx_opstack( R1 | RCONST ); dec_opstack(); // r1 = *opstack; opstack -= 4
|
|
emit_CheckReg( vm, rx[1], FUNC_BADW );
|
|
emit( VSTR( sx[0], rDATABASE, rx[1] ) ); // database[r1] = s0
|
|
unmask_rx( rx[1] );
|
|
wipe_vars(); // unknown/dynamic address, wipe all register mappings
|
|
}
|
|
unmask_sx( sx[0] );
|
|
} else {
|
|
// integer path
|
|
rx[0] = load_rx_opstack( R0 | RCONST ); dec_opstack(); // r0 = *opstack; opstack -= 4
|
|
if ( addr_on_top( &var ) ) {
|
|
// address specified by CONST/LOCAL
|
|
int scale;
|
|
discard_top(); dec_opstack();
|
|
switch ( ci->op ) {
|
|
case OP_STORE1: var.size = 1; scale = 0; break;
|
|
case OP_STORE2: var.size = 2; scale = 1; break;
|
|
default: var.size = 4; scale = 2; break;
|
|
}
|
|
if ( can_encode_imm12( var.addr, scale ) ) {
|
|
switch ( ci->op ) {
|
|
case OP_STORE1: emit( STRB32i( rx[0], var.base, var.addr ) ); break; // (byte*)var.base[var.addr] = r0
|
|
case OP_STORE2: emit( STRH32i( rx[0], var.base, var.addr ) ); break; // (short*)var.base[var.addr] = r0
|
|
default: emit( STR32i( rx[0], var.base, var.addr ) ); break; // (word*)var.base[var.addr] = r0
|
|
}
|
|
} else {
|
|
rx[1] = alloc_rx_const( R1, var.addr );
|
|
switch ( ci->op ) {
|
|
case OP_STORE1: emit( STRB32( rx[0], var.base, rx[1] ) ); break; // (byte*)var.base[r1] = r0
|
|
case OP_STORE2: emit( STRH32( rx[0], var.base, rx[1] ) ); break; // (short*)var.base[r1] = r0
|
|
default: emit( STR32( rx[0], var.base, rx[1] ) ); break; // (word*)var.base[r1] = r0
|
|
}
|
|
unmask_rx( rx[1] );
|
|
}
|
|
wipe_var_range( &var );
|
|
set_rx_var( rx[0], &var ); // update metadata
|
|
} else {
|
|
// address specified by register
|
|
rx[1] = load_rx_opstack( R1 | RCONST ); dec_opstack(); // r1 = *opstack; opstack -= 4
|
|
emit_CheckReg( vm, rx[1], FUNC_BADW );
|
|
switch ( ci->op ) {
|
|
case OP_STORE1: emit( STRB32( rx[0], rDATABASE, rx[1] ) ); break; // (byte*)database[r1] = r0
|
|
case OP_STORE2: emit( STRH32( rx[0], rDATABASE, rx[1] ) ); break; // (short*)database[r1] = r0
|
|
default: emit( STR32( rx[0], rDATABASE, rx[1] ) ); break; // database[r1] = r0
|
|
}
|
|
unmask_rx( rx[1] );
|
|
wipe_vars(); // unknown/dynamic address, wipe all register mappings
|
|
}
|
|
unmask_rx( rx[0] );
|
|
}
|
|
break;
|
|
|
|
case OP_ARG:
|
|
var.base = rPROCBASE;
|
|
var.addr = ci->value;
|
|
var.size = 4;
|
|
wipe_var_range( &var );
|
|
if ( scalar_on_top() ) {
|
|
sx[0] = load_sx_opstack( S0 | RCONST ); dec_opstack(); // s0 = *opstack; opstack -=4
|
|
emit( VSTRi( sx[0], var.base, var.addr ) ); // [procBase + v] = s0
|
|
unmask_sx( sx[0] );
|
|
} else {
|
|
rx[0] = load_rx_opstack( R0 | RCONST ); dec_opstack(); // r0 = *opstack; opstack -=4
|
|
emit( STR32i( rx[0], var.base, var.addr ) ); // [procBase + v] = r0
|
|
unmask_rx( rx[0] );
|
|
}
|
|
break;
|
|
|
|
case OP_BLOCK_COPY:
|
|
emitBlockCopy( vm, ci->value );
|
|
wipe_vars();
|
|
break;
|
|
|
|
case OP_SEX8:
|
|
case OP_SEX16:
|
|
case OP_NEGI:
|
|
case OP_BCOM:
|
|
if ( ci->op == OP_SEX8 || ci->op == OP_SEX16 ) {
|
|
// skip sign-extension for `if ( var == 0 )` tests if we already zero-extended
|
|
reg = rx_on_top();
|
|
if ( reg && (ci+1)->op == OP_CONST && (ci+1)->value == 0 && ( (ci+2)->op == OP_EQ || (ci+2)->op == OP_NE ) ) {
|
|
if ( !(ci+1)->jused && !(ci+2)->jused ) {
|
|
if ( ci->op == OP_SEX8 && reg->ext == Z_EXT8 ) {
|
|
break;
|
|
}
|
|
if ( ci->op == OP_SEX16 && reg->ext == Z_EXT16 ) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
rx[1] = rx[0] = load_rx_opstack( R0 ); // r0 = *opstack
|
|
//load_rx_opstack2( &rx[1], R1, &rx[0], R0 );
|
|
switch ( ci->op ) {
|
|
case OP_SEX8: emit(SXTB(rx[1], rx[0])); break; // r1 = sign extend byte r0
|
|
case OP_SEX16: emit(SXTH(rx[1], rx[0])); break; // r1 = sign extend short r0
|
|
case OP_NEGI: emit(NEG32(rx[1], rx[0])); break; // r1 = -r0
|
|
case OP_BCOM: emit(MVN32(rx[1], rx[0])); break; // r1 = ~r0
|
|
}
|
|
if ( rx[0] != rx[1] ) {
|
|
unmask_rx( rx[0] );
|
|
}
|
|
store_rx_opstack( rx[1] ); // *opstack = r1
|
|
break;
|
|
|
|
case OP_ADD:
|
|
case OP_SUB:
|
|
case OP_MULI:
|
|
case OP_MULU:
|
|
case OP_DIVI:
|
|
case OP_DIVU:
|
|
case OP_MODI:
|
|
case OP_MODU:
|
|
case OP_BAND:
|
|
case OP_BOR:
|
|
case OP_BXOR:
|
|
case OP_LSH:
|
|
case OP_RSHI:
|
|
case OP_RSHU:
|
|
|
|
load_rx_opstack2( &rx[3], R0, &rx[0], R3 ); dec_opstack();
|
|
//rx[3] = rx[0] = load_rx_opstack( R0 ); dec_opstack(); // source, target = *opstack
|
|
|
|
rx[1] = load_rx_opstack( R1 | RCONST ); // opstack-=4; r1 = *opstack
|
|
switch ( ci->op ) {
|
|
case OP_ADD: emit(ADD32(rx[3], rx[1], rx[0])); break; // r3 = r1 + r0
|
|
case OP_SUB: emit(SUB32(rx[3], rx[1], rx[0])); break; // r3 = r1 - r0
|
|
case OP_MULI:
|
|
case OP_MULU: emit(MUL32(rx[3], rx[1], rx[0])); break; // r3 = r1 * r0
|
|
case OP_DIVI: emit(SDIV32(rx[3], rx[1], rx[0])); break; // r3 = r1 / r0
|
|
case OP_DIVU: emit(UDIV32(rx[3], rx[1], rx[0])); break; // r3 = (unsigned)r1 / r0
|
|
case OP_BAND: emit(AND32(rx[3], rx[1], rx[0])); break; // r3 = r1 & r0
|
|
case OP_BOR: emit(ORR32(rx[3], rx[1], rx[0])); break; // r3 = r1 | r0
|
|
case OP_BXOR: emit(EOR32(rx[3], rx[1], rx[0])); break; // r3 = r1 ^ r0
|
|
case OP_LSH: emit(LSL32(rx[3], rx[1], rx[0])); break; // r3 = r1 << r0
|
|
case OP_RSHI: emit(ASR32(rx[3], rx[1], rx[0])); break; // r3 = r1 >> r0
|
|
case OP_RSHU: emit(LSR32(rx[3], rx[1], rx[0])); break; // r3 = (unsigned)r1 >> r0
|
|
case OP_MODI:
|
|
case OP_MODU:
|
|
rx[2] = alloc_rx( R2 | TEMP );
|
|
if ( ci->op == OP_MODI )
|
|
emit(SDIV32(rx[2], rx[1], rx[0])); // r2 = r1 / r0
|
|
else
|
|
emit(UDIV32(rx[2], rx[1], rx[0])); // r2 = (unsigned)r1 / r0
|
|
emit(MSUB32(rx[3], rx[0], rx[2], rx[1])); // r3 = r1 - r0 * r2
|
|
unmask_rx( rx[2] );
|
|
break;
|
|
}
|
|
if ( rx[0] != rx[3] ) {
|
|
unmask_rx( rx[0] );
|
|
}
|
|
unmask_rx( rx[1] );
|
|
store_rx_opstack( rx[3] ); // *opstack = r3
|
|
break;
|
|
|
|
case OP_ADDF:
|
|
case OP_SUBF:
|
|
case OP_MULF:
|
|
case OP_DIVF:
|
|
//sx[2] = sx[0] = load_sx_opstack( S0 ); dec_opstack(); // s0 = *opstack
|
|
load_sx_opstack2( &sx[2], S2, &sx[0], S0 ); dec_opstack();
|
|
sx[1] = load_sx_opstack( S1 | RCONST ); // opstack -= 4; s1 = *opstack
|
|
switch ( ci->op ) {
|
|
case OP_ADDF: emit(FADD(sx[2], sx[1], sx[0])); break; // s2 = s1 + s0
|
|
case OP_SUBF: emit(FSUB(sx[2], sx[1], sx[0])); break; // s2 = s1 - s0
|
|
case OP_MULF: emit(FMUL(sx[2], sx[1], sx[0])); break; // s2 = s1 * s0
|
|
case OP_DIVF: emit(FDIV(sx[2], sx[1], sx[0])); break; // s2 = s1 / s0
|
|
}
|
|
if ( sx[0] != sx[2] ) {
|
|
unmask_sx( sx[0] );
|
|
}
|
|
unmask_sx( sx[1] );
|
|
store_sx_opstack( sx[2] ); // *opstack = s2
|
|
break;
|
|
|
|
case OP_NEGF:
|
|
load_sx_opstack2( &sx[1], S1, &sx[0], S0 );
|
|
//sx[1] = sx[0] = load_sx_opstack( S0 ); // s0 = *opstack
|
|
emit(FNEG(sx[1], sx[0])); // s1 = -s0
|
|
if ( sx[0] != sx[1] ) {
|
|
unmask_sx( sx[0] );
|
|
}
|
|
store_sx_opstack( sx[1] ); // *opstack = s1
|
|
break;
|
|
|
|
case OP_CVIF:
|
|
sx[0] = alloc_sx( S0 );
|
|
rx[0] = load_rx_opstack( R0 | RCONST ); // r0 = *opstack
|
|
emit(SCVTF(sx[0], rx[0])); // s0 = (float)r0
|
|
unmask_rx( rx[0] );
|
|
store_sx_opstack( sx[0] ); // *opstack = s0
|
|
break;
|
|
|
|
case OP_CVFI:
|
|
rx[0] = alloc_rx( R0 );
|
|
sx[0] = load_sx_opstack( S0 | RCONST ); // s0 = *opstack
|
|
emit(FCVTZS(rx[0], sx[0])); // r0 = (int)s0
|
|
unmask_sx( sx[0] );
|
|
store_rx_opstack( rx[0] ); // *opstack = r0;
|
|
break;
|
|
|
|
} // switch op
|
|
} // ip
|
|
#ifdef FUNC_ALIGN
|
|
emitAlign( FUNC_ALIGN );
|
|
#endif
|
|
// it will set multiple offsets
|
|
emitCallFunc( vm );
|
|
#ifdef FUNC_ALIGN
|
|
emitAlign( FUNC_ALIGN );
|
|
#endif
|
|
savedOffset[ FUNC_BCPY ] = compiledOfs;
|
|
emitBlockCopyFunc( vm );
|
|
|
|
savedOffset[ FUNC_BADJ ] = compiledOfs;
|
|
emit_MOVXi(R16, (intptr_t)BadJump);
|
|
emit(BLR(R16));
|
|
|
|
savedOffset[ FUNC_OUTJ ] = compiledOfs;
|
|
emit_MOVXi(R16, (intptr_t)OutJump);
|
|
emit(BLR(R16));
|
|
|
|
savedOffset[ FUNC_OSOF ] = compiledOfs;
|
|
emit_MOVXi(R16, (intptr_t)ErrBadOpStack);
|
|
emit(BLR(R16));
|
|
|
|
savedOffset[ FUNC_PSOF ] = compiledOfs;
|
|
emit_MOVXi(R16, (intptr_t)ErrBadProgramStack);
|
|
emit(BLR(R16));
|
|
|
|
savedOffset[ FUNC_BADR ] = compiledOfs;
|
|
emit_MOVXi( R16, (intptr_t) ErrBadDataRead );
|
|
emit( BLR( R16 ) );
|
|
|
|
savedOffset[ FUNC_BADW ] = compiledOfs;
|
|
emit_MOVXi( R16, (intptr_t) ErrBadDataWrite );
|
|
emit( BLR( R16 ) );
|
|
|
|
} // pass
|
|
|
|
if ( vm->codeBase.ptr == NULL ) {
|
|
#ifdef USE_LITERAL_POOL
|
|
uint32_t allocSize = compiledOfs + numLiterals * sizeof( uint32_t );
|
|
#else
|
|
uint32_t allocSize = compiledOfs;
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
vm->codeBase.ptr = VirtualAlloc( NULL, allocSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE );
|
|
if ( !vm->codeBase.ptr ) {
|
|
VM_FreeBuffers();
|
|
Com_Printf( S_COLOR_YELLOW "%s(%s): VirtualAlloc failed\n", __func__, vm->name );
|
|
return qfalse;
|
|
}
|
|
#else
|
|
vm->codeBase.ptr = mmap( NULL, allocSize, PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0 );
|
|
if ( vm->codeBase.ptr == MAP_FAILED ) {
|
|
VM_FreeBuffers();
|
|
Com_Printf( S_COLOR_YELLOW "%s(%s): mmap failed\n", __func__, vm->name );
|
|
return qfalse;
|
|
}
|
|
#endif
|
|
|
|
vm->codeLength = allocSize; // code + literals
|
|
vm->codeSize = compiledOfs;
|
|
code = (uint32_t*)vm->codeBase.ptr;
|
|
litBase = (uint32_t*)(vm->codeBase.ptr + compiledOfs);
|
|
goto __recompile;
|
|
}
|
|
|
|
#ifdef USE_LITERAL_POOL
|
|
// append literals to the code
|
|
if ( numLiterals ) {
|
|
uint32_t *lp = litBase;
|
|
for ( i = 0; i < numLiterals; i++, lp++ ) {
|
|
*lp = litList[ i ].value;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef DUMP_CODE
|
|
dump_code( vm->name, code, compiledOfs / 4 );
|
|
#endif
|
|
|
|
// offset all the instruction pointers for the new location
|
|
for ( i = 0; i < header->instructionCount; i++ ) {
|
|
if ( !inst[i].jused ) {
|
|
vm->instructionPointers[ i ] = (intptr_t)BadJump;
|
|
continue;
|
|
}
|
|
vm->instructionPointers[ i ] += (intptr_t)vm->codeBase.ptr;
|
|
}
|
|
|
|
VM_FreeBuffers();
|
|
|
|
#ifdef _WIN32
|
|
{
|
|
DWORD oldProtect = 0;
|
|
// remove write permissions
|
|
if ( !VirtualProtect( vm->codeBase.ptr, vm->codeLength, PAGE_EXECUTE_READ, &oldProtect ) ) {
|
|
VM_Destroy_Compiled( vm );
|
|
Com_Printf( S_COLOR_YELLOW "%s(%s): VirtualProtect failed\n", __func__, vm->name );
|
|
return qfalse;
|
|
}
|
|
}
|
|
#else
|
|
if ( mprotect( vm->codeBase.ptr, vm->codeLength, PROT_READ | PROT_EXEC ) ) {
|
|
VM_Destroy_Compiled( vm );
|
|
Com_Printf( S_COLOR_YELLOW "%s(%s): mprotect failed\n", __func__, vm->name );
|
|
return qfalse;
|
|
}
|
|
|
|
// clear icache, http://blogs.arm.com/software-enablement/141-caches-and-self-modifying-code/
|
|
__clear_cache( vm->codeBase.ptr, vm->codeBase.ptr + vm->codeLength );
|
|
#endif
|
|
|
|
vm->destroy = VM_Destroy_Compiled;
|
|
|
|
Com_Printf( "VM file %s compiled to %i bytes of code\n", vm->name, vm->codeLength );
|
|
|
|
return qtrue;
|
|
}
|
|
|
|
|
|
int32_t VM_CallCompiled( vm_t *vm, int nargs, int32_t *args )
|
|
{
|
|
int32_t opStack[ MAX_OPSTACK_SIZE ];
|
|
int32_t stackOnEntry;
|
|
int32_t *image;
|
|
int i;
|
|
|
|
// we might be called recursively, so this might not be the very top
|
|
stackOnEntry = vm->programStack;
|
|
|
|
vm->programStack -= ( MAX_VMMAIN_CALL_ARGS + 2 ) * sizeof( int32_t );
|
|
|
|
// set up the stack frame
|
|
image = (int32_t*) ( vm->dataBase + vm->programStack );
|
|
for ( i = 0; i < nargs; i++ ) {
|
|
image[i + 2] = args[i];
|
|
}
|
|
|
|
// these only needed for interpreter:
|
|
// image[1] = 0; // return stack
|
|
// image[0] = -1; // will terminate loop on return
|
|
|
|
#ifdef DEBUG_VM
|
|
opStack[0] = 0xDEADC0DE;
|
|
#endif
|
|
opStack[1] = 0;
|
|
|
|
vm->opStack = opStack;
|
|
vm->opStackTop = opStack + ARRAY_LEN( opStack ) - 1;
|
|
|
|
vm->codeBase.func(); // go into generated code
|
|
|
|
#ifdef DEBUG_VM
|
|
if ( opStack[0] != 0xDEADC0DE ) {
|
|
Com_Error( ERR_DROP, "%s(%s): opStack corrupted in compiled code", __func__, vm->name );
|
|
}
|
|
|
|
if ( vm->programStack != stackOnEntry - ( MAX_VMMAIN_CALL_ARGS + 2 ) * sizeof( int32_t ) ) {
|
|
Com_Error( ERR_DROP, "%s(%s): programStack corrupted in compiled code", __func__, vm->name );
|
|
}
|
|
#endif
|
|
|
|
vm->programStack = stackOnEntry;
|
|
|
|
return opStack[1];
|
|
}
|