diff --git a/Makefile b/Makefile index bff70210..3876d27b 100644 --- a/Makefile +++ b/Makefile @@ -350,6 +350,9 @@ ifneq (,$(findstring "$(PLATFORM)", "linux" "gnu_kfreebsd" "kfreebsd-gnu" "gnu") OPTIMIZEVM += -mtune=ultrasparc3 -mv8plus HAVE_VM_COMPILED=true endif + ifeq ($(ARCH),armv7l) + HAVE_VM_COMPILED=true + endif ifeq ($(ARCH),alpha) # According to http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=410555 # -ffast-math will cause the client to die with SIGFPE on Alpha @@ -1277,6 +1280,7 @@ targets: makedirs @echo " VERSION: $(VERSION)" @echo " COMPILE_PLATFORM: $(COMPILE_PLATFORM)" @echo " COMPILE_ARCH: $(COMPILE_ARCH)" + @echo " HAVE_VM_COMPILED: $(HAVE_VM_COMPILED)" @echo " CC: $(CC)" ifeq ($(PLATFORM),mingw32) @echo " WINDRES: $(WINDRES)" @@ -2047,6 +2051,9 @@ ifeq ($(HAVE_VM_COMPILED),true) ifeq ($(ARCH),sparc) Q3OBJ += $(B)/client/vm_sparc.o endif + ifeq ($(ARCH),armv7l) + Q3OBJ += $(B)/client/vm_armv7l.o + endif endif ifdef MINGW @@ -2215,6 +2222,9 @@ ifeq ($(HAVE_VM_COMPILED),true) ifeq ($(ARCH),sparc) Q3DOBJ += $(B)/ded/vm_sparc.o endif + ifeq ($(ARCH),armv7l) + Q3DOBJ += $(B)/client/vm_armv7l.o + endif endif ifdef MINGW diff --git a/code/qcommon/vm_armv7l.c b/code/qcommon/vm_armv7l.c new file mode 100644 index 00000000..dd63f5c7 --- /dev/null +++ b/code/qcommon/vm_armv7l.c @@ -0,0 +1,1210 @@ +/* +=========================================================================== +Copyright (C) 2009 David S. Miller +Copyright (C) 2013,2014 SUSE Linux Products GmbH + +This file is part of Quake III Arena source code. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== + +ARMv7l VM by Ludwig Nussel + +TODO: optimization + +Docu: +http://www.coranac.com/tonc/text/asm.htm +http://www.heyrick.co.uk/armwiki/Category:Opcodes +ARMv7-A_ARMv7-R_DDI0406_2007.pdf +*/ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "vm_local.h" +#define R0 0 +#define R1 1 +#define R2 2 +#define R3 3 +#define R4 4 + +#define R12 12 + +#define FP 11 +#define SP 13 +#define LR 14 +#define PC 15 + +#define APSR_nzcv 15 + +#define S14 14 +#define S15 15 + +#define rOPSTACK 5 +#define rOPSTACKBASE 6 +#define rCODEBASE 7 +#define rPSTACK 8 +#define rDATABASE 9 +#define rDATAMASK 10 + +#define bit(x) (1<compiled = qfalse; return; } while(0) +#endif + +static void VM_Destroy_Compiled(vm_t *vm) +{ + if (vm->codeBase) { + if (munmap(vm->codeBase, vm->codeLength)) + Com_Printf(S_COLOR_RED "Memory unmap failed, possible memory leak\n"); + } + vm->codeBase = NULL; +} + +/* +================= +ErrJump +Error handler for jump/call to invalid instruction number +================= +*/ + +static void __attribute__((__noreturn__)) ErrJump(unsigned num) +{ + Com_Error(ERR_DROP, "program tried to execute code outside VM (%x)", num); +} + +static int asmcall(int call, int pstack) +{ + // save currentVM so as to allow for recursive VM entry + vm_t *savedVM = currentVM; + int i, ret; + + // modify VM stack pointer for recursive VM entry + currentVM->programStack = pstack - 4; + + if (sizeof(intptr_t) == sizeof(int)) { + intptr_t *argPosition = (intptr_t *)((byte *)currentVM->dataBase + pstack + 4); + argPosition[0] = -1 - call; + ret = currentVM->systemCall(argPosition); + } else { + intptr_t args[MAX_VMSYSCALL_ARGS]; + + args[0] = -1 - call; + int *argPosition = (int *)((byte *)currentVM->dataBase + pstack + 4); + for( i = 1; i < ARRAY_LEN(args); i++ ) + args[i] = argPosition[i]; + + ret = currentVM->systemCall(args); + } + + currentVM = savedVM; + + return ret; +} + +void _emit(vm_t *vm, unsigned isn, int pass) +{ +#if 0 + static int fd = -2; + if (fd == -2) + fd = open("code.bin", O_TRUNC|O_WRONLY|O_CREAT, 0644); + if (fd > 0) + write(fd, &isn, 4); +#endif + + if (pass) + memcpy(vm->codeBase+vm->codeLength, &isn, 4); + vm->codeLength+=4; +} + +#define emit(isn) _emit(vm, isn, pass) + +static unsigned char off8(unsigned val) +{ + if (val&3) + DIE("offset must be multiple of four"); + if (val > 1020) + DIE("offset too large"); + return val>>2; +} + +// ARM is really crazy ... +static unsigned short rimm(unsigned val) +{ + unsigned shift = 0; + if (val < 256) + return val; + // rotate the value until it fits + while (shift < 16 && (val>255 || !(val&3))) { + val = (val&3)<<30 | val>>2; + ++shift; + } + if (shift > 15 || val > 255) { + DIE("immediate cannot be encoded (%d, %d)\n", shift, val); + } + return (16-shift)<<8 | val; +} + +// same as rimm but doesn't die, returns 0 if not encodable so don't call with zero as argument! +static unsigned short can_encode(unsigned val) +{ + unsigned shift = 0; + if (!val) + DIE("can_encode: invalid argument"); + if (val < 256) + return val; + // rotate the value until it fits + while (shift < 16 && (val>255 || !(val&3))) { + val = (val&3)<<30 | val>>2; + ++shift; + } + if (shift > 15 || val > 255) { + return 0; + } + return (16-shift)<<8 | val; +} + +#define PREINDEX (1<<24) + +#define rASR(i, reg) (0b10<<5 | ((i&31)<<7) | reg) +#define rLSL(i, reg) (0b00<<5 | ((i&31)<<7) | reg) +#define rLSR(i, reg) (0b01<<5 | ((i&31)<<7) | reg) +#define rROR(i, reg) (0b11<<5 | ((i&31)<<7) | reg) + +// conditions +#define EQ (0b0000<<28) +#define NE (0b0001<<28) +#define CS (0b0010<<28) +#define HS CS +#define CC (0b0011<<28) +#define LO CC +#define MI (0b0100<<28) +#define PL (0b0101<<28) +#define VS (0b0110<<28) +#define VC (0b0111<<28) +#define HI (0b1000<<28) +#define LS (0b1001<<28) +#define GE (0b1010<<28) +#define LT (0b1011<<28) +#define GT (0b1100<<28) +#define LE (0b1101<<28) +#define AL (0b1110<<28) +#define cond(what, op) (what | (op&~AL)) + +// XXX: v not correctly computed +#define BKPT(v) (AL | 0b10010<<20 | ((v&~0xF)<<4) | 0b0111<<4 | (v&0xF)) + +#define YIELD (0b110010<<20 | 0b1111<<12 | 1) +#define NOP cond(AL, YIELD) + +// immediate value must fit in 0xFF! +#define ANDi(dst, src, i) (AL | (0b001<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | rimm(i)) +#define EORi(dst, src, i) (AL | (0b001<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | rimm(i)) +#define SUBi(dst, src, i) (AL | (0b001<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | rimm(i)) +#define RSBi(dst, src, i) (AL | (0b001<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | rimm(i)) +#define ADDi(dst, src, i) (AL | (0b001<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | rimm(i)) +#define ADCi(dst, src, i) (AL | (0b001<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | rimm(i)) +#define SBCi(dst, src, i) (AL | (0b001<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | rimm(i)) +#define RSCi(dst, src, i) (AL | (0b001<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | rimm(i)) + +#define ORRi(dst, src, i) (AL | (0b001<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | rimm(i)) +#define MOVi(dst, i) (AL | (0b001<<25) | (0b11010<<20) | (dst<<12) | rimm(i)) +#define BICi(dst, src, i) (AL | (0b001<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | rimm(i)) +#define MVNi(dst, i) (AL | (0b001<<25) | (0b11110<<20) | (dst<<12) | rimm(i)) + +#define MOVW(dst, i) (AL | (0b11<<24) | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1))) +#define MOVT(dst, i) (AL | (0b11<<24) | (0b0100<<20) | ((((i)>>12)&0xF)<<16) | (dst<<12) | ((i)&((1<<12)-1))) + +#define TSTi( src, i) (AL | (0b001<<25) | (0b10001<<20) | (src<<16) | rimm(i)) +#define TEQi( src, i) (AL | (0b001<<25) | (0b10011<<20) | (src<<16) | rimm(i)) +#define CMPi( src, i) (AL | (0b001<<25) | (0b10101<<20) | (src<<16) | rimm(i)) +#define CMNi( src, i) (AL | (0b001<<25) | (0b10111<<20) | (src<<16) | rimm(i)) + +#define ANDSi(dst, src, i) (ANDi(dst, src, i) | (1<<20)) +#define EORSi(dst, src, i) (EORi(dst, src, i) | (1<<20)) +#define SUBSi(dst, src, i) (SUBi(dst, src, i) | (1<<20)) +#define RSBSi(dst, src, i) (RSBi(dst, src, i) | (1<<20)) +#define ADDSi(dst, src, i) (ADDi(dst, src, i) | (1<<20)) +#define ADCSi(dst, src, i) (ADCi(dst, src, i) | (1<<20)) +#define SBCSi(dst, src, i) (SBCi(dst, src, i) | (1<<20)) +#define RSCSi(dst, src, i) (RSCi(dst, src, i) | (1<<20)) + +#define ORRSi(dst, src, i) (ORRi(dst, src, i) | (1<<20)) +#define MOVSi(dst, i) (MOVi(dst, i) | (1<<20)) +#define BICSi(dst, src, i) (BICi(dst, src, i) | (1<<20)) +#define MVNSi(dst, i) (MVNi(dst, src, i) | (1<<20)) + +#define AND(dst, src, reg) (AL | (0b000<<25) | (0b00000<<20) | (src<<16) | (dst<<12) | reg) +#define EOR(dst, src, reg) (AL | (0b000<<25) | (0b00010<<20) | (src<<16) | (dst<<12) | reg) +#define SUB(dst, src, reg) (AL | (0b000<<25) | (0b00100<<20) | (src<<16) | (dst<<12) | reg) +#define RSB(dst, src, reg) (AL | (0b000<<25) | (0b00110<<20) | (src<<16) | (dst<<12) | reg) +#define ADD(dst, src, reg) (AL | (0b000<<25) | (0b01000<<20) | (src<<16) | (dst<<12) | reg) +#define ADC(dst, src, reg) (AL | (0b000<<25) | (0b01010<<20) | (src<<16) | (dst<<12) | reg) +#define SBC(dst, src, reg) (AL | (0b000<<25) | (0b01100<<20) | (src<<16) | (dst<<12) | reg) +#define RSC(dst, src, reg) (AL | (0b000<<25) | (0b01110<<20) | (src<<16) | (dst<<12) | reg) + +#define ORR(dst, src, reg) (AL | (0b000<<25) | (0b11000<<20) | (src<<16) | (dst<<12) | reg) +#define MOV(dst, src) (AL | (0b000<<25) | (0b11010<<20) | (dst<<12) | src) + +#define LSL(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0001<<4) | src) +#define LSR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0011<<4) | src) +#define ASR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0101<<4) | src) +#define ROR(dst, src, reg) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (reg<<8) | (0b0111<<4) | src) + +#define LSLi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b000<<4) | src) +#define LSRi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b010<<4) | src) +#define ASRi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b100<<4) | src) +#define RORi(dst, src, i) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | ((i&0x1F)<<7) | (0b110<<4) | src) +#define RRX(dst, src) (AL | (0b000<<25) | (0b1101<<21) | (0<<20) | (dst<<12) | (0b110<<4) | src) + +#define BIC(dst, src, reg) (AL | (0b000<<25) | (0b11100<<20) | (src<<16) | (dst<<12) | reg) +#define MVN(dst, reg) (AL | (0b000<<25) | (0b11110<<20) | (dst<<12) | reg) + +#define TST( src, reg) (AL | (0b000<<25) | (0b10001<<20) | (src<<16) | reg) +#define TEQ( src, reg) (AL | (0b000<<25) | (0b10011<<20) | (src<<16) | reg) +#define CMP( src, reg) (AL | (0b000<<25) | (0b10101<<20) | (src<<16) | reg) +#define CMN( src, reg) (AL | (0b000<<25) | (0b10111<<20) | (src<<16) | reg) + +#define LDRa(dst, base, off) (AL | (0b011<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | off) +#define LDRx(dst, base, off) (AL | (0b011<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | off) + +#define LDRai(dst, base, off) (AL | (0b010<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off)) +#define LDRxi(dst, base, off) (AL | (0b010<<25) | (0b1000<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off)) +#define LDRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off)) + +#define LDRTa(dst, base, off) (AL | (0b011<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | off) +#define LDRTx(dst, base, off) (AL | (0b011<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | off) +#define LDRTai(dst, base, off) (AL | (0b010<<25) | (0b0101<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off)) +#define LDRTxi(dst, base, off) (AL | (0b010<<25) | (0b0001<<21) | (1<<20) | base<<16 | dst<<12 | rimm(off)) + +#define LDRBa(dst, base, off) (AL | (0b011<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | off) +#define LDRSBai(dst, base, off) (AL | (0b000<<25) | (0b0110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1101<<4|(off&0x0F)) +#define STRBa(dst, base, off) (AL | (0b011<<25) | (0b1110<<21) | (0<<20) | base<<16 | dst<<12 | off) + +#define LDRHa(dst, base, off) (AL | (0b000<<25) | (0b1100<<21) | (1<<20) | base<<16 | dst<<12 | (0b1011<<4) | off) +#define LDRSHai(dst, base, off) (AL | (0b000<<25) | (0b1110<<21) | (1<<20) | base<<16 | dst<<12 | ((off&0xF0)<<4)|0b1111<<4|(off&0x0F)) +#define STRHa(dst, base, off) (AL | (0b000<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | (0b1011<<4) | off) + +#define STRa(dst, base, off) (AL | (0b011<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | off) +#define STRx(dst, base, off) (AL | (0b011<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | off) +#define STRai(dst, base, off) (AL | (0b010<<25) | (0b1100<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off)) +#define STRxi(dst, base, off) (AL | (0b010<<25) | (0b1000<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off)) +#define STRaiw(dst, base, off) (AL | (0b010<<25) | (0b1101<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off)) +#define STRxiw(dst, base, off) (AL | (0b010<<25) | (0b1001<<21) | (0<<20) | base<<16 | dst<<12 | rimm(off)) + +// load with post-increment +#define POP1(reg) (AL | (0b010<<25) | (0b0100<<21) | (1<<20) | SP<<16 | reg<<12 | reg) +// store with post-increment +#define PUSH1(reg) (AL | (0b010<<25) | (0b1001<<21) | (0<<20) | SP<<16 | reg<<12 | 4) + +// branch to target address (for small jumps) +#define Bi(i) \ + (AL | (0b10)<<26 | (1<<25) /*I*/ | (0<<24) /*L*/ | (i)) +// call subroutine +#define BLi(i) \ + (AL | (0b10)<<26 | (1<<25) /*I*/ | (1<<24) /*L*/ | (i)) +// branch and exchange (register) +#define BX(reg) \ + (AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0001<<4 | reg) +// call subroutine (register) +#define BLX(reg) \ + (AL | 0b00010010<<20 | 0b1111<<16 | 0b1111<<12 | 0b1111<<8| 0b0011<<4 | reg) + +#define PUSH(mask) (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) | mask) +#define PUSH2(r1, r2) (AL | (0b100100<<22) | (0b10<<20) | (0b1101<<16) | 1< 0xFFFF) \ + emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \ + } while(0) + +// puts integer arg in register reg. adds nop if only one instr is needed to +// make size constant +#define emit_MOVRxi_or_NOP(reg, arg) do { \ + emit(MOVW(reg, (arg&0xFFFF))); \ + if (arg > 0xFFFF) \ + emit(MOVT(reg, (((arg>>16)&0xFFFF)))); \ + else \ + emit(NOP); \ + } while(0) + +// arm core register -> singe precision register +#define VMOVass(Vn, Rt) (AL|(0b1110<<24)|(0b000<<21)|(0<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4)) +// singe precision register -> arm core register +#define VMOVssa(Rt, Vn) (AL|(0b1110<<24)|(0b000<<21)|(1<<20)| ((Vn>>1)<<16) | (Rt<<12) | (0b1010<<8) | ((Vn&1)<<7) | (1<<4)) + +#define _VCVT_F(Vd, Vm, opc2, op) \ + (AL|(0b11101<<23)|((Vd&1)<<22)|(0b111<<19)|(opc2<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(op<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1)) +#define VCVT_F32_U32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 0 /* unsigned */) +#define VCVT_U32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b100, 1 /* round zero */) +#define VCVT_F32_S32(Sd, Sm) _VCVT_F(Sd, Sm, 0b000, 1 /* unsigned */) +#define VCVT_S32_F32(Sd, Sm) _VCVT_F(Sd, Sm, 0b101, 1 /* round zero */) + +#define VLDRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|1<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i)) +#define VSTRa(Vd, Rn, i) (AL|(0b1101<<24)|1<<23|((Vd&1)<<22)|0<<20|(Rn<<16)|((Vd>>1)<<12)|(0b1010<<8)|off8(i)) + +#define VNEG_F32(Vd, Vm) \ + (AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|(1<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(1<<6)|((Vm&1)<<5)|(Vm>>1)) + +#define VADD_F32(Vd, Vn, Vm) \ + (AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1)) +#define VSUB_F32(Vd, Vn, Vm) \ + (AL|(0b11100<<23)|((Vd&1)<<22)|(0b11<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1)) +#define VMUL_F32(Vd, Vn, Vm) \ + (AL|(0b11100<<23)|((Vd&1)<<22)|(0b10<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101)<<9|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1)) +#define VDIV_F32(Vd, Vn, Vm) \ + (AL|(0b11101<<23)|((Vd&1)<<22)|(0b00<<20)|((Vn>>1)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|((Vn&1)<<7)|(0<<6)|((Vm&1)<<5)|(Vm>>1)) + +#define _VCMP_F32(Vd, Vm, E) \ + (AL|(0b11101<<23)|((Vd&1)<<22)|(0b11<<20)|((0b0100)<<16)|((Vd>>1)<<12)|(0b101<<9)|(0<<8)|(E<<7)|(1<<6)|((Vm&1)<<5)|(Vm>>1)) +#define VCMP_F32(Vd, Vm) _VCMP_F32(Vd, Vm, 0) + +#define VMRS(Rt) \ + (AL|(0b11101111<<20)|(0b0001<<16)|(Rt<<12)|(0b1010<<8)|(1<<4)) + +// check if instruction in R0 is within range. Clobbers R1, R12 +#define CHECK_JUMP do { \ + static int bytes_to_skip = -1; \ + static unsigned branch = -1; \ + emit_MOVRxi(R1, (unsigned)vm->instructionCount); \ + emit(CMP(R0, R1)); \ + if (branch == -1) \ + branch = vm->codeLength; \ + emit(cond(LT, Bi(j_rel(bytes_to_skip)))); \ + emit_MOVRxi_or_NOP(R12, (unsigned)ErrJump); \ + emit(BLX(R12)); \ + if (bytes_to_skip == -1) \ + bytes_to_skip = vm->codeLength - branch; \ +} while(0) + +//#define CONST_OPTIMIZE +#ifdef CONST_OPTIMIZE +#define MAYBE_EMIT_CONST() \ + if (got_const) \ + { \ + got_const = 0; \ + vm->instructionPointers[instruction-1] = assembler_get_code_size(); \ + STACK_PUSH(4); \ + emit("movl $%d, (%%r9, %%rbx, 4)", const_value); \ + } +#else +#define MAYBE_EMIT_CONST() +#endif + +// optimize: use load multiple +#define IJ(comparator) do { \ + MAYBE_EMIT_CONST(); \ + emit_MOVRxi(R0, arg.i); \ + CHECK_JUMP; \ + emit(LDRTxi(R0, rOPSTACK, 4)); \ + emit(LDRTxi(R1, rOPSTACK, 4)); \ + emit(CMP(R1, R0)); \ + emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \ +} while (0) + +#define FJ(comparator) do { \ + emit_MOVRxi(R0, arg.i); \ + CHECK_JUMP; \ + emit(SUBi(rOPSTACK, rOPSTACK, 8)); \ + emit(VLDRa(S15, rOPSTACK, 4)); \ + emit(VLDRa(S14, rOPSTACK, 8)); \ + emit(VCMP_F32(S15, S14)); \ + emit(VMRS(APSR_nzcv)); \ + emit(cond(comparator, Bi(j_rel(vm->instructionPointers[arg.i]-vm->codeLength)))); \ +} while (0) + +#define printreg(reg) emit(PUSH1(R3)); emit(BLX(reg)); emit(POP1(R3)); + +static inline unsigned _j_rel(int x, int pc) +{ + if (x&3) goto err; + x = (x>>2)-2; + if (x < 0) + { + if ((x&(0xFF<<24)) != 0xFF<<24) + goto err; + x &= ~(0xFF<<24); + } + else if (x&(0xFF<<24)) + goto err; + return x; +err: + DIE("jump %d out of range at %d", x, pc); +} + +void VM_Compile(vm_t *vm, vmHeader_t *header) +{ + unsigned char *code; + int i_count, pc = 0; + int pass; + int codeoffsets[1024]; + +#define j_rel(x) (pass?_j_rel(x, pc):0xBAD) +#define OFFSET(i) (pass?(j_rel(codeoffsets[i]-vm->codeLength)):(0xF000000F)) +#define new_offset() (offsidx++) +#define get_offset(i) (codeoffsets[i]) +#define save_offset(i) (codeoffsets[i] = vm->codeLength) +#define OFF_CODE 0 +#define OFF_IMMEDIATES 1 + + vm->compiled = qfalse; + + vm->codeBase = NULL; + vm->codeLength = 0; + + for (pass = 0; pass < 2; ++pass) { + + int offsidx = 0; + + // const optimization + unsigned got_const = 0, const_value = 0; + + if(pass) + { + vm->codeBase = mmap(NULL, vm->codeLength, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); + if(vm->codeBase == MAP_FAILED) + Com_Error(ERR_FATAL, "VM_CompileARM: can't mmap memory"); + vm->codeLength = 0; + } + + //int (*entry)(vm_t*, int*, int*); + emit(PUSH((((1<<8)-1)<<4)|(1<<14))); // push R4-R11, LR + emit(SUBi(SP, SP, 12)); // align stack! + emit(LDRai(rCODEBASE, R0, offsetof(vm_t, codeBase))); + emit(LDRai(rDATABASE, R0, offsetof(vm_t, dataBase))); + emit(LDRai(rDATAMASK, R0, offsetof(vm_t, dataMask))); + emit(LDRai(rPSTACK, R1, 0)); + emit(MOV(rOPSTACK, R2)); // TODO: reverse opstack to avoid writing to return address + emit(MOV(rOPSTACKBASE, rOPSTACK)); + + emit(BLi(OFFSET(OFF_CODE))); + + // save return value in r0 + emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4 + + emit(ADDi(SP, SP, 12)); // align stack! + emit(POP((((1<<8)-1)<<4)|(1<<15))); // pop R4-R11, LR -> PC + + /* save some immediates here */ + emit(BKPT(0)); + emit(BKPT(0)); + save_offset(OFF_IMMEDIATES); +// emit((unsigned)whatever); + emit(BKPT(0)); + emit(BKPT(0)); + + save_offset(OFF_CODE); + offsidx = OFF_IMMEDIATES+1; + + code = (unsigned char *) header + header->codeOffset; + pc = 0; + + for (i_count = 0; i_count < header->instructionCount; i_count++) { + union { + unsigned char b[4]; + unsigned int i; + } arg; + unsigned char op = code[pc++]; + + vm->instructionPointers[i_count] = vm->codeLength; + + if (vm_opInfo[op] & opImm4) + { + memcpy(arg.b, &code[pc], 4); + pc += 4; +#ifdef EXCESSIVE_DEBUG + Com_Printf("%d: instruction %d (%s %d), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength); +#endif + } + else if (vm_opInfo[op] & opImm1) + { + arg.b[0] = code[pc]; + ++pc; +#ifdef EXCESSIVE_DEBUG + Com_Printf("%d: instruction %d (%s %hhd), offset %d\n", pass, i_count, opnames[op], arg.i, vm->codeLength); +#endif + } + else + { +#ifdef EXCESSIVE_DEBUG + Com_Printf("%d: instruction %d (%s), offset %d\n", pass, i_count, opnames[op], vm->codeLength); +#endif + } + + // TODO: for debug only + //emit_MOVRxi(R4, i_count); + + switch ( op ) + { + case OP_UNDEF: + break; + + case OP_IGNORE: + NOTIMPL(op); + break; + + case OP_BREAK: + emit(BKPT(0)); + break; + + case OP_ENTER: + MAYBE_EMIT_CONST(); + emit(PUSH1(LR)); + emit(SUBi(SP, SP, 12)); // align stack + if (arg.i == 0 || can_encode(arg.i)) + { + emit(SUBi(rPSTACK, rPSTACK, arg.i)); // pstack -= arg + } + else + { + emit_MOVR0i(arg.i); + emit(SUB(rPSTACK, rPSTACK, R0)); // pstack -= arg + } + break; + + case OP_LEAVE: + if (arg.i == 0 || can_encode(arg.i)) + { + emit(ADDi(rPSTACK, rPSTACK, arg.i)); // pstack += arg + } + else + { + emit_MOVR0i(arg.i); + emit(ADD(rPSTACK, rPSTACK, R0)); // pstack += arg + } + emit(ADDi(SP, SP, 12)); + emit(0xe49df004); // pop pc + break; + + case OP_CALL: +#if 0 + // save next instruction + emit_MOVR0i(i_count); + emit(STRa(R0, rDATABASE, rPSTACK)); // dataBase[pstack] = r0 +#endif + if (got_const) { + NOTIMPL(op); + } else { + static int bytes_to_skip = -1; + static unsigned start_block = -1; + MAYBE_EMIT_CONST(); + // get instruction nr from stack + emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4 + emit(CMPi(R0, 0)); // check if syscall + if (start_block == -1) + start_block = vm->codeLength; + emit(cond(LT, Bi(j_rel(bytes_to_skip)))); + CHECK_JUMP; + emit_MOVRxi_or_NOP(R1, (unsigned)vm->instructionPointers); + emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0] + emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0 + emit(BLX(R0)); + emit(Bi(j_rel(vm->instructionPointers[i_count+1]-vm->codeLength))); + if (bytes_to_skip == -1) + bytes_to_skip = vm->codeLength - start_block; + emit(MOV(R1, rPSTACK)); + emit_MOVRxi(R12, (unsigned)asmcall); + emit(BLX(R12)); + // store return value + emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0 + } + break; + + case OP_PUSH: + MAYBE_EMIT_CONST(); + emit(ADDi(rOPSTACK, rOPSTACK, 4)); + break; + + case OP_POP: + MAYBE_EMIT_CONST(); + emit(SUBi(rOPSTACK, rOPSTACK, 4)); + break; + + case OP_CONST: + MAYBE_EMIT_CONST(); + emit_MOVR0i(arg.i); + emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0 + break; + + case OP_LOCAL: + MAYBE_EMIT_CONST(); + if (arg.i == 0 || can_encode(arg.i)) + { + emit(ADDi(R0, rPSTACK, arg.i)); // r0 = pstack+arg + } + else + { + emit_MOVR0i(arg.i); + emit(ADD(R0, rPSTACK, R0)); // r0 = pstack+arg + } + emit(STRaiw(R0, rOPSTACK, 4)); // opstack+=4; *opstack = r0 + break; + + case OP_JUMP: + if(got_const) { + NOTIMPL(op); + } else { + emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4 + CHECK_JUMP; + emit_MOVRxi(R1, (unsigned)vm->instructionPointers); + emit(LDRa(R0, R1, rLSL(2, R0))); // r0 = ((int*)r1)[r0] + emit(ADD(R0, rCODEBASE, R0)); // r0 = codeBase+r0 + emit(BLX(R0)); + } + break; + + case OP_EQ: + IJ(EQ); + break; + + case OP_NE: + IJ(NE); + break; + + case OP_LTI: + IJ(LT); + break; + + case OP_LEI: + IJ(LE); + break; + + case OP_GTI: + IJ(GT); + break; + + case OP_GEI: + IJ(GE); + break; + + case OP_LTU: + IJ(LO); + break; + + case OP_LEU: + IJ(LS); + break; + + case OP_GTU: + IJ(HI); + break; + + case OP_GEU: + IJ(HS); + break; + + case OP_EQF: + FJ(EQ); + break; + + case OP_NEF: + FJ(NE); + break; + + case OP_LTF: + FJ(LT); + break; + + case OP_LEF: + FJ(LE); + break; + + case OP_GTF: + FJ(GT); + break; + + case OP_GEF: + FJ(GE); + break; + + case OP_LOAD1: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK + emit(LDRBa(R0, rDATABASE, R0)); // r0 = (unsigned char)dataBase[r0] + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_LOAD2: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK + emit(LDRHa(R0, rDATABASE, R0)); // r0 = (unsigned short)dataBase[r0] + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_LOAD4: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(AND(R0, rDATAMASK, R0)); // r0 = r0 & rDATAMASK + emit(LDRa(R0, rDATABASE, R0)); // r0 = dataBase[r0] + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_STORE1: + MAYBE_EMIT_CONST(); + emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4 + emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4 + emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK + emit(STRBa(R0, rDATABASE, R1)); // database[r1] = r0 + break; + + case OP_STORE2: + MAYBE_EMIT_CONST(); + emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4 + emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4 + emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK + emit(STRHa(R0, rDATABASE, R1)); // database[r1] = r0 + break; + + case OP_STORE4: + MAYBE_EMIT_CONST(); + // optimize: use load multiple + // value + emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4 + // pointer + emit(LDRTxi(R1, rOPSTACK, 4)); // r1 = *opstack; rOPSTACK -= 4 + emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK + // store value at pointer + emit(STRa(R0, rDATABASE, R1)); // database[r1] = r0 + break; + + case OP_ARG: + MAYBE_EMIT_CONST(); + emit(LDRTxi(R0, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4 + emit(ADDi(R1, rPSTACK, arg.b[0])); // r1 = programStack+arg + emit(AND(R1, rDATAMASK, R1)); // r1 = r1 & rDATAMASK + emit(STRa(R0, rDATABASE, R1)); // dataBase[r1] = r0 + break; + + case OP_BLOCK_COPY: + MAYBE_EMIT_CONST(); + emit(LDRTxi(R1, rOPSTACK, 4)); // r0 = *opstack; rOPSTACK -= 4 + emit(LDRTxi(R0, rOPSTACK, 4)); + emit_MOVRxi(R2, arg.i); + emit_MOVRxi(R12, (unsigned)VM_BlockCopy); + emit(BLX(R12)); + break; + + case OP_SEX8: + MAYBE_EMIT_CONST(); + emit(LDRSBai(R0, rOPSTACK, 0)); // sign extend *opstack + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_SEX16: + MAYBE_EMIT_CONST(); + emit(LDRSHai(R0, rOPSTACK, 0)); // sign extend *opstack + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_NEGI: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(RSBi(R0, R0, 0)); // r0 = -r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_ADD: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(ADD(R0, R1, R0)); // r0 = r1 + r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_SUB: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(SUB(R0, R1, R0)); // r0 = r1 - r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_DIVI: + case OP_DIVU: + MAYBE_EMIT_CONST(); + emit(LDRai(R1, rOPSTACK, 0)); // r1 = *opstack + emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack + if ( op == OP_DIVI ) + emit_MOVRxi(R12, (unsigned)__aeabi_idiv); + else + emit_MOVRxi(R12, (unsigned)__aeabi_uidiv); + emit(BLX(R12)); + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_MODI: + case OP_MODU: + MAYBE_EMIT_CONST(); + emit(LDRai(R1, rOPSTACK, 0)); // r1 = *opstack + emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r0 = *opstack + if ( op == OP_MODI ) + emit_MOVRxi(R12, (unsigned)__aeabi_idivmod); + else + emit_MOVRxi(R12, (unsigned)__aeabi_uidivmod); + emit(BLX(R12)); + emit(STRai(R1, rOPSTACK, 0)); // *opstack = r1 + break; + + case OP_MULI: + case OP_MULU: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(MUL(R0, R1, R0)); // r0 = r1 * r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_BAND: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(AND(R0, R1, R0)); // r0 = r1 & r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_BOR: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(ORR(R0, R1, R0)); // r0 = r1 | r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_BXOR: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(EOR(R0, R1, R0)); // r0 = r1 ^ r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_BCOM: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(MVN(R0, R0)); // r0 = ~r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_LSH: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(LSL(R0, R1, R0)); // r0 = r1 << r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_RSHI: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(ASR(R0, R1, R0)); // r0 = r1 >> r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_RSHU: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(LDRxiw(R1, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(LSR(R0, R1, R0)); // r0 = (unsigned)r1 >> r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + + case OP_NEGF: + MAYBE_EMIT_CONST(); + emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack) + emit(VNEG_F32(S14, S14)); // s15 = -s14 + emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15 + break; + + case OP_ADDF: + MAYBE_EMIT_CONST(); + emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack) + // vldr can't modify rOPSTACK so + // we'd either need to change it + // with sub or use regular ldr+vmov + emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(VMOVass(S15,R0)); // s15 = r0 + emit(VADD_F32(S14, S15, S14)); // s14 = s14 + s15 + emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15 + break; + + case OP_SUBF: + emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack) + // see OP_ADDF + emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(VMOVass(S15,R0)); // s15 = r0 + emit(VSUB_F32(S14, S15, S14)); // s14 = s14 - s15 + emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15 + break; + + case OP_DIVF: + emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack) + // see OP_ADDF + emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(VMOVass(S15,R0)); // s15 = r0 + emit(VDIV_F32(S14, S15, S14)); // s14 = s14 / s15 + emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15 + break; + + case OP_MULF: + emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack) + // see OP_ADDF + emit(LDRxiw(R0, rOPSTACK, 4)); // opstack-=4; r1 = *opstack + emit(VMOVass(S15,R0)); // s15 = r0 + emit(VMUL_F32(S14, S15, S14)); // s14 = s14 * s15 + emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15 + break; + + case OP_CVIF: + MAYBE_EMIT_CONST(); + emit(LDRai(R0, rOPSTACK, 0)); // r0 = *opstack + emit(VMOVass(S14,R0)); // s14 = r0 + emit(VCVT_F32_S32(S14, S14)); // s15 = (float)s14 + emit(VSTRa(S14, rOPSTACK, 0)); // *((float*)opstack) = s15 + break; + + case OP_CVFI: + MAYBE_EMIT_CONST(); + emit(VLDRa(S14, rOPSTACK, 0)); // s14 = *((float*)opstack) + emit(VCVT_S32_F32(S14, S14)); // s15 = (int)s14 + emit(VMOVssa(R0,S14)); // s14 = r0 + emit(STRai(R0, rOPSTACK, 0)); // *opstack = r0 + break; + } + } + + // never reached + emit(BKPT(0)); + } // pass + + if (mprotect(vm->codeBase, vm->codeLength, PROT_READ|PROT_EXEC/* |PROT_WRITE */)) { + VM_Destroy_Compiled(vm); + DIE("mprotect failed"); + } + + // clear icache, http://blogs.arm.com/software-enablement/141-caches-and-self-modifying-code/ + __clear_cache(vm->codeBase, vm->codeBase+vm->codeLength); + + vm->destroy = VM_Destroy_Compiled; + vm->compiled = qtrue; +} + +int VM_CallCompiled(vm_t *vm, int *args) +{ + byte stack[OPSTACK_SIZE + 15]; + int *opStack; + int programStack = vm->programStack; + int stackOnEntry = programStack; + byte *image = vm->dataBase; + int *argPointer; + int retVal; + + currentVM = vm; + + vm->currentlyInterpreting = qtrue; + + programStack -= ( 8 + 4 * MAX_VMMAIN_ARGS ); + argPointer = (int *)&image[ programStack + 8 ]; + memcpy( argPointer, args, 4 * MAX_VMMAIN_ARGS ); + argPointer[-1] = 0; + argPointer[-2] = -1; + + + opStack = PADP(stack, 16); + *opStack = 0xDEADBEEF; + +#if 0 + Com_Printf("r5 opStack:\t\t%p\n", opStack); + Com_Printf("r7 codeBase:\t\t%p\n", vm->codeBase); + Com_Printf("r8 programStack:\t0x%x\n", programStack); + Com_Printf("r9 dataBase:\t\t%p\n", vm->dataBase); +#endif + + /* call generated code */ + { + //int (*entry)(void *, int, void *, int); + int (*entry)(vm_t*, int*, int*); + + entry = (void *)(vm->codeBase); + //__asm__ volatile("bkpt"); + //retVal = entry(vm->codeBase, programStack, vm->dataBase, vm->dataMask); + retVal = entry(vm, &programStack, opStack); + } + + if(*opStack != 0xDEADBEEF) + { + Com_Error(ERR_DROP, "opStack corrupted in compiled code"); + } + + if(programStack != stackOnEntry - (8 + 4 * MAX_VMMAIN_ARGS)) + Com_Error(ERR_DROP, "programStack corrupted in compiled code"); + + vm->programStack = stackOnEntry; + vm->currentlyInterpreting = qfalse; + + return retVal; +}