mirror of
https://github.com/DrBeef/ioq3quest.git
synced 2024-11-30 15:51:53 +00:00
01dca75d9f
rename VM_Malloc, VM_Free to PPC_Malloc, PPC_Free safer constant optimization handling Patch by Przemysław Iskra (#3796)
2170 lines
54 KiB
C
2170 lines
54 KiB
C
/*
|
|
===========================================================================
|
|
Copyright (C) 2008 Przemyslaw Iskra <sparky@pld-linux.org>
|
|
|
|
This file is part of Quake III Arena source code.
|
|
|
|
Quake III Arena source code is free software; you can redistribute it
|
|
and/or modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the License,
|
|
or (at your option) any later version.
|
|
|
|
Quake III Arena source code is distributed in the hope that it will be
|
|
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Quake III Arena source code; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
===========================================================================
|
|
*/
|
|
|
|
#include <sys/types.h> /* needed by sys/mman.h on OSX */
|
|
#include <sys/mman.h>
|
|
#include <sys/time.h>
|
|
#include <time.h>
|
|
#include <stddef.h>
|
|
|
|
#ifndef MAP_ANONYMOUS
|
|
# define MAP_ANONYMOUS MAP_ANON
|
|
#endif
|
|
|
|
#include "vm_local.h"
|
|
#include "vm_powerpc_asm.h"
|
|
|
|
/*
|
|
* VM_TIMES enables showing information about time spent inside
|
|
* and outside generated code
|
|
*/
|
|
//#define VM_TIMES
|
|
#ifdef VM_TIMES
|
|
#include <sys/times.h>
|
|
static clock_t time_outside_vm = 0;
|
|
static clock_t time_total_vm = 0;
|
|
#endif
|
|
|
|
/* exit() won't be called but use it because it is marked with noreturn */
|
|
#define DIE( reason ) \
|
|
do { \
|
|
Com_Error(ERR_DROP, "vm_powerpc compiler error: " reason "\n"); \
|
|
exit(1); \
|
|
} while(0)
|
|
|
|
/*
|
|
* vm_powerpc uses large quantities of memory during compilation,
|
|
* Z_Malloc memory may not be enough for some big qvm files
|
|
*/
|
|
|
|
//#define VM_SYSTEM_MALLOC
|
|
#ifdef VM_SYSTEM_MALLOC
|
|
static inline void *
|
|
PPC_Malloc( size_t size )
|
|
{
|
|
void *mem = malloc( size );
|
|
if ( ! mem )
|
|
DIE( "Not enough memory" );
|
|
|
|
return mem;
|
|
}
|
|
# define PPC_Free free
|
|
#else
|
|
# define PPC_Malloc Z_Malloc
|
|
# define PPC_Free Z_Free
|
|
#endif
|
|
|
|
/*
|
|
* optimizations:
|
|
* - hole: bubble optimization (OP_CONST+instruction)
|
|
* - copy: inline OP_BLOCK_COPY for lengths under 16/32 bytes
|
|
* - mask: use rlwinm instruction as dataMask
|
|
*/
|
|
|
|
#ifdef __OPTIMIZE__
|
|
# define OPTIMIZE_HOLE 1
|
|
# define OPTIMIZE_COPY 1
|
|
# define OPTIMIZE_MASK 1
|
|
#else
|
|
# define OPTIMIZE_HOLE 0
|
|
# define OPTIMIZE_COPY 0
|
|
# define OPTIMIZE_MASK 0
|
|
#endif
|
|
|
|
/*
|
|
* SUPPORTED TARGETS:
|
|
* - Linux 32 bits
|
|
* ( http://refspecs.freestandards.org/elf/elfspec_ppc.pdf )
|
|
* * LR at r0 + 4
|
|
* * Local variable space not needed
|
|
* -> store caller safe regs at 16+
|
|
*
|
|
* - Linux 64 bits (not fully conformant)
|
|
* ( http://www.ibm.com/developerworks/linux/library/l-powasm4.html )
|
|
* * needs "official procedure descriptors" (only first function has one)
|
|
* * LR at r0 + 16
|
|
* * local variable space required, min 64 bytes, starts at 48
|
|
* -> store caller safe regs at 128+
|
|
*
|
|
* - OS X 32 bits
|
|
* ( http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/32bitPowerPC.html )
|
|
* * LR at r0 + 8
|
|
* * local variable space required, min 32 bytes (?), starts at 24
|
|
* -> store caller safe regs at 64+
|
|
*
|
|
* - OS X 64 bits (completely untested)
|
|
* ( http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/64bitPowerPC.html )
|
|
* * LR at r0 + 16
|
|
* * local variable space required, min 64 bytes (?), starts at 48
|
|
* -> store caller safe regs at 128+
|
|
*/
|
|
|
|
/* Select Length - first value on 32 bits, second on 64 */
|
|
#ifdef __PPC64__
|
|
# define SL( a, b ) (b)
|
|
#else
|
|
# define SL( a, b ) (a)
|
|
#endif
|
|
|
|
/* Select ABI - first for ELF, second for OS X */
|
|
#ifdef __ELF__
|
|
# define SA( a, b ) (a)
|
|
#else
|
|
# define SA( a, b ) (b)
|
|
#endif
|
|
|
|
#define ELF32 SL( SA( 1, 0 ), 0 )
|
|
#define ELF64 SL( 0, SA( 1, 0 ) )
|
|
#define OSX32 SL( SA( 0, 1 ), 0 )
|
|
#define OSX64 SL( 0, SA( 0, 1 ) )
|
|
|
|
/* native length load/store instructions ( L stands for long ) */
|
|
#define iSTLU SL( iSTWU, iSTDU )
|
|
#define iSTL SL( iSTW, iSTD )
|
|
#define iLL SL( iLWZ, iLD )
|
|
#define iLLX SL( iLWZX, iLDX )
|
|
|
|
/* register length */
|
|
#define GPRLEN SL( 4, 8 )
|
|
#define FPRLEN (8)
|
|
/* shift that many bits to obtain value miltiplied by GPRLEN */
|
|
#define GPRLEN_SHIFT SL( 2, 3 )
|
|
|
|
/* Link register position */
|
|
#define STACK_LR SL( SA( 4, 8 ), 16 )
|
|
/* register save position */
|
|
#define STACK_SAVE SL( SA( 16, 64 ), 128 )
|
|
/* temporary space, for float<->int exchange */
|
|
#define STACK_TEMP SL( SA( 8, 24 ), 48 )
|
|
/* red zone temporary space, used instead of STACK_TEMP if stack isn't
|
|
* prepared properly */
|
|
#define STACK_RTEMP (-16)
|
|
|
|
#if ELF64
|
|
/*
|
|
* Official Procedure Descriptor
|
|
* we need to prepare one for generated code if we want to call it
|
|
* as function
|
|
*/
|
|
typedef struct {
|
|
void *function;
|
|
void *toc;
|
|
void *env;
|
|
} opd_t;
|
|
#endif
|
|
|
|
|
|
/*
|
|
* opcode information table:
|
|
* - length of immediate value
|
|
* - returned register type
|
|
* - required register(s) type
|
|
*/
|
|
#define opImm0 0x0000 /* no immediate */
|
|
#define opImm1 0x0001 /* 1 byte immadiate value after opcode */
|
|
#define opImm4 0x0002 /* 4 bytes immediate value after opcode */
|
|
|
|
#define opRet0 0x0000 /* returns nothing */
|
|
#define opRetI 0x0004 /* returns integer */
|
|
#define opRetF 0x0008 /* returns float */
|
|
#define opRetIF (opRetI | opRetF) /* returns integer or float */
|
|
|
|
#define opArg0 0x0000 /* requires nothing */
|
|
#define opArgI 0x0010 /* requires integer(s) */
|
|
#define opArgF 0x0020 /* requires float(s) */
|
|
#define opArgIF (opArgI | opArgF) /* requires integer or float */
|
|
|
|
#define opArg2I 0x0040 /* requires second argument, integer */
|
|
#define opArg2F 0x0080 /* requires second argument, float */
|
|
#define opArg2IF (opArg2I | opArg2F) /* requires second argument, integer or float */
|
|
|
|
static const unsigned char vm_opInfo[256] =
|
|
{
|
|
[OP_UNDEF] = opImm0,
|
|
[OP_IGNORE] = opImm0,
|
|
[OP_BREAK] = opImm0,
|
|
[OP_ENTER] = opImm4,
|
|
/* OP_LEAVE has to accept floats, they will be converted to ints */
|
|
[OP_LEAVE] = opImm4 | opRet0 | opArgIF,
|
|
/* only STORE4 and POP use values from OP_CALL,
|
|
* no need to convert floats back */
|
|
[OP_CALL] = opImm0 | opRetI | opArgI,
|
|
[OP_PUSH] = opImm0 | opRetIF,
|
|
[OP_POP] = opImm0 | opRet0 | opArgIF,
|
|
[OP_CONST] = opImm4 | opRetIF,
|
|
[OP_LOCAL] = opImm4 | opRetI,
|
|
[OP_JUMP] = opImm0 | opRet0 | opArgI,
|
|
|
|
[OP_EQ] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_NE] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_LTI] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_LEI] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_GTI] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_GEI] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_LTU] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_LEU] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_GTU] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_GEU] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
[OP_EQF] = opImm4 | opRet0 | opArgF | opArg2F,
|
|
[OP_NEF] = opImm4 | opRet0 | opArgF | opArg2F,
|
|
[OP_LTF] = opImm4 | opRet0 | opArgF | opArg2F,
|
|
[OP_LEF] = opImm4 | opRet0 | opArgF | opArg2F,
|
|
[OP_GTF] = opImm4 | opRet0 | opArgF | opArg2F,
|
|
[OP_GEF] = opImm4 | opRet0 | opArgF | opArg2F,
|
|
|
|
[OP_LOAD1] = opImm0 | opRetI | opArgI,
|
|
[OP_LOAD2] = opImm0 | opRetI | opArgI,
|
|
[OP_LOAD4] = opImm0 | opRetIF| opArgI,
|
|
[OP_STORE1] = opImm0 | opRet0 | opArgI | opArg2I,
|
|
[OP_STORE2] = opImm0 | opRet0 | opArgI | opArg2I,
|
|
[OP_STORE4] = opImm0 | opRet0 | opArgIF| opArg2I,
|
|
[OP_ARG] = opImm1 | opRet0 | opArgIF,
|
|
[OP_BLOCK_COPY] = opImm4 | opRet0 | opArgI | opArg2I,
|
|
|
|
[OP_SEX8] = opImm0 | opRetI | opArgI,
|
|
[OP_SEX16] = opImm0 | opRetI | opArgI,
|
|
[OP_NEGI] = opImm0 | opRetI | opArgI,
|
|
[OP_ADD] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_SUB] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_DIVI] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_DIVU] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_MODI] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_MODU] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_MULI] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_MULU] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_BAND] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_BOR] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_BXOR] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_BCOM] = opImm0 | opRetI | opArgI,
|
|
[OP_LSH] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_RSHI] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_RSHU] = opImm0 | opRetI | opArgI | opArg2I,
|
|
[OP_NEGF] = opImm0 | opRetF | opArgF,
|
|
[OP_ADDF] = opImm0 | opRetF | opArgF | opArg2F,
|
|
[OP_SUBF] = opImm0 | opRetF | opArgF | opArg2F,
|
|
[OP_DIVF] = opImm0 | opRetF | opArgF | opArg2F,
|
|
[OP_MULF] = opImm0 | opRetF | opArgF | opArg2F,
|
|
[OP_CVIF] = opImm0 | opRetF | opArgI,
|
|
[OP_CVFI] = opImm0 | opRetI | opArgF,
|
|
};
|
|
|
|
/*
|
|
* source instruction data
|
|
*/
|
|
typedef struct source_instruction_s source_instruction_t;
|
|
struct source_instruction_s {
|
|
// opcode
|
|
unsigned long int op;
|
|
|
|
// number of instruction
|
|
unsigned long int i_count;
|
|
|
|
// immediate value (if any)
|
|
union {
|
|
unsigned int i;
|
|
signed int si;
|
|
signed short ss[2];
|
|
unsigned short us[2];
|
|
unsigned char b;
|
|
} arg;
|
|
|
|
// required and returned registers
|
|
unsigned char regA1;
|
|
unsigned char regA2;
|
|
unsigned char regR;
|
|
unsigned char regPos;
|
|
|
|
// next instruction
|
|
source_instruction_t *next;
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
* read-only data needed by the generated code
|
|
*/
|
|
typedef struct VM_Data {
|
|
// length of this struct + data
|
|
size_t dataLength;
|
|
// compiled code size (in bytes)
|
|
// it only is code size, without the data
|
|
size_t codeLength;
|
|
|
|
// function pointers, no use to waste registers for them
|
|
long int (* AsmCall)( int, int );
|
|
void (* BlockCopy )( unsigned int, unsigned int, unsigned int );
|
|
|
|
// instruction pointers, rarely used so don't waste register
|
|
ppc_instruction_t *iPointers;
|
|
|
|
// data mask for load and store, not used if optimized
|
|
unsigned int dataMask;
|
|
|
|
// fixed number used to convert from integer to float
|
|
unsigned int floatBase; // 0x59800004
|
|
|
|
#if ELF64
|
|
// official procedure descriptor
|
|
opd_t opd;
|
|
#endif
|
|
|
|
// additional constants, for floating point OP_CONST
|
|
// this data has dynamic length, thus '0' here
|
|
unsigned int data[0];
|
|
} vm_data_t;
|
|
|
|
#ifdef offsetof
|
|
# define VM_Data_Offset( field ) offsetof( vm_data_t, field )
|
|
#else
|
|
# define OFFSET( structName, field ) \
|
|
( (void *)&(((structName *)NULL)->field) - NULL )
|
|
# define VM_Data_Offset( field ) OFFSET( vm_data_t, field )
|
|
#endif
|
|
|
|
|
|
/*
|
|
* functions used by generated code
|
|
*/
|
|
static long int
|
|
VM_AsmCall( int callSyscallInvNum, int callProgramStack )
|
|
{
|
|
vm_t *savedVM = currentVM;
|
|
long int i, ret;
|
|
#ifdef VM_TIMES
|
|
struct tms start_time, stop_time;
|
|
clock_t saved_time = time_outside_vm;
|
|
times( &start_time );
|
|
#endif
|
|
|
|
// save the stack to allow recursive VM entry
|
|
currentVM->programStack = callProgramStack - 4;
|
|
|
|
// we need to convert ints to longs on 64bit powerpcs
|
|
if ( sizeof( intptr_t ) == sizeof( int ) ) {
|
|
intptr_t *argPosition = (intptr_t *)((byte *)currentVM->dataBase + callProgramStack + 4);
|
|
|
|
// generated code does not invert syscall number
|
|
argPosition[ 0 ] = -1 - callSyscallInvNum;
|
|
|
|
ret = currentVM->systemCall( argPosition );
|
|
} else {
|
|
intptr_t args[11];
|
|
|
|
// generated code does not invert syscall number
|
|
args[0] = -1 - callSyscallInvNum;
|
|
|
|
int *argPosition = (int *)((byte *)currentVM->dataBase + callProgramStack + 4);
|
|
for( i = 1; i < 11; i++ )
|
|
args[ i ] = argPosition[ i ];
|
|
|
|
ret = currentVM->systemCall( args );
|
|
}
|
|
|
|
currentVM = savedVM;
|
|
|
|
#ifdef VM_TIMES
|
|
times( &stop_time );
|
|
time_outside_vm = saved_time + ( stop_time.tms_utime - start_time.tms_utime );
|
|
#endif
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void
|
|
VM_BlockCopy( unsigned int dest, unsigned int src, unsigned int count )
|
|
{
|
|
unsigned dataMask = currentVM->dataMask;
|
|
|
|
if ( (dest & dataMask) != dest
|
|
|| (src & dataMask) != src
|
|
|| ((dest+count) & dataMask) != dest + count
|
|
|| ((src+count) & dataMask) != src + count)
|
|
{
|
|
DIE( "OP_BLOCK_COPY out of range!");
|
|
}
|
|
|
|
memcpy( currentVM->dataBase+dest, currentVM->dataBase+src, count );
|
|
}
|
|
|
|
|
|
/*
|
|
* code-block descriptors
|
|
*/
|
|
typedef struct dest_instruction dest_instruction_t;
|
|
typedef struct symbolic_jump symbolic_jump_t;
|
|
|
|
struct symbolic_jump {
|
|
// number of source instruction it has to jump to
|
|
unsigned long int jump_to;
|
|
|
|
// jump condition true/false, (4*cr7+(eq|gt..))
|
|
long int bo, bi;
|
|
|
|
// extensions / modifiers (branch-link)
|
|
unsigned long ext;
|
|
|
|
// dest_instruction refering to this jump
|
|
dest_instruction_t *parent;
|
|
|
|
// next jump
|
|
symbolic_jump_t *nextJump;
|
|
};
|
|
|
|
struct dest_instruction {
|
|
// position in the output chain
|
|
unsigned long int count;
|
|
|
|
// source instruction number
|
|
unsigned long int i_count;
|
|
|
|
// exact (for instructins), or maximum (for jump) length
|
|
unsigned short length;
|
|
|
|
dest_instruction_t *next;
|
|
|
|
// if the instruction is a jump than jump will be non NULL
|
|
symbolic_jump_t *jump;
|
|
|
|
// if jump is NULL than all the instructions will be here
|
|
ppc_instruction_t code[0];
|
|
};
|
|
|
|
// first and last instruction,
|
|
// di_first is a dummy instruction
|
|
static dest_instruction_t *di_first = NULL, *di_last = NULL;
|
|
// number of instructions
|
|
static unsigned long int di_count = 0;
|
|
// pointers needed to compute local jumps, those aren't pointers to
|
|
// actual instructions, just used to check how long the jump is going
|
|
// to be and whether it is positive or negative
|
|
static dest_instruction_t **di_pointers = NULL;
|
|
|
|
// output instructions which does not come from source code
|
|
// use false i_count value
|
|
#define FALSE_ICOUNT 0xffffffff
|
|
|
|
|
|
/*
|
|
* append specified instructions at the end of instruction chain
|
|
*/
|
|
static void
|
|
PPC_Append(
|
|
dest_instruction_t *di_now,
|
|
unsigned long int i_count
|
|
)
|
|
{
|
|
di_now->count = di_count++;
|
|
di_now->i_count = i_count;
|
|
di_now->next = NULL;
|
|
|
|
di_last->next = di_now;
|
|
di_last = di_now;
|
|
|
|
if ( i_count != FALSE_ICOUNT ) {
|
|
if ( ! di_pointers[ i_count ] )
|
|
di_pointers[ i_count ] = di_now;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* make space for instructions and append
|
|
*/
|
|
static void
|
|
PPC_AppendInstructions(
|
|
unsigned long int i_count,
|
|
size_t num_instructions,
|
|
const ppc_instruction_t *is
|
|
)
|
|
{
|
|
if ( num_instructions < 0 )
|
|
num_instructions = 0;
|
|
size_t iBytes = sizeof( ppc_instruction_t ) * num_instructions;
|
|
dest_instruction_t *di_now = PPC_Malloc( sizeof( dest_instruction_t ) + iBytes );
|
|
|
|
di_now->length = num_instructions;
|
|
di_now->jump = NULL;
|
|
|
|
if ( iBytes > 0 )
|
|
memcpy( &(di_now->code[0]), is, iBytes );
|
|
|
|
PPC_Append( di_now, i_count );
|
|
}
|
|
|
|
/*
|
|
* create symbolic jump and append
|
|
*/
|
|
static symbolic_jump_t *sj_first = NULL, *sj_last = NULL;
|
|
static void
|
|
PPC_PrepareJump(
|
|
unsigned long int i_count,
|
|
unsigned long int dest,
|
|
long int bo,
|
|
long int bi,
|
|
unsigned long int ext
|
|
)
|
|
{
|
|
dest_instruction_t *di_now = PPC_Malloc( sizeof( dest_instruction_t ) );
|
|
symbolic_jump_t *sj = PPC_Malloc( sizeof( symbolic_jump_t ) );
|
|
|
|
sj->jump_to = dest;
|
|
sj->bo = bo;
|
|
sj->bi = bi;
|
|
sj->ext = ext;
|
|
sj->parent = di_now;
|
|
sj->nextJump = NULL;
|
|
|
|
sj_last->nextJump = sj;
|
|
sj_last = sj;
|
|
|
|
di_now->length = (bo == branchAlways ? 1 : 2);
|
|
di_now->jump = sj;
|
|
|
|
PPC_Append( di_now, i_count );
|
|
}
|
|
|
|
/*
|
|
* simplyfy instruction emission
|
|
*/
|
|
#define emitStart( i_cnt ) \
|
|
unsigned long int i_count = i_cnt; \
|
|
size_t num_instructions = 0; \
|
|
long int force_emit = 0; \
|
|
ppc_instruction_t instructions[50];
|
|
|
|
#define pushIn( inst ) \
|
|
(instructions[ num_instructions++ ] = inst)
|
|
#define in( inst, args... ) pushIn( IN( inst, args ) )
|
|
|
|
#define emitEnd() \
|
|
do{ \
|
|
if ( num_instructions || force_emit ) \
|
|
PPC_AppendInstructions( i_count, num_instructions, instructions );\
|
|
num_instructions = 0; \
|
|
} while(0)
|
|
|
|
#define emitJump( dest, bo, bi, ext ) \
|
|
do { \
|
|
emitEnd(); \
|
|
PPC_PrepareJump( i_count, dest, bo, bi, ext ); \
|
|
} while(0)
|
|
|
|
|
|
/*
|
|
* definitions for creating .data section,
|
|
* used in cases where constant float is needed
|
|
*/
|
|
#define LOCAL_DATA_CHUNK 50
|
|
typedef struct local_data_s local_data_t;
|
|
struct local_data_s {
|
|
// number of data in this structure
|
|
long int count;
|
|
|
|
// data placeholder
|
|
unsigned int data[ LOCAL_DATA_CHUNK ];
|
|
|
|
// next chunk, if this one wasn't enough
|
|
local_data_t *next;
|
|
};
|
|
|
|
// first data chunk
|
|
static local_data_t *data_first = NULL;
|
|
// total number of data
|
|
static long int data_acc = 0;
|
|
|
|
/*
|
|
* append the data and return its offset
|
|
*/
|
|
static size_t
|
|
PPC_PushData( unsigned int datum )
|
|
{
|
|
local_data_t *d_now = data_first;
|
|
long int accumulated = 0;
|
|
|
|
// check whether we have this one already
|
|
do {
|
|
long int i;
|
|
for ( i = 0; i < d_now->count; i++ ) {
|
|
if ( d_now->data[ i ] == datum ) {
|
|
accumulated += i;
|
|
return VM_Data_Offset( data[ accumulated ] );
|
|
}
|
|
}
|
|
if ( !d_now->next )
|
|
break;
|
|
|
|
accumulated += d_now->count;
|
|
d_now = d_now->next;
|
|
} while (1);
|
|
|
|
// not found, need to append
|
|
accumulated += d_now->count;
|
|
|
|
// last chunk is full, create new one
|
|
if ( d_now->count >= LOCAL_DATA_CHUNK ) {
|
|
d_now->next = PPC_Malloc( sizeof( local_data_t ) );
|
|
d_now = d_now->next;
|
|
d_now->count = 0;
|
|
d_now->next = NULL;
|
|
}
|
|
|
|
d_now->data[ d_now->count ] = datum;
|
|
d_now->count += 1;
|
|
|
|
data_acc = accumulated + 1;
|
|
|
|
return VM_Data_Offset( data[ accumulated ] );
|
|
}
|
|
|
|
/*
|
|
* find leading zeros in dataMask to implement it with
|
|
* "rotate and mask" instruction
|
|
*/
|
|
static long int fastMaskHi = 0, fastMaskLo = 31;
|
|
static void
|
|
PPC_MakeFastMask( int mask )
|
|
{
|
|
#if defined( __GNUC__ ) && ( __GNUC__ >= 4 || ( __GNUC__ == 3 && __GNUC_MINOR__ >= 4 ) )
|
|
/* count leading zeros */
|
|
fastMaskHi = __builtin_clz( mask );
|
|
|
|
/* count trailing zeros */
|
|
fastMaskLo = 31 - __builtin_ctz( mask );
|
|
#else
|
|
fastMaskHi = 0;
|
|
while ( ( mask & ( 0x80000000 >> fastMaskHi ) ) == 0 )
|
|
fastMaskHi++;
|
|
|
|
fastMaskLo = 31;
|
|
while ( ( mask & ( 0x80000000 >> fastMaskLo ) ) == 0 )
|
|
fastMaskLo--;
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* register definitions
|
|
*/
|
|
|
|
/* registers which are global for generated code */
|
|
|
|
// pointer to VM_Data (constant)
|
|
#define rVMDATA r14
|
|
// vm->dataBase (constant)
|
|
#define rDATABASE r15
|
|
// programStack (variable)
|
|
#define rPSTACK r16
|
|
|
|
/*
|
|
* function local registers,
|
|
*
|
|
* normally only volatile registers are used, but if there aren't enough
|
|
* or function has to preserve some value while calling annother one
|
|
* then caller safe registers are used as well
|
|
*/
|
|
static const long int gpr_list[] = {
|
|
/* caller safe registers, normally only one is used */
|
|
r24, r23, r22, r21,
|
|
r20, r19, r18, r17,
|
|
/* volatile registers (preferred),
|
|
* normally no more than 5 is used */
|
|
r3, r4, r5, r6,
|
|
r7, r8, r9, r10,
|
|
};
|
|
static const long int gpr_vstart = 8; /* position of first volatile register */
|
|
static const long int gpr_total = sizeof( gpr_list ) / sizeof( gpr_list[0] );
|
|
|
|
static const long int fpr_list[] = {
|
|
/* static registers, normally none is used */
|
|
f20, f21, f19, f18,
|
|
f17, f16, f15, f14,
|
|
/* volatile registers (preferred),
|
|
* normally no more than 7 is used */
|
|
f0, f1, f2, f3,
|
|
f4, f5, f6, f7,
|
|
f8, f9, f10, f11,
|
|
f12, f13,
|
|
};
|
|
static const long int fpr_vstart = 8;
|
|
static const long int fpr_total = sizeof( fpr_list ) / sizeof( fpr_list[0] );
|
|
|
|
/*
|
|
* prepare some dummy structures and emit init code
|
|
*/
|
|
static void
|
|
PPC_CompileInit( void )
|
|
{
|
|
di_first = di_last = PPC_Malloc( sizeof( dest_instruction_t ) );
|
|
di_first->count = 0;
|
|
di_first->next = NULL;
|
|
di_first->jump = NULL;
|
|
|
|
sj_first = sj_last = PPC_Malloc( sizeof( symbolic_jump_t ) );
|
|
sj_first->nextJump = NULL;
|
|
|
|
data_first = PPC_Malloc( sizeof( local_data_t ) );
|
|
data_first->count = 0;
|
|
data_first->next = NULL;
|
|
|
|
/*
|
|
* init function:
|
|
* saves old values of global registers and sets our values
|
|
* function prototype is:
|
|
* int begin( void *data, int programStack, void *vm->dataBase )
|
|
*/
|
|
|
|
/* first instruction must not be placed on instruction list */
|
|
emitStart( FALSE_ICOUNT );
|
|
|
|
long int stack = STACK_SAVE + 4 * GPRLEN;
|
|
|
|
in( iMFLR, r0 );
|
|
in( iSTLU, r1, -stack, r1 );
|
|
in( iSTL, rVMDATA, STACK_SAVE + 0 * GPRLEN, r1 );
|
|
in( iSTL, rPSTACK, STACK_SAVE + 1 * GPRLEN, r1 );
|
|
in( iSTL, rDATABASE, STACK_SAVE + 2 * GPRLEN, r1 );
|
|
in( iSTL, r0, stack + STACK_LR, r1 );
|
|
in( iMR, rVMDATA, r3 );
|
|
in( iMR, rPSTACK, r4 );
|
|
in( iMR, rDATABASE, r5 );
|
|
in( iBL, +4*8 ); // LINK JUMP: first generated instruction | XXX jump !
|
|
in( iLL, rVMDATA, STACK_SAVE + 0 * GPRLEN, r1 );
|
|
in( iLL, rPSTACK, STACK_SAVE + 1 * GPRLEN, r1 );
|
|
in( iLL, rDATABASE, STACK_SAVE + 2 * GPRLEN, r1 );
|
|
in( iLL, r0, stack + STACK_LR, r1 );
|
|
in( iMTLR, r0 );
|
|
in( iADDI, r1, r1, stack );
|
|
in( iBLR );
|
|
|
|
emitEnd();
|
|
}
|
|
|
|
// rFIRST is the copy of the top value on the opstack
|
|
#define rFIRST (gpr_list[ gpr_pos - 1])
|
|
// second value on the opstack
|
|
#define rSECOND (gpr_list[ gpr_pos - 2 ])
|
|
// temporary registers, not on the opstack
|
|
#define rTEMP(x) (gpr_list[ gpr_pos + x ])
|
|
#define rTMP rTEMP(0)
|
|
|
|
#define fFIRST (fpr_list[ fpr_pos - 1 ])
|
|
#define fSECOND (fpr_list[ fpr_pos - 2 ])
|
|
#define fTEMP(x) (fpr_list[ fpr_pos + x ])
|
|
#define fTMP fTEMP(0)
|
|
|
|
// register types
|
|
#define rTYPE_STATIC 0x01
|
|
#define rTYPE_FLOAT 0x02
|
|
|
|
// what type should this opcode return
|
|
#define RET_INT ( !(i_now->regR & rTYPE_FLOAT) )
|
|
#define RET_FLOAT ( i_now->regR & rTYPE_FLOAT )
|
|
// what type should it accept
|
|
#define ARG_INT ( ! i_now->regA1 )
|
|
#define ARG_FLOAT ( i_now->regA1 )
|
|
#define ARG2_INT ( ! i_now->regA2 )
|
|
#define ARG2_FLOAT ( i_now->regA2 )
|
|
|
|
/*
|
|
* emit OP_CONST, called if nothing has used the const value directly
|
|
*/
|
|
static void
|
|
PPC_EmitConst( source_instruction_t * const i_const )
|
|
{
|
|
emitStart( i_const->i_count );
|
|
|
|
if ( !(i_const->regR & rTYPE_FLOAT) ) {
|
|
// gpr_pos needed for "rFIRST" to work
|
|
long int gpr_pos = i_const->regPos;
|
|
|
|
if ( i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
|
|
in( iLI, rFIRST, i_const->arg.si );
|
|
} else if ( i_const->arg.i < 0x10000 ) {
|
|
in( iLI, rFIRST, 0 );
|
|
in( iORI, rFIRST, rFIRST, i_const->arg.i );
|
|
} else {
|
|
in( iLIS, rFIRST, i_const->arg.ss[ 0 ] );
|
|
if ( i_const->arg.us[ 1 ] != 0 )
|
|
in( iORI, rFIRST, rFIRST, i_const->arg.us[ 1 ] );
|
|
}
|
|
|
|
} else {
|
|
// fpr_pos needed for "fFIRST" to work
|
|
long int fpr_pos = i_const->regPos;
|
|
|
|
// there's no good way to generate the data,
|
|
// just read it from data section
|
|
in( iLFS, fFIRST, PPC_PushData( i_const->arg.i ), rVMDATA );
|
|
}
|
|
|
|
emitEnd();
|
|
}
|
|
#define MAYBE_EMIT_CONST() if ( i_const ) PPC_EmitConst( i_const )
|
|
|
|
/*
|
|
* emit empty instruction, just sets the needed pointers
|
|
*/
|
|
static inline void
|
|
PPC_EmitNull( source_instruction_t * const i_null )
|
|
{
|
|
PPC_AppendInstructions( i_null->i_count, 0, NULL );
|
|
}
|
|
#define EMIT_FALSE_CONST() PPC_EmitNull( i_const )
|
|
|
|
|
|
/*
|
|
* analize function for register usage and whether it needs stack (r1) prepared
|
|
*/
|
|
static void
|
|
VM_AnalyzeFunction(
|
|
source_instruction_t * const i_first,
|
|
long int *prepareStack,
|
|
long int *gpr_start_pos,
|
|
long int *fpr_start_pos
|
|
)
|
|
{
|
|
source_instruction_t *i_now = i_first;
|
|
|
|
source_instruction_t *value_provider[20] = { NULL };
|
|
unsigned long int opstack_depth = 0;
|
|
|
|
/*
|
|
* first step:
|
|
* remember what codes returned some value and mark the value type
|
|
* when we get to know what it should be
|
|
*/
|
|
while ( (i_now = i_now->next) ) {
|
|
unsigned long int op = i_now->op;
|
|
unsigned long int opi = vm_opInfo[ op ];
|
|
|
|
if ( opi & opArgIF ) {
|
|
assert( opstack_depth > 0 );
|
|
|
|
opstack_depth--;
|
|
source_instruction_t *vp = value_provider[ opstack_depth ];
|
|
unsigned long int vpopi = vm_opInfo[ vp->op ];
|
|
|
|
if ( (opi & opArgI) && (vpopi & opRetI) ) {
|
|
// instruction accepts integer, provider returns integer
|
|
//vp->regR |= rTYPE_INT;
|
|
//i_now->regA1 = rTYPE_INT;
|
|
} else if ( (opi & opArgF) && (vpopi & opRetF) ) {
|
|
// instruction accepts float, provider returns float
|
|
vp->regR |= rTYPE_FLOAT; // use OR here - could be marked as static
|
|
i_now->regA1 = rTYPE_FLOAT;
|
|
} else {
|
|
// instruction arg type does not agree with
|
|
// provider return type
|
|
DIE( "unrecognized instruction combination" );
|
|
}
|
|
|
|
}
|
|
if ( opi & opArg2IF ) {
|
|
assert( opstack_depth > 0 );
|
|
|
|
opstack_depth--;
|
|
source_instruction_t *vp = value_provider[ opstack_depth ];
|
|
unsigned long int vpopi = vm_opInfo[ vp->op ];
|
|
|
|
if ( (opi & opArg2I) && (vpopi & opRetI) ) {
|
|
// instruction accepts integer, provider returns integer
|
|
//vp->regR |= rTYPE_INT;
|
|
//i_now->regA2 = rTYPE_INT;
|
|
} else if ( (opi & opArg2F) && (vpopi & opRetF) ) {
|
|
// instruction accepts float, provider returns float
|
|
vp->regR |= rTYPE_FLOAT; // use OR here - could be marked as static
|
|
i_now->regA2 = rTYPE_FLOAT;
|
|
} else {
|
|
// instruction arg type does not agree with
|
|
// provider return type
|
|
DIE( "unrecognized instruction combination" );
|
|
}
|
|
}
|
|
|
|
|
|
if (
|
|
( op == OP_CALL )
|
|
||
|
|
( op == OP_BLOCK_COPY && ( i_now->arg.i > SL( 16, 32 ) || !OPTIMIZE_COPY ) )
|
|
) {
|
|
long int i;
|
|
*prepareStack = 1;
|
|
// force caller safe registers so we won't have to save them
|
|
for ( i = 0; i < opstack_depth; i++ ) {
|
|
source_instruction_t *vp = value_provider[ i ];
|
|
vp->regR |= rTYPE_STATIC;
|
|
}
|
|
}
|
|
|
|
|
|
if ( opi & opRetIF ) {
|
|
value_provider[ opstack_depth ] = i_now;
|
|
opstack_depth++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* second step:
|
|
* now that we know register types; compute exactly how many registers
|
|
* of each type we need
|
|
*/
|
|
|
|
i_now = i_first;
|
|
long int needed_reg[4] = {0,0,0,0}, max_reg[4] = {0,0,0,0};
|
|
opstack_depth = 0;
|
|
while ( (i_now = i_now->next) ) {
|
|
unsigned long int op = i_now->op;
|
|
unsigned long int opi = vm_opInfo[ op ];
|
|
|
|
if ( opi & opArgIF ) {
|
|
assert( opstack_depth > 0 );
|
|
opstack_depth--;
|
|
source_instruction_t *vp = value_provider[ opstack_depth ];
|
|
|
|
needed_reg[ ( vp->regR & 2 ) ] -= 1;
|
|
if ( vp->regR & 1 ) // static
|
|
needed_reg[ ( vp->regR & 3 ) ] -= 1;
|
|
}
|
|
if ( opi & opArg2IF ) {
|
|
assert( opstack_depth > 0 );
|
|
opstack_depth--;
|
|
source_instruction_t *vp = value_provider[ opstack_depth ];
|
|
|
|
needed_reg[ ( vp->regR & 2 ) ] -= 1;
|
|
if ( vp->regR & 1 ) // static
|
|
needed_reg[ ( vp->regR & 3 ) ] -= 1;
|
|
}
|
|
|
|
if ( opi & opRetIF ) {
|
|
long int i;
|
|
value_provider[ opstack_depth ] = i_now;
|
|
opstack_depth++;
|
|
|
|
i = i_now->regR & 2;
|
|
needed_reg[ i ] += 1;
|
|
if ( max_reg[ i ] < needed_reg[ i ] )
|
|
max_reg[ i ] = needed_reg[ i ];
|
|
|
|
i = i_now->regR & 3;
|
|
if ( i & 1 ) {
|
|
needed_reg[ i ] += 1;
|
|
if ( max_reg[ i ] < needed_reg[ i ] )
|
|
max_reg[ i ] = needed_reg[ i ];
|
|
}
|
|
}
|
|
}
|
|
|
|
long int gpr_start = gpr_vstart;
|
|
const long int gpr_volatile = gpr_total - gpr_vstart;
|
|
if ( max_reg[ 1 ] > 0 || max_reg[ 0 ] > gpr_volatile ) {
|
|
// max_reg[ 0 ] - all gprs needed
|
|
// max_reg[ 1 ] - static gprs needed
|
|
long int max = max_reg[ 0 ] - gpr_volatile;
|
|
if ( max_reg[ 1 ] > max )
|
|
max = max_reg[ 1 ];
|
|
if ( max > gpr_vstart ) {
|
|
/* error */
|
|
DIE( "Need more GPRs" );
|
|
}
|
|
|
|
gpr_start -= max;
|
|
|
|
// need stack to save caller safe registers
|
|
*prepareStack = 1;
|
|
}
|
|
*gpr_start_pos = gpr_start;
|
|
|
|
long int fpr_start = fpr_vstart;
|
|
const long int fpr_volatile = fpr_total - fpr_vstart;
|
|
if ( max_reg[ 3 ] > 0 || max_reg[ 2 ] > fpr_volatile ) {
|
|
// max_reg[ 2 ] - all fprs needed
|
|
// max_reg[ 3 ] - static fprs needed
|
|
long int max = max_reg[ 2 ] - fpr_volatile;
|
|
if ( max_reg[ 3 ] > max )
|
|
max = max_reg[ 3 ];
|
|
if ( max > fpr_vstart ) {
|
|
/* error */
|
|
DIE( "Need more FPRs" );
|
|
}
|
|
|
|
fpr_start -= max;
|
|
|
|
// need stack to save caller safe registers
|
|
*prepareStack = 1;
|
|
}
|
|
*fpr_start_pos = fpr_start;
|
|
}
|
|
|
|
/*
|
|
* translate opcodes to ppc instructions,
|
|
* it works on functions, not on whole code at once
|
|
*/
|
|
static void
|
|
VM_CompileFunction( source_instruction_t * const i_first )
|
|
{
|
|
long int prepareStack = 0;
|
|
long int gpr_start_pos, fpr_start_pos;
|
|
|
|
VM_AnalyzeFunction( i_first, &prepareStack, &gpr_start_pos, &fpr_start_pos );
|
|
|
|
long int gpr_pos = gpr_start_pos, fpr_pos = fpr_start_pos;
|
|
|
|
// OP_CONST combines well with many opcodes so we treat it in a special way
|
|
source_instruction_t *i_const = NULL;
|
|
source_instruction_t *i_now = i_first;
|
|
|
|
// how big the stack has to be
|
|
long int save_space = STACK_SAVE;
|
|
{
|
|
if ( gpr_start_pos < gpr_vstart )
|
|
save_space += (gpr_vstart - gpr_start_pos) * GPRLEN;
|
|
save_space = ( save_space + 15 ) & ~0x0f;
|
|
|
|
if ( fpr_start_pos < fpr_vstart )
|
|
save_space += (fpr_vstart - fpr_start_pos) * FPRLEN;
|
|
save_space = ( save_space + 15 ) & ~0x0f;
|
|
}
|
|
|
|
long int stack_temp = prepareStack ? STACK_TEMP : STACK_RTEMP;
|
|
|
|
while ( (i_now = i_now->next) ) {
|
|
emitStart( i_now->i_count );
|
|
|
|
switch ( i_now->op )
|
|
{
|
|
default:
|
|
case OP_UNDEF:
|
|
case OP_IGNORE:
|
|
MAYBE_EMIT_CONST();
|
|
in( iNOP );
|
|
break;
|
|
|
|
case OP_BREAK:
|
|
MAYBE_EMIT_CONST();
|
|
// force SEGV
|
|
in( iLWZ, r0, 0, r0 );
|
|
break;
|
|
|
|
case OP_ENTER:
|
|
if ( i_const )
|
|
DIE( "Weird opcode order" );
|
|
|
|
// don't prepare stack if not needed
|
|
if ( prepareStack ) {
|
|
long int i, save_pos = STACK_SAVE;
|
|
|
|
in( iMFLR, r0 );
|
|
in( iSTLU, r1, -save_space, r1 );
|
|
in( iSTL, r0, save_space + STACK_LR, r1 );
|
|
|
|
/* save registers */
|
|
for ( i = gpr_start_pos; i < gpr_vstart; i++ ) {
|
|
in( iSTL, gpr_list[ i ], save_pos, r1 );
|
|
save_pos += GPRLEN;
|
|
}
|
|
save_pos = ( save_pos + 15 ) & ~0x0f;
|
|
|
|
for ( i = fpr_start_pos; i < fpr_vstart; i++ ) {
|
|
in( iSTFD, fpr_list[ i ], save_pos, r1 );
|
|
save_pos += FPRLEN;
|
|
}
|
|
prepareStack = 2;
|
|
}
|
|
|
|
in( iADDI, rPSTACK, rPSTACK, - i_now->arg.si );
|
|
break;
|
|
|
|
case OP_LEAVE:
|
|
if ( i_const ) {
|
|
EMIT_FALSE_CONST();
|
|
|
|
if ( i_const->regR & rTYPE_FLOAT)
|
|
DIE( "constant float in OP_LEAVE" );
|
|
|
|
if ( i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
|
|
in( iLI, r3, i_const->arg.si );
|
|
} else if ( i_const->arg.i < 0x10000 ) {
|
|
in( iLI, r3, 0 );
|
|
in( iORI, r3, r3, i_const->arg.i );
|
|
} else {
|
|
in( iLIS, r3, i_const->arg.ss[ 0 ] );
|
|
if ( i_const->arg.us[ 1 ] != 0 )
|
|
in( iORI, r3, r3, i_const->arg.us[ 1 ] );
|
|
}
|
|
gpr_pos--;
|
|
} else {
|
|
MAYBE_EMIT_CONST();
|
|
|
|
/* place return value in r3 */
|
|
if ( ARG_INT ) {
|
|
if ( rFIRST != r3 )
|
|
in( iMR, r3, rFIRST );
|
|
gpr_pos--;
|
|
} else {
|
|
in( iSTFS, fFIRST, stack_temp, r1 );
|
|
in( iLWZ, r3, stack_temp, r1 );
|
|
fpr_pos--;
|
|
}
|
|
}
|
|
|
|
// don't undo stack if not prepared
|
|
if ( prepareStack >= 2 ) {
|
|
long int i, save_pos = STACK_SAVE;
|
|
|
|
in( iLL, r0, save_space + STACK_LR, r1 );
|
|
|
|
|
|
/* restore registers */
|
|
for ( i = gpr_start_pos; i < gpr_vstart; i++ ) {
|
|
in( iLL, gpr_list[ i ], save_pos, r1 );
|
|
save_pos += GPRLEN;
|
|
}
|
|
save_pos = ( save_pos + 15 ) & ~0x0f;
|
|
for ( i = fpr_start_pos; i < fpr_vstart; i++ ) {
|
|
in( iLFD, fpr_list[ i ], save_pos, r1 );
|
|
save_pos += FPRLEN;
|
|
}
|
|
|
|
in( iMTLR, r0 );
|
|
in( iADDI, r1, r1, save_space );
|
|
}
|
|
in( iADDI, rPSTACK, rPSTACK, i_now->arg.si);
|
|
in( iBLR );
|
|
assert( gpr_pos == gpr_start_pos );
|
|
assert( fpr_pos == fpr_start_pos );
|
|
break;
|
|
|
|
case OP_CALL:
|
|
if ( i_const ) {
|
|
EMIT_FALSE_CONST();
|
|
|
|
if ( i_const->arg.si >= 0 ) {
|
|
emitJump(
|
|
i_const->arg.i,
|
|
branchAlways, 0, branchExtLink
|
|
);
|
|
} else {
|
|
/* syscall */
|
|
in( iLL, r0, VM_Data_Offset( AsmCall ), rVMDATA );
|
|
|
|
in( iLI, r3, i_const->arg.si ); // negative value
|
|
in( iMR, r4, rPSTACK ); // push PSTACK on argument list
|
|
|
|
in( iMTCTR, r0 );
|
|
in( iBCTRL );
|
|
}
|
|
if ( rFIRST != r3 )
|
|
in( iMR, rFIRST, r3 );
|
|
} else {
|
|
MAYBE_EMIT_CONST();
|
|
|
|
in( iCMPWI, cr7, rFIRST, 0 );
|
|
in( iBLTm, cr7, +4*5 /* syscall */ ); // XXX jump !
|
|
/* instruction call */
|
|
|
|
// get instruction address
|
|
in( iLL, r0, VM_Data_Offset( iPointers ), rVMDATA );
|
|
in( iRLWINM, rFIRST, rFIRST, GPRLEN_SHIFT, 0, 31-GPRLEN_SHIFT ); // mul * GPRLEN
|
|
in( iLLX, r0, rFIRST, r0 ); // load pointer
|
|
|
|
in( iB, +4*(3 + (rFIRST != r3 ? 1 : 0) ) ); // XXX jump !
|
|
|
|
/* syscall */
|
|
in( iLL, r0, VM_Data_Offset( AsmCall ), rVMDATA ); // get asmCall pointer
|
|
/* rFIRST can be r3 or some static register */
|
|
if ( rFIRST != r3 )
|
|
in( iMR, r3, rFIRST ); // push OPSTACK top value on argument list
|
|
in( iMR, r4, rPSTACK ); // push PSTACK on argument list
|
|
|
|
/* common code */
|
|
in( iMTCTR, r0 );
|
|
in( iBCTRL );
|
|
|
|
if ( rFIRST != r3 )
|
|
in( iMR, rFIRST, r3 ); // push return value on the top of the opstack
|
|
}
|
|
break;
|
|
|
|
case OP_PUSH:
|
|
MAYBE_EMIT_CONST();
|
|
if ( RET_INT )
|
|
gpr_pos++;
|
|
else
|
|
fpr_pos++;
|
|
/* no instructions here */
|
|
force_emit = 1;
|
|
break;
|
|
|
|
case OP_POP:
|
|
MAYBE_EMIT_CONST();
|
|
if ( ARG_INT )
|
|
gpr_pos--;
|
|
else
|
|
fpr_pos--;
|
|
/* no instructions here */
|
|
force_emit = 1;
|
|
break;
|
|
|
|
case OP_CONST:
|
|
MAYBE_EMIT_CONST();
|
|
/* nothing here */
|
|
break;
|
|
|
|
case OP_LOCAL:
|
|
MAYBE_EMIT_CONST();
|
|
{
|
|
signed long int hi, lo;
|
|
hi = i_now->arg.ss[ 0 ];
|
|
lo = i_now->arg.ss[ 1 ];
|
|
if ( lo < 0 )
|
|
hi += 1;
|
|
|
|
gpr_pos++;
|
|
if ( hi == 0 ) {
|
|
in( iADDI, rFIRST, rPSTACK, lo );
|
|
} else {
|
|
in( iADDIS, rFIRST, rPSTACK, hi );
|
|
if ( lo != 0 )
|
|
in( iADDI, rFIRST, rFIRST, lo );
|
|
}
|
|
}
|
|
break;
|
|
|
|
case OP_JUMP:
|
|
if ( i_const ) {
|
|
EMIT_FALSE_CONST();
|
|
|
|
emitJump(
|
|
i_const->arg.i,
|
|
branchAlways, 0, 0
|
|
);
|
|
} else {
|
|
MAYBE_EMIT_CONST();
|
|
|
|
in( iLL, r0, VM_Data_Offset( iPointers ), rVMDATA );
|
|
in( iRLWINM, rFIRST, rFIRST, GPRLEN_SHIFT, 0, 31-GPRLEN_SHIFT ); // mul * GPRLEN
|
|
in( iLLX, r0, rFIRST, r0 ); // load pointer
|
|
in( iMTCTR, r0 );
|
|
in( iBCTR );
|
|
}
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_EQ:
|
|
case OP_NE:
|
|
if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x10000 ) {
|
|
EMIT_FALSE_CONST();
|
|
if ( i_const->arg.si >= 0x8000 )
|
|
in( iCMPLWI, cr7, rSECOND, i_const->arg.i );
|
|
else
|
|
in( iCMPWI, cr7, rSECOND, i_const->arg.si );
|
|
} else {
|
|
MAYBE_EMIT_CONST();
|
|
in( iCMPW, cr7, rSECOND, rFIRST );
|
|
}
|
|
emitJump(
|
|
i_now->arg.i,
|
|
(i_now->op == OP_EQ ? branchTrue : branchFalse),
|
|
4*cr7+eq, 0
|
|
);
|
|
gpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_LTI:
|
|
case OP_GEI:
|
|
if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
|
|
EMIT_FALSE_CONST();
|
|
in( iCMPWI, cr7, rSECOND, i_const->arg.si );
|
|
} else {
|
|
MAYBE_EMIT_CONST();
|
|
in( iCMPW, cr7, rSECOND, rFIRST );
|
|
}
|
|
emitJump(
|
|
i_now->arg.i,
|
|
( i_now->op == OP_LTI ? branchTrue : branchFalse ),
|
|
4*cr7+lt, 0
|
|
);
|
|
gpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_GTI:
|
|
case OP_LEI:
|
|
if ( i_const && i_const->arg.si >= -0x8000 && i_const->arg.si < 0x8000 ) {
|
|
EMIT_FALSE_CONST();
|
|
in( iCMPWI, cr7, rSECOND, i_const->arg.si );
|
|
} else {
|
|
MAYBE_EMIT_CONST();
|
|
in( iCMPW, cr7, rSECOND, rFIRST );
|
|
}
|
|
emitJump(
|
|
i_now->arg.i,
|
|
( i_now->op == OP_GTI ? branchTrue : branchFalse ),
|
|
4*cr7+gt, 0
|
|
);
|
|
gpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_LTU:
|
|
case OP_GEU:
|
|
if ( i_const && i_const->arg.i < 0x10000 ) {
|
|
EMIT_FALSE_CONST();
|
|
in( iCMPLWI, cr7, rSECOND, i_const->arg.i );
|
|
} else {
|
|
MAYBE_EMIT_CONST();
|
|
in( iCMPLW, cr7, rSECOND, rFIRST );
|
|
}
|
|
emitJump(
|
|
i_now->arg.i,
|
|
( i_now->op == OP_LTU ? branchTrue : branchFalse ),
|
|
4*cr7+lt, 0
|
|
);
|
|
gpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_GTU:
|
|
case OP_LEU:
|
|
if ( i_const && i_const->arg.i < 0x10000 ) {
|
|
EMIT_FALSE_CONST();
|
|
in( iCMPLWI, cr7, rSECOND, i_const->arg.i );
|
|
} else {
|
|
MAYBE_EMIT_CONST();
|
|
in( iCMPLW, cr7, rSECOND, rFIRST );
|
|
}
|
|
emitJump(
|
|
i_now->arg.i,
|
|
( i_now->op == OP_GTU ? branchTrue : branchFalse ),
|
|
4*cr7+gt, 0
|
|
);
|
|
gpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_EQF:
|
|
case OP_NEF:
|
|
MAYBE_EMIT_CONST();
|
|
in( iFCMPU, cr7, fSECOND, fFIRST );
|
|
emitJump(
|
|
i_now->arg.i,
|
|
( i_now->op == OP_EQF ? branchTrue : branchFalse ),
|
|
4*cr7+eq, 0
|
|
);
|
|
fpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_LTF:
|
|
case OP_GEF:
|
|
MAYBE_EMIT_CONST();
|
|
in( iFCMPU, cr7, fSECOND, fFIRST );
|
|
emitJump(
|
|
i_now->arg.i,
|
|
( i_now->op == OP_LTF ? branchTrue : branchFalse ),
|
|
4*cr7+lt, 0
|
|
);
|
|
fpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_GTF:
|
|
case OP_LEF:
|
|
MAYBE_EMIT_CONST();
|
|
in( iFCMPU, cr7, fSECOND, fFIRST );
|
|
emitJump(
|
|
i_now->arg.i,
|
|
( i_now->op == OP_GTF ? branchTrue : branchFalse ),
|
|
4*cr7+gt, 0
|
|
);
|
|
fpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_LOAD1:
|
|
MAYBE_EMIT_CONST();
|
|
#if OPTIMIZE_MASK
|
|
in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
|
|
#else
|
|
in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
|
|
in( iAND, rFIRST, rFIRST, r0 );
|
|
#endif
|
|
in( iLBZX, rFIRST, rFIRST, rDATABASE );
|
|
break;
|
|
|
|
case OP_LOAD2:
|
|
MAYBE_EMIT_CONST();
|
|
#if OPTIMIZE_MASK
|
|
in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
|
|
#else
|
|
in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
|
|
in( iAND, rFIRST, rFIRST, r0 );
|
|
#endif
|
|
in( iLHZX, rFIRST, rFIRST, rDATABASE );
|
|
break;
|
|
|
|
case OP_LOAD4:
|
|
MAYBE_EMIT_CONST();
|
|
#if OPTIMIZE_MASK
|
|
in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
|
|
#else
|
|
in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
|
|
in( iAND, rFIRST, rFIRST, r0 );
|
|
#endif
|
|
if ( RET_INT ) {
|
|
in( iLWZX, rFIRST, rFIRST, rDATABASE );
|
|
} else {
|
|
fpr_pos++;
|
|
in( iLFSX, fFIRST, rFIRST, rDATABASE );
|
|
gpr_pos--;
|
|
}
|
|
break;
|
|
|
|
case OP_STORE1:
|
|
MAYBE_EMIT_CONST();
|
|
#if OPTIMIZE_MASK
|
|
in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo );
|
|
#else
|
|
in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
|
|
in( iAND, rSECOND, rSECOND, r0 );
|
|
#endif
|
|
in( iSTBX, rFIRST, rSECOND, rDATABASE );
|
|
gpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_STORE2:
|
|
MAYBE_EMIT_CONST();
|
|
#if OPTIMIZE_MASK
|
|
in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo );
|
|
#else
|
|
in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
|
|
in( iAND, rSECOND, rSECOND, r0 );
|
|
#endif
|
|
in( iSTHX, rFIRST, rSECOND, rDATABASE );
|
|
gpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_STORE4:
|
|
MAYBE_EMIT_CONST();
|
|
if ( ARG_INT ) {
|
|
#if OPTIMIZE_MASK
|
|
in( iRLWINM, rSECOND, rSECOND, 0, fastMaskHi, fastMaskLo );
|
|
#else
|
|
in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
|
|
in( iAND, rSECOND, rSECOND, r0 );
|
|
#endif
|
|
|
|
in( iSTWX, rFIRST, rSECOND, rDATABASE );
|
|
gpr_pos--;
|
|
} else {
|
|
#if OPTIMIZE_MASK
|
|
in( iRLWINM, rFIRST, rFIRST, 0, fastMaskHi, fastMaskLo );
|
|
#else
|
|
in( iLWZ, r0, VM_Data_Offset( dataMask ), rVMDATA );
|
|
in( iAND, rFIRST, rFIRST, r0 );
|
|
#endif
|
|
|
|
in( iSTFSX, fFIRST, rFIRST, rDATABASE );
|
|
fpr_pos--;
|
|
}
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_ARG:
|
|
MAYBE_EMIT_CONST();
|
|
in( iADDI, r0, rPSTACK, i_now->arg.b );
|
|
if ( ARG_INT ) {
|
|
in( iSTWX, rFIRST, rDATABASE, r0 );
|
|
gpr_pos--;
|
|
} else {
|
|
in( iSTFSX, fFIRST, rDATABASE, r0 );
|
|
fpr_pos--;
|
|
}
|
|
break;
|
|
|
|
case OP_BLOCK_COPY:
|
|
MAYBE_EMIT_CONST();
|
|
#if OPTIMIZE_COPY
|
|
if ( i_now->arg.i <= SL( 16, 32 ) ) {
|
|
/* block is very short so copy it in-place */
|
|
|
|
unsigned int len = i_now->arg.i;
|
|
unsigned int copied = 0, left = len;
|
|
|
|
in( iADD, rFIRST, rFIRST, rDATABASE );
|
|
in( iADD, rSECOND, rSECOND, rDATABASE );
|
|
|
|
if ( len >= GPRLEN ) {
|
|
long int i, words = len / GPRLEN;
|
|
in( iLL, r0, 0, rFIRST );
|
|
for ( i = 1; i < words; i++ )
|
|
in( iLL, rTEMP( i - 1 ), GPRLEN * i, rFIRST );
|
|
|
|
in( iSTL, r0, 0, rSECOND );
|
|
for ( i = 1; i < words; i++ )
|
|
in( iSTL, rTEMP( i - 1 ), GPRLEN * i, rSECOND );
|
|
|
|
copied += words * GPRLEN;
|
|
left -= copied;
|
|
}
|
|
|
|
if ( SL( 0, left >= 4 ) ) {
|
|
in( iLWZ, r0, copied+0, rFIRST );
|
|
in( iSTW, r0, copied+0, rSECOND );
|
|
copied += 4;
|
|
left -= 4;
|
|
}
|
|
if ( left >= 4 ) {
|
|
DIE("Bug in OP_BLOCK_COPY");
|
|
}
|
|
if ( left == 3 ) {
|
|
in( iLHZ, r0, copied+0, rFIRST );
|
|
in( iLBZ, rTMP, copied+2, rFIRST );
|
|
in( iSTH, r0, copied+0, rSECOND );
|
|
in( iSTB, rTMP, copied+2, rSECOND );
|
|
} else if ( left == 2 ) {
|
|
in( iLHZ, r0, copied+0, rFIRST );
|
|
in( iSTH, r0, copied+0, rSECOND );
|
|
} else if ( left == 1 ) {
|
|
in( iLBZ, r0, copied+0, rFIRST );
|
|
in( iSTB, r0, copied+0, rSECOND );
|
|
}
|
|
} else
|
|
#endif
|
|
{
|
|
unsigned long int r5_ori = 0;
|
|
if ( i_now->arg.si >= -0x8000 && i_now->arg.si < 0x8000 ) {
|
|
in( iLI, r5, i_now->arg.si );
|
|
} else if ( i_now->arg.i < 0x10000 ) {
|
|
in( iLI, r5, 0 );
|
|
r5_ori = i_now->arg.i;
|
|
} else {
|
|
in( iLIS, r5, i_now->arg.ss[ 0 ] );
|
|
r5_ori = i_now->arg.us[ 1 ];
|
|
}
|
|
|
|
in( iLL, r0, VM_Data_Offset( BlockCopy ), rVMDATA ); // get blockCopy pointer
|
|
|
|
if ( r5_ori )
|
|
in( iORI, r5, r5, r5_ori );
|
|
|
|
in( iMTCTR, r0 );
|
|
|
|
if ( rFIRST != r4 )
|
|
in( iMR, r4, rFIRST );
|
|
if ( rSECOND != r3 )
|
|
in( iMR, r3, rSECOND );
|
|
|
|
in( iBCTRL );
|
|
}
|
|
|
|
gpr_pos -= 2;
|
|
break;
|
|
|
|
case OP_SEX8:
|
|
MAYBE_EMIT_CONST();
|
|
in( iEXTSB, rFIRST, rFIRST );
|
|
break;
|
|
|
|
case OP_SEX16:
|
|
MAYBE_EMIT_CONST();
|
|
in( iEXTSH, rFIRST, rFIRST );
|
|
break;
|
|
|
|
case OP_NEGI:
|
|
MAYBE_EMIT_CONST();
|
|
in( iNEG, rFIRST, rFIRST );
|
|
break;
|
|
|
|
case OP_ADD:
|
|
if ( i_const ) {
|
|
EMIT_FALSE_CONST();
|
|
|
|
signed short int hi, lo;
|
|
hi = i_const->arg.ss[ 0 ];
|
|
lo = i_const->arg.ss[ 1 ];
|
|
if ( lo < 0 )
|
|
hi += 1;
|
|
|
|
if ( hi != 0 )
|
|
in( iADDIS, rSECOND, rSECOND, hi );
|
|
if ( lo != 0 )
|
|
in( iADDI, rSECOND, rSECOND, lo );
|
|
|
|
// if both are zero no instruction will be written
|
|
if ( hi == 0 && lo == 0 )
|
|
force_emit = 1;
|
|
} else {
|
|
MAYBE_EMIT_CONST();
|
|
in( iADD, rSECOND, rSECOND, rFIRST );
|
|
}
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_SUB:
|
|
MAYBE_EMIT_CONST();
|
|
in( iSUB, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_DIVI:
|
|
MAYBE_EMIT_CONST();
|
|
in( iDIVW, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_DIVU:
|
|
MAYBE_EMIT_CONST();
|
|
in( iDIVWU, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_MODI:
|
|
MAYBE_EMIT_CONST();
|
|
in( iDIVW, r0, rSECOND, rFIRST );
|
|
in( iMULLW, r0, r0, rFIRST );
|
|
in( iSUB, rSECOND, rSECOND, r0 );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_MODU:
|
|
MAYBE_EMIT_CONST();
|
|
in( iDIVWU, r0, rSECOND, rFIRST );
|
|
in( iMULLW, r0, r0, rFIRST );
|
|
in( iSUB, rSECOND, rSECOND, r0 );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_MULI:
|
|
case OP_MULU:
|
|
MAYBE_EMIT_CONST();
|
|
in( iMULLW, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_BAND:
|
|
MAYBE_EMIT_CONST();
|
|
in( iAND, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_BOR:
|
|
MAYBE_EMIT_CONST();
|
|
in( iOR, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_BXOR:
|
|
MAYBE_EMIT_CONST();
|
|
in( iXOR, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_BCOM:
|
|
MAYBE_EMIT_CONST();
|
|
in( iNOT, rFIRST, rFIRST );
|
|
break;
|
|
|
|
case OP_LSH:
|
|
MAYBE_EMIT_CONST();
|
|
in( iSLW, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_RSHI:
|
|
MAYBE_EMIT_CONST();
|
|
in( iSRAW, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_RSHU:
|
|
MAYBE_EMIT_CONST();
|
|
in( iSRW, rSECOND, rSECOND, rFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_NEGF:
|
|
MAYBE_EMIT_CONST();
|
|
in( iFNEG, fFIRST, fFIRST );
|
|
break;
|
|
|
|
case OP_ADDF:
|
|
MAYBE_EMIT_CONST();
|
|
in( iFADDS, fSECOND, fSECOND, fFIRST );
|
|
fpr_pos--;
|
|
break;
|
|
|
|
case OP_SUBF:
|
|
MAYBE_EMIT_CONST();
|
|
in( iFSUBS, fSECOND, fSECOND, fFIRST );
|
|
fpr_pos--;
|
|
break;
|
|
|
|
case OP_DIVF:
|
|
MAYBE_EMIT_CONST();
|
|
in( iFDIVS, fSECOND, fSECOND, fFIRST );
|
|
fpr_pos--;
|
|
break;
|
|
|
|
case OP_MULF:
|
|
MAYBE_EMIT_CONST();
|
|
in( iFMULS, fSECOND, fSECOND, fFIRST );
|
|
fpr_pos--;
|
|
break;
|
|
|
|
case OP_CVIF:
|
|
MAYBE_EMIT_CONST();
|
|
fpr_pos++;
|
|
in( iXORIS, rFIRST, rFIRST, 0x8000 );
|
|
in( iLIS, r0, 0x4330 );
|
|
in( iSTW, rFIRST, stack_temp + 4, r1 );
|
|
in( iSTW, r0, stack_temp, r1 );
|
|
in( iLFS, fTMP, VM_Data_Offset( floatBase ), rVMDATA );
|
|
in( iLFD, fFIRST, stack_temp, r1 );
|
|
in( iFSUB, fFIRST, fFIRST, fTMP );
|
|
in( iFRSP, fFIRST, fFIRST );
|
|
gpr_pos--;
|
|
break;
|
|
|
|
case OP_CVFI:
|
|
MAYBE_EMIT_CONST();
|
|
gpr_pos++;
|
|
in( iFCTIWZ, fFIRST, fFIRST );
|
|
in( iSTFD, fFIRST, stack_temp, r1 );
|
|
in( iLWZ, rFIRST, stack_temp + 4, r1 );
|
|
fpr_pos--;
|
|
break;
|
|
}
|
|
|
|
i_const = NULL;
|
|
|
|
if ( i_now->op != OP_CONST ) {
|
|
// emit the instructions if it isn't OP_CONST
|
|
emitEnd();
|
|
} else {
|
|
// mark in what register the value should be saved
|
|
if ( RET_INT )
|
|
i_now->regPos = ++gpr_pos;
|
|
else
|
|
i_now->regPos = ++fpr_pos;
|
|
|
|
#if OPTIMIZE_HOLE
|
|
i_const = i_now;
|
|
#else
|
|
PPC_EmitConst( i_now );
|
|
#endif
|
|
}
|
|
}
|
|
if ( i_const )
|
|
DIE( "left (unused) OP_CONST" );
|
|
|
|
{
|
|
// free opcode information, don't free first dummy one
|
|
source_instruction_t *i_next = i_first->next;
|
|
while ( i_next ) {
|
|
i_now = i_next;
|
|
i_next = i_now->next;
|
|
PPC_Free( i_now );
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* check which jumps are short enough to use signed 16bit immediate branch
|
|
*/
|
|
static void
|
|
PPC_ShrinkJumps( void )
|
|
{
|
|
symbolic_jump_t *sj_now = sj_first;
|
|
while ( (sj_now = sj_now->nextJump) ) {
|
|
if ( sj_now->bo == branchAlways )
|
|
// non-conditional branch has 26bit immediate
|
|
sj_now->parent->length = 1;
|
|
|
|
else {
|
|
dest_instruction_t *di = di_pointers[ sj_now->jump_to ];
|
|
dest_instruction_t *ji = sj_now->parent;
|
|
long int jump_length = 0;
|
|
if ( ! di )
|
|
DIE( "No instruction to jump to" );
|
|
if ( ji->count > di->count ) {
|
|
do {
|
|
jump_length += di->length;
|
|
} while ( ( di = di->next ) != ji );
|
|
} else {
|
|
jump_length = 1;
|
|
while ( ( ji = ji->next ) != di )
|
|
jump_length += ji->length;
|
|
}
|
|
if ( jump_length < 0x2000 )
|
|
// jump is short, use normal instruction
|
|
sj_now->parent->length = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* puts all the data in one place, it consists of many different tasks
|
|
*/
|
|
static void
|
|
PPC_ComputeCode( vm_t *vm )
|
|
{
|
|
dest_instruction_t *di_now = di_first;
|
|
|
|
unsigned long int codeInstructions = 0;
|
|
// count total instruciton number
|
|
while ( (di_now = di_now->next ) )
|
|
codeInstructions += di_now->length;
|
|
|
|
size_t codeLength = sizeof( vm_data_t )
|
|
+ sizeof( unsigned int ) * data_acc
|
|
+ sizeof( ppc_instruction_t ) * codeInstructions;
|
|
|
|
// get the memory for the generated code, smarter ppcs need the
|
|
// mem to be marked as executable (whill change later)
|
|
unsigned char *dataAndCode = mmap( NULL, codeLength,
|
|
PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0 );
|
|
|
|
if ( ! dataAndCode )
|
|
DIE( "Not enough memory" );
|
|
|
|
ppc_instruction_t *codeNow, *codeBegin;
|
|
codeNow = codeBegin = (ppc_instruction_t *)( dataAndCode + VM_Data_Offset( data[ data_acc ] ) );
|
|
|
|
ppc_instruction_t nop = IN( iNOP );
|
|
|
|
// copy instructions to the destination
|
|
// fills the jump instructions with nops
|
|
// saves pointers of all instructions
|
|
di_now = di_first;
|
|
while ( (di_now = di_now->next ) ) {
|
|
unsigned long int i_count = di_now->i_count;
|
|
if ( i_count != FALSE_ICOUNT ) {
|
|
if ( ! di_pointers[ i_count ] )
|
|
di_pointers[ i_count ] = (void *) codeNow;
|
|
}
|
|
|
|
if ( di_now->jump == NULL ) {
|
|
memcpy( codeNow, &(di_now->code[0]), di_now->length * sizeof( ppc_instruction_t ) );
|
|
codeNow += di_now->length;
|
|
} else {
|
|
long int i;
|
|
symbolic_jump_t *sj;
|
|
for ( i = 0; i < di_now->length; i++ )
|
|
codeNow[ i ] = nop;
|
|
codeNow += di_now->length;
|
|
|
|
sj = di_now->jump;
|
|
// save position of jumping instruction
|
|
sj->parent = (void *)(codeNow - 1);
|
|
}
|
|
}
|
|
|
|
// compute the jumps and write corresponding instructions
|
|
symbolic_jump_t *sj_now = sj_first;
|
|
while ( (sj_now = sj_now->nextJump ) ) {
|
|
ppc_instruction_t *jumpFrom = (void *) sj_now->parent;
|
|
ppc_instruction_t *jumpTo = (void *) di_pointers[ sj_now->jump_to ];
|
|
signed long int jumpLength = jumpTo - jumpFrom;
|
|
|
|
// if jump is short, just write it
|
|
if ( jumpLength >= - 8192 && jumpLength < 8192 ) {
|
|
powerpc_iname_t branchConditional = sj_now->ext & branchExtLink ? iBCL : iBC;
|
|
*jumpFrom = IN( branchConditional, sj_now->bo, sj_now->bi, jumpLength * 4 );
|
|
continue;
|
|
}
|
|
|
|
// jump isn't short so write it as two instructions
|
|
//
|
|
// the letter one is a non-conditional branch instruction which
|
|
// accepts immediate values big enough (26 bits)
|
|
*jumpFrom = IN( (sj_now->ext & branchExtLink ? iBL : iB), jumpLength * 4 );
|
|
if ( sj_now->bo == branchAlways )
|
|
continue;
|
|
|
|
// there should have been additional space prepared for this case
|
|
if ( jumpFrom[ -1 ] != nop )
|
|
DIE( "additional space for long jump not prepared" );
|
|
|
|
// invert instruction condition
|
|
long int bo = 0;
|
|
switch ( sj_now->bo ) {
|
|
case branchTrue:
|
|
bo = branchFalse;
|
|
break;
|
|
case branchFalse:
|
|
bo = branchTrue;
|
|
break;
|
|
default:
|
|
DIE( "unrecognized branch type" );
|
|
break;
|
|
}
|
|
|
|
// the former instruction is an inverted conditional branch which
|
|
// jumps over the non-conditional one
|
|
jumpFrom[ -1 ] = IN( iBC, bo, sj_now->bi, +2*4 );
|
|
}
|
|
|
|
vm->codeBase = dataAndCode;
|
|
vm->codeLength = codeLength;
|
|
|
|
vm_data_t *data = (vm_data_t *)dataAndCode;
|
|
|
|
#if ELF64
|
|
// prepare Official Procedure Descriptor for the generated code
|
|
// and retrieve real function pointer for helper functions
|
|
|
|
opd_t *ac = (void *)VM_AsmCall, *bc = (void *)VM_BlockCopy;
|
|
data->opd.function = codeBegin;
|
|
// trick it into using the same TOC
|
|
// this way we won't have to switch TOC before calling AsmCall or BlockCopy
|
|
data->opd.toc = ac->toc;
|
|
data->opd.env = ac->env;
|
|
|
|
data->AsmCall = ac->function;
|
|
data->BlockCopy = bc->function;
|
|
#else
|
|
data->AsmCall = VM_AsmCall;
|
|
data->BlockCopy = VM_BlockCopy;
|
|
#endif
|
|
|
|
data->dataMask = vm->dataMask;
|
|
data->iPointers = (ppc_instruction_t *)vm->instructionPointers;
|
|
data->dataLength = VM_Data_Offset( data[ data_acc ] );
|
|
data->codeLength = ( codeNow - codeBegin ) * sizeof( ppc_instruction_t );
|
|
data->floatBase = 0x59800004;
|
|
|
|
|
|
/* write dynamic data (float constants) */
|
|
{
|
|
local_data_t *d_next, *d_now = data_first;
|
|
long int accumulated = 0;
|
|
|
|
do {
|
|
long int i;
|
|
for ( i = 0; i < d_now->count; i++ )
|
|
data->data[ accumulated + i ] = d_now->data[ i ];
|
|
|
|
accumulated += d_now->count;
|
|
d_next = d_now->next;
|
|
PPC_Free( d_now );
|
|
|
|
if ( !d_next )
|
|
break;
|
|
d_now = d_next;
|
|
} while (1);
|
|
data_first = NULL;
|
|
}
|
|
|
|
/* free most of the compilation memory */
|
|
{
|
|
di_now = di_first->next;
|
|
PPC_Free( di_first );
|
|
PPC_Free( sj_first );
|
|
|
|
while ( di_now ) {
|
|
di_first = di_now->next;
|
|
if ( di_now->jump )
|
|
PPC_Free( di_now->jump );
|
|
PPC_Free( di_now );
|
|
di_now = di_first;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
static void
|
|
VM_Destroy_Compiled( vm_t *self )
|
|
{
|
|
if ( self->codeBase ) {
|
|
if ( munmap( self->codeBase, self->codeLength ) )
|
|
Com_Printf( S_COLOR_RED "Memory unmap failed, possible memory leak\n" );
|
|
}
|
|
self->codeBase = NULL;
|
|
}
|
|
|
|
void
|
|
VM_Compile( vm_t *vm, vmHeader_t *header )
|
|
{
|
|
long int pc = 0;
|
|
unsigned long int i_count;
|
|
char* code;
|
|
struct timeval tvstart = {0, 0};
|
|
source_instruction_t *i_first /* dummy */, *i_last = NULL, *i_now;
|
|
|
|
vm->compiled = qfalse;
|
|
|
|
gettimeofday(&tvstart, NULL);
|
|
|
|
PPC_MakeFastMask( vm->dataMask );
|
|
|
|
i_first = PPC_Malloc( sizeof( source_instruction_t ) );
|
|
i_first->next = NULL;
|
|
|
|
// realloc instructionPointers with correct size
|
|
// use Z_Malloc so vm.c will be able to free the memory
|
|
if ( sizeof( void * ) != sizeof( int ) ) {
|
|
Z_Free( vm->instructionPointers );
|
|
vm->instructionPointers = Z_Malloc( header->instructionCount * sizeof( void * ) );
|
|
}
|
|
di_pointers = (void *)vm->instructionPointers;
|
|
memset( di_pointers, 0, header->instructionCount * sizeof( void * ) );
|
|
|
|
|
|
PPC_CompileInit();
|
|
|
|
/*
|
|
* read the input program
|
|
* divide it into functions and send each function to compiler
|
|
*/
|
|
code = (char *)header + header->codeOffset;
|
|
for ( i_count = 0; i_count < header->instructionCount; ++i_count )
|
|
{
|
|
unsigned char op = code[ pc++ ];
|
|
|
|
if ( op == OP_ENTER ) {
|
|
if ( i_first->next )
|
|
VM_CompileFunction( i_first );
|
|
i_first->next = NULL;
|
|
i_last = i_first;
|
|
}
|
|
|
|
i_now = PPC_Malloc( sizeof( source_instruction_t ) );
|
|
i_now->op = op;
|
|
i_now->i_count = i_count;
|
|
i_now->arg.i = 0;
|
|
i_now->regA1 = 0;
|
|
i_now->regA2 = 0;
|
|
i_now->regR = 0;
|
|
i_now->regPos = 0;
|
|
i_now->next = NULL;
|
|
|
|
if ( vm_opInfo[op] & opImm4 ) {
|
|
union {
|
|
unsigned char b[4];
|
|
unsigned int i;
|
|
} c = { { code[ pc + 3 ], code[ pc + 2 ], code[ pc + 1 ], code[ pc + 0 ] }, };
|
|
|
|
i_now->arg.i = c.i;
|
|
pc += 4;
|
|
} else if ( vm_opInfo[op] & opImm1 ) {
|
|
i_now->arg.b = code[ pc++ ];
|
|
}
|
|
|
|
i_last->next = i_now;
|
|
i_last = i_now;
|
|
}
|
|
VM_CompileFunction( i_first );
|
|
PPC_Free( i_first );
|
|
|
|
PPC_ShrinkJumps();
|
|
memset( di_pointers, 0, header->instructionCount * sizeof( void * ) );
|
|
PPC_ComputeCode( vm );
|
|
|
|
/* check for uninitialized pointers */
|
|
#ifdef DEBUG_VM
|
|
long int i;
|
|
for ( i = 0; i < header->instructionCount; i++ )
|
|
if ( di_pointers[ i ] == 0 )
|
|
Com_Printf( S_COLOR_RED "Pointer %ld not initialized !\n", i );
|
|
#endif
|
|
|
|
/* mark memory as executable and not writeable */
|
|
if ( mprotect( vm->codeBase, vm->codeLength, PROT_READ|PROT_EXEC ) ) {
|
|
|
|
// it has failed, make sure memory is unmapped before throwing the error
|
|
VM_Destroy_Compiled( vm );
|
|
DIE( "mprotect failed" );
|
|
}
|
|
|
|
vm->destroy = VM_Destroy_Compiled;
|
|
vm->compiled = qtrue;
|
|
|
|
{
|
|
struct timeval tvdone = {0, 0};
|
|
struct timeval dur = {0, 0};
|
|
Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n",
|
|
vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength );
|
|
|
|
gettimeofday(&tvdone, NULL);
|
|
timersub(&tvdone, &tvstart, &dur);
|
|
Com_Printf( "compilation took %lu.%06lu seconds\n",
|
|
(long unsigned int)dur.tv_sec, (long unsigned int)dur.tv_usec );
|
|
}
|
|
}
|
|
|
|
int
|
|
VM_CallCompiled( vm_t *vm, int *args )
|
|
{
|
|
int retVal;
|
|
int *argPointer;
|
|
|
|
vm_data_t *vm_dataAndCode = (void *)( vm->codeBase );
|
|
int programStack = vm->programStack;
|
|
int stackOnEntry = programStack;
|
|
|
|
byte *image = vm->dataBase;
|
|
|
|
currentVM = vm;
|
|
|
|
vm->currentlyInterpreting = qtrue;
|
|
|
|
programStack -= 48;
|
|
argPointer = (int *)&image[ programStack + 8 ];
|
|
memcpy( argPointer, args, 4 * 9 );
|
|
argPointer[ -1 ] = 0;
|
|
argPointer[ -2 ] = -1;
|
|
|
|
#ifdef VM_TIMES
|
|
struct tms start_time, stop_time;
|
|
clock_t time_diff;
|
|
|
|
times( &start_time );
|
|
time_outside_vm = 0;
|
|
#endif
|
|
|
|
/* call generated code */
|
|
{
|
|
int ( *entry )( void *, int, void * );
|
|
#ifdef __PPC64__
|
|
entry = (void *)&(vm_dataAndCode->opd);
|
|
#else
|
|
entry = (void *)(vm->codeBase + vm_dataAndCode->dataLength);
|
|
#endif
|
|
retVal = entry( vm->codeBase, programStack, vm->dataBase );
|
|
}
|
|
|
|
#ifdef VM_TIMES
|
|
times( &stop_time );
|
|
time_diff = stop_time.tms_utime - start_time.tms_utime;
|
|
time_total_vm += time_diff - time_outside_vm;
|
|
if ( time_diff > 100 ) {
|
|
printf( "App clock: %ld, vm total: %ld, vm this: %ld, vm real: %ld, vm out: %ld\n"
|
|
"Inside VM %f%% of app time\n",
|
|
stop_time.tms_utime,
|
|
time_total_vm,
|
|
time_diff,
|
|
time_diff - time_outside_vm,
|
|
time_outside_vm,
|
|
(double)100 * time_total_vm / stop_time.tms_utime );
|
|
}
|
|
#endif
|
|
|
|
vm->programStack = stackOnEntry;
|
|
vm->currentlyInterpreting = qfalse;
|
|
|
|
return retVal;
|
|
}
|