f33873ddb7
some minor changes. Mostly bug fixes and internal reorganisation. Added code to provide an activex control as part of the npfte.dll plugin. If the dll is registered the regsvr32 way, the plugin can be used with IE as well. fisheye/panoramic view enable is now controlled by rulesets instead of serverinfo. server will list all pak files it has loaded. client will probably do the wrong thing and still needs fixing properly. git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@3910 fc73d0e0-1445-4013-8a0c-d673dee63da5
1445 lines
38 KiB
C
1445 lines
38 KiB
C
/*
|
|
when I say JIT, I mean load time, not execution time.
|
|
|
|
notes:
|
|
qc jump offsets are all constants. we have no variable offset jumps (other than function calls/returns)
|
|
field remapping... fields are in place, and cannot be adjusted. if a field is not set to 0, its assumed to be a constant.
|
|
|
|
optimisations:
|
|
none at the moment...
|
|
instructions need to be chained. stuff that writes to C should be cacheable, etc. maybe we don't even need to do the write to C
|
|
it should also be possible to fold in eq+ifnot, so none of this silly storeing of floats in equality tests
|
|
|
|
this means that we need to track which vars are cached and in what form: fpreg, ireg+floatasint, ireg+float.
|
|
certain qccx hacks can use fpu operations on ints, so do what the instruction says, rather than considering an add an add regardless of types.
|
|
|
|
OP_AND_F, OP_OR_F etc will generally result in ints, and we should be able to keep them as ints if they combine with other ints.
|
|
|
|
some instructions are jump sites. any cache must be flushed before the start of the instruction.
|
|
some variables are locals, and will only ever be written by a single instruction, then read by the following instruction. such temps do not need to be written, or are overwritten later in the function anyway.
|
|
such locals need to be calculated PER FUNCTION as (fte)qcc can overlap locals making multiple distinct locals on a single offset.
|
|
|
|
store locals on a proper stack instead of the current absurd mechanism.
|
|
|
|
eax - tmp
|
|
ebx - prinst->edicttable
|
|
ecx - tmp
|
|
edx - tmp
|
|
esi - debug opcode number
|
|
edi - tmp (because its preserved by subfunctions
|
|
ebp -
|
|
|
|
to use gas to provide binary opcodes:
|
|
vim -N blob.s && as blob.s && objdump.exe -d a.out
|
|
|
|
|
|
notable mods to test:
|
|
prydon gate, due to fpu mangling to carry values between maps
|
|
*/
|
|
|
|
#define PROGSUSED
|
|
#include "progsint.h"
|
|
|
|
#ifdef QCJIT
|
|
|
|
#ifndef _WIN32
|
|
#include <sys/mman.h>
|
|
#endif
|
|
|
|
static float ta, tb, nullfloat=0;
|
|
|
|
struct jitstate
|
|
{
|
|
unsigned int *statementjumps; //[MAX_STATEMENTS*3]
|
|
unsigned char **statementoffsets; //[MAX_STATEMENTS]
|
|
unsigned int numjumps;
|
|
unsigned char *code;
|
|
unsigned int codesize;
|
|
unsigned int jitstatements;
|
|
|
|
float *glob;
|
|
unsigned int cachedglobal;
|
|
unsigned int cachereg;
|
|
};
|
|
|
|
static void EmitByte(struct jitstate *jit, unsigned char byte)
|
|
{
|
|
jit->code[jit->codesize++] = byte;
|
|
}
|
|
static void Emit4Byte(struct jitstate *jit, unsigned int value)
|
|
{
|
|
jit->code[jit->codesize++] = (value>> 0)&0xff;
|
|
jit->code[jit->codesize++] = (value>> 8)&0xff;
|
|
jit->code[jit->codesize++] = (value>>16)&0xff;
|
|
jit->code[jit->codesize++] = (value>>24)&0xff;
|
|
}
|
|
static void EmitAdr(struct jitstate *jit, void *value)
|
|
{
|
|
Emit4Byte(jit, (unsigned int)value);
|
|
}
|
|
static void EmitFloat(struct jitstate *jit, float value)
|
|
{
|
|
union {float f; unsigned int i;} u;
|
|
u.f = value;
|
|
Emit4Byte(jit, u.i);
|
|
}
|
|
static void Emit2Byte(struct jitstate *jit, unsigned short value)
|
|
{
|
|
jit->code[jit->codesize++] = (value>> 0)&0xff;
|
|
jit->code[jit->codesize++] = (value>> 8)&0xff;
|
|
}
|
|
|
|
static void EmitFOffset(struct jitstate *jit, void *func, int bias)
|
|
{
|
|
union {void *f; unsigned int i;} u;
|
|
u.f = func;
|
|
u.i -= (unsigned int)&jit->code[jit->codesize+bias];
|
|
Emit4Byte(jit, u.i);
|
|
}
|
|
|
|
static void Emit4ByteJump(struct jitstate *jit, int statementnum, int offset)
|
|
{
|
|
jit->statementjumps[jit->numjumps++] = jit->codesize;
|
|
jit->statementjumps[jit->numjumps++] = statementnum;
|
|
jit->statementjumps[jit->numjumps++] = offset;
|
|
|
|
//the offset is filled in later
|
|
jit->codesize += 4;
|
|
}
|
|
|
|
enum
|
|
{
|
|
REG_EAX,
|
|
REG_ECX,
|
|
REG_EDX,
|
|
REG_EBX,
|
|
REG_ESP,
|
|
REG_EBP,
|
|
REG_ESI,
|
|
REG_EDI,
|
|
|
|
/*I'm not going to list S1 here, as that makes things too awkward*/
|
|
REG_S0,
|
|
REG_NONE
|
|
};
|
|
#define XOR(sr,dr) EmitByte(0x31);EmitByte(0xc0 | (sr<<3) | dr);
|
|
#define CLEARREG(reg) XOR(reg,reg)
|
|
#define LOADREG(addr, reg) if (reg == REG_EAX) {EmitByte(0xa1);} else {EmitByte(0x8b); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);
|
|
#define STOREREG(reg, addr) if (reg == REG_EAX) {EmitByte(0xa3);} else {EmitByte(0x89); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);
|
|
#define STOREF(f, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);EmitFloat(f);
|
|
#define STOREI(i, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);Emit4Byte(i);
|
|
#define SETREGI(val,reg) EmitByte(0xbe);Emit4Byte(val);
|
|
|
|
#define ARGREGS(a,b,c) GCache_Load(jit, op[i].a, a, op[i].b, b, op[i].c, c)
|
|
#define RESULTREG(r) GCache_Store(jit, op[i].c, r)
|
|
|
|
//for the purposes of the cache, 'temp' offsets are only read when they have been written only within the preceeding control block.
|
|
//if they were read at any other time, then we must write them out in full.
|
|
//this logic applies only to locals of a function.
|
|
//#define USECACHE
|
|
|
|
static void GCache_Load(struct jitstate *jit, int ao, int ar, int bo, int br, int co, int cr)
|
|
{
|
|
#if USECACHE
|
|
if (jit->cachedreg != REG_NONE)
|
|
{
|
|
/*something is cached, if its one of the input offsets then can chain the instruction*/
|
|
|
|
if (jit->cachedglobal === ao && ar != REG_NONE)
|
|
{
|
|
if (jit->cachedreg == ar)
|
|
ar = REG_NONE;
|
|
}
|
|
if (jit->cachedglobal === bo && br != REG_NONE)
|
|
{
|
|
if (jit->cachedreg == br)
|
|
br = REG_NONE;
|
|
}
|
|
if (jit->cachedglobal === co && cr != REG_NONE)
|
|
{
|
|
if (jit->cachedreg == cr)
|
|
cr = REG_NONE;
|
|
}
|
|
|
|
if (!istemp(ao))
|
|
{
|
|
/*purge the old cache*/
|
|
switch(jit->cachedreg)
|
|
{
|
|
case REG_NONE:
|
|
break;
|
|
case REG_S0:
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + jit->cachedglobal);
|
|
break;
|
|
default:
|
|
STOREREG(jit->cachedreg, jit->glob + jit->cachedglobal);
|
|
break;
|
|
}
|
|
jit->cachedglobal = -1;
|
|
jit->cachedreg = REG_NONE;
|
|
}
|
|
|
|
#endif
|
|
switch(ar)
|
|
{
|
|
case REG_NONE:
|
|
break;
|
|
case REG_S0:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].a);
|
|
break;
|
|
default:
|
|
LOADREG(jit->glob + ao, ar);
|
|
break;
|
|
}
|
|
|
|
switch(br)
|
|
{
|
|
case REG_NONE:
|
|
break;
|
|
case REG_S0:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].b);
|
|
break;
|
|
default:
|
|
LOADREG(jit->glob + bo, br);
|
|
break;
|
|
}
|
|
|
|
switch(cr)
|
|
{
|
|
case REG_NONE:
|
|
break;
|
|
case REG_S0:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + op[i].c);
|
|
break;
|
|
default:
|
|
LOADREG(jit->glob + co, cr);
|
|
break;
|
|
}
|
|
}
|
|
static void GCache_Store(struct jitstate *jit, int ofs, int reg)
|
|
{
|
|
#if USECACHE
|
|
jit->cachedglobal = ofs;
|
|
jit->cachedreg = reg;
|
|
#else
|
|
switch(reg)
|
|
{
|
|
case REG_NONE:
|
|
break;
|
|
case REG_S0:
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + ofs);
|
|
break;
|
|
default:
|
|
STOREREG(reg, jit->glob + ofs);
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void *LocalLoc(struct jitstate *jit)
|
|
{
|
|
return &jit->code[jit->codesize];
|
|
}
|
|
static void *LocalJmp(struct jitstate *jit, int cond)
|
|
{
|
|
/*floating point ops don't set the sign flag, thus we use the 'above/below' instructions instead of 'greater/less' instructions*/
|
|
if (cond == OP_GOTO)
|
|
EmitByte(jit, 0xeb); //jmp
|
|
else if (cond == OP_LE_F)
|
|
EmitByte(jit, 0x76); //jbe
|
|
else if (cond == OP_GE_F)
|
|
EmitByte(jit, 0x73); //jae
|
|
else if (cond == OP_LT_F)
|
|
EmitByte(jit, 0x72); //jb
|
|
else if (cond == OP_GT_F)
|
|
EmitByte(jit, 0x77); //ja
|
|
else if (cond == OP_LE_I)
|
|
EmitByte(jit, 0x7e); //jle
|
|
else if (cond == OP_LT_I)
|
|
EmitByte(jit, 0x7c); //jl
|
|
else if ((cond >= OP_NE_F && cond <= OP_NE_FNC) || cond == OP_NE_I)
|
|
EmitByte(jit, 0x75); //jne
|
|
else if ((cond >= OP_EQ_F && cond <= OP_EQ_FNC) || cond == OP_EQ_I)
|
|
EmitByte(jit, 0x74); //je
|
|
#if defined(DEBUG) && defined(_WIN32)
|
|
else
|
|
{
|
|
OutputDebugString("oh noes!\n");
|
|
return NULL;
|
|
}
|
|
#endif
|
|
|
|
EmitByte(jit, 0);
|
|
|
|
return LocalLoc(jit);
|
|
}
|
|
static void LocalJmpLoc(void *jmp, void *loc)
|
|
{
|
|
int offs;
|
|
unsigned char *a = jmp;
|
|
offs = (char *)loc - (char *)jmp;
|
|
#if defined(DEBUG) && defined(_WIN32)
|
|
if (offs > 127 || offs <= -128)
|
|
{
|
|
OutputDebugStringA("bad jump\n");
|
|
a[-2] = 0xcd;
|
|
a[-1] = 0xcc;
|
|
return;
|
|
}
|
|
#endif
|
|
a[-1] = offs;
|
|
}
|
|
|
|
static void FixupJumps(struct jitstate *jit)
|
|
{
|
|
unsigned int j;
|
|
unsigned char *codesrc;
|
|
unsigned char *codedst;
|
|
unsigned int offset;
|
|
|
|
unsigned int v;
|
|
|
|
for (j = 0; j < jit->numjumps;)
|
|
{
|
|
v = jit->statementjumps[j++];
|
|
codesrc = &jit->code[v];
|
|
|
|
v = jit->statementjumps[j++];
|
|
codedst = jit->statementoffsets[v];
|
|
|
|
v = jit->statementjumps[j++];
|
|
offset = (int)(codedst - (codesrc-v)); //3rd term because the jump is relative to the instruction start, not the instruction's offset
|
|
|
|
codesrc[0] = (offset>> 0)&0xff;
|
|
codesrc[1] = (offset>> 8)&0xff;
|
|
codesrc[2] = (offset>>16)&0xff;
|
|
codesrc[3] = (offset>>24)&0xff;
|
|
}
|
|
}
|
|
|
|
int ASMCALL PR_LeaveFunction (progfuncs_t *progfuncs);
|
|
int ASMCALL PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsnum);
|
|
|
|
void PR_CloseJit(struct jitstate *jit)
|
|
{
|
|
if (jit)
|
|
{
|
|
free(jit->statementjumps);
|
|
free(jit->statementoffsets);
|
|
#ifndef _WIN32
|
|
munmap(jit->code, jit->jitstatements * 500);
|
|
#else
|
|
free(jit->code);
|
|
#endif
|
|
free(jit)
|
|
}
|
|
}
|
|
|
|
#define EmitByte(v) EmitByte(jit, v)
|
|
#define EmitAdr(v) EmitAdr(jit, v)
|
|
#define EmitFOffset(a,b) EmitFOffset(jit, a, b)
|
|
#define Emit4ByteJump(a,b) Emit4ByteJump(jit, a, b)
|
|
#define Emit4Byte(v) Emit4Byte(jit, v)
|
|
#define EmitFloat(v) EmitFloat(jit, v)
|
|
#define LocalJmp(v) LocalJmp(jit, v)
|
|
#define LocalLoc() LocalLoc(jit)
|
|
|
|
|
|
struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)
|
|
{
|
|
struct jitstate *jit;
|
|
|
|
void *j0, *l0;
|
|
void *j1, *l1;
|
|
void *j2, *l2;
|
|
unsigned int i;
|
|
dstatement16_t *op = (dstatement16_t*)current_progstate->statements;
|
|
unsigned int numstatements = current_progstate->progs->numstatements;
|
|
int *glob = (int*)current_progstate->globals;
|
|
|
|
if (current_progstate->numbuiltins)
|
|
return NULL;
|
|
jit = malloc(sizeof(*jit));
|
|
jit->jitstatements = numstatements;
|
|
|
|
jit->statementjumps = malloc(numstatements*12);
|
|
jit->statementoffsets = malloc(numstatements*4);
|
|
#ifndef _WIN32
|
|
jit->code = mmap(NULL, numstatements*500, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
|
#else
|
|
jit->code = malloc(numstatements*500);
|
|
#endif
|
|
if (!jit->code)
|
|
return NULL;
|
|
|
|
jit->numjumps = 0;
|
|
jit->codesize = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < numstatements; i++)
|
|
{
|
|
jit->statementoffsets[i] = &jit->code[jit->codesize];
|
|
|
|
/*DEBUG*/
|
|
SETREGI(op[i].op, REG_ESI);
|
|
|
|
switch(op[i].op)
|
|
{
|
|
//jumps
|
|
case OP_IF_I:
|
|
//integer compare
|
|
//if a, goto b
|
|
|
|
//cmpl $0,glob[A]
|
|
EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);
|
|
//jne B
|
|
EmitByte(0x0f);EmitByte(0x85);Emit4ByteJump(i + (signed short)op[i].b, -4);
|
|
break;
|
|
|
|
case OP_IFNOT_I:
|
|
//integer compare
|
|
//if !a, goto b
|
|
|
|
//cmpl $0,glob[A]
|
|
EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);
|
|
//je B
|
|
EmitByte(0x0f);EmitByte(0x84);Emit4ByteJump(i + (signed short)op[i].b, -4);
|
|
break;
|
|
|
|
case OP_GOTO:
|
|
EmitByte(0xE9);Emit4ByteJump(i + (signed short)op[i].a, -4);
|
|
break;
|
|
|
|
//function returns
|
|
case OP_DONE:
|
|
case OP_RETURN:
|
|
//done and return are the same
|
|
|
|
//part 1: store A into OFS_RETURN
|
|
|
|
if (!op[i].a)
|
|
{
|
|
//assumption: anything that returns address 0 is a void or zero return.
|
|
//thus clear eax and copy that to the return vector.
|
|
CLEARREG(REG_EAX);
|
|
STOREREG(REG_EAX, glob + OFS_RETURN+0);
|
|
STOREREG(REG_EAX, glob + OFS_RETURN+1);
|
|
STOREREG(REG_EAX, glob + OFS_RETURN+2);
|
|
}
|
|
else
|
|
{
|
|
LOADREG(glob + op[i].a+0, REG_EAX);
|
|
LOADREG(glob + op[i].a+1, REG_EDX);
|
|
LOADREG(glob + op[i].a+2, REG_ECX);
|
|
STOREREG(REG_EAX, glob + OFS_RETURN+0);
|
|
STOREREG(REG_EDX, glob + OFS_RETURN+1);
|
|
STOREREG(REG_ECX, glob + OFS_RETURN+2);
|
|
}
|
|
|
|
//call leavefunction to get the return address
|
|
|
|
// pushl progfuncs
|
|
EmitByte(0x68);EmitAdr(progfuncs);
|
|
// call PR_LeaveFunction
|
|
EmitByte(0xe8);EmitFOffset(PR_LeaveFunction, 4);
|
|
// add $4,%esp
|
|
EmitByte(0x83);EmitByte(0xc4);EmitByte(0x04);
|
|
// movl pr_depth,%edx
|
|
EmitByte(0x8b);EmitByte(0x15);EmitAdr(&pr_depth);
|
|
// cmp prinst->exitdepth,%edx
|
|
EmitByte(0x3b);EmitByte(0x15);EmitAdr(&prinst->exitdepth);
|
|
// je returntoc
|
|
j1 = LocalJmp(OP_EQ_E);
|
|
// mov statementoffsets[%eax*4],%eax
|
|
EmitByte(0x8b);EmitByte(0x04);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);
|
|
// jmp *eax
|
|
EmitByte(0xff);EmitByte(0xe0);
|
|
// returntoc:
|
|
l1 = LocalLoc();
|
|
// ret
|
|
EmitByte(0xc3);
|
|
|
|
LocalJmpLoc(j1,l1);
|
|
break;
|
|
|
|
//function calls
|
|
case OP_CALL0:
|
|
case OP_CALL1:
|
|
case OP_CALL2:
|
|
case OP_CALL3:
|
|
case OP_CALL4:
|
|
case OP_CALL5:
|
|
case OP_CALL6:
|
|
case OP_CALL7:
|
|
case OP_CALL8:
|
|
//FIXME: the size of this instruction is going to hurt cache performance if every single function call is expanded into this HUGE CHUNK of gibberish!
|
|
//FIXME: consider the feasability of just calling a C function and just jumping to the address it returns.
|
|
|
|
//save the state in place the rest of the engine can cope with
|
|
//movl $i, pr_xstatement
|
|
EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);
|
|
//movl $(op[i].op-OP_CALL0), pr_argc
|
|
EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_argc);Emit4Byte(op[i].op-OP_CALL0);
|
|
|
|
//figure out who we're calling, and what that involves
|
|
//%eax = glob[A]
|
|
LOADREG(glob + op[i].a, REG_EAX);
|
|
//eax is now the func num
|
|
|
|
//mov %eax,%ecx
|
|
EmitByte(0x89); EmitByte(0xc1);
|
|
//shr $24,%ecx
|
|
EmitByte(0xc1); EmitByte(0xe9); EmitByte(0x18);
|
|
//ecx is now the progs num for the new func
|
|
|
|
//cmp %ecx,pr_typecurrent
|
|
EmitByte(0x39); EmitByte(0x0d); EmitAdr(&pr_typecurrent);
|
|
//je sameprogs
|
|
j1 = LocalJmp(OP_EQ_I);
|
|
{
|
|
//can't handle switching progs
|
|
|
|
//FIXME: recurse though PR_ExecuteProgram
|
|
//push eax
|
|
//push progfuncs
|
|
//call PR_ExecuteProgram
|
|
//add $8,%esp
|
|
//remember to change the je above
|
|
|
|
//err... exit depth? no idea
|
|
EmitByte(0xcd);EmitByte(op[i].op); //int $X
|
|
|
|
|
|
//ret
|
|
EmitByte(0xc3);
|
|
}
|
|
//sameprogs:
|
|
l1 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
|
|
//andl $0x00ffffff, %eax
|
|
EmitByte(0x25);Emit4Byte(0x00ffffff);
|
|
|
|
//mov $sizeof(dfunction_t),%edx
|
|
EmitByte(0xba);Emit4Byte(sizeof(dfunction_t));
|
|
//mul %edx
|
|
EmitByte(0xf7); EmitByte(0xe2);
|
|
//add pr_functions,%eax
|
|
EmitByte(0x05); EmitAdr(pr_functions);
|
|
|
|
//eax is now the dfunction_t to be called
|
|
//edx is clobbered.
|
|
|
|
//mov (%eax),%edx
|
|
EmitByte(0x8b);EmitByte(0x10);
|
|
//edx is now the first statement number
|
|
//cmp $0,%edx
|
|
EmitByte(0x83);EmitByte(0xfa);EmitByte(0x00);
|
|
//jl isabuiltin
|
|
j1 = LocalJmp(OP_LT_I);
|
|
{
|
|
/* call the function*/
|
|
//push %ecx
|
|
EmitByte(0x51);
|
|
//push %eax
|
|
EmitByte(0x50);
|
|
//pushl progfuncs
|
|
EmitByte(0x68);EmitAdr(progfuncs);
|
|
//call PR_EnterFunction
|
|
EmitByte(0xe8);EmitFOffset(PR_EnterFunction, 4);
|
|
//sub $12,%esp
|
|
EmitByte(0x83);EmitByte(0xc4);EmitByte(0xc);
|
|
//eax is now the next statement number (first of the new function, usually equal to ecx, but not always)
|
|
|
|
//jmp statementoffsets[%eax*4]
|
|
EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);
|
|
}
|
|
/*its a builtin, figure out which, and call it*/
|
|
//isabuiltin:
|
|
l1 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
|
|
//push current_progstate->globals
|
|
EmitByte(0x68);EmitAdr(current_progstate->globals);
|
|
//push progfuncs
|
|
EmitByte(0x68);EmitAdr(progfuncs);
|
|
//neg %edx
|
|
EmitByte(0xf7);EmitByte(0xda);
|
|
//call externs->globalbuiltins[%edx,4]
|
|
//FIXME: make sure this dereferences
|
|
EmitByte(0xff);EmitByte(0x14);EmitByte(0x95);EmitAdr(externs->globalbuiltins);
|
|
//add $8,%esp
|
|
EmitByte(0x83);EmitByte(0xc4);EmitByte(0x8);
|
|
|
|
//but that builtin might have been Abort()
|
|
|
|
LOADREG(&prinst->continuestatement, REG_EAX);
|
|
//cmp $-1,%eax
|
|
EmitByte(0x83);EmitByte(0xf8);EmitByte(0xff);
|
|
//je donebuiltincall
|
|
j1 = LocalJmp(OP_EQ_I);
|
|
{
|
|
//mov $-1,prinst->continuestatement
|
|
EmitByte(0xc7);EmitByte(0x05);EmitAdr(&prinst->continuestatement);Emit4Byte((unsigned int)-1);
|
|
|
|
//jmp statementoffsets[%eax*4]
|
|
EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets);
|
|
}
|
|
//donebuiltincall:
|
|
l1 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
break;
|
|
|
|
case OP_MUL_F:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
|
|
//fmuls glob[B]
|
|
EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
|
|
break;
|
|
case OP_DIV_F:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
|
|
//fdivs glob[B]
|
|
EmitByte(0xd8);EmitByte(0x35);EmitAdr(glob + op[i].b);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
|
|
break;
|
|
case OP_ADD_F:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
|
|
//fadds glob[B]
|
|
EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
|
|
break;
|
|
case OP_SUB_F:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
|
|
//fsubs glob[B]
|
|
EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
|
|
break;
|
|
|
|
case OP_NOT_F:
|
|
//fldz
|
|
EmitByte(0xd9);EmitByte(0xee);
|
|
//fcomps glob[A]
|
|
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(glob + op[i].a);
|
|
//fnstsw %ax
|
|
EmitByte(0xdf);EmitByte(0xe0);
|
|
//testb 0x40,%ah
|
|
EmitByte(0xf6);EmitByte(0xc4);EmitByte(0x40);
|
|
|
|
j1 = LocalJmp(OP_NE_F);
|
|
{
|
|
STOREF(0.0f, glob + op[i].c);
|
|
j2 = LocalJmp(OP_GOTO);
|
|
}
|
|
{
|
|
//noteq:
|
|
l1 = LocalLoc();
|
|
STOREF(1.0f, glob + op[i].c);
|
|
}
|
|
//end:
|
|
l2 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
LocalJmpLoc(j2,l2);
|
|
break;
|
|
|
|
case OP_STORE_F:
|
|
case OP_STORE_S:
|
|
case OP_STORE_ENT:
|
|
case OP_STORE_FLD:
|
|
case OP_STORE_FNC:
|
|
LOADREG(glob + op[i].a, REG_EAX);
|
|
STOREREG(REG_EAX, glob + op[i].b);
|
|
break;
|
|
|
|
case OP_STORE_V:
|
|
LOADREG(glob + op[i].a+0, REG_EAX);
|
|
LOADREG(glob + op[i].a+1, REG_EDX);
|
|
LOADREG(glob + op[i].a+2, REG_ECX);
|
|
STOREREG(REG_EAX, glob + op[i].b+0);
|
|
STOREREG(REG_EDX, glob + op[i].b+1);
|
|
STOREREG(REG_ECX, glob + op[i].b+2);
|
|
break;
|
|
|
|
case OP_LOAD_F:
|
|
case OP_LOAD_S:
|
|
case OP_LOAD_ENT:
|
|
case OP_LOAD_FLD:
|
|
case OP_LOAD_FNC:
|
|
case OP_LOAD_V:
|
|
//a is the ent number, b is the field
|
|
//c is the dest
|
|
|
|
LOADREG(glob + op[i].a, REG_EAX);
|
|
LOADREG(glob + op[i].b, REG_ECX);
|
|
|
|
//FIXME: bound eax (ent number)
|
|
//FIXME: bound ecx (field index)
|
|
//mov (ebx,eax,4).%eax
|
|
EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);
|
|
//eax is now an edictrun_t
|
|
//mov fields(,%eax,4),%edx
|
|
EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);
|
|
//edx is now the field array for that ent
|
|
|
|
//mov fieldajust(%edx,%ecx,4),%eax
|
|
EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);
|
|
|
|
STOREREG(REG_EAX, glob + op[i].c)
|
|
|
|
if (op[i].op == OP_LOAD_V)
|
|
{
|
|
//mov fieldajust+4(%edx,%ecx,4),%eax
|
|
EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(4+progfuncs->fieldadjust*4);
|
|
STOREREG(REG_EAX, glob + op[i].c+1)
|
|
|
|
//mov fieldajust+8(%edx,%ecx,4),%eax
|
|
EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(8+progfuncs->fieldadjust*4);
|
|
STOREREG(REG_EAX, glob + op[i].c+2)
|
|
}
|
|
break;
|
|
|
|
case OP_ADDRESS:
|
|
//a is the ent number, b is the field
|
|
//c is the dest
|
|
|
|
LOADREG(glob + op[i].a, REG_EAX);
|
|
LOADREG(glob + op[i].b, REG_ECX);
|
|
|
|
//FIXME: bound eax (ent number)
|
|
//FIXME: bound ecx (field index)
|
|
//mov (ebx,eax,4).%eax
|
|
EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);
|
|
//eax is now an edictrun_t
|
|
//mov fields(,%eax,4),%edx
|
|
EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);
|
|
//edx is now the field array for that ent
|
|
//mov fieldajust(%edx,%ecx,4),%eax //offset = progfuncs->fieldadjust
|
|
//EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); EmitByte(progfuncs->fieldadjust*4);
|
|
EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->fieldadjust*4);
|
|
STOREREG(REG_EAX, glob + op[i].c);
|
|
break;
|
|
|
|
case OP_STOREP_F:
|
|
case OP_STOREP_S:
|
|
case OP_STOREP_ENT:
|
|
case OP_STOREP_FLD:
|
|
case OP_STOREP_FNC:
|
|
LOADREG(glob + op[i].a, REG_EAX);
|
|
LOADREG(glob + op[i].b, REG_ECX);
|
|
//mov %eax,(%ecx)
|
|
EmitByte(0x89);EmitByte(0x01);
|
|
break;
|
|
|
|
case OP_STOREP_V:
|
|
LOADREG(glob + op[i].b, REG_ECX);
|
|
|
|
LOADREG(glob + op[i].a+0, REG_EAX);
|
|
//mov %eax,0(%ecx)
|
|
EmitByte(0x89);EmitByte(0x01);
|
|
|
|
LOADREG(glob + op[i].a+1, REG_EAX);
|
|
//mov %eax,4(%ecx)
|
|
EmitByte(0x89);EmitByte(0x41);EmitByte(0x04);
|
|
|
|
LOADREG(glob + op[i].a+2, REG_EAX);
|
|
//mov %eax,8(%ecx)
|
|
EmitByte(0x89);EmitByte(0x41);EmitByte(0x08);
|
|
break;
|
|
|
|
case OP_NE_I:
|
|
case OP_NE_E:
|
|
case OP_NE_FNC:
|
|
case OP_EQ_I:
|
|
case OP_EQ_E:
|
|
case OP_EQ_FNC:
|
|
//integer equality
|
|
LOADREG(glob + op[i].a, REG_EAX);
|
|
|
|
//cmp glob[B],%eax
|
|
EmitByte(0x3b); EmitByte(0x04); EmitByte(0x25); EmitAdr(glob + op[i].b);
|
|
j1 = LocalJmp(op[i].op);
|
|
{
|
|
STOREF(0.0f, glob + op[i].c);
|
|
j2 = LocalJmp(OP_GOTO);
|
|
}
|
|
{
|
|
l1 = LocalLoc();
|
|
STOREF(1.0f, glob + op[i].c);
|
|
}
|
|
l2 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
LocalJmpLoc(j2,l2);
|
|
break;
|
|
|
|
case OP_NOT_I:
|
|
case OP_NOT_ENT:
|
|
case OP_NOT_FNC:
|
|
//cmp glob[B],$0
|
|
EmitByte(0x83); EmitByte(0x3d); EmitAdr(glob + op[i].a); EmitByte(0x00);
|
|
j1 = LocalJmp(OP_NE_I);
|
|
{
|
|
STOREF(1.0f, glob + op[i].c);
|
|
j2 = LocalJmp(OP_GOTO);
|
|
}
|
|
{
|
|
l1 = LocalLoc();
|
|
STOREF(0.0f, glob + op[i].c);
|
|
}
|
|
l2 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
LocalJmpLoc(j2,l2);
|
|
break;
|
|
|
|
case OP_BITOR_F: //floats...
|
|
//flds glob[A]
|
|
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
|
|
//flds glob[B]
|
|
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
|
|
//fistp tb
|
|
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);
|
|
//fistp ta
|
|
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
|
|
LOADREG(&ta, REG_EAX)
|
|
//or %eax,tb
|
|
EmitByte(0x09); EmitByte(0x05);EmitAdr(&tb);
|
|
//fild tb
|
|
EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);
|
|
break;
|
|
|
|
case OP_BITAND_F:
|
|
//flds glob[A]
|
|
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
|
|
//flds glob[B]
|
|
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
|
|
//fistp tb
|
|
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);
|
|
//fistp ta
|
|
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
|
|
/*two args are now at ta and tb*/
|
|
LOADREG(&ta, REG_EAX)
|
|
//and tb,%eax
|
|
EmitByte(0x21); EmitByte(0x05);EmitAdr(&tb);
|
|
/*we just wrote the int value to tb, convert that to a float and store it at c*/
|
|
//fild tb
|
|
EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);
|
|
break;
|
|
|
|
case OP_AND_F:
|
|
//test floats properly, so we don't get confused with -0.0
|
|
//FIXME: is it feasable to grab the value as an int and test it against 0x7fffffff?
|
|
|
|
//flds glob[A]
|
|
EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);
|
|
//fcomps nullfloat
|
|
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
|
|
//fnstsw %ax
|
|
EmitByte(0xdf); EmitByte(0xe0);
|
|
//test $0x40,%ah
|
|
EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
|
|
//jz onefalse
|
|
EmitByte(0x75); EmitByte(0x1f);
|
|
|
|
//flds glob[B]
|
|
EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);
|
|
//fcomps nullfloat
|
|
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
|
|
//fnstsw %ax
|
|
EmitByte(0xdf); EmitByte(0xe0);
|
|
//test $0x40,%ah
|
|
EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
|
|
//jnz onefalse
|
|
EmitByte(0x75); EmitByte(0x0c);
|
|
|
|
//mov float0,glob[C]
|
|
EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);
|
|
//jmp done
|
|
EmitByte(0xeb); EmitByte(0x0a);
|
|
|
|
//onefalse:
|
|
//mov float1,glob[C]
|
|
EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);
|
|
//done:
|
|
break;
|
|
case OP_OR_F:
|
|
//test floats properly, so we don't get confused with -0.0
|
|
|
|
//flds glob[A]
|
|
EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);
|
|
//fcomps nullfloat
|
|
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
|
|
//fnstsw %ax
|
|
EmitByte(0xdf); EmitByte(0xe0);
|
|
//test $0x40,%ah
|
|
EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
|
|
//je onetrue
|
|
EmitByte(0x74); EmitByte(0x1f);
|
|
|
|
//flds glob[B]
|
|
EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);
|
|
//fcomps nullfloat
|
|
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
|
|
//fnstsw %ax
|
|
EmitByte(0xdf); EmitByte(0xe0);
|
|
//test $0x40,%ah
|
|
EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
|
|
//je onetrue
|
|
EmitByte(0x74); EmitByte(0x0c);
|
|
|
|
//mov float0,glob[C]
|
|
EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);
|
|
//jmp done
|
|
EmitByte(0xeb); EmitByte(0x0a);
|
|
|
|
//onetrue:
|
|
//mov float1,glob[C]
|
|
EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);
|
|
//done:
|
|
break;
|
|
|
|
case OP_EQ_S:
|
|
case OP_NE_S:
|
|
{
|
|
//put a in ecx
|
|
LOADREG(glob + op[i].a, REG_ECX);
|
|
//put b in edi
|
|
LOADREG(glob + op[i].b, REG_EDI);
|
|
/*
|
|
//early out if they're equal
|
|
//cmp %ecx,%edi
|
|
EmitByte(0x39); EmitByte(0xc0 | (REG_EDI<<3) | REG_ECX);
|
|
j1c = LocalJmp(OP_EQ_S);
|
|
|
|
//if a is 0, check if b is ""
|
|
//jecxz ais0
|
|
EmitByte(0xe3); EmitByte(0x1a);
|
|
|
|
//if b is 0, check if a is ""
|
|
//cmp $0,%edi
|
|
EmitByte(0x83); EmitByte(0xff); EmitByte(0x00);
|
|
//jne bnot0
|
|
EmitByte(0x75); EmitByte(0x2a);
|
|
{
|
|
//push a
|
|
EmitByte(0x51);
|
|
//push progfuncs
|
|
EmitByte(0x68); EmitAdr(progfuncs);
|
|
//call PR_StringToNative
|
|
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
|
|
//add $8,%esp
|
|
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
|
|
//cmpb $0,(%eax)
|
|
EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
|
|
j1b = LocalJmp(OP_EQ_S);
|
|
j0b = LocalJmp(OP_GOTO);
|
|
}
|
|
|
|
//ais0:
|
|
{
|
|
//push edi
|
|
EmitByte(0x57);
|
|
//push progfuncs
|
|
EmitByte(0x68); EmitAdr(progfuncs);
|
|
//call PR_StringToNative
|
|
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
|
|
//add $8,%esp
|
|
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
|
|
//cmpb $0,(%eax)
|
|
EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
|
|
//je _true
|
|
EmitByte(0x74); EmitByte(0x36);
|
|
//jmp _false
|
|
EmitByte(0xeb); EmitByte(0x28);
|
|
}
|
|
//bnot0:
|
|
*/
|
|
LOADREG(glob + op[i].a, REG_ECX);
|
|
//push ecx
|
|
EmitByte(0x51);
|
|
//push progfuncs
|
|
EmitByte(0x68); EmitAdr(progfuncs);
|
|
//call PR_StringToNative
|
|
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
|
|
//push %eax
|
|
EmitByte(0x50);
|
|
|
|
LOADREG(glob + op[i].b, REG_EDI);
|
|
//push %edi
|
|
EmitByte(0x57);
|
|
//push progfuncs
|
|
EmitByte(0x68); EmitAdr(progfuncs);
|
|
//call PR_StringToNative
|
|
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
|
|
//add $8,%esp
|
|
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
|
|
|
|
|
|
//push %eax
|
|
EmitByte(0x50);
|
|
//call strcmp
|
|
EmitByte(0xe8); EmitFOffset(strcmp,4);
|
|
//add $16,%esp
|
|
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x10);
|
|
|
|
//cmp $0,%eax
|
|
EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);
|
|
j1 = LocalJmp(OP_EQ_S);
|
|
{
|
|
l0 = LocalLoc();
|
|
STOREF((op[i].op == OP_NE_S)?1.0f:0.0f, glob + op[i].c);
|
|
j2 = LocalJmp(OP_GOTO);
|
|
}
|
|
{
|
|
l1 = LocalLoc();
|
|
STOREF((op[i].op == OP_NE_S)?0.0f:1.0f, glob + op[i].c);
|
|
}
|
|
l2 = LocalLoc();
|
|
|
|
// LocalJmpLoc(j0b, l0);
|
|
LocalJmpLoc(j1, l1);
|
|
// LocalJmpLoc(j1b, l1);
|
|
LocalJmpLoc(j2, l2);
|
|
}
|
|
break;
|
|
|
|
case OP_NOT_S:
|
|
LOADREG(glob + op[i].a, REG_EAX)
|
|
|
|
//cmp $0,%eax
|
|
EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);
|
|
j2 = LocalJmp(OP_EQ_S);
|
|
|
|
//push %eax
|
|
EmitByte(0x50);
|
|
//push progfuncs
|
|
EmitByte(0x68); EmitAdr(progfuncs);
|
|
//call PR_StringToNative
|
|
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
|
|
//add $8,%esp
|
|
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
|
|
|
|
//cmpb $0,(%eax)
|
|
EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
|
|
j1 = LocalJmp(OP_EQ_S);
|
|
{
|
|
STOREF(0.0f, glob + op[i].c);
|
|
j0 = LocalJmp(OP_GOTO);
|
|
}
|
|
{
|
|
l1 = LocalLoc();
|
|
STOREF(1.0f, glob + op[i].c);
|
|
}
|
|
l2 = LocalLoc();
|
|
LocalJmpLoc(j2, l1);
|
|
LocalJmpLoc(j1, l1);
|
|
LocalJmpLoc(j0, l2);
|
|
break;
|
|
|
|
case OP_ADD_V:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
|
|
//fadds glob[B]
|
|
EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+0);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
|
|
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
|
|
//fadds glob[B]
|
|
EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+1);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
|
|
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
|
|
//fadds glob[B]
|
|
EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+2);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
|
|
break;
|
|
case OP_SUB_V:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
|
|
//fsubs glob[B]
|
|
EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+0);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
|
|
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
|
|
//fsubs glob[B]
|
|
EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+1);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
|
|
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
|
|
//fsubs glob[B]
|
|
EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+2);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
|
|
break;
|
|
|
|
case OP_MUL_V:
|
|
//this is actually a dotproduct
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
|
|
//fmuls glob[B]
|
|
EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+0);
|
|
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
|
|
//fmuls glob[B]
|
|
EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+1);
|
|
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
|
|
//fmuls glob[B]
|
|
EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+2);
|
|
|
|
//faddp
|
|
EmitByte(0xde);EmitByte(0xc1);
|
|
//faddp
|
|
EmitByte(0xde);EmitByte(0xc1);
|
|
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
|
|
break;
|
|
|
|
case OP_EQ_F:
|
|
case OP_NE_F:
|
|
case OP_LE_F:
|
|
case OP_GE_F:
|
|
case OP_LT_F:
|
|
case OP_GT_F:
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b);
|
|
//flds glob[B]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
|
|
//fcomip %st(1),%st
|
|
EmitByte(0xdf);EmitByte(0xe9);
|
|
//fstp %st(0) (aka: pop)
|
|
EmitByte(0xdd);EmitByte(0xd8);
|
|
|
|
j1 = LocalJmp(op[i].op);
|
|
{
|
|
STOREF(0.0f, glob + op[i].c);
|
|
j2 = LocalJmp(OP_GOTO);
|
|
}
|
|
{
|
|
l1 = LocalLoc();
|
|
STOREF(1.0f, glob + op[i].c);
|
|
}
|
|
l2 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
LocalJmpLoc(j2,l2);
|
|
break;
|
|
|
|
case OP_MUL_FV:
|
|
case OP_MUL_VF:
|
|
//
|
|
{
|
|
int v;
|
|
int f;
|
|
if (op[i].op == OP_MUL_FV)
|
|
{
|
|
f = op[i].a;
|
|
v = op[i].b;
|
|
}
|
|
else
|
|
{
|
|
v = op[i].a;
|
|
f = op[i].b;
|
|
}
|
|
|
|
//flds glob[F]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + f);
|
|
|
|
//flds glob[V0]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+0);
|
|
//fmul st(1)
|
|
EmitByte(0xd8);EmitByte(0xc9);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
|
|
|
|
//flds glob[V0]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+1);
|
|
//fmul st(1)
|
|
EmitByte(0xd8);EmitByte(0xc9);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
|
|
|
|
//flds glob[V0]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+2);
|
|
//fmul st(1)
|
|
EmitByte(0xd8);EmitByte(0xc9);
|
|
//fstps glob[C]
|
|
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
|
|
|
|
//fstp %st(0) (aka: pop)
|
|
EmitByte(0xdd);EmitByte(0xd8);
|
|
}
|
|
break;
|
|
|
|
case OP_STATE:
|
|
//externs->stateop(progfuncs, OPA->_float, OPB->function);
|
|
//push b
|
|
EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].b);
|
|
//push a
|
|
EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].a);
|
|
//push $progfuncs
|
|
EmitByte(0x68); EmitAdr(progfuncs);
|
|
//call externs->stateop
|
|
EmitByte(0xe8); EmitFOffset(externs->stateop, 4);
|
|
//add $12,%esp
|
|
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x0c);
|
|
break;
|
|
#if 1
|
|
/* case OP_NOT_V:
|
|
//flds 0
|
|
//flds glob[A+0]
|
|
//fcomip %st(1),%st
|
|
//jne _true
|
|
//flds glob[A+1]
|
|
//fcomip %st(1),%st
|
|
//jne _true
|
|
//flds glob[A+1]
|
|
//fcomip %st(1),%st
|
|
//jne _true
|
|
//mov 1,C
|
|
//jmp done
|
|
//_true:
|
|
//mov 0,C
|
|
//done:
|
|
break;
|
|
*/
|
|
|
|
case OP_NOT_V:
|
|
EmitByte(0xcd);EmitByte(op[i].op);
|
|
printf("QCJIT: instruction %i is not implemented\n", op[i].op);
|
|
break;
|
|
#endif
|
|
case OP_NE_V:
|
|
case OP_EQ_V:
|
|
{
|
|
void *f0, *f1, *f2, *floc;
|
|
//compare v[0]
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
|
|
//flds glob[B]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+0);
|
|
//fcomip %st(1),%st
|
|
EmitByte(0xdf);EmitByte(0xe9);
|
|
//fstp %st(0) (aka: pop)
|
|
EmitByte(0xdd);EmitByte(0xd8);
|
|
|
|
/*if the condition is true, don't fail*/
|
|
j1 = LocalJmp(op[i].op);
|
|
{
|
|
STOREF(0.0f, glob + op[i].c);
|
|
f0 = LocalJmp(OP_GOTO);
|
|
}
|
|
l1 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
|
|
//compare v[1]
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
|
|
//flds glob[B]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+1);
|
|
//fcomip %st(1),%st
|
|
EmitByte(0xdf);EmitByte(0xe9);
|
|
//fstp %st(0) (aka: pop)
|
|
EmitByte(0xdd);EmitByte(0xd8);
|
|
|
|
/*if the condition is true, don't fail*/
|
|
j1 = LocalJmp(op[i].op);
|
|
{
|
|
STOREF(0.0f, glob + op[i].c);
|
|
f1 = LocalJmp(OP_GOTO);
|
|
}
|
|
l1 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
|
|
//compare v[2]
|
|
//flds glob[A]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
|
|
//flds glob[B]
|
|
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+2);
|
|
//fcomip %st(1),%st
|
|
EmitByte(0xdf);EmitByte(0xe9);
|
|
//fstp %st(0) (aka: pop)
|
|
EmitByte(0xdd);EmitByte(0xd8);
|
|
|
|
/*if the condition is true, don't fail*/
|
|
j1 = LocalJmp(op[i].op);
|
|
{
|
|
STOREF(0.0f, glob + op[i].c);
|
|
f2 = LocalJmp(OP_GOTO);
|
|
}
|
|
l1 = LocalLoc();
|
|
LocalJmpLoc(j1,l1);
|
|
|
|
//success!
|
|
STOREF(1.0f, glob + op[i].c);
|
|
|
|
floc = LocalLoc();
|
|
LocalJmpLoc(f0,floc);
|
|
LocalJmpLoc(f1,floc);
|
|
LocalJmpLoc(f2,floc);
|
|
break;
|
|
}
|
|
|
|
/*fteqcc generates these from reading 'fast arrays', and are part of hexenc extras*/
|
|
case OP_FETCH_GBL_F:
|
|
case OP_FETCH_GBL_S:
|
|
case OP_FETCH_GBL_E:
|
|
case OP_FETCH_GBL_FNC:
|
|
case OP_FETCH_GBL_V:
|
|
{
|
|
unsigned int max = ((unsigned int*)glob)[op[i].a-1];
|
|
unsigned int base = op[i].a;
|
|
//flds glob[B]
|
|
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
|
|
//fistp ta
|
|
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
|
|
LOADREG(&ta, REG_EAX)
|
|
//FIXME: if eax >= $max, abort
|
|
|
|
if (op[i].op == OP_FETCH_GBL_V)
|
|
{
|
|
/*scale the index by 3*/
|
|
SETREGI(3, REG_EDX)
|
|
//mul %edx
|
|
EmitByte(0xf7); EmitByte(0xe2);
|
|
}
|
|
|
|
//lookup global
|
|
//mov &glob[base](,%eax,4),%edx
|
|
EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+0));
|
|
STOREREG(REG_EDX, glob + op[i].c+0)
|
|
if (op[i].op == OP_FETCH_GBL_V)
|
|
{
|
|
//mov &glob[base+1](,%eax,4),%edx
|
|
EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+1));
|
|
STOREREG(REG_EDX, glob + op[i].c+1)
|
|
//mov &glob[base+2](,%eax,4),%edx
|
|
EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+2));
|
|
STOREREG(REG_EDX, glob + op[i].c+2)
|
|
}
|
|
break;
|
|
}
|
|
|
|
/*fteqcc generates these from writing 'fast arrays'*/
|
|
case OP_GLOBALADDRESS:
|
|
LOADREG(glob + op[i].b, REG_EAX);
|
|
//lea &glob[A](, %eax, 4),%eax
|
|
EmitByte(0x8d);EmitByte(0x04);EmitByte(0x85);EmitAdr(glob + op[i].b+2);
|
|
STOREREG(REG_EAX, glob + op[i].c);
|
|
break;
|
|
// case OP_BOUNDCHECK:
|
|
//FIXME: assert b <= a < c
|
|
break;
|
|
case OP_CONV_FTOI:
|
|
//flds glob[A]
|
|
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
|
|
//fistp glob[C]
|
|
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(glob + op[i].c);
|
|
break;
|
|
case OP_MUL_I:
|
|
LOADREG(glob + op[i].a, REG_EAX);
|
|
//mull glob[C] (arg*eax => edx:eax)
|
|
EmitByte(0xfc); EmitByte(0x25);EmitAdr(glob + op[i].b);
|
|
STOREREG(REG_EAX, glob + op[i].c);
|
|
break;
|
|
|
|
/*other extended opcodes*/
|
|
case OP_BITOR_I:
|
|
LOADREG(glob + op[i].a, REG_EAX)
|
|
//or %eax,tb
|
|
EmitByte(0x0b); EmitByte(0x05);EmitAdr(glob + op[i].b);
|
|
STOREREG(REG_EAX, glob + op[i].c);
|
|
break;
|
|
|
|
|
|
default:
|
|
{
|
|
enum qcop_e e = op[i].op;
|
|
printf("QCJIT: Extended instruction set %i is not supported, not using jit.\n", e);
|
|
}
|
|
|
|
|
|
free(jit->statementjumps); //[MAX_STATEMENTS]
|
|
free(jit->statementoffsets); //[MAX_STATEMENTS]
|
|
free(jit->code);
|
|
free(jit);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
FixupJumps(jit);
|
|
|
|
/* most likely want executable memory calls somewhere else more common */
|
|
#ifdef _WIN32
|
|
{
|
|
DWORD old;
|
|
|
|
//this memory is on the heap.
|
|
//this means that we must maintain read/write protection, or libc will crash us
|
|
VirtualProtect(jit->code, jit->codesize, PAGE_EXECUTE_READWRITE, &old);
|
|
}
|
|
#else
|
|
mprotect(jit->code, jit->codesize, PROT_READ|PROT_EXEC);
|
|
#endif
|
|
|
|
// externs->WriteFile("jit.x86", jit->code, jit->codesize);
|
|
|
|
return jit;
|
|
}
|
|
|
|
float foo(float arg)
|
|
{
|
|
float f;
|
|
if (!arg)
|
|
f = 1;
|
|
else
|
|
f = 0;
|
|
return f;
|
|
}
|
|
|
|
void PR_EnterJIT(progfuncs_t *progfuncs, struct jitstate *jit, int statement)
|
|
{
|
|
#ifdef __GNUC__
|
|
//call, it clobbers pretty much everything.
|
|
asm("call *%0" :: "r"(jit->statementoffsets[statement+1]),"b"(prinst->edicttable):"cc","memory","eax","ecx","edx");
|
|
#elif defined(_MSC_VER)
|
|
void *entry = jit->statementoffsets[statement+1];
|
|
void *edicttable = prinst->edicttable;
|
|
__asm {
|
|
pushad
|
|
mov eax,entry
|
|
mov ebx,edicttable
|
|
call eax
|
|
popad
|
|
}
|
|
#else
|
|
#error "Sorry, no idea how to enter assembler safely for your compiler"
|
|
#endif
|
|
}
|
|
#endif
|