1
0
Fork 0
forked from fte/fteqw
fteqw/engine/qclib/pr_x86.c
Spoike 685404250f added cmake file.
reduced input latency.
reworked how internal texture formats work,.
added support for LIGHTING_E5BGR9 bspx lump for HDR lighting.
updated support for srgb, no longer looks quite so weird. works on glx
vid_srgb 3 attempts to use half-float swapchains, where possible.
gl: use glTextureStorage where available.
d3d11: gave up on using dxgi for fullscreen, was just too buggy.
glx: updated gl context creation on linux.
server: fix svc_updatefrags not being passed though (fixes frikbot scores)
fs: spanned pk3s now work (fragmented files/directory will fail to open, so this needs a custom tool to be fully useful).
fixed restart_ents command (restarts the map, but preserving the players as they are)
tw: removed 'QWSKINS' featureset from tw config

git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@5217 fc73d0e0-1445-4013-8a0c-d673dee63da5
2018-03-04 14:41:16 +00:00

1611 lines
43 KiB
C

/*
when I say JIT, I mean load time, not execution time.
notes:
qc jump offsets are all constants. we have no variable offset jumps (other than function calls/returns)
field remapping... fields are in place, and cannot be adjusted. if a field is not set to 0, its assumed to be a constant.
optimisations:
none at the moment...
instructions need to be chained. stuff that writes to C should be cacheable, etc. maybe we don't even need to do the write to C
it should also be possible to fold in eq+ifnot, so none of this silly storeing of floats in equality tests
this means that we need to track which vars are cached and in what form: fpreg, ireg+floatasint, ireg+float.
certain qccx hacks can use fpu operations on ints, so do what the instruction says, rather than considering an add an add regardless of types.
OP_AND_F, OP_OR_F etc will generally result in ints, and we should be able to keep them as ints if they combine with other ints.
some instructions are jump sites. any cache must be flushed before the start of the instruction.
some variables are locals, and will only ever be written by a single instruction, then read by the following instruction. such temps do not need to be written, or are overwritten later in the function anyway.
such locals need to be calculated PER FUNCTION as (fte)qcc can overlap locals making multiple distinct locals on a single offset.
store locals on a proper stack instead of the current absurd mechanism.
eax - tmp
ebx - prinst->edicttable
ecx - tmp
edx - tmp
esi - debug opcode number
edi - tmp (because its preserved by subfunctions
ebp -
to use gas to provide binary opcodes:
vim -N blob.s && as blob.s && objdump.exe -d a.out
notable mods to test:
prydon gate, due to fpu mangling to carry values between maps
*/
#define PROGSUSED
#include "progsint.h"
#ifdef QCJIT
#ifndef _WIN32
#include <sys/mman.h>
#endif
static float ta, tb, nullfloat=0;
struct jitstate
{
unsigned int *statementjumps; //[MAX_STATEMENTS*3]
unsigned char **statementoffsets; //[MAX_STATEMENTS]
unsigned int numjumps;
unsigned char *code;
unsigned int codesize;
unsigned int jitstatements;
float *glob;
unsigned int cachedglobal;
unsigned int cachereg;
};
static void Jit_EmitByte(struct jitstate *jit, unsigned char byte)
{
jit->code[jit->codesize++] = byte;
}
static void Jit_Emit4Byte(struct jitstate *jit, unsigned int value)
{
jit->code[jit->codesize++] = (value>> 0)&0xff;
jit->code[jit->codesize++] = (value>> 8)&0xff;
jit->code[jit->codesize++] = (value>>16)&0xff;
jit->code[jit->codesize++] = (value>>24)&0xff;
}
static void Jit_EmitAdr(struct jitstate *jit, void *value)
{
Jit_Emit4Byte(jit, (unsigned int)value);
}
static void Jit_EmitFloat(struct jitstate *jit, float value)
{
union {float f; unsigned int i;} u;
u.f = value;
Jit_Emit4Byte(jit, u.i);
}
static void Jit_Emit2Byte(struct jitstate *jit, unsigned short value)
{
jit->code[jit->codesize++] = (value>> 0)&0xff;
jit->code[jit->codesize++] = (value>> 8)&0xff;
}
static void Jit_EmitFOffset(struct jitstate *jit, const void *func, int bias)
{
union {const void *f; unsigned int i;} u;
u.f = func;
u.i -= (unsigned int)&jit->code[jit->codesize+bias];
Jit_Emit4Byte(jit, u.i);
}
static void Jit_Emit4ByteJump(struct jitstate *jit, int statementnum, int offset)
{
jit->statementjumps[jit->numjumps++] = jit->codesize;
jit->statementjumps[jit->numjumps++] = statementnum;
jit->statementjumps[jit->numjumps++] = offset;
//the offset is filled in later
jit->codesize += 4;
}
#ifdef _WIN32
#undef REG_NONE
#endif
enum
{
REG_EAX,
REG_ECX,
REG_EDX,
REG_EBX, //note: edicttable
REG_ESP,
REG_EBP,
REG_ESI,
REG_EDI,
/*I'm not going to list S1 here, as that makes things too awkward*/
REG_S0,
REG_NONE
};
#define XOR(sr,dr) EmitByte(0x31);EmitByte(0xc0 | (sr<<3) | dr);
#define CLEARREG(reg) XOR(reg,reg)
#define LOADREG(addr, reg) if (reg == REG_EAX) {EmitByte(0xa1);} else {EmitByte(0x8b); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);
#define STOREREG(reg, addr) if (reg == REG_EAX) {EmitByte(0xa3);} else {EmitByte(0x89); EmitByte((reg<<3) | 0x05);} EmitAdr(addr);
#define STOREF(f, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);EmitFloat(f);
#define STOREI(i, addr) EmitByte(0xc7);EmitByte(0x05); EmitAdr(addr);Emit4Byte(i);
#define SETREGI(val,reg) EmitByte(0xbe);Emit4Byte(val);
#define ARGREGS(a,b,c) GCache_Load(jit, op[i].a, a, op[i].b, b, op[i].c, c)
#define RESULTREG(r) GCache_Store(jit, op[i].c, r)
#define EmitByte(v) Jit_EmitByte(jit, v)
#define EmitAdr(v) Jit_EmitAdr(jit, v)
#define EmitFOffset(a,b) Jit_EmitFOffset(jit, a, b)
#define Emit4ByteJump(a,b) Jit_Emit4ByteJump(jit, a, b)
#define Emit4Byte(v) Jit_Emit4Byte(jit, v)
#define EmitFloat(v) Jit_EmitFloat(jit, v)
#define LocalJmp(v) Jit_LocalJmp(jit, v)
#define LocalLoc() Jit_LocalLoc(jit)
//for the purposes of the cache, 'temp' offsets are only read when they have been written only within the preceeding control block.
//if they were read at any other time, then we must write them out in full.
//this logic applies only to locals of a function.
//#define USECACHE
static void GCache_Load(struct jitstate *jit, int ao, int ar, int bo, int br, int co, int cr)
{
#if USECACHE
if (jit->cachedreg != REG_NONE)
{
/*something is cached, if its one of the input offsets then can chain the instruction*/
if (jit->cachedglobal === ao && ar != REG_NONE)
{
if (jit->cachedreg == ar)
ar = REG_NONE;
}
if (jit->cachedglobal === bo && br != REG_NONE)
{
if (jit->cachedreg == br)
br = REG_NONE;
}
if (jit->cachedglobal === co && cr != REG_NONE)
{
if (jit->cachedreg == cr)
cr = REG_NONE;
}
if (!istemp(ao))
{
/*purge the old cache*/
switch(jit->cachedreg)
{
case REG_NONE:
break;
case REG_S0:
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + jit->cachedglobal);
break;
default:
STOREREG(jit->cachedreg, jit->glob + jit->cachedglobal);
break;
}
jit->cachedglobal = -1;
jit->cachedreg = REG_NONE;
}
#endif
switch(ar)
{
case REG_NONE:
break;
case REG_S0:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + ao);
break;
default:
LOADREG(jit->glob + ao, ar);
break;
}
switch(br)
{
case REG_NONE:
break;
case REG_S0:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + bo);
break;
default:
LOADREG(jit->glob + bo, br);
break;
}
switch(cr)
{
case REG_NONE:
break;
case REG_S0:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(jit->glob + co);
break;
default:
LOADREG(jit->glob + co, cr);
break;
}
}
static void GCache_Store(struct jitstate *jit, int ofs, int reg)
{
#if USECACHE
jit->cachedglobal = ofs;
jit->cachedreg = reg;
#else
switch(reg)
{
case REG_NONE:
break;
case REG_S0:
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(jit->glob + ofs);
break;
default:
STOREREG(reg, jit->glob + ofs);
break;
}
#endif
}
static void *Jit_LocalLoc(struct jitstate *jit)
{
return &jit->code[jit->codesize];
}
static void *Jit_LocalJmp(struct jitstate *jit, int cond)
{
/*floating point ops don't set the sign flag, thus we use the 'above/below' instructions instead of 'greater/less' instructions*/
if (cond == OP_GOTO)
Jit_EmitByte(jit, 0xeb); //jmp
else if (cond == OP_LE_F)
Jit_EmitByte(jit, 0x76); //jbe
else if (cond == OP_GE_F)
Jit_EmitByte(jit, 0x73); //jae
else if (cond == OP_LT_F)
Jit_EmitByte(jit, 0x72); //jb
else if (cond == OP_GT_F)
Jit_EmitByte(jit, 0x77); //ja
else if (cond == OP_LE_I)
Jit_EmitByte(jit, 0x7e); //jle
else if (cond == OP_LT_I)
Jit_EmitByte(jit, 0x7c); //jl
else if ((cond >= OP_NE_F && cond <= OP_NE_FNC) || cond == OP_NE_I)
Jit_EmitByte(jit, 0x75); //jne
else if ((cond >= OP_EQ_F && cond <= OP_EQ_FNC) || cond == OP_EQ_I)
Jit_EmitByte(jit, 0x74); //je
#if defined(DEBUG) && defined(_WIN32)
else
{
OutputDebugString("oh noes!\n");
return NULL;
}
#endif
Jit_EmitByte(jit, 0);
return Jit_LocalLoc(jit);
}
static void LocalJmpLoc(void *jmp, void *loc)
{
int offs;
unsigned char *a = jmp;
offs = (char *)loc - (char *)jmp;
#if defined(DEBUG) && defined(_WIN32)
if (offs > 127 || offs <= -128)
{
OutputDebugStringA("bad jump\n");
a[-2] = 0xcd;
a[-1] = 0xcc;
return;
}
#endif
a[-1] = offs;
}
static void FixupJumps(struct jitstate *jit)
{
unsigned int j;
unsigned char *codesrc;
unsigned char *codedst;
unsigned int offset;
unsigned int v;
for (j = 0; j < jit->numjumps;)
{
v = jit->statementjumps[j++];
codesrc = &jit->code[v];
v = jit->statementjumps[j++];
codedst = jit->statementoffsets[v];
v = jit->statementjumps[j++];
offset = (int)(codedst - (codesrc-v)); //3rd term because the jump is relative to the instruction start, not the instruction's offset
codesrc[0] = (offset>> 0)&0xff;
codesrc[1] = (offset>> 8)&0xff;
codesrc[2] = (offset>>16)&0xff;
codesrc[3] = (offset>>24)&0xff;
}
}
int ASMCALL PR_LeaveFunction (progfuncs_t *progfuncs);
int ASMCALL PR_EnterFunction (progfuncs_t *progfuncs, dfunction_t *f, int progsnum);
void PR_CloseJit(struct jitstate *jit)
{
if (jit)
{
free(jit->statementjumps);
free(jit->statementoffsets);
#ifndef _WIN32
munmap(jit->code, jit->jitstatements * 500);
#else
free(jit->code);
#endif
free(jit);
}
}
#if 0
//called from jit code
static PDECL PR_CallFuncion(progfuncs_t *progfuncs, int fnum)
{
int callerprogs;
int newpr;
unsigned int fnum;
fnum = OPA->function;
glob = NULL; //try to derestrict it.
callerprogs=prinst.pr_typecurrent; //so we can revert to the right caller.
newpr = (fnum & 0xff000000)>>24; //this is the progs index of the callee
fnum &= ~0xff000000; //the callee's function index.
//if it's an external call, switch now (before any function pointers are used)
if (callerprogs != newpr || !fnum || fnum > pr_progs->numfunctions)
{
char *msg = fnum?"OP_CALL references invalid function in %s\n":"NULL function from qc (inside %s).\n";
PR_SwitchProgsParms(progfuncs, callerprogs);
glob = pr_globals;
if (!progfuncs->funcs.debug_trace)
QCFAULT(&progfuncs->funcs, msg, PR_StringToNative(&progfuncs->funcs, pr_xfunction->s_name));
//skip the instruction if they just try stepping over it anyway.
PR_StackTrace(&progfuncs->funcs, 0);
printf(msg, PR_StringToNative(&progfuncs->funcs, pr_xfunction->s_name));
pr_globals[OFS_RETURN] = 0;
pr_globals[OFS_RETURN+1] = 0;
pr_globals[OFS_RETURN+2] = 0;
break;
}
newf = &pr_cp_functions[fnum & ~0xff000000];
if (newf->first_statement <= 0)
{ // negative statements are built in functions
/*calling a builtin in another progs may affect that other progs' globals instead, is the theory anyway, so args and stuff need to move over*/
if (prinst.pr_typecurrent != 0)
{
//builtins quite hackily refer to only a single global.
//for builtins to affect the globals of other progs, we need to first switch to the progs that it will affect, so they'll be correct when we switch back
PR_SwitchProgsParms(progfuncs, 0);
}
i = -newf->first_statement;
// p = pr_typecurrent;
if (i < externs->numglobalbuiltins)
{
#ifndef QCGC
prinst.numtempstringsstack = prinst.numtempstrings;
#endif
(*externs->globalbuiltins[i]) (&progfuncs->funcs, (struct globalvars_s *)current_progstate->globals);
//in case ed_alloc was called
num_edicts = sv_num_edicts;
if (prinst.continuestatement!=-1)
{
st=&pr_statements[prinst.continuestatement];
prinst.continuestatement=-1;
glob = pr_globals;
break;
}
}
else
{
// if (newf->first_statement == -0x7fffffff)
// ((builtin_t)newf->profile) (progfuncs, (struct globalvars_s *)current_progstate->globals);
// else
PR_RunError (&progfuncs->funcs, "Bad builtin call number - %i", -newf->first_statement);
}
// memcpy(&pr_progstate[p].globals[OFS_RETURN], &current_progstate->globals[OFS_RETURN], sizeof(vec3_t));
PR_SwitchProgsParms(progfuncs, (progsnum_t)callerprogs);
//decide weather non debugger wants to start debugging.
s = st-pr_statements;
return s;
}
// PR_SwitchProgsParms((OPA->function & 0xff000000)>>24);
s = PR_EnterFunction (progfuncs, newf, callerprogs);
st = &pr_statements[s];
}
#endif
struct jitstate *PR_GenerateJit(progfuncs_t *progfuncs)
{
struct jitstate *jit;
void *j0, *l0;
void *j1, *l1;
void *j2, *l2;
unsigned int i;
dstatement16_t *op = (dstatement16_t*)current_progstate->statements;
unsigned int numstatements = current_progstate->progs->numstatements;
unsigned int numglobals = current_progstate->progs->numglobals+3; //vectors are annoying.
int *glob = (int*)current_progstate->globals;
unsigned int numfunctions = current_progstate->progs->numfunctions;
mfunction_t *func;
// pbyte *isconst;
pbool failed = false;
jit = malloc(sizeof(*jit));
jit->jitstatements = numstatements;
// isconst = malloc(numglobals*sizeof(*isconst));
jit->statementjumps = malloc(numstatements*3*sizeof(int));
jit->statementoffsets = malloc(numstatements*sizeof(*jit->statementoffsets));
#ifndef _WIN32
jit->code = mmap(NULL, numstatements*500, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
#else
jit->code = malloc(numstatements*500);
#endif
if (!jit->code)
return NULL;
jit->numjumps = 0;
jit->codesize = 0;
for (i = 0; i < numstatements; i++)
jit->statementoffsets[i] = NULL;
// for (i = 0; i < numglobals; i++)
// isconst[i] = true;
for (i = 0; i < numfunctions; i++)
{
}
for (i = 0; i < numstatements; i++)
{
//figure out which statements are jumped to. these are statements that must flush registers prior to execution.
switch(op[i].op)
{
case OP_GOTO:
jit->statementoffsets[i + (short)op[i].a] = (void*)~0;
break;
case OP_IF_I:
case OP_IFNOT_I:
case OP_IF_F:
case OP_IFNOT_F:
case OP_IF_S:
case OP_IFNOT_S:
case OP_CASE:
jit->statementoffsets[i + (short)op[i].b] = (void*)~0;
break;
case OP_CASERANGE:
jit->statementoffsets[i + (short)op[i].c] = (void*)~0;
break;
}
//we probably can't do anything about consts.
//we might be able to do something about locals, but we would need to fix this to generate per-function.
//we CAN do something about consts, most of them anyway.
//visible types
/*
if (OpAssignsToA(op[i].op))
{
if (op[i].a >= numglobals)
failed = true;
else
isconst[op[i].a] = false;
}
if (OpAssignsToB(op[i].op))
{
if (op[i].b >= numglobals)
failed = true;
else
isconst[op[i].b] = false;
}
if (OpAssignsToC(op[i].op))
{
if (op[i].c >= numglobals)
failed = true;
else
isconst[op[i].c] = false;
}
*/
}
for (i = 0; i < numstatements && !failed; i++)
{
if (jit->statementoffsets[i])
{
//FIXME: flush any registers.
}
jit->statementoffsets[i] = &jit->code[jit->codesize];
#ifdef _DEBUG
/*DEBUG*/
SETREGI(op[i].op, REG_ESI);
#endif
switch(op[i].op)
{
//jumps
case OP_IF_I:
//integer compare
//if a, goto b
//cmpl $0,glob[A]
EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);
//jne B
EmitByte(0x0f);EmitByte(0x85);Emit4ByteJump(i + (signed short)op[i].b, -4);
break;
case OP_IFNOT_I:
//integer compare
//if !a, goto b
//cmpl $0,glob[A]
EmitByte(0x83);EmitByte(0x3d);EmitAdr(glob + op[i].a);EmitByte(0x0);
//je B
EmitByte(0x0f);EmitByte(0x84);Emit4ByteJump(i + (signed short)op[i].b, -4);
break;
case OP_GOTO:
EmitByte(0xE9);Emit4ByteJump(i + (signed short)op[i].a, -4);
break;
//function returns
case OP_DONE:
case OP_RETURN:
//done and return are the same
//part 1: store A into OFS_RETURN
if (!op[i].a)
{
//assumption: anything that returns address 0 is a void or zero return.
//thus clear eax and copy that to the return vector.
CLEARREG(REG_EAX);
STOREREG(REG_EAX, glob + OFS_RETURN+0);
STOREREG(REG_EAX, glob + OFS_RETURN+1);
STOREREG(REG_EAX, glob + OFS_RETURN+2);
}
else
{
LOADREG(glob + op[i].a+0, REG_EAX);
LOADREG(glob + op[i].a+1, REG_EDX);
LOADREG(glob + op[i].a+2, REG_ECX);
STOREREG(REG_EAX, glob + OFS_RETURN+0);
STOREREG(REG_EDX, glob + OFS_RETURN+1);
STOREREG(REG_ECX, glob + OFS_RETURN+2);
}
//call leavefunction to get the return address
// pushl progfuncs
EmitByte(0x68);EmitAdr(progfuncs);
// call PR_LeaveFunction
EmitByte(0xe8);EmitFOffset(PR_LeaveFunction, 4);
// add $4,%esp
EmitByte(0x83);EmitByte(0xc4);EmitByte(0x04);
// movl pr_depth,%edx
EmitByte(0x8b);EmitByte(0x15);EmitAdr(&pr_depth);
// cmp prinst->exitdepth,%edx
EmitByte(0x3b);EmitByte(0x15);EmitAdr(&prinst.exitdepth);
// je returntoc
j1 = LocalJmp(OP_EQ_E);
// mov statementoffsets[%eax*4],%eax
EmitByte(0x8b);EmitByte(0x04);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);
// jmp *eax
EmitByte(0xff);EmitByte(0xe0);
// returntoc:
l1 = LocalLoc();
// ret
EmitByte(0xc3);
LocalJmpLoc(j1,l1);
break;
//function calls
case OP_CALL0:
case OP_CALL1:
case OP_CALL2:
case OP_CALL3:
case OP_CALL4:
case OP_CALL5:
case OP_CALL6:
case OP_CALL7:
case OP_CALL8:
//FIXME: the size of this instruction is going to hurt cache performance if every single function call is expanded into this HUGE CHUNK of gibberish!
//FIXME: consider the feasability of just calling a C function and just jumping to the address it returns.
//save the state in place the rest of the engine can cope with
//movl $i, pr_xstatement
EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&pr_xstatement);Emit4Byte(i);
//movl $(op[i].op-OP_CALL0), pr_argc
EmitByte( 0xc7);EmitByte(0x05);EmitAdr(&progfuncs->funcs.callargc);Emit4Byte(op[i].op-OP_CALL0);
//figure out who we're calling, and what that involves
//%eax = glob[A]
LOADREG(glob + op[i].a, REG_EAX);
//eax is now the func num
//mov %eax,%ecx
EmitByte(0x89); EmitByte(0xc1);
//shr $24,%ecx
EmitByte(0xc1); EmitByte(0xe9); EmitByte(0x18);
//ecx is now the progs num for the new func
/*
//cmp %ecx,pr_typecurrent
EmitByte(0x39); EmitByte(0x0d); EmitAdr(&pr_typecurrent);
//je sameprogs
j1 = LocalJmp(OP_EQ_I);
{
//can't handle switching progs
//FIXME: recurse though PR_ExecuteProgram
//push eax
//push progfuncs
//call PR_ExecuteProgram
//add $8,%esp
//remember to change the je above
//err... exit depth? no idea
EmitByte(0xcd);EmitByte(op[i].op); //int $X
//ret
EmitByte(0xc3);
}
//sameprogs:
l1 = LocalLoc();
LocalJmpLoc(j1,l1);
*/
//andl $0x00ffffff, %eax
EmitByte(0x25);Emit4Byte(0x00ffffff);
//mov $sizeof(dfunction_t),%edx
EmitByte(0xba);Emit4Byte(sizeof(dfunction_t));
//mul %edx
EmitByte(0xf7); EmitByte(0xe2);
//add pr_functions,%eax
EmitByte(0x05); EmitAdr(current_progstate->functions);
//eax is now the dfunction_t to be called
//edx is clobbered.
//mov (%eax),%edx
EmitByte(0x8b);EmitByte(0x10);
//edx is now the first statement number
//cmp $0,%edx
EmitByte(0x83);EmitByte(0xfa);EmitByte(0x00);
//jl isabuiltin
j1 = LocalJmp(OP_LT_I);
{
/* call the function*/
//push %ecx
EmitByte(0x51);
//push %eax
EmitByte(0x50);
//pushl progfuncs
EmitByte(0x68);EmitAdr(progfuncs);
//call PR_EnterFunction
EmitByte(0xe8);EmitFOffset(PR_EnterFunction, 4);
//sub $12,%esp
EmitByte(0x83);EmitByte(0xc4);EmitByte(0xc);
//eax is now the next statement number (first of the new function, usually equal to ecx, but not always)
//jmp statementoffsets[%eax*4]
EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets+1);
}
/*its a builtin, figure out which, and call it*/
//isabuiltin:
l1 = LocalLoc();
LocalJmpLoc(j1,l1);
//push current_progstate->globals
EmitByte(0x68);EmitAdr(current_progstate->globals);
//push progfuncs
EmitByte(0x68);EmitAdr(progfuncs);
//neg %edx
EmitByte(0xf7);EmitByte(0xda);
//call externs->globalbuiltins[%edx,4]
//FIXME: make sure this dereferences
EmitByte(0xff);EmitByte(0x14);EmitByte(0x95);EmitAdr(externs->globalbuiltins);
//add $8,%esp
EmitByte(0x83);EmitByte(0xc4);EmitByte(0x8);
//but that builtin might have been Abort()
LOADREG(&prinst.continuestatement, REG_EAX);
//cmp $-1,%eax
EmitByte(0x83);EmitByte(0xf8);EmitByte(0xff);
//je donebuiltincall
j1 = LocalJmp(OP_EQ_I);
{
//mov $-1,prinst->continuestatement
EmitByte(0xc7);EmitByte(0x05);EmitAdr(&prinst.continuestatement);Emit4Byte((unsigned int)-1);
//jmp statementoffsets[%eax*4]
EmitByte(0xff);EmitByte(0x24);EmitByte(0x85);EmitAdr(jit->statementoffsets);
}
//donebuiltincall:
l1 = LocalLoc();
LocalJmpLoc(j1,l1);
break;
case OP_MUL_F:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
//fmuls glob[B]
EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
break;
case OP_DIV_F:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
//fdivs glob[B]
EmitByte(0xd8);EmitByte(0x35);EmitAdr(glob + op[i].b);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
break;
case OP_ADD_F:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
//fadds glob[B]
EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
break;
case OP_SUB_F:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
//fsubs glob[B]
EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
break;
case OP_NOT_F:
//fldz
EmitByte(0xd9);EmitByte(0xee);
//fcomps glob[A]
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(glob + op[i].a);
//fnstsw %ax
EmitByte(0xdf);EmitByte(0xe0);
//testb 0x40,%ah
EmitByte(0xf6);EmitByte(0xc4);EmitByte(0x40);
j1 = LocalJmp(OP_NE_F);
{
STOREF(0.0f, glob + op[i].c);
j2 = LocalJmp(OP_GOTO);
}
{
//noteq:
l1 = LocalLoc();
STOREF(1.0f, glob + op[i].c);
}
//end:
l2 = LocalLoc();
LocalJmpLoc(j1,l1);
LocalJmpLoc(j2,l2);
break;
case OP_STORE_F:
case OP_STORE_S:
case OP_STORE_ENT:
case OP_STORE_FLD:
case OP_STORE_FNC:
LOADREG(glob + op[i].a, REG_EAX);
STOREREG(REG_EAX, glob + op[i].b);
break;
case OP_STORE_V:
LOADREG(glob + op[i].a+0, REG_EAX);
LOADREG(glob + op[i].a+1, REG_EDX);
LOADREG(glob + op[i].a+2, REG_ECX);
STOREREG(REG_EAX, glob + op[i].b+0);
STOREREG(REG_EDX, glob + op[i].b+1);
STOREREG(REG_ECX, glob + op[i].b+2);
break;
case OP_LOAD_F:
case OP_LOAD_S:
case OP_LOAD_ENT:
case OP_LOAD_FLD:
case OP_LOAD_FNC:
case OP_LOAD_V:
//a is the ent number, b is the field
//c is the dest
LOADREG(glob + op[i].a, REG_EAX);
LOADREG(glob + op[i].b, REG_ECX);
//FIXME: bound eax (ent number)
//FIXME: bound ecx (field index)
//mov (ebx,eax,4).%eax
EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);
//eax is now an edictrun_t
//mov fields(,%eax,4),%edx
EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);
//edx is now the field array for that ent
//mov fieldajust(%edx,%ecx,4),%eax
EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->funcs.fieldadjust*4);
STOREREG(REG_EAX, glob + op[i].c)
if (op[i].op == OP_LOAD_V)
{
//mov fieldajust+4(%edx,%ecx,4),%eax
EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(4+progfuncs->funcs.fieldadjust*4);
STOREREG(REG_EAX, glob + op[i].c+1)
//mov fieldajust+8(%edx,%ecx,4),%eax
EmitByte(0x8b); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(8+progfuncs->funcs.fieldadjust*4);
STOREREG(REG_EAX, glob + op[i].c+2)
}
break;
case OP_ADDRESS:
//a is the ent number, b is the field
//c is the dest
LOADREG(glob + op[i].a, REG_EAX);
LOADREG(glob + op[i].b, REG_ECX);
//FIXME: bound eax (ent number)
//FIXME: bound ecx (field index)
//mov (ebx,eax,4).%eax
EmitByte(0x8b); EmitByte(0x04); EmitByte(0x83);
//eax is now an edictrun_t
//mov fields(,%eax,4),%edx
EmitByte(0x8b);EmitByte(0x50);EmitByte((int)&((edictrun_t*)NULL)->fields);
//edx is now the field array for that ent
//mov fieldajust(%edx,%ecx,4),%eax //offset = progfuncs->fieldadjust
//EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); EmitByte(progfuncs->funcs.fieldadjust*4);
EmitByte(0x8d); EmitByte(0x84); EmitByte(0x8a); Emit4Byte(progfuncs->funcs.fieldadjust*4);
STOREREG(REG_EAX, glob + op[i].c);
break;
case OP_STOREP_F:
case OP_STOREP_S:
case OP_STOREP_ENT:
case OP_STOREP_FLD:
case OP_STOREP_FNC:
LOADREG(glob + op[i].a, REG_EAX);
LOADREG(glob + op[i].b, REG_ECX);
//mov %eax,(%ecx)
EmitByte(0x89);EmitByte(0x01);
break;
case OP_STOREP_V:
LOADREG(glob + op[i].b, REG_ECX);
LOADREG(glob + op[i].a+0, REG_EAX);
//mov %eax,0(%ecx)
EmitByte(0x89);EmitByte(0x01);
LOADREG(glob + op[i].a+1, REG_EAX);
//mov %eax,4(%ecx)
EmitByte(0x89);EmitByte(0x41);EmitByte(0x04);
LOADREG(glob + op[i].a+2, REG_EAX);
//mov %eax,8(%ecx)
EmitByte(0x89);EmitByte(0x41);EmitByte(0x08);
break;
case OP_NE_I:
case OP_NE_E:
case OP_NE_FNC:
case OP_EQ_I:
case OP_EQ_E:
case OP_EQ_FNC:
//integer equality
LOADREG(glob + op[i].a, REG_EAX);
//cmp glob[B],%eax
EmitByte(0x3b); EmitByte(0x04); EmitByte(0x25); EmitAdr(glob + op[i].b);
j1 = LocalJmp(op[i].op);
{
STOREF(0.0f, glob + op[i].c);
j2 = LocalJmp(OP_GOTO);
}
{
l1 = LocalLoc();
STOREF(1.0f, glob + op[i].c);
}
l2 = LocalLoc();
LocalJmpLoc(j1,l1);
LocalJmpLoc(j2,l2);
break;
case OP_NOT_I:
case OP_NOT_ENT:
case OP_NOT_FNC:
//cmp glob[B],$0
EmitByte(0x83); EmitByte(0x3d); EmitAdr(glob + op[i].a); EmitByte(0x00);
j1 = LocalJmp(OP_NE_I);
{
STOREF(1.0f, glob + op[i].c);
j2 = LocalJmp(OP_GOTO);
}
{
l1 = LocalLoc();
STOREF(0.0f, glob + op[i].c);
}
l2 = LocalLoc();
LocalJmpLoc(j1,l1);
LocalJmpLoc(j2,l2);
break;
case OP_BITOR_F: //floats...
//flds glob[A]
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
//flds glob[B]
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
//fistp tb
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);
//fistp ta
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
LOADREG(&ta, REG_EAX)
//or %eax,tb
EmitByte(0x09); EmitByte(0x05);EmitAdr(&tb);
//fild tb
EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);
//fstps glob[C]
EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);
break;
case OP_BITAND_F:
//flds glob[A]
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
//flds glob[B]
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
//fistp tb
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&tb);
//fistp ta
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
/*two args are now at ta and tb*/
LOADREG(&ta, REG_EAX)
//and tb,%eax
EmitByte(0x21); EmitByte(0x05);EmitAdr(&tb);
/*we just wrote the int value to tb, convert that to a float and store it at c*/
//fild tb
EmitByte(0xdb); EmitByte(0x05);EmitAdr(&tb);
//fstps glob[C]
EmitByte(0xd9); EmitByte(0x1d);EmitAdr(glob + op[i].c);
break;
case OP_AND_F:
//test floats properly, so we don't get confused with -0.0
//FIXME: is it feasable to grab the value as an int and test it against 0x7fffffff?
//flds glob[A]
EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);
//fcomps nullfloat
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
//fnstsw %ax
EmitByte(0xdf); EmitByte(0xe0);
//test $0x40,%ah
EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
//jz onefalse
EmitByte(0x75); EmitByte(0x1f);
//flds glob[B]
EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);
//fcomps nullfloat
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
//fnstsw %ax
EmitByte(0xdf); EmitByte(0xe0);
//test $0x40,%ah
EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
//jnz onefalse
EmitByte(0x75); EmitByte(0x0c);
//mov float0,glob[C]
EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);
//jmp done
EmitByte(0xeb); EmitByte(0x0a);
//onefalse:
//mov float1,glob[C]
EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);
//done:
break;
case OP_OR_F:
//test floats properly, so we don't get confused with -0.0
//flds glob[A]
EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].a);
//fcomps nullfloat
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
//fnstsw %ax
EmitByte(0xdf); EmitByte(0xe0);
//test $0x40,%ah
EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
//je onetrue
EmitByte(0x74); EmitByte(0x1f);
//flds glob[B]
EmitByte(0xd9); EmitByte(0x05); EmitAdr(glob + op[i].b);
//fcomps nullfloat
EmitByte(0xd8); EmitByte(0x1d); EmitAdr(&nullfloat);
//fnstsw %ax
EmitByte(0xdf); EmitByte(0xe0);
//test $0x40,%ah
EmitByte(0xf6); EmitByte(0xc4);EmitByte(0x40);
//je onetrue
EmitByte(0x74); EmitByte(0x0c);
//mov float0,glob[C]
EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(0.0f);
//jmp done
EmitByte(0xeb); EmitByte(0x0a);
//onetrue:
//mov float1,glob[C]
EmitByte(0xc7); EmitByte(0x05); EmitAdr(glob + op[i].c); EmitFloat(1.0f);
//done:
break;
case OP_EQ_S:
case OP_NE_S:
{
//put a in ecx
LOADREG(glob + op[i].a, REG_ECX);
//put b in edi
LOADREG(glob + op[i].b, REG_EDI);
/*
//early out if they're equal
//cmp %ecx,%edi
EmitByte(0x39); EmitByte(0xc0 | (REG_EDI<<3) | REG_ECX);
j1c = LocalJmp(OP_EQ_S);
//if a is 0, check if b is ""
//jecxz ais0
EmitByte(0xe3); EmitByte(0x1a);
//if b is 0, check if a is ""
//cmp $0,%edi
EmitByte(0x83); EmitByte(0xff); EmitByte(0x00);
//jne bnot0
EmitByte(0x75); EmitByte(0x2a);
{
//push a
EmitByte(0x51);
//push progfuncs
EmitByte(0x68); EmitAdr(progfuncs);
//call PR_StringToNative
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
//add $8,%esp
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
//cmpb $0,(%eax)
EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
j1b = LocalJmp(OP_EQ_S);
j0b = LocalJmp(OP_GOTO);
}
//ais0:
{
//push edi
EmitByte(0x57);
//push progfuncs
EmitByte(0x68); EmitAdr(progfuncs);
//call PR_StringToNative
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
//add $8,%esp
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
//cmpb $0,(%eax)
EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
//je _true
EmitByte(0x74); EmitByte(0x36);
//jmp _false
EmitByte(0xeb); EmitByte(0x28);
}
//bnot0:
*/
LOADREG(glob + op[i].a, REG_ECX);
//push ecx
EmitByte(0x51);
//push progfuncs
EmitByte(0x68); EmitAdr(progfuncs);
//call PR_StringToNative
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
//push %eax
EmitByte(0x50);
LOADREG(glob + op[i].b, REG_EDI);
//push %edi
EmitByte(0x57);
//push progfuncs
EmitByte(0x68); EmitAdr(progfuncs);
//call PR_StringToNative
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
//add $8,%esp
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
//push %eax
EmitByte(0x50);
//call strcmp
EmitByte(0xe8); EmitFOffset(strcmp,4);
//add $16,%esp
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x10);
//cmp $0,%eax
EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);
j1 = LocalJmp(OP_EQ_S);
{
l0 = LocalLoc();
STOREF((op[i].op == OP_NE_S)?1.0f:0.0f, glob + op[i].c);
j2 = LocalJmp(OP_GOTO);
}
{
l1 = LocalLoc();
STOREF((op[i].op == OP_NE_S)?0.0f:1.0f, glob + op[i].c);
}
l2 = LocalLoc();
// LocalJmpLoc(j0b, l0);
LocalJmpLoc(j1, l1);
// LocalJmpLoc(j1b, l1);
LocalJmpLoc(j2, l2);
}
break;
case OP_NOT_S:
LOADREG(glob + op[i].a, REG_EAX)
//cmp $0,%eax
EmitByte(0x83); EmitByte(0xf8); EmitByte(0x00);
j2 = LocalJmp(OP_EQ_S);
//push %eax
EmitByte(0x50);
//push progfuncs
EmitByte(0x68); EmitAdr(progfuncs);
//call PR_StringToNative
EmitByte(0xe8); EmitFOffset(PR_StringToNative,4);
//add $8,%esp
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x08);
//cmpb $0,(%eax)
EmitByte(0x80); EmitByte(0x38); EmitByte(0x00);
j1 = LocalJmp(OP_EQ_S);
{
STOREF(0.0f, glob + op[i].c);
j0 = LocalJmp(OP_GOTO);
}
{
l1 = LocalLoc();
STOREF(1.0f, glob + op[i].c);
}
l2 = LocalLoc();
LocalJmpLoc(j2, l1);
LocalJmpLoc(j1, l1);
LocalJmpLoc(j0, l2);
break;
case OP_ADD_V:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
//fadds glob[B]
EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+0);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
//fadds glob[B]
EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+1);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
//fadds glob[B]
EmitByte(0xd8);EmitByte(0x05);EmitAdr(glob + op[i].b+2);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
break;
case OP_SUB_V:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
//fsubs glob[B]
EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+0);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
//fsubs glob[B]
EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+1);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
//fsubs glob[B]
EmitByte(0xd8);EmitByte(0x25);EmitAdr(glob + op[i].b+2);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
break;
case OP_MUL_V:
//this is actually a dotproduct
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
//fmuls glob[B]
EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+0);
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
//fmuls glob[B]
EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+1);
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
//fmuls glob[B]
EmitByte(0xd8);EmitByte(0x0d);EmitAdr(glob + op[i].b+2);
//faddp
EmitByte(0xde);EmitByte(0xc1);
//faddp
EmitByte(0xde);EmitByte(0xc1);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c);
break;
case OP_EQ_F:
case OP_NE_F:
case OP_LE_F:
case OP_GE_F:
case OP_LT_F:
case OP_GT_F:
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b);
//flds glob[B]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a);
//fcomip %st(1),%st
EmitByte(0xdf);EmitByte(0xe9);
//fstp %st(0) (aka: pop)
EmitByte(0xdd);EmitByte(0xd8);
j1 = LocalJmp(op[i].op);
{
STOREF(0.0f, glob + op[i].c);
j2 = LocalJmp(OP_GOTO);
}
{
l1 = LocalLoc();
STOREF(1.0f, glob + op[i].c);
}
l2 = LocalLoc();
LocalJmpLoc(j1,l1);
LocalJmpLoc(j2,l2);
break;
case OP_MUL_FV:
case OP_MUL_VF:
//
{
int v;
int f;
if (op[i].op == OP_MUL_FV)
{
f = op[i].a;
v = op[i].b;
}
else
{
v = op[i].a;
f = op[i].b;
}
//flds glob[F]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + f);
//flds glob[V0]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+0);
//fmul st(1)
EmitByte(0xd8);EmitByte(0xc9);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+0);
//flds glob[V0]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+1);
//fmul st(1)
EmitByte(0xd8);EmitByte(0xc9);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+1);
//flds glob[V0]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + v+2);
//fmul st(1)
EmitByte(0xd8);EmitByte(0xc9);
//fstps glob[C]
EmitByte(0xd9);EmitByte(0x1d);EmitAdr(glob + op[i].c+2);
//fstp %st(0) (aka: pop)
EmitByte(0xdd);EmitByte(0xd8);
}
break;
case OP_STATE:
//externs->stateop(progfuncs, OPA->_float, OPB->function);
//push b
EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].b);
//push a
EmitByte(0xff);EmitByte(0x35);EmitAdr(glob + op[i].a);
//push $progfuncs
EmitByte(0x68); EmitAdr(progfuncs);
//call externs->stateop
EmitByte(0xe8); EmitFOffset(externs->stateop, 4);
//add $12,%esp
EmitByte(0x83); EmitByte(0xc4); EmitByte(0x0c);
break;
#if 1
/* case OP_NOT_V:
//flds 0
//flds glob[A+0]
//fcomip %st(1),%st
//jne _true
//flds glob[A+1]
//fcomip %st(1),%st
//jne _true
//flds glob[A+1]
//fcomip %st(1),%st
//jne _true
//mov 1,C
//jmp done
//_true:
//mov 0,C
//done:
break;
*/
case OP_NOT_V:
EmitByte(0xcd);EmitByte(op[i].op);
printf("QCJIT: instruction %i is not implemented\n", op[i].op);
break;
#endif
case OP_NE_V:
case OP_EQ_V:
{
void *f0, *f1, *f2, *floc;
//compare v[0]
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+0);
//flds glob[B]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+0);
//fcomip %st(1),%st
EmitByte(0xdf);EmitByte(0xe9);
//fstp %st(0) (aka: pop)
EmitByte(0xdd);EmitByte(0xd8);
/*if the condition is true, don't fail*/
j1 = LocalJmp(op[i].op);
{
STOREF(0.0f, glob + op[i].c);
f0 = LocalJmp(OP_GOTO);
}
l1 = LocalLoc();
LocalJmpLoc(j1,l1);
//compare v[1]
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+1);
//flds glob[B]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+1);
//fcomip %st(1),%st
EmitByte(0xdf);EmitByte(0xe9);
//fstp %st(0) (aka: pop)
EmitByte(0xdd);EmitByte(0xd8);
/*if the condition is true, don't fail*/
j1 = LocalJmp(op[i].op);
{
STOREF(0.0f, glob + op[i].c);
f1 = LocalJmp(OP_GOTO);
}
l1 = LocalLoc();
LocalJmpLoc(j1,l1);
//compare v[2]
//flds glob[A]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].a+2);
//flds glob[B]
EmitByte(0xd9);EmitByte(0x05);EmitAdr(glob + op[i].b+2);
//fcomip %st(1),%st
EmitByte(0xdf);EmitByte(0xe9);
//fstp %st(0) (aka: pop)
EmitByte(0xdd);EmitByte(0xd8);
/*if the condition is true, don't fail*/
j1 = LocalJmp(op[i].op);
{
STOREF(0.0f, glob + op[i].c);
f2 = LocalJmp(OP_GOTO);
}
l1 = LocalLoc();
LocalJmpLoc(j1,l1);
//success!
STOREF(1.0f, glob + op[i].c);
floc = LocalLoc();
LocalJmpLoc(f0,floc);
LocalJmpLoc(f1,floc);
LocalJmpLoc(f2,floc);
break;
}
/*fteqcc generates these from reading 'fast arrays', and are part of hexenc extras*/
case OP_FETCH_GBL_F:
case OP_FETCH_GBL_S:
case OP_FETCH_GBL_E:
case OP_FETCH_GBL_FNC:
case OP_FETCH_GBL_V:
{
unsigned int max = ((unsigned int*)glob)[op[i].a-1];
unsigned int base = op[i].a;
//flds glob[B]
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].b);
//fistp ta
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(&ta);
LOADREG(&ta, REG_EAX)
//FIXME: if eax >= $max, abort
if (op[i].op == OP_FETCH_GBL_V)
{
/*scale the index by 3*/
SETREGI(3, REG_EDX)
//mul %edx
EmitByte(0xf7); EmitByte(0xe2);
}
//lookup global
//mov &glob[base](,%eax,4),%edx
EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+0));
STOREREG(REG_EDX, glob + op[i].c+0)
if (op[i].op == OP_FETCH_GBL_V)
{
//mov &glob[base+1](,%eax,4),%edx
EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+1));
STOREREG(REG_EDX, glob + op[i].c+1)
//mov &glob[base+2](,%eax,4),%edx
EmitByte(0x8b);EmitByte(0x14);EmitByte(0x85);Emit4Byte((unsigned int)(glob + base+2));
STOREREG(REG_EDX, glob + op[i].c+2)
}
break;
}
/*fteqcc generates these from writing 'fast arrays'*/
case OP_GLOBALADDRESS:
LOADREG(glob + op[i].b, REG_EAX);
//lea &glob[A](, %eax, 4),%eax
EmitByte(0x8d);EmitByte(0x04);EmitByte(0x85);EmitAdr(glob + op[i].b+2);
STOREREG(REG_EAX, glob + op[i].c);
break;
// case OP_BOUNDCHECK:
//FIXME: assert b <= a < c
break;
case OP_CONV_FTOI:
//flds glob[A]
EmitByte(0xd9); EmitByte(0x05);EmitAdr(glob + op[i].a);
//fistp glob[C]
EmitByte(0xdb); EmitByte(0x1d);EmitAdr(glob + op[i].c);
break;
case OP_MUL_I:
LOADREG(glob + op[i].a, REG_EAX);
//mull glob[C] (arg*eax => edx:eax)
EmitByte(0xfc); EmitByte(0x25);EmitAdr(glob + op[i].b);
STOREREG(REG_EAX, glob + op[i].c);
break;
/*other extended opcodes*/
case OP_BITOR_I:
LOADREG(glob + op[i].a, REG_EAX)
//or %eax,tb
EmitByte(0x0b); EmitByte(0x05);EmitAdr(glob + op[i].b);
STOREREG(REG_EAX, glob + op[i].c);
break;
default:
{
enum qcop_e e = op[i].op;
printf("QCJIT: Extended instruction set %i is not supported, not using jit.\n", e);
}
failed = true;
break;
}
}
if(failed)
{
free(jit->statementjumps); //[MAX_STATEMENTS]
free(jit->statementoffsets); //[MAX_STATEMENTS]
free(jit->code);
free(jit);
return NULL;
}
FixupJumps(jit);
/* most likely want executable memory calls somewhere else more common */
#ifdef _WIN32
{
DWORD old;
//this memory is on the heap.
//this means that we must maintain read/write protection, or libc will crash us
VirtualProtect(jit->code, jit->codesize, PAGE_EXECUTE_READWRITE, &old);
}
#else
mprotect(jit->code, jit->codesize, PROT_READ|PROT_EXEC);
#endif
// externs->WriteFile("jit.x86", jit->code, jit->codesize);
return jit;
}
static float foo(float arg)
{
float f;
if (!arg)
f = 1;
else
f = 0;
return f;
}
void PR_EnterJIT(progfuncs_t *progfuncs, struct jitstate *jit, int statement)
{
#ifdef __GNUC__
//call, it clobbers pretty much everything.
asm("call *%0" :: "r"(jit->statementoffsets[statement+1]),"b"(prinst->edicttable):"cc","memory","eax","ecx","edx","esi","edi");
#elif defined(_MSC_VER)
void *entry = jit->statementoffsets[statement+1];
void *edicttable = prinst.edicttable;
__asm {
pushad
mov eax,entry
mov ebx,edicttable
call eax
popad
}
#else
#error "Sorry, no idea how to enter assembler safely for your compiler"
#endif
}
#endif