- delay emitting PARAM and VTBL instructions until CALL/CALL_K

This commit is contained in:
Magnus Norddahl 2018-11-18 12:38:55 +01:00
parent 4624c6e6a3
commit 486b7e0f3c
3 changed files with 135 additions and 153 deletions

View file

@ -3,114 +3,12 @@
void JitCompiler::EmitPARAM() void JitCompiler::EmitPARAM()
{ {
using namespace asmjit;
int index = NumParam++;
ParamOpcodes.Push(pc); ParamOpcodes.Push(pc);
X86Gp stackPtr, tmp;
X86Xmm tmp2;
switch (A)
{
case REGT_NIL:
cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), (int64_t)0);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_NIL);
break;
case REGT_INT:
cc.mov(x86::dword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, i)), regD[BC]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_INT);
break;
case REGT_INT | REGT_ADDROF:
stackPtr = newTempIntPtr();
cc.lea(stackPtr, x86::ptr(vmframe, offsetD + (int)(BC * sizeof(int32_t))));
cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), stackPtr);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_INT | REGT_KONST:
cc.mov(x86::dword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, i)), konstd[BC]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_INT);
break;
case REGT_STRING:
cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, sp)), regS[BC]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_STRING);
break;
case REGT_STRING | REGT_ADDROF:
cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), regS[BC]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_STRING | REGT_KONST:
tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(&konsts[BC]));
cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, sp)), tmp);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_STRING);
break;
case REGT_POINTER:
cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), regA[BC]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_POINTER | REGT_ADDROF:
stackPtr = newTempIntPtr();
cc.lea(stackPtr, x86::ptr(vmframe, offsetA + (int)(BC * sizeof(void*))));
cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), stackPtr);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_POINTER | REGT_KONST:
tmp = newTempIntPtr();
cc.mov(tmp, asmjit::imm_ptr(konsta[BC].v));
cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), tmp);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_FLOAT:
cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT);
break;
case REGT_FLOAT | REGT_MULTIREG2:
cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT);
index = NumParam++;
ParamOpcodes.Push(pc);
cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC + 1]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT);
break;
case REGT_FLOAT | REGT_MULTIREG3:
cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT);
index = NumParam++;
ParamOpcodes.Push(pc);
cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC + 1]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT);
index = NumParam++;
ParamOpcodes.Push(pc);
cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC + 2]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT);
break;
case REGT_FLOAT | REGT_ADDROF:
stackPtr = newTempIntPtr();
cc.lea(stackPtr, x86::ptr(vmframe, offsetF + (int)(BC * sizeof(double))));
cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), stackPtr);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_FLOAT | REGT_KONST:
tmp = newTempIntPtr();
tmp2 = newTempXmmSd();
cc.mov(tmp, asmjit::imm_ptr(konstf + BC));
cc.movsd(tmp2, asmjit::x86::qword_ptr(tmp));
cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), tmp2);
cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT);
break;
default:
I_FatalError("Unknown REGT value passed to EmitPARAM\n");
break;
}
} }
void JitCompiler::EmitPARAMI() void JitCompiler::EmitPARAMI()
{ {
int index = NumParam++;
ParamOpcodes.Push(pc); ParamOpcodes.Push(pc);
cc.mov(asmjit::x86::dword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, i)), (int)ABCs);
cc.mov(asmjit::x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_INT);
} }
void JitCompiler::EmitRESULT() void JitCompiler::EmitRESULT()
@ -120,6 +18,36 @@ void JitCompiler::EmitRESULT()
// be executed. // be executed.
} }
void JitCompiler::EmitVTBL()
{
// This instruction is handled in the CALL/CALL_K instruction following it
}
static VMFunction *GetVirtual(DObject *o, int c)
{
auto p = o->GetClass();
assert(c < (int)p->Virtuals.Size());
return p->Virtuals[c];
}
void JitCompiler::EmitVtbl(const VMOP *op)
{
int a = op->a;
int b = op->b;
int c = op->c;
auto label = EmitThrowExceptionLabel(X_READ_NIL);
cc.test(regA[b], regA[b]);
cc.jz(label);
auto result = newResultIntPtr();
auto call = CreateCall<VMFunction*, DObject*, int>(GetVirtual);
call->setRet(0, result);
call->setArg(0, regA[b]);
call->setArg(1, asmjit::Imm(c));
cc.mov(regA[a], result);
}
void JitCompiler::EmitCALL() void JitCompiler::EmitCALL()
{ {
EmitDoCall(regA[A]); EmitDoCall(regA[A]);
@ -136,22 +64,24 @@ void JitCompiler::EmitDoCall(asmjit::X86Gp vmfunc)
{ {
using namespace asmjit; using namespace asmjit;
if (NumParam < B) int numparams = StoreCallParams();
if (numparams != B)
I_FatalError("OP_CALL parameter count does not match the number of preceding OP_PARAM instructions"); I_FatalError("OP_CALL parameter count does not match the number of preceding OP_PARAM instructions");
StoreInOuts(B); if ((pc - 1)->op == OP_VTBL)
EmitVtbl(pc - 1);
FillReturns(pc + 1, C); FillReturns(pc + 1, C);
X86Gp paramsptr = newTempIntPtr(); X86Gp paramsptr = newTempIntPtr();
cc.lea(paramsptr, x86::ptr(vmframe, offsetParams + (int)((NumParam - B) * sizeof(VMValue)))); cc.lea(paramsptr, x86::ptr(vmframe, offsetParams));
EmitScriptCall(vmfunc, paramsptr); EmitScriptCall(vmfunc, paramsptr);
LoadInOuts(B); LoadInOuts();
LoadReturns(pc + 1, C); LoadReturns(pc + 1, C);
NumParam -= B; ParamOpcodes.Clear();
ParamOpcodes.Resize(ParamOpcodes.Size() - B);
pc += C; // Skip RESULTs pc += C; // Skip RESULTs
} }
@ -173,55 +103,127 @@ void JitCompiler::EmitScriptCall(asmjit::X86Gp vmfunc, asmjit::X86Gp paramsptr)
call->setArg(4, Imm(C)); call->setArg(4, Imm(C));
} }
void JitCompiler::StoreInOuts(int b) int JitCompiler::StoreCallParams()
{ {
using namespace asmjit; using namespace asmjit;
for (unsigned int i = ParamOpcodes.Size() - b; i < ParamOpcodes.Size(); i++) X86Gp stackPtr = newTempIntPtr();
X86Gp tmp = newTempIntPtr();
X86Xmm tmp2 = newTempXmmSd();
int numparams = 0;
for (unsigned int i = 0; i < ParamOpcodes.Size(); i++)
{ {
asmjit::X86Gp stackPtr; int slot = numparams++;
auto bc = ParamOpcodes[i]->i16u;
if (ParamOpcodes[i]->op == OP_PARAMI)
{
int abcs = ParamOpcodes[i]->i24;
cc.mov(asmjit::x86::dword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, i)), abcs);
cc.mov(asmjit::x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_INT);
continue;
}
int bc = ParamOpcodes[i]->i16u;
switch (ParamOpcodes[i]->a) switch (ParamOpcodes[i]->a)
{ {
case REGT_NIL:
cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), (int64_t)0);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_NIL);
break;
case REGT_INT:
cc.mov(x86::dword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, i)), regD[bc]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_INT);
break;
case REGT_INT | REGT_ADDROF: case REGT_INT | REGT_ADDROF:
stackPtr = newTempIntPtr();
cc.lea(stackPtr, x86::ptr(vmframe, offsetD + (int)(bc * sizeof(int32_t)))); cc.lea(stackPtr, x86::ptr(vmframe, offsetD + (int)(bc * sizeof(int32_t))));
cc.mov(x86::dword_ptr(stackPtr), regD[bc]); cc.mov(x86::dword_ptr(stackPtr), regD[bc]);
cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), stackPtr);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_INT | REGT_KONST:
cc.mov(x86::dword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, i)), konstd[bc]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_INT);
break;
case REGT_STRING:
cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, sp)), regS[bc]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_STRING);
break; break;
case REGT_STRING | REGT_ADDROF: case REGT_STRING | REGT_ADDROF:
// We don't have to do anything in this case. String values are never moved to virtual registers. cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), regS[bc]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_STRING | REGT_KONST:
cc.mov(tmp, asmjit::imm_ptr(&konsts[bc]));
cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, sp)), tmp);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_STRING);
break;
case REGT_POINTER:
cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), regA[bc]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER);
break; break;
case REGT_POINTER | REGT_ADDROF: case REGT_POINTER | REGT_ADDROF:
stackPtr = newTempIntPtr();
cc.lea(stackPtr, x86::ptr(vmframe, offsetA + (int)(bc * sizeof(void*)))); cc.lea(stackPtr, x86::ptr(vmframe, offsetA + (int)(bc * sizeof(void*))));
cc.mov(x86::ptr(stackPtr), regA[bc]); cc.mov(x86::ptr(stackPtr), regA[bc]);
cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), stackPtr);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_POINTER | REGT_KONST:
cc.mov(tmp, asmjit::imm_ptr(konsta[bc].v));
cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), tmp);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER);
break;
case REGT_FLOAT:
cc.movsd(x86::qword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_FLOAT);
break;
case REGT_FLOAT | REGT_MULTIREG2:
for (int j = 0; j < 2; j++)
{
cc.movsd(x86::qword_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc + j]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_FLOAT);
}
numparams++;
break;
case REGT_FLOAT | REGT_MULTIREG3:
for (int j = 0; j < 3; j++)
{
cc.movsd(x86::qword_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc + j]);
cc.mov(x86::byte_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_FLOAT);
}
numparams += 2;
break; break;
case REGT_FLOAT | REGT_ADDROF: case REGT_FLOAT | REGT_ADDROF:
stackPtr = newTempIntPtr();
cc.lea(stackPtr, x86::ptr(vmframe, offsetF + (int)(bc * sizeof(double)))); cc.lea(stackPtr, x86::ptr(vmframe, offsetF + (int)(bc * sizeof(double))));
cc.movsd(x86::qword_ptr(stackPtr), regF[bc]);
// When passing the address to a float we don't know if the receiving function will treat it as float, vec2 or vec3. // When passing the address to a float we don't know if the receiving function will treat it as float, vec2 or vec3.
if ((unsigned int)bc + 1 < regF.Size()) for (int j = 0; j < 3; j++)
{ {
cc.add(stackPtr, (int)sizeof(double)); if ((unsigned int)(bc + j) < regF.Size())
cc.movsd(x86::qword_ptr(stackPtr), regF[bc + 1]); cc.movsd(x86::qword_ptr(stackPtr, j * sizeof(double)), regF[bc + j]);
}
if ((unsigned int)bc + 2 < regF.Size())
{
cc.add(stackPtr, (int)sizeof(double));
cc.movsd(x86::qword_ptr(stackPtr), regF[bc + 2]);
} }
cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), stackPtr);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER);
break; break;
case REGT_FLOAT | REGT_KONST:
cc.mov(tmp, asmjit::imm_ptr(konstf + bc));
cc.movsd(tmp2, asmjit::x86::qword_ptr(tmp));
cc.movsd(x86::qword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, f)), tmp2);
cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_FLOAT);
break;
default: default:
I_FatalError("Unknown REGT value passed to EmitPARAM\n");
break; break;
} }
} }
return numparams;
} }
void JitCompiler::LoadInOuts(int b) void JitCompiler::LoadInOuts()
{ {
for (unsigned int i = ParamOpcodes.Size() - b; i < ParamOpcodes.Size(); i++) for (unsigned int i = 0; i < ParamOpcodes.Size(); i++)
{ {
const VMOP &param = *ParamOpcodes[i]; const VMOP &param = *ParamOpcodes[i];
if (param.op == OP_PARAM && (param.a & REGT_ADDROF)) if (param.op == OP_PARAM && (param.a & REGT_ADDROF))
@ -321,7 +323,7 @@ void JitCompiler::FillReturns(const VMOP *retval, int numret)
break; break;
} }
cc.mov(x86::ptr(callReturns, i * sizeof(VMReturn) + offsetof(VMReturn, Location)), regPtr); cc.mov(x86::ptr(callReturns, i * sizeof(VMReturn) + myoffsetof(VMReturn, Location)), regPtr);
cc.mov(x86::byte_ptr(callReturns, i * sizeof(VMReturn) + offsetof(VMReturn, RegType)), type); cc.mov(x86::byte_ptr(callReturns, i * sizeof(VMReturn) + myoffsetof(VMReturn, RegType)), type);
} }
} }

View file

@ -45,27 +45,6 @@ void JitCompiler::EmitIJMP()
EmitThrowException(X_OTHER); EmitThrowException(X_OTHER);
} }
static VMFunction *GetVirtual(DObject *o, int c)
{
auto p = o->GetClass();
assert(c < (int)p->Virtuals.Size());
return p->Virtuals[c];
}
void JitCompiler::EmitVTBL()
{
auto label = EmitThrowExceptionLabel(X_READ_NIL);
cc.test(regA[B], regA[B]);
cc.jz(label);
auto result = newResultIntPtr();
auto call = CreateCall<VMFunction*, DObject*, int>(GetVirtual);
call->setRet(0, result);
call->setArg(0, regA[B]);
call->setArg(1, asmjit::Imm(C));
cc.mov(regA[A], result);
}
static void ValidateCall(DObject *o, VMFunction *f, int b) static void ValidateCall(DObject *o, VMFunction *f, int b)
{ {
try try

View file

@ -54,9 +54,10 @@ private:
void EmitDoCall(asmjit::X86Gp ptr); void EmitDoCall(asmjit::X86Gp ptr);
void EmitScriptCall(asmjit::X86Gp vmfunc, asmjit::X86Gp paramsptr); void EmitScriptCall(asmjit::X86Gp vmfunc, asmjit::X86Gp paramsptr);
void EmitVtbl(const VMOP *op);
void StoreInOuts(int b); int StoreCallParams();
void LoadInOuts(int b); void LoadInOuts();
void LoadReturns(const VMOP *retval, int numret); void LoadReturns(const VMOP *retval, int numret);
void FillReturns(const VMOP *retval, int numret); void FillReturns(const VMOP *retval, int numret);
void LoadCallResult(int type, int regnum, bool addrof); void LoadCallResult(int type, int regnum, bool addrof);