From 486b7e0f3c869b58b61628dbf6d657150002a818 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 18 Nov 2018 12:38:55 +0100 Subject: [PATCH] - delay emitting PARAM and VTBL instructions until CALL/CALL_K --- src/scripting/vm/jit_call.cpp | 262 +++++++++++++++++----------------- src/scripting/vm/jit_flow.cpp | 21 --- src/scripting/vm/jitintern.h | 5 +- 3 files changed, 135 insertions(+), 153 deletions(-) diff --git a/src/scripting/vm/jit_call.cpp b/src/scripting/vm/jit_call.cpp index 0c2d8115b..fbacb1f91 100644 --- a/src/scripting/vm/jit_call.cpp +++ b/src/scripting/vm/jit_call.cpp @@ -3,114 +3,12 @@ void JitCompiler::EmitPARAM() { - using namespace asmjit; - - int index = NumParam++; ParamOpcodes.Push(pc); - - X86Gp stackPtr, tmp; - X86Xmm tmp2; - - switch (A) - { - case REGT_NIL: - cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), (int64_t)0); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_NIL); - break; - case REGT_INT: - cc.mov(x86::dword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, i)), regD[BC]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_INT); - break; - case REGT_INT | REGT_ADDROF: - stackPtr = newTempIntPtr(); - cc.lea(stackPtr, x86::ptr(vmframe, offsetD + (int)(BC * sizeof(int32_t)))); - cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), stackPtr); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER); - break; - case REGT_INT | REGT_KONST: - cc.mov(x86::dword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, i)), konstd[BC]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_INT); - break; - case REGT_STRING: - cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, sp)), regS[BC]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_STRING); - break; - case REGT_STRING | REGT_ADDROF: - cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), regS[BC]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER); - break; - case REGT_STRING | REGT_KONST: - tmp = newTempIntPtr(); - cc.mov(tmp, asmjit::imm_ptr(&konsts[BC])); - cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, sp)), tmp); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_STRING); - break; - case REGT_POINTER: - cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), regA[BC]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER); - break; - case REGT_POINTER | REGT_ADDROF: - stackPtr = newTempIntPtr(); - cc.lea(stackPtr, x86::ptr(vmframe, offsetA + (int)(BC * sizeof(void*)))); - cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), stackPtr); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER); - break; - case REGT_POINTER | REGT_KONST: - tmp = newTempIntPtr(); - cc.mov(tmp, asmjit::imm_ptr(konsta[BC].v)); - cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), tmp); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER); - break; - case REGT_FLOAT: - cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT); - break; - case REGT_FLOAT | REGT_MULTIREG2: - cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT); - index = NumParam++; - ParamOpcodes.Push(pc); - cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC + 1]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT); - break; - case REGT_FLOAT | REGT_MULTIREG3: - cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT); - index = NumParam++; - ParamOpcodes.Push(pc); - cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC + 1]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT); - index = NumParam++; - ParamOpcodes.Push(pc); - cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), regF[BC + 2]); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT); - break; - case REGT_FLOAT | REGT_ADDROF: - stackPtr = newTempIntPtr(); - cc.lea(stackPtr, x86::ptr(vmframe, offsetF + (int)(BC * sizeof(double)))); - cc.mov(x86::ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, a)), stackPtr); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_POINTER); - break; - case REGT_FLOAT | REGT_KONST: - tmp = newTempIntPtr(); - tmp2 = newTempXmmSd(); - cc.mov(tmp, asmjit::imm_ptr(konstf + BC)); - cc.movsd(tmp2, asmjit::x86::qword_ptr(tmp)); - cc.movsd(x86::qword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, f)), tmp2); - cc.mov(x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_FLOAT); - break; - default: - I_FatalError("Unknown REGT value passed to EmitPARAM\n"); - break; - } } void JitCompiler::EmitPARAMI() { - int index = NumParam++; ParamOpcodes.Push(pc); - cc.mov(asmjit::x86::dword_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, i)), (int)ABCs); - cc.mov(asmjit::x86::byte_ptr(vmframe, offsetParams + index * sizeof(VMValue) + offsetof(VMValue, Type)), (int)REGT_INT); } void JitCompiler::EmitRESULT() @@ -120,6 +18,36 @@ void JitCompiler::EmitRESULT() // be executed. } +void JitCompiler::EmitVTBL() +{ + // This instruction is handled in the CALL/CALL_K instruction following it +} + +static VMFunction *GetVirtual(DObject *o, int c) +{ + auto p = o->GetClass(); + assert(c < (int)p->Virtuals.Size()); + return p->Virtuals[c]; +} + +void JitCompiler::EmitVtbl(const VMOP *op) +{ + int a = op->a; + int b = op->b; + int c = op->c; + + auto label = EmitThrowExceptionLabel(X_READ_NIL); + cc.test(regA[b], regA[b]); + cc.jz(label); + + auto result = newResultIntPtr(); + auto call = CreateCall(GetVirtual); + call->setRet(0, result); + call->setArg(0, regA[b]); + call->setArg(1, asmjit::Imm(c)); + cc.mov(regA[a], result); +} + void JitCompiler::EmitCALL() { EmitDoCall(regA[A]); @@ -136,22 +64,24 @@ void JitCompiler::EmitDoCall(asmjit::X86Gp vmfunc) { using namespace asmjit; - if (NumParam < B) + int numparams = StoreCallParams(); + if (numparams != B) I_FatalError("OP_CALL parameter count does not match the number of preceding OP_PARAM instructions"); - StoreInOuts(B); + if ((pc - 1)->op == OP_VTBL) + EmitVtbl(pc - 1); + FillReturns(pc + 1, C); X86Gp paramsptr = newTempIntPtr(); - cc.lea(paramsptr, x86::ptr(vmframe, offsetParams + (int)((NumParam - B) * sizeof(VMValue)))); + cc.lea(paramsptr, x86::ptr(vmframe, offsetParams)); EmitScriptCall(vmfunc, paramsptr); - LoadInOuts(B); + LoadInOuts(); LoadReturns(pc + 1, C); - NumParam -= B; - ParamOpcodes.Resize(ParamOpcodes.Size() - B); + ParamOpcodes.Clear(); pc += C; // Skip RESULTs } @@ -173,55 +103,127 @@ void JitCompiler::EmitScriptCall(asmjit::X86Gp vmfunc, asmjit::X86Gp paramsptr) call->setArg(4, Imm(C)); } -void JitCompiler::StoreInOuts(int b) +int JitCompiler::StoreCallParams() { using namespace asmjit; - for (unsigned int i = ParamOpcodes.Size() - b; i < ParamOpcodes.Size(); i++) + X86Gp stackPtr = newTempIntPtr(); + X86Gp tmp = newTempIntPtr(); + X86Xmm tmp2 = newTempXmmSd(); + + int numparams = 0; + for (unsigned int i = 0; i < ParamOpcodes.Size(); i++) { - asmjit::X86Gp stackPtr; - auto bc = ParamOpcodes[i]->i16u; + int slot = numparams++; + + if (ParamOpcodes[i]->op == OP_PARAMI) + { + int abcs = ParamOpcodes[i]->i24; + cc.mov(asmjit::x86::dword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, i)), abcs); + cc.mov(asmjit::x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_INT); + continue; + } + + int bc = ParamOpcodes[i]->i16u; + switch (ParamOpcodes[i]->a) { + case REGT_NIL: + cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), (int64_t)0); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_NIL); + break; + case REGT_INT: + cc.mov(x86::dword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, i)), regD[bc]); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_INT); + break; case REGT_INT | REGT_ADDROF: - stackPtr = newTempIntPtr(); cc.lea(stackPtr, x86::ptr(vmframe, offsetD + (int)(bc * sizeof(int32_t)))); cc.mov(x86::dword_ptr(stackPtr), regD[bc]); + cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), stackPtr); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER); + break; + case REGT_INT | REGT_KONST: + cc.mov(x86::dword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, i)), konstd[bc]); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_INT); + break; + case REGT_STRING: + cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, sp)), regS[bc]); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_STRING); break; case REGT_STRING | REGT_ADDROF: - // We don't have to do anything in this case. String values are never moved to virtual registers. + cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), regS[bc]); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER); + break; + case REGT_STRING | REGT_KONST: + cc.mov(tmp, asmjit::imm_ptr(&konsts[bc])); + cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, sp)), tmp); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_STRING); + break; + case REGT_POINTER: + cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), regA[bc]); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER); break; case REGT_POINTER | REGT_ADDROF: - stackPtr = newTempIntPtr(); cc.lea(stackPtr, x86::ptr(vmframe, offsetA + (int)(bc * sizeof(void*)))); cc.mov(x86::ptr(stackPtr), regA[bc]); + cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), stackPtr); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER); + break; + case REGT_POINTER | REGT_KONST: + cc.mov(tmp, asmjit::imm_ptr(konsta[bc].v)); + cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), tmp); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER); + break; + case REGT_FLOAT: + cc.movsd(x86::qword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc]); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_FLOAT); + break; + case REGT_FLOAT | REGT_MULTIREG2: + for (int j = 0; j < 2; j++) + { + cc.movsd(x86::qword_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc + j]); + cc.mov(x86::byte_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_FLOAT); + } + numparams++; + break; + case REGT_FLOAT | REGT_MULTIREG3: + for (int j = 0; j < 3; j++) + { + cc.movsd(x86::qword_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc + j]); + cc.mov(x86::byte_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_FLOAT); + } + numparams += 2; break; case REGT_FLOAT | REGT_ADDROF: - stackPtr = newTempIntPtr(); cc.lea(stackPtr, x86::ptr(vmframe, offsetF + (int)(bc * sizeof(double)))); - cc.movsd(x86::qword_ptr(stackPtr), regF[bc]); - // When passing the address to a float we don't know if the receiving function will treat it as float, vec2 or vec3. - if ((unsigned int)bc + 1 < regF.Size()) + for (int j = 0; j < 3; j++) { - cc.add(stackPtr, (int)sizeof(double)); - cc.movsd(x86::qword_ptr(stackPtr), regF[bc + 1]); - } - if ((unsigned int)bc + 2 < regF.Size()) - { - cc.add(stackPtr, (int)sizeof(double)); - cc.movsd(x86::qword_ptr(stackPtr), regF[bc + 2]); + if ((unsigned int)(bc + j) < regF.Size()) + cc.movsd(x86::qword_ptr(stackPtr, j * sizeof(double)), regF[bc + j]); } + cc.mov(x86::ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, a)), stackPtr); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_POINTER); break; + case REGT_FLOAT | REGT_KONST: + cc.mov(tmp, asmjit::imm_ptr(konstf + bc)); + cc.movsd(tmp2, asmjit::x86::qword_ptr(tmp)); + cc.movsd(x86::qword_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, f)), tmp2); + cc.mov(x86::byte_ptr(vmframe, offsetParams + slot * sizeof(VMValue) + myoffsetof(VMValue, Type)), (int)REGT_FLOAT); + break; + default: + I_FatalError("Unknown REGT value passed to EmitPARAM\n"); break; } } + + return numparams; } -void JitCompiler::LoadInOuts(int b) +void JitCompiler::LoadInOuts() { - for (unsigned int i = ParamOpcodes.Size() - b; i < ParamOpcodes.Size(); i++) + for (unsigned int i = 0; i < ParamOpcodes.Size(); i++) { const VMOP ¶m = *ParamOpcodes[i]; if (param.op == OP_PARAM && (param.a & REGT_ADDROF)) @@ -321,7 +323,7 @@ void JitCompiler::FillReturns(const VMOP *retval, int numret) break; } - cc.mov(x86::ptr(callReturns, i * sizeof(VMReturn) + offsetof(VMReturn, Location)), regPtr); - cc.mov(x86::byte_ptr(callReturns, i * sizeof(VMReturn) + offsetof(VMReturn, RegType)), type); + cc.mov(x86::ptr(callReturns, i * sizeof(VMReturn) + myoffsetof(VMReturn, Location)), regPtr); + cc.mov(x86::byte_ptr(callReturns, i * sizeof(VMReturn) + myoffsetof(VMReturn, RegType)), type); } } diff --git a/src/scripting/vm/jit_flow.cpp b/src/scripting/vm/jit_flow.cpp index 8c89e36da..b0b66ce2a 100644 --- a/src/scripting/vm/jit_flow.cpp +++ b/src/scripting/vm/jit_flow.cpp @@ -45,27 +45,6 @@ void JitCompiler::EmitIJMP() EmitThrowException(X_OTHER); } -static VMFunction *GetVirtual(DObject *o, int c) -{ - auto p = o->GetClass(); - assert(c < (int)p->Virtuals.Size()); - return p->Virtuals[c]; -} - -void JitCompiler::EmitVTBL() -{ - auto label = EmitThrowExceptionLabel(X_READ_NIL); - cc.test(regA[B], regA[B]); - cc.jz(label); - - auto result = newResultIntPtr(); - auto call = CreateCall(GetVirtual); - call->setRet(0, result); - call->setArg(0, regA[B]); - call->setArg(1, asmjit::Imm(C)); - cc.mov(regA[A], result); -} - static void ValidateCall(DObject *o, VMFunction *f, int b) { try diff --git a/src/scripting/vm/jitintern.h b/src/scripting/vm/jitintern.h index 34099158a..3b6e6b3e8 100644 --- a/src/scripting/vm/jitintern.h +++ b/src/scripting/vm/jitintern.h @@ -54,9 +54,10 @@ private: void EmitDoCall(asmjit::X86Gp ptr); void EmitScriptCall(asmjit::X86Gp vmfunc, asmjit::X86Gp paramsptr); + void EmitVtbl(const VMOP *op); - void StoreInOuts(int b); - void LoadInOuts(int b); + int StoreCallParams(); + void LoadInOuts(); void LoadReturns(const VMOP *retval, int numret); void FillReturns(const VMOP *retval, int numret); void LoadCallResult(int type, int regnum, bool addrof);