From 34b037c9db8e3aa0f0afb4b82d913c9963aec0ee Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Nov 2018 01:13:25 +0100 Subject: [PATCH] - avoid calling VMFillParams for simple stack frames - split Setup into more functions --- src/scripting/backend/vmbuilder.cpp | 2 + src/scripting/vm/jit.cpp | 438 +++++++++++++++------------- src/scripting/vm/jitintern.h | 6 + src/scripting/vm/vm.h | 1 + 4 files changed, 248 insertions(+), 199 deletions(-) diff --git a/src/scripting/backend/vmbuilder.cpp b/src/scripting/backend/vmbuilder.cpp index 11898d09d5..c9df0c5e6c 100644 --- a/src/scripting/backend/vmbuilder.cpp +++ b/src/scripting/backend/vmbuilder.cpp @@ -814,6 +814,7 @@ VMFunction *FFunctionBuildList::AddFunction(PNamespace *gnspc, const VersionInfo if (it.Func->SymbolName != NAME_None) { it.Function->Proto = it.Func->Variants[0].Proto; + it.Function->ArgFlags = it.Func->Variants[0].ArgFlags; } mItems.Push(it); @@ -885,6 +886,7 @@ void FFunctionBuildList::Build() if (sfunc->Proto == nullptr) { sfunc->Proto = NewPrototype(item.Proto->ReturnTypes, item.Func->Variants[0].Proto->ArgumentTypes); + sfunc->ArgFlags = item.Func->Variants[0].ArgFlags; } // Emit code diff --git a/src/scripting/vm/jit.cpp b/src/scripting/vm/jit.cpp index a3964d4b2a..53272bb053 100644 --- a/src/scripting/vm/jit.cpp +++ b/src/scripting/vm/jit.cpp @@ -471,6 +471,245 @@ void JitCompiler::Setup() konsts = sfunc->KonstS; konsta = sfunc->KonstA; + labels.Resize(sfunc->CodeSize); + + CreateRegisters(); + IncrementVMCalls(); + SetupFrame(); +} + +void JitCompiler::SetupFrame() +{ + // the VM version reads this from the stack, but it is constant data + offsetParams = ((int)sizeof(VMFrame) + 15) & ~15; + offsetF = offsetParams + (int)(sfunc->MaxParam * sizeof(VMValue)); + offsetS = offsetF + (int)(sfunc->NumRegF * sizeof(double)); + offsetA = offsetS + (int)(sfunc->NumRegS * sizeof(FString)); + offsetD = offsetA + (int)(sfunc->NumRegA * sizeof(void*)); + offsetExtra = (offsetD + (int)(sfunc->NumRegD * sizeof(int32_t)) + 15) & ~15; + + vmframe = cc.newIntPtr("vmframe"); + + if (sfunc->SpecialInits.Size() == 0 && sfunc->NumRegS == 0) + { + // This is a simple frame with no constructors or destructors. Allocate it on the stack ourselves. + + auto vmstack = cc.newStack(sfunc->StackSize, 16, "vmstack"); + cc.lea(vmframe, vmstack); + + auto slowinit = cc.newLabel(); + auto endinit = cc.newLabel(); + + cc.cmp(numargs, sfunc->NumArgs); + cc.jne(slowinit); + SetupSimpleFrame(vmstack); + cc.jmp(endinit); + cc.bind(slowinit); + SetupSimpleFrameMissingArgs(vmstack); // Does this ever happen? + cc.bind(endinit); + } + else + { + SetupFullVMFrame(); + } +} + +void JitCompiler::SetupSimpleFrame(asmjit::X86Mem vmstack) +{ + using namespace asmjit; + + int argsPos = 0; + int regd = 0, regf = 0, rega = 0; + for (unsigned int i = 0; i < sfunc->Proto->ArgumentTypes.Size(); i++) + { + const PType *type = sfunc->Proto->ArgumentTypes[i]; + if (sfunc->ArgFlags[i] & (VARF_Out | VARF_Ref)) + { + cc.mov(regA[rega++], x86::ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, a))); + } + else if (type == TypeVector2) + { + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + } + else if (type == TypeVector3) + { + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + } + else if (type == TypeFloat64) + { + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + } + else if (type == TypeString) + { + I_FatalError("JIT: Strings are not supported yet for simple frames"); + } + else if (type->isIntCompatible()) + { + cc.mov(regD[regd++], x86::dword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, i))); + } + else + { + cc.mov(regA[rega++], x86::ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, a))); + } + } + + if (sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || rega > sfunc->NumRegA) + I_FatalError("JIT: sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || rega > sfunc->NumRegA"); + + for (int i = regd; i < sfunc->NumRegD; i++) + cc.xor_(regD[i], regD[i]); + + for (int i = regf; i < sfunc->NumRegF; i++) + cc.xorpd(regF[i], regF[i]); + + for (int i = rega; i < sfunc->NumRegA; i++) + cc.xor_(regA[i], regA[i]); +} + +void JitCompiler::SetupSimpleFrameMissingArgs(asmjit::X86Mem vmstack) +{ + using namespace asmjit; + + auto sfuncptr = newTempIntPtr(); + cc.mov(sfuncptr, imm_ptr(sfunc)); + if (cc.is64Bit()) + cc.mov(x86::qword_ptr(vmframe, offsetof(VMFrame, Func)), sfuncptr); + else + cc.mov(x86::dword_ptr(vmframe, offsetof(VMFrame, Func)), sfuncptr); + cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegD)), sfunc->NumRegD); + cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegF)), sfunc->NumRegF); + cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegS)), sfunc->NumRegS); + cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegA)), sfunc->NumRegA); + cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, MaxParam)), sfunc->MaxParam); + cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, NumParam)), 0); + + // Zero initialize the variables (retardedly stupid to do here - should be done by the compiler backend!!) + unsigned int clearbegin = (unsigned int)offsetof(VMFrame, NumParam) + 2; + unsigned int clearend = sfunc->StackSize; + unsigned int sseend = clearbegin + (clearend - clearbegin) / 16 * 16; + if (clearbegin < sseend) + { + auto zerosse = newTempXmmPd(); + cc.xorpd(zerosse, zerosse); + for (unsigned int i = clearbegin; i < sseend; i += 16) + cc.movupd(x86::ptr(vmframe, i), zerosse); + } + if (sseend < clearend) + { + auto zero32 = newTempInt32(); + cc.xor_(zero32, zero32); + + unsigned int dwordend = sseend + (clearend - sseend) / 4 * 4; + for (unsigned int i = sseend; i < dwordend; i += 4) + cc.mov(asmjit::x86::dword_ptr(vmframe, i), zero32); + + for (unsigned int i = dwordend; i < clearend; i++) + cc.mov(asmjit::x86::byte_ptr(vmframe, i), zero32.r8Lo()); + } + + auto fillParams = CreateCall([](VMFrame *newf, VMValue *args, int numargs) { + try + { + VMFillParams(args, newf, numargs); + } + catch (...) + { + VMThrowException(std::current_exception()); + } + }); + fillParams->setArg(0, vmframe); + fillParams->setArg(1, args); + fillParams->setArg(2, numargs); + + for (int i = 0; i < sfunc->NumRegD; i++) + cc.mov(regD[i], x86::dword_ptr(vmframe, offsetD + i * sizeof(int32_t))); + + for (int i = 0; i < sfunc->NumRegF; i++) + cc.movsd(regF[i], x86::qword_ptr(vmframe, offsetF + i * sizeof(double))); + + for (int i = 0; i < sfunc->NumRegS; i++) + cc.lea(regS[i], x86::ptr(vmframe, offsetS + i * sizeof(FString))); + + for (int i = 0; i < sfunc->NumRegA; i++) + cc.mov(regA[i], x86::ptr(vmframe, offsetA + i * sizeof(void*))); +} + +void JitCompiler::SetupFullVMFrame() +{ + using namespace asmjit; + + stack = cc.newIntPtr("stack"); + auto allocFrame = CreateCall([](VMScriptFunction *func, VMValue *args, int numargs) -> VMFrameStack* { + try + { + VMFrameStack *stack = &GlobalVMStack; + VMFrame *newf = stack->AllocFrame(func); + CurrentJitExceptInfo->vmframes++; + VMFillParams(args, newf, numargs); + return stack; + } + catch (...) + { + VMThrowException(std::current_exception()); + return nullptr; + } + }); + allocFrame->setRet(0, stack); + allocFrame->setArg(0, imm_ptr(sfunc)); + allocFrame->setArg(1, args); + allocFrame->setArg(2, numargs); + + cc.mov(vmframe, x86::ptr(stack)); // stack->Blocks + cc.mov(vmframe, x86::ptr(vmframe, VMFrameStack::OffsetLastFrame())); // Blocks->LastFrame + + for (int i = 0; i < sfunc->NumRegD; i++) + cc.mov(regD[i], x86::dword_ptr(vmframe, offsetD + i * sizeof(int32_t))); + + for (int i = 0; i < sfunc->NumRegF; i++) + cc.movsd(regF[i], x86::qword_ptr(vmframe, offsetF + i * sizeof(double))); + + for (int i = 0; i < sfunc->NumRegS; i++) + cc.lea(regS[i], x86::ptr(vmframe, offsetS + i * sizeof(FString))); + + for (int i = 0; i < sfunc->NumRegA; i++) + cc.mov(regA[i], x86::ptr(vmframe, offsetA + i * sizeof(void*))); +} + +void JitCompiler::EmitPopFrame() +{ + if (sfunc->SpecialInits.Size() != 0 || sfunc->NumRegS != 0) + { + auto popFrame = CreateCall([](VMFrameStack *stack) { + try + { + stack->PopFrame(); + CurrentJitExceptInfo->vmframes--; + } + catch (...) + { + VMThrowException(std::current_exception()); + } + }); + popFrame->setArg(0, stack); + } +} + +void JitCompiler::IncrementVMCalls() +{ + // VMCalls[0]++ + auto vmcallsptr = newTempIntPtr(); + auto vmcalls = newTempInt32(); + cc.mov(vmcallsptr, asmjit::imm_ptr(VMCalls)); + cc.mov(vmcalls, asmjit::x86::dword_ptr(vmcallsptr)); + cc.add(vmcalls, (int)1); + cc.mov(asmjit::x86::dword_ptr(vmcallsptr), vmcalls); +} + +void JitCompiler::CreateRegisters() +{ regD.Resize(sfunc->NumRegD); regF.Resize(sfunc->NumRegF); regA.Resize(sfunc->NumRegA); @@ -499,205 +738,6 @@ void JitCompiler::Setup() regname.Format("regA%d", i); regA[i] = cc.newIntPtr(regname.GetChars()); } - - labels.Resize(sfunc->CodeSize); - - // VMCalls[0]++ - auto vmcallsptr = newTempIntPtr(); - auto vmcalls = newTempInt32(); - cc.mov(vmcallsptr, imm_ptr(VMCalls)); - cc.mov(vmcalls, x86::dword_ptr(vmcallsptr)); - cc.add(vmcalls, (int)1); - cc.mov(x86::dword_ptr(vmcallsptr), vmcalls); - - // the VM version reads this from the stack, but it is constant data - offsetParams = ((int)sizeof(VMFrame) + 15) & ~15; - offsetF = offsetParams + (int)(sfunc->MaxParam * sizeof(VMValue)); - offsetS = offsetF + (int)(sfunc->NumRegF * sizeof(double)); - offsetA = offsetS + (int)(sfunc->NumRegS * sizeof(FString)); - offsetD = offsetA + (int)(sfunc->NumRegA * sizeof(void*)); - offsetExtra = (offsetD + (int)(sfunc->NumRegD * sizeof(int32_t)) + 15) & ~15; - - vmframe = cc.newIntPtr("vmframe"); - - if (sfunc->SpecialInits.Size() == 0 && sfunc->NumRegS == 0) - { - // This is a simple frame with no constructors or destructors. Allocate it on the stack ourselves. - - auto vmstack = cc.newStack(sfunc->StackSize, 16, "vmstack"); - cc.lea(vmframe, vmstack); - - auto slowinit = cc.newLabel(); - auto endinit = cc.newLabel(); - -#if 0 // this crashes sometimes - cc.cmp(numargs, sfunc->NumArgs); - cc.jne(slowinit); - - // Is there a better way to know the type than this? - int argsPos = 0; - int regd = 0, regf = 0, rega = 0; - for (unsigned int i = 0; i < sfunc->Proto->ArgumentTypes.Size(); i++) - { - const PType *type = sfunc->Proto->ArgumentTypes[i]; - if (type->isPointer()) - { - cc.mov(regA[rega++], x86::ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, a))); - } - else if (type->isIntCompatible()) - { - cc.mov(regD[regd++], x86::dword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, i))); - } - else if (type == TypeVector2) - { - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - } - else if (type == TypeVector3) - { - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - } - else if (type->isFloat()) - { - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - } - else if (type == TypeString) - { - I_FatalError("JIT: Strings are not supported yet for simple frames"); - } - } - - if (sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || rega > sfunc->NumRegA) - I_FatalError("JIT: sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || rega > sfunc->NumRegA"); - - cc.jmp(endinit); -#endif - cc.bind(slowinit); - - auto sfuncptr = newTempIntPtr(); - cc.mov(sfuncptr, imm_ptr(sfunc)); - if (cc.is64Bit()) - cc.mov(x86::qword_ptr(vmframe, offsetof(VMFrame, Func)), sfuncptr); - else - cc.mov(x86::dword_ptr(vmframe, offsetof(VMFrame, Func)), sfuncptr); - cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegD)), sfunc->NumRegD); - cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegF)), sfunc->NumRegF); - cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegS)), sfunc->NumRegS); - cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegA)), sfunc->NumRegA); - cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, MaxParam)), sfunc->MaxParam); - cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, NumParam)), 0); - - // Zero initialize the variables (retardedly stupid to do here - should be done by the compiler backend!!) - unsigned int clearbegin = (unsigned int)offsetof(VMFrame, NumParam) + 2; - unsigned int clearend = sfunc->StackSize; - unsigned int sseend = clearbegin + (clearend - clearbegin) / 16 * 16; - if (clearbegin < sseend) - { - auto zerosse = newTempXmmPd(); - cc.xorpd(zerosse, zerosse); - for (unsigned int i = clearbegin; i < sseend; i += 16) - cc.movupd(x86::ptr(vmframe, i), zerosse); - } - if (sseend < clearend) - { - auto zero32 = newTempInt32(); - cc.xor_(zero32, zero32); - - unsigned int dwordend = sseend + (clearend - sseend) / 4 * 4; - for (unsigned int i = sseend; i < dwordend; i += 4) - cc.mov(asmjit::x86::dword_ptr(vmframe, i), zero32); - - for (unsigned int i = dwordend; i < clearend; i++) - cc.mov(asmjit::x86::byte_ptr(vmframe, i), zero32.r8Lo()); - } - - auto fillParams = CreateCall([](VMFrame *newf, VMValue *args, int numargs) { - try - { - VMFillParams(args, newf, numargs); - } - catch (...) - { - VMThrowException(std::current_exception()); - } - }); - fillParams->setArg(0, vmframe); - fillParams->setArg(1, args); - fillParams->setArg(2, numargs); - - for (int i = 0; i < sfunc->NumRegD; i++) - cc.mov(regD[i], x86::dword_ptr(vmframe, offsetD + i * sizeof(int32_t))); - - for (int i = 0; i < sfunc->NumRegF; i++) - cc.movsd(regF[i], x86::qword_ptr(vmframe, offsetF + i * sizeof(double))); - - for (int i = 0; i < sfunc->NumRegS; i++) - cc.lea(regS[i], x86::ptr(vmframe, offsetS + i * sizeof(FString))); - - for (int i = 0; i < sfunc->NumRegA; i++) - cc.mov(regA[i], x86::ptr(vmframe, offsetA + i * sizeof(void*))); - - cc.bind(endinit); - } - else - { - stack = cc.newIntPtr("stack"); - auto allocFrame = CreateCall([](VMScriptFunction *func, VMValue *args, int numargs) -> VMFrameStack* { - try - { - VMFrameStack *stack = &GlobalVMStack; - VMFrame *newf = stack->AllocFrame(func); - CurrentJitExceptInfo->vmframes++; - VMFillParams(args, newf, numargs); - return stack; - } - catch (...) - { - VMThrowException(std::current_exception()); - return nullptr; - } - }); - allocFrame->setRet(0, stack); - allocFrame->setArg(0, imm_ptr(sfunc)); - allocFrame->setArg(1, args); - allocFrame->setArg(2, numargs); - - cc.mov(vmframe, x86::ptr(stack)); // stack->Blocks - cc.mov(vmframe, x86::ptr(vmframe, VMFrameStack::OffsetLastFrame())); // Blocks->LastFrame - - for (int i = 0; i < sfunc->NumRegD; i++) - cc.mov(regD[i], x86::dword_ptr(vmframe, offsetD + i * sizeof(int32_t))); - - for (int i = 0; i < sfunc->NumRegF; i++) - cc.movsd(regF[i], x86::qword_ptr(vmframe, offsetF + i * sizeof(double))); - - for (int i = 0; i < sfunc->NumRegS; i++) - cc.lea(regS[i], x86::ptr(vmframe, offsetS + i * sizeof(FString))); - - for (int i = 0; i < sfunc->NumRegA; i++) - cc.mov(regA[i], x86::ptr(vmframe, offsetA + i * sizeof(void*))); - } -} - -void JitCompiler::EmitPopFrame() -{ - if (sfunc->SpecialInits.Size() != 0 || sfunc->NumRegS != 0) - { - auto popFrame = CreateCall([](VMFrameStack *stack) { - try - { - stack->PopFrame(); - CurrentJitExceptInfo->vmframes--; - } - catch (...) - { - VMThrowException(std::current_exception()); - } - }); - popFrame->setArg(0, stack); - } } void JitCompiler::EmitNullPointerThrow(int index, EVMAbortException reason) diff --git a/src/scripting/vm/jitintern.h b/src/scripting/vm/jitintern.h index b7348b773e..0f7cc74424 100644 --- a/src/scripting/vm/jitintern.h +++ b/src/scripting/vm/jitintern.h @@ -39,6 +39,12 @@ private: #undef xx void Setup(); + void CreateRegisters(); + void IncrementVMCalls(); + void SetupFrame(); + void SetupSimpleFrame(asmjit::X86Mem vmstack); + void SetupSimpleFrameMissingArgs(asmjit::X86Mem vmstack); + void SetupFullVMFrame(); void BindLabels(); void EmitOpcode(); void EmitPopFrame(); diff --git a/src/scripting/vm/vm.h b/src/scripting/vm/vm.h index 2ceeebae97..8289097790 100644 --- a/src/scripting/vm/vm.h +++ b/src/scripting/vm/vm.h @@ -329,6 +329,7 @@ public: FString PrintableName; // so that the VM can print meaningful info if something in this function goes wrong. class PPrototype *Proto; + TArray ArgFlags; // Should be the same length as Proto->ArgumentTypes int(*ScriptCall)(VMFunction *func, VMValue *params, int numparams, VMReturn *ret, int numret) = nullptr;