From f3e0db913c89fff03d24f6638f076915093a85f7 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 15 Nov 2018 22:07:27 +0100 Subject: [PATCH 1/8] - fix operand size mismatch error --- src/scripting/vm/jit_load.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/scripting/vm/jit_load.cpp b/src/scripting/vm/jit_load.cpp index 3037a78b5..5b35a7d44 100644 --- a/src/scripting/vm/jit_load.cpp +++ b/src/scripting/vm/jit_load.cpp @@ -255,8 +255,7 @@ void JitCompiler::EmitLV2() { EmitNullPointerThrow(B, X_READ_NIL); auto tmp = newTempIntPtr(); - cc.mov(tmp, regA[B]); - cc.add(tmp, konstd[C]); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C])); cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp)); cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8)); } @@ -265,8 +264,7 @@ void JitCompiler::EmitLV2_R() { EmitNullPointerThrow(B, X_READ_NIL); auto tmp = newTempIntPtr(); - cc.mov(tmp, regA[B]); - cc.add(tmp, regD[C]); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C])); cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp)); cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8)); } @@ -275,8 +273,7 @@ void JitCompiler::EmitLV3() { EmitNullPointerThrow(B, X_READ_NIL); auto tmp = newTempIntPtr(); - cc.mov(tmp, regA[B]); - cc.add(tmp, konstd[C]); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C])); cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp)); cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8)); cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16)); @@ -286,8 +283,7 @@ void JitCompiler::EmitLV3_R() { EmitNullPointerThrow(B, X_READ_NIL); auto tmp = newTempIntPtr(); - cc.mov(tmp, regA[B]); - cc.add(tmp, regD[C]); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C])); cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp)); cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8)); cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16)); From 0394dc56b754f3f1156318a1d0c6a57ca20addc1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 15 Nov 2018 22:33:13 +0100 Subject: [PATCH 2/8] - generate a memset for the allocated stack memory because that's what the VM does (this really shouldn't be done by the VM either - the compiler backend should clear its registers if it wants them to be zero!) --- src/scripting/vm/jit.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/scripting/vm/jit.cpp b/src/scripting/vm/jit.cpp index 6063ee366..cd9bf6e0b 100644 --- a/src/scripting/vm/jit.cpp +++ b/src/scripting/vm/jit.cpp @@ -589,6 +589,21 @@ void JitCompiler::Setup() cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, MaxParam)), sfunc->MaxParam); cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, NumParam)), 0); + // Zero initialize the variables (retardedly stupid to do here - should be done by the compiler backend!!) + unsigned int clearoffset = (unsigned int)offsetof(VMFrame, NumParam) + 2; + unsigned int sselength = (sfunc->StackSize - clearoffset) / 4; + auto zerosse = newTempXmmPd(); + cc.xorpd(zerosse, zerosse); + for (unsigned int i = 0; i < sselength * 4; i += 4) + cc.movupd(x86::ptr(vmframe, clearoffset + i), zerosse); + if (clearoffset + sselength * 4 < sfunc->StackSize) + { + auto zero32 = newTempInt32(); + cc.xor_(zero32, zero32); + for (unsigned int i = clearoffset + sselength * 4; i < sfunc->StackSize; i++) + cc.mov(asmjit::x86::byte_ptr(vmframe, i), zero32.r8Lo()); + } + auto fillParams = CreateCall([](VMFrame *newf, VMValue *args, int numargs) { try { From 5ef2175c38245c3b87b5c6b95eb782ccf896863f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 15 Nov 2018 22:40:12 +0100 Subject: [PATCH 3/8] - fix wrong offsets --- src/scripting/vm/jit.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/scripting/vm/jit.cpp b/src/scripting/vm/jit.cpp index cd9bf6e0b..0fa6971e8 100644 --- a/src/scripting/vm/jit.cpp +++ b/src/scripting/vm/jit.cpp @@ -590,17 +590,21 @@ void JitCompiler::Setup() cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, NumParam)), 0); // Zero initialize the variables (retardedly stupid to do here - should be done by the compiler backend!!) - unsigned int clearoffset = (unsigned int)offsetof(VMFrame, NumParam) + 2; - unsigned int sselength = (sfunc->StackSize - clearoffset) / 4; - auto zerosse = newTempXmmPd(); - cc.xorpd(zerosse, zerosse); - for (unsigned int i = 0; i < sselength * 4; i += 4) - cc.movupd(x86::ptr(vmframe, clearoffset + i), zerosse); - if (clearoffset + sselength * 4 < sfunc->StackSize) + unsigned int clearbegin = (unsigned int)offsetof(VMFrame, NumParam) + 2; + unsigned int clearend = sfunc->StackSize; + unsigned int sseend = clearbegin + (clearend - clearbegin) / 16 * 16; + if (clearbegin < sseend) + { + auto zerosse = newTempXmmPd(); + cc.xorpd(zerosse, zerosse); + for (unsigned int i = clearbegin; i < sseend; i += 16) + cc.movupd(x86::ptr(vmframe, i), zerosse); + } + if (sseend < clearend) { auto zero32 = newTempInt32(); cc.xor_(zero32, zero32); - for (unsigned int i = clearoffset + sselength * 4; i < sfunc->StackSize; i++) + for (unsigned int i = sseend; i < clearend; i++) cc.mov(asmjit::x86::byte_ptr(vmframe, i), zero32.r8Lo()); } From 00d41432d8792de3c78258645fcd8cc5fcc8a5dc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 15 Nov 2018 22:47:44 +0100 Subject: [PATCH 4/8] - disable jit in 32-bit builds --- src/scripting/vm/vmframe.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/scripting/vm/vmframe.cpp b/src/scripting/vm/vmframe.cpp index 5ee4150a9..374911ec6 100644 --- a/src/scripting/vm/vmframe.cpp +++ b/src/scripting/vm/vmframe.cpp @@ -44,11 +44,17 @@ #include "c_cvars.h" #include "version.h" +#if (defined(_M_X64 ) || defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(__amd64 ) || defined(__amd64__ )) +#define ARCH_X64 +#endif + +#ifdef ARCH_X64 CUSTOM_CVAR(Bool, vm_jit, true, CVAR_NOINITCALL) { Printf("You must restart " GAMENAME " for this change to take effect.\n"); Printf("This cvar is currently not saved. You must specify it on the command line."); } +#endif cycle_t VMCycles[10]; int VMCalls[10]; @@ -217,6 +223,7 @@ int VMScriptFunction::PCToLine(const VMOP *pc) int VMScriptFunction::FirstScriptCall(VMFunction *func, VMValue *params, int numparams, VMReturn *ret, int numret) { +#ifdef ARCH_X64 if (vm_jit) { func->ScriptCall = JitCompile(static_cast(func)); @@ -227,6 +234,9 @@ int VMScriptFunction::FirstScriptCall(VMFunction *func, VMValue *params, int num { func->ScriptCall = VMExec; } +#else + func->ScriptCall = VMExec; +#endif return func->ScriptCall(func, params, numparams, ret, numret); } From 6c31d2e965263fb2e73ed70603a65e772355dae2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 15 Nov 2018 23:04:11 +0100 Subject: [PATCH 5/8] - add dword store to memset code --- src/scripting/vm/jit.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/scripting/vm/jit.cpp b/src/scripting/vm/jit.cpp index 0fa6971e8..a3964d4b2 100644 --- a/src/scripting/vm/jit.cpp +++ b/src/scripting/vm/jit.cpp @@ -604,7 +604,12 @@ void JitCompiler::Setup() { auto zero32 = newTempInt32(); cc.xor_(zero32, zero32); - for (unsigned int i = sseend; i < clearend; i++) + + unsigned int dwordend = sseend + (clearend - sseend) / 4 * 4; + for (unsigned int i = sseend; i < dwordend; i += 4) + cc.mov(asmjit::x86::dword_ptr(vmframe, i), zero32); + + for (unsigned int i = dwordend; i < clearend; i++) cc.mov(asmjit::x86::byte_ptr(vmframe, i), zero32.r8Lo()); } From 34b037c9db8e3aa0f0afb4b82d913c9963aec0ee Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Nov 2018 01:13:25 +0100 Subject: [PATCH 6/8] - avoid calling VMFillParams for simple stack frames - split Setup into more functions --- src/scripting/backend/vmbuilder.cpp | 2 + src/scripting/vm/jit.cpp | 438 +++++++++++++++------------- src/scripting/vm/jitintern.h | 6 + src/scripting/vm/vm.h | 1 + 4 files changed, 248 insertions(+), 199 deletions(-) diff --git a/src/scripting/backend/vmbuilder.cpp b/src/scripting/backend/vmbuilder.cpp index 11898d09d..c9df0c5e6 100644 --- a/src/scripting/backend/vmbuilder.cpp +++ b/src/scripting/backend/vmbuilder.cpp @@ -814,6 +814,7 @@ VMFunction *FFunctionBuildList::AddFunction(PNamespace *gnspc, const VersionInfo if (it.Func->SymbolName != NAME_None) { it.Function->Proto = it.Func->Variants[0].Proto; + it.Function->ArgFlags = it.Func->Variants[0].ArgFlags; } mItems.Push(it); @@ -885,6 +886,7 @@ void FFunctionBuildList::Build() if (sfunc->Proto == nullptr) { sfunc->Proto = NewPrototype(item.Proto->ReturnTypes, item.Func->Variants[0].Proto->ArgumentTypes); + sfunc->ArgFlags = item.Func->Variants[0].ArgFlags; } // Emit code diff --git a/src/scripting/vm/jit.cpp b/src/scripting/vm/jit.cpp index a3964d4b2..53272bb05 100644 --- a/src/scripting/vm/jit.cpp +++ b/src/scripting/vm/jit.cpp @@ -471,6 +471,245 @@ void JitCompiler::Setup() konsts = sfunc->KonstS; konsta = sfunc->KonstA; + labels.Resize(sfunc->CodeSize); + + CreateRegisters(); + IncrementVMCalls(); + SetupFrame(); +} + +void JitCompiler::SetupFrame() +{ + // the VM version reads this from the stack, but it is constant data + offsetParams = ((int)sizeof(VMFrame) + 15) & ~15; + offsetF = offsetParams + (int)(sfunc->MaxParam * sizeof(VMValue)); + offsetS = offsetF + (int)(sfunc->NumRegF * sizeof(double)); + offsetA = offsetS + (int)(sfunc->NumRegS * sizeof(FString)); + offsetD = offsetA + (int)(sfunc->NumRegA * sizeof(void*)); + offsetExtra = (offsetD + (int)(sfunc->NumRegD * sizeof(int32_t)) + 15) & ~15; + + vmframe = cc.newIntPtr("vmframe"); + + if (sfunc->SpecialInits.Size() == 0 && sfunc->NumRegS == 0) + { + // This is a simple frame with no constructors or destructors. Allocate it on the stack ourselves. + + auto vmstack = cc.newStack(sfunc->StackSize, 16, "vmstack"); + cc.lea(vmframe, vmstack); + + auto slowinit = cc.newLabel(); + auto endinit = cc.newLabel(); + + cc.cmp(numargs, sfunc->NumArgs); + cc.jne(slowinit); + SetupSimpleFrame(vmstack); + cc.jmp(endinit); + cc.bind(slowinit); + SetupSimpleFrameMissingArgs(vmstack); // Does this ever happen? + cc.bind(endinit); + } + else + { + SetupFullVMFrame(); + } +} + +void JitCompiler::SetupSimpleFrame(asmjit::X86Mem vmstack) +{ + using namespace asmjit; + + int argsPos = 0; + int regd = 0, regf = 0, rega = 0; + for (unsigned int i = 0; i < sfunc->Proto->ArgumentTypes.Size(); i++) + { + const PType *type = sfunc->Proto->ArgumentTypes[i]; + if (sfunc->ArgFlags[i] & (VARF_Out | VARF_Ref)) + { + cc.mov(regA[rega++], x86::ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, a))); + } + else if (type == TypeVector2) + { + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + } + else if (type == TypeVector3) + { + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + } + else if (type == TypeFloat64) + { + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + } + else if (type == TypeString) + { + I_FatalError("JIT: Strings are not supported yet for simple frames"); + } + else if (type->isIntCompatible()) + { + cc.mov(regD[regd++], x86::dword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, i))); + } + else + { + cc.mov(regA[rega++], x86::ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, a))); + } + } + + if (sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || rega > sfunc->NumRegA) + I_FatalError("JIT: sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || rega > sfunc->NumRegA"); + + for (int i = regd; i < sfunc->NumRegD; i++) + cc.xor_(regD[i], regD[i]); + + for (int i = regf; i < sfunc->NumRegF; i++) + cc.xorpd(regF[i], regF[i]); + + for (int i = rega; i < sfunc->NumRegA; i++) + cc.xor_(regA[i], regA[i]); +} + +void JitCompiler::SetupSimpleFrameMissingArgs(asmjit::X86Mem vmstack) +{ + using namespace asmjit; + + auto sfuncptr = newTempIntPtr(); + cc.mov(sfuncptr, imm_ptr(sfunc)); + if (cc.is64Bit()) + cc.mov(x86::qword_ptr(vmframe, offsetof(VMFrame, Func)), sfuncptr); + else + cc.mov(x86::dword_ptr(vmframe, offsetof(VMFrame, Func)), sfuncptr); + cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegD)), sfunc->NumRegD); + cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegF)), sfunc->NumRegF); + cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegS)), sfunc->NumRegS); + cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegA)), sfunc->NumRegA); + cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, MaxParam)), sfunc->MaxParam); + cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, NumParam)), 0); + + // Zero initialize the variables (retardedly stupid to do here - should be done by the compiler backend!!) + unsigned int clearbegin = (unsigned int)offsetof(VMFrame, NumParam) + 2; + unsigned int clearend = sfunc->StackSize; + unsigned int sseend = clearbegin + (clearend - clearbegin) / 16 * 16; + if (clearbegin < sseend) + { + auto zerosse = newTempXmmPd(); + cc.xorpd(zerosse, zerosse); + for (unsigned int i = clearbegin; i < sseend; i += 16) + cc.movupd(x86::ptr(vmframe, i), zerosse); + } + if (sseend < clearend) + { + auto zero32 = newTempInt32(); + cc.xor_(zero32, zero32); + + unsigned int dwordend = sseend + (clearend - sseend) / 4 * 4; + for (unsigned int i = sseend; i < dwordend; i += 4) + cc.mov(asmjit::x86::dword_ptr(vmframe, i), zero32); + + for (unsigned int i = dwordend; i < clearend; i++) + cc.mov(asmjit::x86::byte_ptr(vmframe, i), zero32.r8Lo()); + } + + auto fillParams = CreateCall([](VMFrame *newf, VMValue *args, int numargs) { + try + { + VMFillParams(args, newf, numargs); + } + catch (...) + { + VMThrowException(std::current_exception()); + } + }); + fillParams->setArg(0, vmframe); + fillParams->setArg(1, args); + fillParams->setArg(2, numargs); + + for (int i = 0; i < sfunc->NumRegD; i++) + cc.mov(regD[i], x86::dword_ptr(vmframe, offsetD + i * sizeof(int32_t))); + + for (int i = 0; i < sfunc->NumRegF; i++) + cc.movsd(regF[i], x86::qword_ptr(vmframe, offsetF + i * sizeof(double))); + + for (int i = 0; i < sfunc->NumRegS; i++) + cc.lea(regS[i], x86::ptr(vmframe, offsetS + i * sizeof(FString))); + + for (int i = 0; i < sfunc->NumRegA; i++) + cc.mov(regA[i], x86::ptr(vmframe, offsetA + i * sizeof(void*))); +} + +void JitCompiler::SetupFullVMFrame() +{ + using namespace asmjit; + + stack = cc.newIntPtr("stack"); + auto allocFrame = CreateCall([](VMScriptFunction *func, VMValue *args, int numargs) -> VMFrameStack* { + try + { + VMFrameStack *stack = &GlobalVMStack; + VMFrame *newf = stack->AllocFrame(func); + CurrentJitExceptInfo->vmframes++; + VMFillParams(args, newf, numargs); + return stack; + } + catch (...) + { + VMThrowException(std::current_exception()); + return nullptr; + } + }); + allocFrame->setRet(0, stack); + allocFrame->setArg(0, imm_ptr(sfunc)); + allocFrame->setArg(1, args); + allocFrame->setArg(2, numargs); + + cc.mov(vmframe, x86::ptr(stack)); // stack->Blocks + cc.mov(vmframe, x86::ptr(vmframe, VMFrameStack::OffsetLastFrame())); // Blocks->LastFrame + + for (int i = 0; i < sfunc->NumRegD; i++) + cc.mov(regD[i], x86::dword_ptr(vmframe, offsetD + i * sizeof(int32_t))); + + for (int i = 0; i < sfunc->NumRegF; i++) + cc.movsd(regF[i], x86::qword_ptr(vmframe, offsetF + i * sizeof(double))); + + for (int i = 0; i < sfunc->NumRegS; i++) + cc.lea(regS[i], x86::ptr(vmframe, offsetS + i * sizeof(FString))); + + for (int i = 0; i < sfunc->NumRegA; i++) + cc.mov(regA[i], x86::ptr(vmframe, offsetA + i * sizeof(void*))); +} + +void JitCompiler::EmitPopFrame() +{ + if (sfunc->SpecialInits.Size() != 0 || sfunc->NumRegS != 0) + { + auto popFrame = CreateCall([](VMFrameStack *stack) { + try + { + stack->PopFrame(); + CurrentJitExceptInfo->vmframes--; + } + catch (...) + { + VMThrowException(std::current_exception()); + } + }); + popFrame->setArg(0, stack); + } +} + +void JitCompiler::IncrementVMCalls() +{ + // VMCalls[0]++ + auto vmcallsptr = newTempIntPtr(); + auto vmcalls = newTempInt32(); + cc.mov(vmcallsptr, asmjit::imm_ptr(VMCalls)); + cc.mov(vmcalls, asmjit::x86::dword_ptr(vmcallsptr)); + cc.add(vmcalls, (int)1); + cc.mov(asmjit::x86::dword_ptr(vmcallsptr), vmcalls); +} + +void JitCompiler::CreateRegisters() +{ regD.Resize(sfunc->NumRegD); regF.Resize(sfunc->NumRegF); regA.Resize(sfunc->NumRegA); @@ -499,205 +738,6 @@ void JitCompiler::Setup() regname.Format("regA%d", i); regA[i] = cc.newIntPtr(regname.GetChars()); } - - labels.Resize(sfunc->CodeSize); - - // VMCalls[0]++ - auto vmcallsptr = newTempIntPtr(); - auto vmcalls = newTempInt32(); - cc.mov(vmcallsptr, imm_ptr(VMCalls)); - cc.mov(vmcalls, x86::dword_ptr(vmcallsptr)); - cc.add(vmcalls, (int)1); - cc.mov(x86::dword_ptr(vmcallsptr), vmcalls); - - // the VM version reads this from the stack, but it is constant data - offsetParams = ((int)sizeof(VMFrame) + 15) & ~15; - offsetF = offsetParams + (int)(sfunc->MaxParam * sizeof(VMValue)); - offsetS = offsetF + (int)(sfunc->NumRegF * sizeof(double)); - offsetA = offsetS + (int)(sfunc->NumRegS * sizeof(FString)); - offsetD = offsetA + (int)(sfunc->NumRegA * sizeof(void*)); - offsetExtra = (offsetD + (int)(sfunc->NumRegD * sizeof(int32_t)) + 15) & ~15; - - vmframe = cc.newIntPtr("vmframe"); - - if (sfunc->SpecialInits.Size() == 0 && sfunc->NumRegS == 0) - { - // This is a simple frame with no constructors or destructors. Allocate it on the stack ourselves. - - auto vmstack = cc.newStack(sfunc->StackSize, 16, "vmstack"); - cc.lea(vmframe, vmstack); - - auto slowinit = cc.newLabel(); - auto endinit = cc.newLabel(); - -#if 0 // this crashes sometimes - cc.cmp(numargs, sfunc->NumArgs); - cc.jne(slowinit); - - // Is there a better way to know the type than this? - int argsPos = 0; - int regd = 0, regf = 0, rega = 0; - for (unsigned int i = 0; i < sfunc->Proto->ArgumentTypes.Size(); i++) - { - const PType *type = sfunc->Proto->ArgumentTypes[i]; - if (type->isPointer()) - { - cc.mov(regA[rega++], x86::ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, a))); - } - else if (type->isIntCompatible()) - { - cc.mov(regD[regd++], x86::dword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, i))); - } - else if (type == TypeVector2) - { - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - } - else if (type == TypeVector3) - { - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - } - else if (type->isFloat()) - { - cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); - } - else if (type == TypeString) - { - I_FatalError("JIT: Strings are not supported yet for simple frames"); - } - } - - if (sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || rega > sfunc->NumRegA) - I_FatalError("JIT: sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || rega > sfunc->NumRegA"); - - cc.jmp(endinit); -#endif - cc.bind(slowinit); - - auto sfuncptr = newTempIntPtr(); - cc.mov(sfuncptr, imm_ptr(sfunc)); - if (cc.is64Bit()) - cc.mov(x86::qword_ptr(vmframe, offsetof(VMFrame, Func)), sfuncptr); - else - cc.mov(x86::dword_ptr(vmframe, offsetof(VMFrame, Func)), sfuncptr); - cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegD)), sfunc->NumRegD); - cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegF)), sfunc->NumRegF); - cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegS)), sfunc->NumRegS); - cc.mov(x86::byte_ptr(vmframe, offsetof(VMFrame, NumRegA)), sfunc->NumRegA); - cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, MaxParam)), sfunc->MaxParam); - cc.mov(x86::word_ptr(vmframe, offsetof(VMFrame, NumParam)), 0); - - // Zero initialize the variables (retardedly stupid to do here - should be done by the compiler backend!!) - unsigned int clearbegin = (unsigned int)offsetof(VMFrame, NumParam) + 2; - unsigned int clearend = sfunc->StackSize; - unsigned int sseend = clearbegin + (clearend - clearbegin) / 16 * 16; - if (clearbegin < sseend) - { - auto zerosse = newTempXmmPd(); - cc.xorpd(zerosse, zerosse); - for (unsigned int i = clearbegin; i < sseend; i += 16) - cc.movupd(x86::ptr(vmframe, i), zerosse); - } - if (sseend < clearend) - { - auto zero32 = newTempInt32(); - cc.xor_(zero32, zero32); - - unsigned int dwordend = sseend + (clearend - sseend) / 4 * 4; - for (unsigned int i = sseend; i < dwordend; i += 4) - cc.mov(asmjit::x86::dword_ptr(vmframe, i), zero32); - - for (unsigned int i = dwordend; i < clearend; i++) - cc.mov(asmjit::x86::byte_ptr(vmframe, i), zero32.r8Lo()); - } - - auto fillParams = CreateCall([](VMFrame *newf, VMValue *args, int numargs) { - try - { - VMFillParams(args, newf, numargs); - } - catch (...) - { - VMThrowException(std::current_exception()); - } - }); - fillParams->setArg(0, vmframe); - fillParams->setArg(1, args); - fillParams->setArg(2, numargs); - - for (int i = 0; i < sfunc->NumRegD; i++) - cc.mov(regD[i], x86::dword_ptr(vmframe, offsetD + i * sizeof(int32_t))); - - for (int i = 0; i < sfunc->NumRegF; i++) - cc.movsd(regF[i], x86::qword_ptr(vmframe, offsetF + i * sizeof(double))); - - for (int i = 0; i < sfunc->NumRegS; i++) - cc.lea(regS[i], x86::ptr(vmframe, offsetS + i * sizeof(FString))); - - for (int i = 0; i < sfunc->NumRegA; i++) - cc.mov(regA[i], x86::ptr(vmframe, offsetA + i * sizeof(void*))); - - cc.bind(endinit); - } - else - { - stack = cc.newIntPtr("stack"); - auto allocFrame = CreateCall([](VMScriptFunction *func, VMValue *args, int numargs) -> VMFrameStack* { - try - { - VMFrameStack *stack = &GlobalVMStack; - VMFrame *newf = stack->AllocFrame(func); - CurrentJitExceptInfo->vmframes++; - VMFillParams(args, newf, numargs); - return stack; - } - catch (...) - { - VMThrowException(std::current_exception()); - return nullptr; - } - }); - allocFrame->setRet(0, stack); - allocFrame->setArg(0, imm_ptr(sfunc)); - allocFrame->setArg(1, args); - allocFrame->setArg(2, numargs); - - cc.mov(vmframe, x86::ptr(stack)); // stack->Blocks - cc.mov(vmframe, x86::ptr(vmframe, VMFrameStack::OffsetLastFrame())); // Blocks->LastFrame - - for (int i = 0; i < sfunc->NumRegD; i++) - cc.mov(regD[i], x86::dword_ptr(vmframe, offsetD + i * sizeof(int32_t))); - - for (int i = 0; i < sfunc->NumRegF; i++) - cc.movsd(regF[i], x86::qword_ptr(vmframe, offsetF + i * sizeof(double))); - - for (int i = 0; i < sfunc->NumRegS; i++) - cc.lea(regS[i], x86::ptr(vmframe, offsetS + i * sizeof(FString))); - - for (int i = 0; i < sfunc->NumRegA; i++) - cc.mov(regA[i], x86::ptr(vmframe, offsetA + i * sizeof(void*))); - } -} - -void JitCompiler::EmitPopFrame() -{ - if (sfunc->SpecialInits.Size() != 0 || sfunc->NumRegS != 0) - { - auto popFrame = CreateCall([](VMFrameStack *stack) { - try - { - stack->PopFrame(); - CurrentJitExceptInfo->vmframes--; - } - catch (...) - { - VMThrowException(std::current_exception()); - } - }); - popFrame->setArg(0, stack); - } } void JitCompiler::EmitNullPointerThrow(int index, EVMAbortException reason) diff --git a/src/scripting/vm/jitintern.h b/src/scripting/vm/jitintern.h index b7348b773..0f7cc7442 100644 --- a/src/scripting/vm/jitintern.h +++ b/src/scripting/vm/jitintern.h @@ -39,6 +39,12 @@ private: #undef xx void Setup(); + void CreateRegisters(); + void IncrementVMCalls(); + void SetupFrame(); + void SetupSimpleFrame(asmjit::X86Mem vmstack); + void SetupSimpleFrameMissingArgs(asmjit::X86Mem vmstack); + void SetupFullVMFrame(); void BindLabels(); void EmitOpcode(); void EmitPopFrame(); diff --git a/src/scripting/vm/vm.h b/src/scripting/vm/vm.h index 2ceeebae9..828909779 100644 --- a/src/scripting/vm/vm.h +++ b/src/scripting/vm/vm.h @@ -329,6 +329,7 @@ public: FString PrintableName; // so that the VM can print meaningful info if something in this function goes wrong. class PPrototype *Proto; + TArray ArgFlags; // Should be the same length as Proto->ArgumentTypes int(*ScriptCall)(VMFunction *func, VMValue *params, int numparams, VMReturn *ret, int numret) = nullptr; From ac28f0d34f8285aa608217299eaa60ce6ec43301 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 16 Nov 2018 01:33:41 +0100 Subject: [PATCH 7/8] - remove unused argument --- src/scripting/vm/jit.cpp | 8 ++++---- src/scripting/vm/jitintern.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/scripting/vm/jit.cpp b/src/scripting/vm/jit.cpp index 53272bb05..d292a9d26 100644 --- a/src/scripting/vm/jit.cpp +++ b/src/scripting/vm/jit.cpp @@ -502,10 +502,10 @@ void JitCompiler::SetupFrame() cc.cmp(numargs, sfunc->NumArgs); cc.jne(slowinit); - SetupSimpleFrame(vmstack); + SetupSimpleFrame(); cc.jmp(endinit); cc.bind(slowinit); - SetupSimpleFrameMissingArgs(vmstack); // Does this ever happen? + SetupSimpleFrameMissingArgs(); // Does this ever happen? cc.bind(endinit); } else @@ -514,7 +514,7 @@ void JitCompiler::SetupFrame() } } -void JitCompiler::SetupSimpleFrame(asmjit::X86Mem vmstack) +void JitCompiler::SetupSimpleFrame() { using namespace asmjit; @@ -569,7 +569,7 @@ void JitCompiler::SetupSimpleFrame(asmjit::X86Mem vmstack) cc.xor_(regA[i], regA[i]); } -void JitCompiler::SetupSimpleFrameMissingArgs(asmjit::X86Mem vmstack) +void JitCompiler::SetupSimpleFrameMissingArgs() { using namespace asmjit; diff --git a/src/scripting/vm/jitintern.h b/src/scripting/vm/jitintern.h index 0f7cc7442..1e70ec6ed 100644 --- a/src/scripting/vm/jitintern.h +++ b/src/scripting/vm/jitintern.h @@ -42,8 +42,8 @@ private: void CreateRegisters(); void IncrementVMCalls(); void SetupFrame(); - void SetupSimpleFrame(asmjit::X86Mem vmstack); - void SetupSimpleFrameMissingArgs(asmjit::X86Mem vmstack); + void SetupSimpleFrame(); + void SetupSimpleFrameMissingArgs(); void SetupFullVMFrame(); void BindLabels(); void EmitOpcode(); From 33fb76698bd8d05000c2b28c370dfaea0b783f5f Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 16 Nov 2018 15:25:37 +0100 Subject: [PATCH 8/8] Avoid using argument count for any kind of decision making in native VM functions. This allows retaining the functionality, even if for the JIT compiler's benefit all default arguments are pushed onto the stack instead of reading them from the defaults array. --- src/p_enemy.cpp | 6 +++--- src/p_mobj.cpp | 21 ++++++++++++--------- src/p_sectors.cpp | 3 ++- src/r_data/r_sections.h | 4 ++-- wadsrc/static/zscript/actor.txt | 8 ++++---- wadsrc/static/zscript/mapdata.txt | 2 +- 6 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/p_enemy.cpp b/src/p_enemy.cpp index f49da9983..14715608b 100644 --- a/src/p_enemy.cpp +++ b/src/p_enemy.cpp @@ -2976,17 +2976,17 @@ DEFINE_ACTION_FUNCTION(AActor, A_Chase) PARAM_STATE_DEF (missile) PARAM_INT_DEF (flags) - if (numparam > 1) + if (melee != nullptr || missile != nullptr || flags != 0x40000000) { if ((flags & CHF_RESURRECT) && P_CheckForResurrection(self, false)) return 0; A_DoChase(self, !!(flags&CHF_FASTCHASE), melee, missile, !(flags&CHF_NOPLAYACTIVE), - !!(flags&CHF_NIGHTMAREFAST), !!(flags&CHF_DONTMOVE), flags); + !!(flags&CHF_NIGHTMAREFAST), !!(flags&CHF_DONTMOVE), flags & 0x3fffffff); } else // this is the old default A_Chase { - A_DoChase(self, false, self->MeleeState, self->MissileState, true, gameinfo.nightmarefast, false, flags); + A_DoChase(self, false, self->MeleeState, self->MissileState, true, gameinfo.nightmarefast, false, 0); } return 0; } diff --git a/src/p_mobj.cpp b/src/p_mobj.cpp index 8f2cf64ff..ef77b77d3 100644 --- a/src/p_mobj.cpp +++ b/src/p_mobj.cpp @@ -7432,7 +7432,7 @@ DEFINE_ACTION_FUNCTION(AActor, SpawnPlayerMissile) PARAM_BOOL_DEF(noautoaim); PARAM_INT_DEF(aimflags); AActor *missileactor; - if (numparam == 2) angle = self->Angles.Yaw; + if (angle == 1e37) angle = self->Angles.Yaw; AActor *misl = P_SpawnPlayerMissile(self, x, y, z, type, angle, lt, &missileactor, nofreeaim, noautoaim, aimflags); if (numret > 0) ret[0].SetObject(misl); if (numret > 1) ret[1].SetObject(missileactor), numret = 2; @@ -8292,20 +8292,22 @@ DEFINE_ACTION_FUNCTION(AActor, GetDefaultByType) DEFINE_ACTION_FUNCTION(AActor, VelFromAngle) { PARAM_SELF_PROLOGUE(AActor); - if (numparam == 1) + PARAM_FLOAT(speed); + PARAM_ANGLE(angle); + + if (speed == 1e37) { self->VelFromAngle(); } else { - PARAM_FLOAT(speed); - if (numparam == 2) + if (angle == 1e37) + { self->VelFromAngle(speed); } else { - PARAM_ANGLE(angle); self->VelFromAngle(speed, angle); } } @@ -8327,20 +8329,21 @@ DEFINE_ACTION_FUNCTION(AActor, Vel3DFromAngle) DEFINE_ACTION_FUNCTION(AActor, Thrust) { PARAM_SELF_PROLOGUE(AActor); - if (numparam == 1) + PARAM_FLOAT(speed); + PARAM_ANGLE(angle); + + if (speed == 1e37) { self->Thrust(); } else { - PARAM_FLOAT(speed); - if (numparam == 2) + if (angle == 1e37) { self->Thrust(speed); } else { - PARAM_ANGLE(angle); self->Thrust(angle, speed); } } diff --git a/src/p_sectors.cpp b/src/p_sectors.cpp index 1abb971b6..14cd4a4d1 100644 --- a/src/p_sectors.cpp +++ b/src/p_sectors.cpp @@ -2603,7 +2603,8 @@ DEFINE_ACTION_FUNCTION(_Secplane, HeightDiff) { PARAM_SELF_STRUCT_PROLOGUE(secplane_t); PARAM_FLOAT(oldd); - if (numparam == 2) + PARAM_FLOAT(newd); + if (newd != 1e37) { ACTION_RETURN_FLOAT(self->HeightDiff(oldd)); } diff --git a/src/r_data/r_sections.h b/src/r_data/r_sections.h index 35f61cb7c..13172f0cf 100644 --- a/src/r_data/r_sections.h +++ b/src/r_data/r_sections.h @@ -33,8 +33,8 @@ struct BoundingRect void setEmpty() { - left = top = 1e38; - bottom = right = -1e38; + left = top = FLT_MAX; + bottom = right = FLT_MIN; } bool contains(const BoundingRect & other) const diff --git a/wadsrc/static/zscript/actor.txt b/wadsrc/static/zscript/actor.txt index 7f9cd5afd..c5426936e 100644 --- a/wadsrc/static/zscript/actor.txt +++ b/wadsrc/static/zscript/actor.txt @@ -616,7 +616,7 @@ class Actor : Thinker native native Actor SpawnMissileAngleZSpeed (double z, class type, double angle, double vz, double speed, Actor owner = null, bool checkspawn = true); native Actor SpawnMissileZAimed (double z, Actor dest, Class type); native Actor SpawnSubMissile(Class type, Actor target); - native Actor, Actor SpawnPlayerMissile(class type, double angle = 0, double x = 0, double y = 0, double z = 0, out FTranslatedLineTarget pLineTarget = null, bool nofreeaim = false, bool noautoaim = false, int aimflags = 0); + native Actor, Actor SpawnPlayerMissile(class type, double angle = 1e37, double x = 0, double y = 0, double z = 0, out FTranslatedLineTarget pLineTarget = null, bool nofreeaim = false, bool noautoaim = false, int aimflags = 0); native void SpawnTeleportFog(Vector3 pos, bool beforeTele, bool setTarget); native Actor RoughMonsterSearch(int distance, bool onlyseekable = false, bool frontonly = false); native int ApplyDamageFactor(Name damagetype, int damage); @@ -696,9 +696,9 @@ class Actor : Thinker native native clearscope vector2 Vec2Offset(double x, double y, bool absolute = false) const; native clearscope vector3 Vec2OffsetZ(double x, double y, double atz, bool absolute = false) const; native void VelIntercept(Actor targ, double speed = -1, bool aimpitch = true, bool oldvel = false); - native void VelFromAngle(double speed = 0, double angle = 0); + native void VelFromAngle(double speed = 1e37, double angle = 1e37); native void Vel3DFromAngle(double speed, double angle, double pitch); - native void Thrust(double speed = 0, double angle = 0); + native void Thrust(double speed = 1e37, double angle = 1e37); native clearscope bool isFriend(Actor other) const; native clearscope bool isHostile(Actor other) const; native void AdjustFloorClip(); @@ -1030,7 +1030,7 @@ class Actor : Thinker native void A_Fall() { A_NoBlocking(); } native void A_XScream(); native void A_Look(); - native void A_Chase(statelabel melee = null, statelabel missile = null, int flags = 0); + native void A_Chase(statelabel melee = null, statelabel missile = null, int flags = 0x40000000); native void A_Scream(); native void A_VileChase(); native bool A_CheckForResurrection(); diff --git a/wadsrc/static/zscript/mapdata.txt b/wadsrc/static/zscript/mapdata.txt index 11827e3ad..3440d4b24 100644 --- a/wadsrc/static/zscript/mapdata.txt +++ b/wadsrc/static/zscript/mapdata.txt @@ -206,7 +206,7 @@ struct SecPlane native play native bool isEqual(Secplane other) const; native void ChangeHeight(double hdiff); native double GetChangedHeight(double hdiff) const; - native double HeightDiff(double oldd, double newd = 0.0) const; + native double HeightDiff(double oldd, double newd = 1e37) const; native double PointToDist(Vector2 xy, double z) const; }