- Backend update from GZDoom

* Vector 4 in zscript
* garbage collector fixes
This commit is contained in:
Christoph Oelckers 2022-11-12 10:45:39 +01:00
parent f7a2fd29ba
commit 8806fb930b
32 changed files with 1294 additions and 328 deletions

View file

@ -6,6 +6,7 @@
extern PString *TypeString;
extern PStruct *TypeVector2;
extern PStruct *TypeVector3;
extern PStruct* TypeVector4;
static void OutputJitLog(const asmjit::StringLogger &logger);
@ -315,6 +316,13 @@ void JitCompiler::SetupSimpleFrame()
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
}
else if (type == TypeVector4 || type == TypeFVector4)
{
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
}
else if (type == TypeFloat64)
{
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
@ -551,6 +559,20 @@ asmjit::X86Xmm JitCompiler::CheckRegF(int r0, int r1, int r2, int r3)
}
}
asmjit::X86Xmm JitCompiler::CheckRegF(int r0, int r1, int r2, int r3, int r4)
{
if (r0 != r1 && r0 != r2 && r0 != r3 && r0 != r4)
{
return regF[r0];
}
else
{
auto copy = newTempXmmSd();
cc.movsd(copy, regF[r0]);
return copy;
}
}
asmjit::X86Gp JitCompiler::CheckRegS(int r0, int r1)
{
if (r0 != r1)

View file

@ -182,6 +182,13 @@ int JitCompiler::StoreCallParams()
}
numparams += 2;
break;
case REGT_FLOAT | REGT_MULTIREG4:
for (int j = 0; j < 4; j++)
{
cc.movsd(x86::qword_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc + j]);
}
numparams += 3;
break;
case REGT_FLOAT | REGT_ADDROF:
cc.lea(stackPtr, x86::ptr(vmframe, offsetF + (int)(bc * sizeof(double))));
// When passing the address to a float we don't know if the receiving function will treat it as float, vec2 or vec3.
@ -256,6 +263,12 @@ void JitCompiler::LoadCallResult(int type, int regnum, bool addrof)
cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
}
else if (type & REGT_MULTIREG4)
{
cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
cc.movsd(regF[regnum + 3], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 3) * sizeof(double)));
}
break;
case REGT_STRING:
// We don't have to do anything in this case. String values are never moved to virtual registers.
@ -408,6 +421,11 @@ void JitCompiler::EmitNativeCall(VMNativeFunction *target)
call->setArg(slot + j, regF[bc + j]);
numparams += 2;
break;
case REGT_FLOAT | REGT_MULTIREG4:
for (int j = 0; j < 4; j++)
call->setArg(slot + j, regF[bc + j]);
numparams += 3;
break;
case REGT_FLOAT | REGT_KONST:
tmp = newTempIntPtr();
tmp2 = newTempXmmSd();
@ -550,6 +568,12 @@ void JitCompiler::EmitNativeCall(VMNativeFunction *target)
cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
break;
case REGT_FLOAT | REGT_MULTIREG4:
cc.movsd(regF[regnum], asmjit::x86::qword_ptr(vmframe, offsetF + regnum * sizeof(double)));
cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
cc.movsd(regF[regnum + 3], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 3) * sizeof(double)));
break;
case REGT_STRING:
// We don't have to do anything in this case. String values are never moved to virtual registers.
break;
@ -624,6 +648,13 @@ asmjit::FuncSignature JitCompiler::CreateFuncSignature()
args.Push(TypeIdOf<double>::kTypeId);
key += "fff";
break;
case REGT_FLOAT | REGT_MULTIREG4:
args.Push(TypeIdOf<double>::kTypeId);
args.Push(TypeIdOf<double>::kTypeId);
args.Push(TypeIdOf<double>::kTypeId);
args.Push(TypeIdOf<double>::kTypeId);
key += "ffff";
break;
default:
I_Error("Unknown REGT value passed to EmitPARAM\n");

View file

@ -110,7 +110,21 @@ void JitCompiler::EmitRET()
if (regtype & REGT_KONST)
{
auto tmp = newTempInt64();
if (regtype & REGT_MULTIREG3)
if (regtype & REGT_MULTIREG4)
{
cc.mov(tmp, (((int64_t*)konstf)[regnum]));
cc.mov(x86::qword_ptr(location), tmp);
cc.mov(tmp, (((int64_t*)konstf)[regnum + 1]));
cc.mov(x86::qword_ptr(location, 8), tmp);
cc.mov(tmp, (((int64_t*)konstf)[regnum + 2]));
cc.mov(x86::qword_ptr(location, 16), tmp);
cc.mov(tmp, (((int64_t*)konstf)[regnum + 3]));
cc.mov(x86::qword_ptr(location, 24), tmp);
}
else if (regtype & REGT_MULTIREG3)
{
cc.mov(tmp, (((int64_t *)konstf)[regnum]));
cc.mov(x86::qword_ptr(location), tmp);
@ -137,7 +151,14 @@ void JitCompiler::EmitRET()
}
else
{
if (regtype & REGT_MULTIREG3)
if (regtype & REGT_MULTIREG4)
{
cc.movsd(x86::qword_ptr(location), regF[regnum]);
cc.movsd(x86::qword_ptr(location, 8), regF[regnum + 1]);
cc.movsd(x86::qword_ptr(location, 16), regF[regnum + 2]);
cc.movsd(x86::qword_ptr(location, 24), regF[regnum + 3]);
}
else if (regtype & REGT_MULTIREG3)
{
cc.movsd(x86::qword_ptr(location), regF[regnum]);
cc.movsd(x86::qword_ptr(location, 8), regF[regnum + 1]);

View file

@ -325,6 +325,28 @@ void JitCompiler::EmitLV3_R()
cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16));
}
void JitCompiler::EmitLV4()
{
EmitNullPointerThrow(B, X_READ_NIL);
auto tmp = newTempIntPtr();
cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C]));
cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8));
cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16));
cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 24));
}
void JitCompiler::EmitLV4_R()
{
EmitNullPointerThrow(B, X_READ_NIL);
auto tmp = newTempIntPtr();
cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C]));
cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8));
cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16));
cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 24));
}
void JitCompiler::EmitLFV2()
{
EmitNullPointerThrow(B, X_READ_NIL);
@ -373,6 +395,36 @@ void JitCompiler::EmitLFV3_R()
cc.cvtss2sd(regF[A + 2], regF[A + 2]);
}
void JitCompiler::EmitLFV4()
{
EmitNullPointerThrow(B, X_READ_NIL);
auto tmp = newTempIntPtr();
cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C]));
cc.movss(regF[A], asmjit::x86::qword_ptr(tmp));
cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4));
cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8));
cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 12));
cc.cvtss2sd(regF[A], regF[A]);
cc.cvtss2sd(regF[A + 1], regF[A + 1]);
cc.cvtss2sd(regF[A + 2], regF[A + 2]);
cc.cvtss2sd(regF[A + 3], regF[A + 3]);
}
void JitCompiler::EmitLFV4_R()
{
EmitNullPointerThrow(B, X_READ_NIL);
auto tmp = newTempIntPtr();
cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C]));
cc.movss(regF[A], asmjit::x86::qword_ptr(tmp));
cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4));
cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8));
cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 12));
cc.cvtss2sd(regF[A], regF[A]);
cc.cvtss2sd(regF[A + 1], regF[A + 1]);
cc.cvtss2sd(regF[A + 2], regF[A + 2]);
cc.cvtss2sd(regF[A + 3], regF[A + 3]);
}
static void SetString(FString *to, char **from)
{
*to = *from;

View file

@ -1447,6 +1447,165 @@ void JitCompiler::EmitEQV3_K()
I_Error("EQV3_K is not used.");
}
/////////////////////////////////////////////////////////////////////////////
// Vector math. (4D/Quaternion)
void JitCompiler::EmitNEGV4()
{
auto mask = cc.newDoubleConst(asmjit::kConstScopeLocal, -0.0);
auto maskXmm = newTempXmmSd();
cc.movsd(maskXmm, mask);
cc.movsd(regF[A], regF[B]);
cc.xorpd(regF[A], maskXmm);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.xorpd(regF[A + 1], maskXmm);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.xorpd(regF[A + 2], maskXmm);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.xorpd(regF[A + 3], maskXmm);
}
void JitCompiler::EmitADDV4_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
auto rc2 = CheckRegF(C + 2, A + 2);
auto rc3 = CheckRegF(C + 3, A + 3);
cc.movsd(regF[A], regF[B]);
cc.addsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.addsd(regF[A + 1], rc1);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.addsd(regF[A + 2], rc2);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.addsd(regF[A + 3], rc3);
}
void JitCompiler::EmitSUBV4_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
auto rc2 = CheckRegF(C + 2, A + 2);
auto rc3 = CheckRegF(C + 3, A + 3);
cc.movsd(regF[A], regF[B]);
cc.subsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.subsd(regF[A + 1], rc1);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.subsd(regF[A + 2], rc2);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.subsd(regF[A + 3], rc3);
}
void JitCompiler::EmitDOTV4_RR()
{
auto rb1 = CheckRegF(B + 1, A);
auto rb2 = CheckRegF(B + 2, A);
auto rb3 = CheckRegF(B + 3, A);
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A);
auto rc2 = CheckRegF(C + 2, A);
auto rc3 = CheckRegF(C + 3, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], rc0);
cc.movsd(tmp, rb1);
cc.mulsd(tmp, rc1);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb2);
cc.mulsd(tmp, rc2);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb3);
cc.mulsd(tmp, rc3);
cc.addsd(regF[A], tmp);
}
void JitCompiler::EmitMULVF4_RR()
{
auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.mulsd(regF[A], rc);
cc.mulsd(regF[A + 1], rc);
cc.mulsd(regF[A + 2], rc);
cc.mulsd(regF[A + 3], rc);
}
void JitCompiler::EmitMULVF4_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 2], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 3], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitDIVVF4_RR()
{
auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.divsd(regF[A], rc);
cc.divsd(regF[A + 1], rc);
cc.divsd(regF[A + 2], rc);
cc.divsd(regF[A + 3], rc);
}
void JitCompiler::EmitDIVVF4_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.divsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 2], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 3], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitLENV4()
{
auto rb1 = CheckRegF(B + 1, A);
auto rb2 = CheckRegF(B + 2, A);
auto rb3 = CheckRegF(B + 3, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], regF[B]);
cc.movsd(tmp, rb1);
cc.mulsd(tmp, rb1);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb2);
cc.mulsd(tmp, rb2);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb3);
cc.mulsd(tmp, rb3);
cc.addsd(regF[A], tmp);
CallSqrt(regF[A], regF[A]);
}
void JitCompiler::EmitEQV4_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
EmitVectorComparison<4> (check, fail, success);
});
}
void JitCompiler::EmitEQV4_K()
{
I_Error("EQV4_K is not used.");
}
/////////////////////////////////////////////////////////////////////////////
// Pointer math.

View file

@ -39,11 +39,20 @@ void JitCompiler::EmitMOVEV3()
cc.movsd(regF[A + 2], regF[B + 2]);
}
void JitCompiler::EmitMOVEV4()
{
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
}
static void CastI2S(FString *a, int b) { a->Format("%d", b); }
static void CastU2S(FString *a, int b) { a->Format("%u", b); }
static void CastF2S(FString *a, double b) { a->Format("%.5f", b); }
static void CastV22S(FString *a, double b, double b1) { a->Format("(%.5f, %.5f)", b, b1); }
static void CastV32S(FString *a, double b, double b1, double b2) { a->Format("(%.5f, %.5f, %.5f)", b, b1, b2); }
static void CastV42S(FString *a, double b, double b1, double b2, double b3) { a->Format("(%.5f, %.5f, %.5f, %.5f)", b, b1, b2, b3); }
static void CastP2S(FString *a, void *b) { if (b == nullptr) *a = "null"; else a->Format("%p", b); }
static int CastS2I(FString *b) { return (int)b->ToLong(); }
static double CastS2F(FString *b) { return b->ToDouble(); }
@ -109,6 +118,14 @@ void JitCompiler::EmitCAST()
call->setArg(2, regF[B + 1]);
call->setArg(3, regF[B + 2]);
break;
case CAST_V42S:
call = CreateCall<void, FString*, double, double, double>(CastV42S);
call->setArg(0, regS[A]);
call->setArg(1, regF[B]);
call->setArg(2, regF[B + 1]);
call->setArg(3, regF[B + 2]);
call->setArg(4, regF[B + 3]);
break;
case CAST_P2S:
call = CreateCall<void, FString*, void*>(CastP2S);
call->setArg(0, regS[A]);

View file

@ -161,6 +161,30 @@ void JitCompiler::EmitSV3_R()
cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]);
}
void JitCompiler::EmitSV4()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
auto tmp = newTempIntPtr();
cc.mov(tmp, regA[A]);
cc.add(tmp, konstd[C]);
cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 24), regF[B + 3]);
}
void JitCompiler::EmitSV4_R()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
auto tmp = newTempIntPtr();
cc.mov(tmp, regA[A]);
cc.add(tmp, regD[C]);
cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 24), regF[B + 3]);
}
void JitCompiler::EmitSFV2()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
@ -219,6 +243,40 @@ void JitCompiler::EmitSFV3_R()
cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF);
}
void JitCompiler::EmitSFV4()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
auto tmp = newTempIntPtr();
cc.mov(tmp, regA[A]);
cc.add(tmp, konstd[C]);
auto tmpF = newTempXmmSs();
cc.cvtsd2ss(tmpF, regF[B]);
cc.movss(asmjit::x86::qword_ptr(tmp), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 1]);
cc.movss(asmjit::x86::qword_ptr(tmp, 4), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 2]);
cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 3]);
cc.movss(asmjit::x86::qword_ptr(tmp, 12), tmpF);
}
void JitCompiler::EmitSFV4_R()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
auto tmp = newTempIntPtr();
cc.mov(tmp, regA[A]);
cc.add(tmp, regD[C]);
auto tmpF = newTempXmmSs();
cc.cvtsd2ss(tmpF, regF[B]);
cc.movss(asmjit::x86::qword_ptr(tmp), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 1]);
cc.movss(asmjit::x86::qword_ptr(tmp, 4), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 2]);
cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 3]);
cc.movss(asmjit::x86::qword_ptr(tmp, 12), tmpF);
}
void JitCompiler::EmitSBIT()
{
EmitNullPointerThrow(A, X_WRITE_NIL);

View file

@ -241,6 +241,7 @@ private:
asmjit::X86Xmm CheckRegF(int r0, int r1);
asmjit::X86Xmm CheckRegF(int r0, int r1, int r2);
asmjit::X86Xmm CheckRegF(int r0, int r1, int r2, int r3);
asmjit::X86Xmm CheckRegF(int r0, int r1, int r2, int r3, int r4);
asmjit::X86Gp CheckRegS(int r0, int r1);
asmjit::X86Gp CheckRegA(int r0, int r1);