From a5252339144e1922957d87d701d995e78f10f91e Mon Sep 17 00:00:00 2001 From: Shiny Metagross <30511800+ShinyMetagross@users.noreply.github.com> Date: Fri, 11 Nov 2022 16:38:22 +0100 Subject: [PATCH] Attempted to add Vector4 to ZScript --- src/common/engine/namedef.h | 4 + src/common/engine/serializer.h | 5 + src/common/scripting/backend/codegen.cpp | 159 +++++++++++++----- src/common/scripting/backend/codegen.h | 7 +- src/common/scripting/core/types.cpp | 34 ++++ src/common/scripting/core/types.h | 2 + src/common/scripting/frontend/ast.cpp | 2 + src/common/scripting/frontend/zcc_compile.cpp | 39 ++++- src/common/scripting/frontend/zcc_parser.h | 3 +- src/common/scripting/jit/jit.cpp | 22 +++ src/common/scripting/jit/jit_load.cpp | 52 ++++++ src/common/scripting/jit/jit_math.cpp | 159 ++++++++++++++++++ src/common/scripting/jit/jit_move.cpp | 17 ++ src/common/scripting/jit/jit_store.cpp | 58 +++++++ src/common/scripting/jit/jitintern.h | 1 + src/common/scripting/vm/vmexec.h | 149 +++++++++++++++- src/common/scripting/vm/vmintern.h | 1 + src/common/scripting/vm/vmops.h | 22 +++ 18 files changed, 675 insertions(+), 61 deletions(-) diff --git a/src/common/engine/namedef.h b/src/common/engine/namedef.h index af8e2bca05..5e30591acb 100644 --- a/src/common/engine/namedef.h +++ b/src/common/engine/namedef.h @@ -124,8 +124,10 @@ xx(State) xx(Fixed) xx(Vector2) xx(Vector3) +xx(Vector4) xx(FVector2) xx(FVector3) +xx(FVector4) xx(let) xx(Min) @@ -175,7 +177,9 @@ xx(b) xx(X) xx(Y) xx(Z) +xx(W) xx(XY) +xx(XYZ) xx(Prototype) xx(Void) diff --git a/src/common/engine/serializer.h b/src/common/engine/serializer.h index d98798f959..63bfb35299 100644 --- a/src/common/engine/serializer.h +++ b/src/common/engine/serializer.h @@ -326,6 +326,11 @@ inline FSerializer &Serialize(FSerializer &arc, const char *key, DVector2 &p, DV return arc.Array(key, &p[0], def? &(*def)[0] : nullptr, 2, true); } +inline FSerializer& Serialize(FSerializer& arc, const char* key, FVector4& p, FVector4* def) +{ + return arc.Array(key, &p[0], def ? &(*def)[0] : nullptr, 4, true); +} + inline FSerializer& Serialize(FSerializer& arc, const char* key, FVector3& p, FVector3* def) { return arc.Array(key, &p[0], def ? &(*def)[0] : nullptr, 3, true); diff --git a/src/common/scripting/backend/codegen.cpp b/src/common/scripting/backend/codegen.cpp index 24ce94447f..394e4ffc81 100644 --- a/src/common/scripting/backend/codegen.cpp +++ b/src/common/scripting/backend/codegen.cpp @@ -573,19 +573,20 @@ ExpEmit FxConstant::Emit(VMFunctionBuilder *build) // //========================================================================== -FxVectorValue::FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, const FScriptPosition &sc) +FxVectorValue::FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, FxExpression* w, const FScriptPosition &sc) :FxExpression(EFX_VectorValue, sc) { - xyz[0] = x; - xyz[1] = y; - xyz[2] = z; + xyzw[0] = x; + xyzw[1] = y; + xyzw[2] = z; + xyzw[3] = w; isConst = false; ValueType = TypeVoid; // we do not know yet } FxVectorValue::~FxVectorValue() { - for (auto &a : xyz) + for (auto &a : xyzw) { SAFE_DELETE(a); } @@ -595,7 +596,7 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx) { bool fails = false; - for (auto &a : xyz) + for (auto &a : xyzw) { if (a != nullptr) { @@ -617,33 +618,50 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx) delete this; return nullptr; } - // at this point there are three legal cases: + // at this point there are five legal cases: // * two floats = vector2 // * three floats = vector3 + // * four floats = vector4 // * vector2 + float = vector3 - if (xyz[0]->ValueType == TypeVector2) + // * vector3 + float = vector4 + if (xyzw[0]->ValueType == TypeVector2) { - if (xyz[1]->ValueType != TypeFloat64 || xyz[2] != nullptr) + if (xyzw[1]->ValueType != TypeFloat64 || xyzw[2] != nullptr) { ScriptPosition.Message(MSG_ERROR, "Not a valid vector"); delete this; return nullptr; } ValueType = TypeVector3; - if (xyz[0]->ExprType == EFX_VectorValue) + if (xyzw[0]->ExprType == EFX_VectorValue) { // If two vector initializers are nested, unnest them now. - auto vi = static_cast(xyz[0]); - xyz[2] = xyz[1]; - xyz[1] = vi->xyz[1]; - xyz[0] = vi->xyz[0]; - vi->xyz[0] = vi->xyz[1] = nullptr; // Don't delete our own expressions. + auto vi = static_cast(xyzw[0]); + xyzw[2] = xyzw[1]; + xyzw[1] = vi->xyzw[1]; + xyzw[0] = vi->xyzw[0]; + vi->xyzw[0] = vi->xyzw[1] = nullptr; // Don't delete our own expressions. + delete vi; + } + ValueType = TypeVector4; + if (xyzw[0]->ExprType == EFX_VectorValue) + { + // If two vector initializers are nested, unnest them now. + auto vi = static_cast(xyzw[0]); + xyzw[2] = xyzw[1]; + xyzw[1] = vi->xyzw[1]; + xyzw[0] = vi->xyzw[0]; + vi->xyzw[0] = vi->xyzw[1] = nullptr; // Don't delete our own expressions. delete vi; } } - else if (xyz[0]->ValueType == TypeFloat64 && xyz[1]->ValueType == TypeFloat64) + else if (xyzw[0]->ValueType == TypeFloat64 && xyzw[1]->ValueType == TypeFloat64) { - ValueType = xyz[2] == nullptr ? TypeVector2 : TypeVector3; + ValueType = xyzw[2] == nullptr ? TypeVector2 : TypeVector3; + } + else if (xyzw[0]->ValueType == TypeFloat64 && xyzw[1]->ValueType == TypeFloat64 && xyzw[2]->ValueType == TypeFloat64) + { + ValueType = xyzw[3] == nullptr ? TypeVector3 : TypeVector4; } else { @@ -654,7 +672,7 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx) // check if all elements are constant. If so this can be emitted as a constant vector. isConst = true; - for (auto &a : xyz) + for (auto &a : xyzw) { if (a != nullptr && !a->isConstant()) isConst = false; } @@ -676,12 +694,12 @@ ExpEmit FxVectorValue::Emit(VMFunctionBuilder *build) { // no const handling here. Ultimately it's too rarely used (i.e. the only fully constant vector ever allocated in ZDoom is the 0-vector in a very few places) // and the negatives (excessive allocation of float constants) outweigh the positives (saved a few instructions) - assert(xyz[0] != nullptr); - assert(xyz[1] != nullptr); + assert(xyzw[0] != nullptr); + assert(xyzw[1] != nullptr); if (ValueType == TypeVector2) { - ExpEmit tempxval = xyz[0]->Emit(build); - ExpEmit tempyval = xyz[1]->Emit(build); + ExpEmit tempxval = xyzw[0]->Emit(build); + ExpEmit tempyval = xyzw[1]->Emit(build); ExpEmit xval = EmitKonst(build, tempxval); ExpEmit yval = EmitKonst(build, tempyval); assert(xval.RegType == REGT_FLOAT && yval.RegType == REGT_FLOAT); @@ -702,10 +720,10 @@ ExpEmit FxVectorValue::Emit(VMFunctionBuilder *build) return out; } } - else if (xyz[0]->ValueType == TypeVector2) // vec2+float + else if (xyzw[0]->ValueType == TypeVector2) // vec2+float { - ExpEmit xyval = xyz[0]->Emit(build); - ExpEmit tempzval = xyz[1]->Emit(build); + ExpEmit xyval = xyzw[0]->Emit(build); + ExpEmit tempzval = xyzw[1]->Emit(build); ExpEmit zval = EmitKonst(build, tempzval); assert(xyval.RegType == REGT_FLOAT && xyval.RegCount == 2 && zval.RegType == REGT_FLOAT); if (zval.RegNum == xyval.RegNum + 2) @@ -717,7 +735,7 @@ ExpEmit FxVectorValue::Emit(VMFunctionBuilder *build) else { // The values are not in continuous registers so they need to be copied together now. - ExpEmit out(build, REGT_FLOAT, 3); + ExpEmit out(build, REGT_FLOAT, 4); build->Emit(OP_MOVEV2, out.RegNum, xyval.RegNum); build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum); xyval.Free(build); @@ -725,12 +743,12 @@ ExpEmit FxVectorValue::Emit(VMFunctionBuilder *build) return out; } } - else // 3*float + else if (xyzw[0]->ValueType == TypeVector3) // vec3+float { - assert(xyz[2] != nullptr); - ExpEmit tempxval = xyz[0]->Emit(build); - ExpEmit tempyval = xyz[1]->Emit(build); - ExpEmit tempzval = xyz[2]->Emit(build); + assert(xyzw[2] != nullptr); + ExpEmit tempxval = xyzw[0]->Emit(build); + ExpEmit tempyval = xyzw[1]->Emit(build); + ExpEmit tempzval = xyzw[2]->Emit(build); ExpEmit xval = EmitKonst(build, tempxval); ExpEmit yval = EmitKonst(build, tempyval); ExpEmit zval = EmitKonst(build, tempzval); @@ -744,7 +762,52 @@ ExpEmit FxVectorValue::Emit(VMFunctionBuilder *build) else { // The values are not in continuous registers so they need to be copied together now. - ExpEmit out(build, REGT_FLOAT, 3); + ExpEmit out(build, REGT_FLOAT, 4); + //Try to optimize a bit... + if (yval.RegNum == xval.RegNum + 1) + { + build->Emit(OP_MOVEV2, out.RegNum, xval.RegNum); + build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum); + } + else if (zval.RegNum == yval.RegNum + 1) + { + build->Emit(OP_MOVEF, out.RegNum, xval.RegNum); + build->Emit(OP_MOVEV2, out.RegNum+1, yval.RegNum); + } + else + { + build->Emit(OP_MOVEF, out.RegNum, xval.RegNum); + build->Emit(OP_MOVEF, out.RegNum + 1, yval.RegNum); + build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum); + } + xval.Free(build); + yval.Free(build); + zval.Free(build); + return out; + } + } + else + { + assert(xyzw[3] != nullptr); + ExpEmit tempxval = xyzw[0]->Emit(build); + ExpEmit tempyval = xyzw[1]->Emit(build); + ExpEmit tempzval = xyzw[2]->Emit(build); + ExpEmit tempwval = xyzw[3]->Emit(build); + ExpEmit xval = EmitKonst(build, tempxval); + ExpEmit yval = EmitKonst(build, tempyval); + ExpEmit zval = EmitKonst(build, tempzval); + ExpEmit wval = EmitKonst(build, tempwval); + assert(xval.RegType == REGT_FLOAT && yval.RegType == REGT_FLOAT && zval.RegType == REGT_FLOAT && wval.RegType == REGT_FLOAT); + if (yval.RegNum == xval.RegNum + 1 && zval.RegNum == xval.RegNum + 2) + { + // The results are already in three continuous registers so just return them as-is. + xval.RegCount += 3; + return xval; + } + else + { + // The values are not in continuous registers so they need to be copied together now. + ExpEmit out(build, REGT_FLOAT, 4); //Try to optimize a bit... if (yval.RegNum == xval.RegNum + 1) { @@ -1688,7 +1751,7 @@ FxExpression *FxTypeCast::Resolve(FCompileContext &ctx) delete this; return x; } - else if ((basex->IsVector2() && IsVector2()) || (basex->IsVector3() && IsVector3())) + else if ((basex->IsVector2() && IsVector2()) || (basex->IsVector3() && IsVector3()) || (basex->IsVector4() && IsVector4())) { auto x = basex; basex = nullptr; @@ -1887,6 +1950,10 @@ ExpEmit FxMinusSign::Emit(VMFunctionBuilder *build) build->Emit(OP_NEGV3, to.RegNum, from.RegNum); break; + case 4: + build->Emit(OP_NEGV4, to.RegNum, from.RegNum); + break; + } } return to; @@ -2799,7 +2866,7 @@ FxExpression *FxAddSub::Resolve(FCompileContext& ctx) else if (left->IsVector() && right->IsVector()) { // a vector2 can be added to or subtracted from a vector 3 but it needs to be the right operand. - if (((left->IsVector3() || left->IsVector2()) && right->IsVector2()) || (left->IsVector3() && right->IsVector3())) + if (((left->IsVector3() || left->IsVector2()) && right->IsVector2()) || (left->IsVector3() && right->IsVector3()) || (left->IsVector4() && right->IsVector4())) { ValueType = left->ValueType; } @@ -2893,7 +2960,7 @@ ExpEmit FxAddSub::Emit(VMFunctionBuilder *build) { assert(op1.RegType == REGT_FLOAT && op2.RegType == REGT_FLOAT); - build->Emit(right->IsVector2() ? OP_ADDV2_RR : OP_ADDV3_RR, to.RegNum, op1.RegNum, op2.RegNum); + build->Emit(right->IsVector4() ? OP_ADDV4_RR : right->IsVector3() ? OP_ADDV3_RR : OP_ADDV2_RR, to.RegNum, op1.RegNum, op2.RegNum); if (left->IsVector3() && right->IsVector2() && to.RegNum != op1.RegNum) { // must move the z-coordinate @@ -2926,7 +2993,7 @@ ExpEmit FxAddSub::Emit(VMFunctionBuilder *build) if (IsVector()) { assert(op1.RegType == REGT_FLOAT && op2.RegType == REGT_FLOAT); - build->Emit(right->IsVector2() ? OP_SUBV2_RR : OP_SUBV3_RR, to.RegNum, op1.RegNum, op2.RegNum); + build->Emit(right->IsVector4() ? OP_SUBV4_RR : right->IsVector3() ? OP_SUBV3_RR : OP_SUBV2_RR, to.RegNum, op1.RegNum, op2.RegNum); return to; } else if (ValueType->GetRegType() == REGT_FLOAT) @@ -3598,7 +3665,7 @@ FxExpression *FxCompareEq::Resolve(FCompileContext& ctx) } // identical types are always comparable, if they can be placed in a register, so we can save most checks if this is the case. - if (left->ValueType != right->ValueType && !(left->IsVector2() && right->IsVector2()) && !(left->IsVector3() && right->IsVector3())) + if (left->ValueType != right->ValueType && !(left->IsVector2() && right->IsVector2()) && !(left->IsVector3() && right->IsVector3()) && !(left->IsVector4() && right->IsVector4())) { FxExpression *x; if (left->IsNumeric() && right->ValueType == TypeString && (x = StringConstToChar(right))) @@ -3840,7 +3907,7 @@ ExpEmit FxCompareEq::EmitCommon(VMFunctionBuilder *build, bool forcompare, bool ExpEmit to(build, REGT_INT); - static int flops[] = { OP_EQF_R, OP_EQV2_R, OP_EQV3_R }; + static int flops[] = { OP_EQF_R, OP_EQV2_R, OP_EQV3_R, OP_EQV4_R }; instr = op1.RegType == REGT_INT ? OP_EQ_R : op1.RegType == REGT_FLOAT ? flops[op1.RegCount - 1] : OP_EQA_R; @@ -4256,7 +4323,7 @@ ExpEmit FxConcat::Emit(VMFunctionBuilder *build) build->Emit(op1.RegType == REGT_INT ? OP_LK : op1.RegType == REGT_FLOAT ? OP_LKF : OP_LKP, nonconst.RegNum, op1.RegNum); op1 = nonconst; } - if (op1.RegType == REGT_FLOAT) cast = op1.RegCount == 1 ? CAST_F2S : op1.RegCount == 2 ? CAST_V22S : CAST_V32S; + if (op1.RegType == REGT_FLOAT) cast = op1.RegCount == 1 ? CAST_F2S : op1.RegCount == 2 ? CAST_V22S : op1.RegCount == 3 ? CAST_V32S : CAST_V42S; else if (left->ValueType == TypeUInt32) cast = CAST_U2S; else if (left->ValueType == TypeName) cast = CAST_N2S; else if (left->ValueType == TypeSound) cast = CAST_So2S; @@ -4289,7 +4356,7 @@ ExpEmit FxConcat::Emit(VMFunctionBuilder *build) build->Emit(op2.RegType == REGT_INT ? OP_LK : op2.RegType == REGT_FLOAT ? OP_LKF : OP_LKP, nonconst.RegNum, op2.RegNum); op2 = nonconst; } - if (op2.RegType == REGT_FLOAT) cast = op2.RegCount == 1 ? CAST_F2S : op2.RegCount == 2 ? CAST_V22S : CAST_V32S; + if (op1.RegType == REGT_FLOAT) cast = op1.RegCount == 1 ? CAST_F2S : op1.RegCount == 2 ? CAST_V22S : op1.RegCount == 3 ? CAST_V32S : CAST_V42S; else if (right->ValueType == TypeUInt32) cast = CAST_U2S; else if (right->ValueType == TypeName) cast = CAST_N2S; else if (right->ValueType == TypeSound) cast = CAST_So2S; @@ -4552,7 +4619,7 @@ ExpEmit FxDotCross::Emit(VMFunctionBuilder *build) ExpEmit to(build, ValueType->GetRegType(), ValueType->GetRegCount()); ExpEmit op1 = left->Emit(build); ExpEmit op2 = right->Emit(build); - int op = Operator == TK_Cross ? OP_CROSSV_RR : left->ValueType == TypeVector3 ? OP_DOTV3_RR : OP_DOTV2_RR; + int op = Operator == TK_Cross ? OP_CROSSV_RR : left->ValueType == TypeVector4 ? OP_DOTV4_RR : left->ValueType == TypeVector3 ? OP_DOTV3_RR : OP_DOTV2_RR; build->Emit(op, to.RegNum, op1.RegNum, op2.RegNum); op1.Free(build); op2.Free(build); @@ -8740,12 +8807,12 @@ FxExpression *FxVMFunctionCall::Resolve(FCompileContext& ctx) else { // Vectors need special treatment because they are not normal constants - FxConstant *cs[3] = { nullptr }; + FxConstant *cs[4] = { nullptr }; for (int l = 0; l < ntype->GetRegCount(); l++) { cs[l] = new FxConstant(TypeFloat64, defaults[l + i + k + skipdefs + implicit], ScriptPosition); } - FxExpression *x = new FxVectorValue(cs[0], cs[1], cs[2], ScriptPosition); + FxExpression *x = new FxVectorValue(cs[0], cs[1], cs[2], cs[3], ScriptPosition); ArgList.Insert(i + k, x); skipdefs += ntype->GetRegCount() - 1; } @@ -9157,13 +9224,13 @@ ExpEmit FxVectorBuiltin::Emit(VMFunctionBuilder *build) ExpEmit op = Self->Emit(build); if (Function == NAME_Length) { - build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : OP_LENV3, to.RegNum, op.RegNum); + build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : Self->ValueType == TypeFVector3 ? OP_LENV3 : OP_LENV4, to.RegNum, op.RegNum); } else { ExpEmit len(build, REGT_FLOAT); - build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : OP_LENV3, len.RegNum, op.RegNum); - build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_DIVVF2_RR : OP_DIVVF3_RR, to.RegNum, op.RegNum, len.RegNum); + build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : Self->ValueType == TypeFVector3 ? OP_LENV3 : OP_LENV4, to.RegNum, op.RegNum); + build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_DIVVF2_RR : Self->ValueType == TypeFVector3 ? OP_DIVVF3_RR : OP_DIVVF4_RR, to.RegNum, op.RegNum, len.RegNum); len.Free(build); } op.Free(build); diff --git a/src/common/scripting/backend/codegen.h b/src/common/scripting/backend/codegen.h index 2e36177002..a5a23fec6e 100644 --- a/src/common/scripting/backend/codegen.h +++ b/src/common/scripting/backend/codegen.h @@ -339,6 +339,7 @@ public: bool IsVector() const { return ValueType == TypeVector2 || ValueType == TypeVector3 || ValueType == TypeFVector2 || ValueType == TypeFVector3; }; bool IsVector2() const { return ValueType == TypeVector2 || ValueType == TypeFVector2; }; bool IsVector3() const { return ValueType == TypeVector3 || ValueType == TypeFVector3; }; + bool IsVector4() const { return ValueType == TypeVector4 || ValueType == TypeFVector4; }; bool IsBoolCompat() const { return ValueType->isScalar(); } bool IsObject() const { return ValueType->isObjectPointer(); } bool IsArray() const { return ValueType->isArray() || (ValueType->isPointer() && ValueType->toPointer()->PointedType->isArray()); } @@ -550,20 +551,20 @@ public: class FxVectorValue : public FxExpression { - FxExpression *xyz[3]; + FxExpression *xyzw[4]; bool isConst; // gets set to true if all element are const (used by function defaults parser) public: friend class ZCCCompiler; - FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, const FScriptPosition &sc); + FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, FxExpression* w, const FScriptPosition &sc); ~FxVectorValue(); FxExpression *Resolve(FCompileContext&); bool isConstVector(int dim) { if (!isConst) return false; - return dim == 2 ? xyz[2] == nullptr : xyz[2] != nullptr; + return dim == 2 ? xyzw[2] == nullptr : xyzw[2] != nullptr; } ExpEmit Emit(VMFunctionBuilder *build); diff --git a/src/common/scripting/core/types.cpp b/src/common/scripting/core/types.cpp index 5dd7cd6b3b..3f57bfb134 100644 --- a/src/common/scripting/core/types.cpp +++ b/src/common/scripting/core/types.cpp @@ -61,8 +61,10 @@ PPointer *TypeFont; PStateLabel *TypeStateLabel; PStruct *TypeVector2; PStruct *TypeVector3; +PStruct* TypeVector4; PStruct* TypeFVector2; PStruct* TypeFVector3; +PStruct* TypeFVector4; PStruct *TypeColorStruct; PStruct *TypeStringStruct; PPointer *TypeNullPtr; @@ -350,6 +352,21 @@ void PType::StaticInit() TypeVector3->RegCount = 3; TypeVector3->isOrdered = true; + TypeVector4 = new PStruct(NAME_Vector4, nullptr); + TypeVector4->AddField(NAME_X, TypeFloat64); + TypeVector4->AddField(NAME_Y, TypeFloat64); + TypeVector4->AddField(NAME_Z, TypeFloat64); + TypeVector4->AddField(NAME_W, TypeFloat64); + // allow accessing xyz as a vector3. This is not supposed to be serialized so it's marked transient + TypeVector4->Symbols.AddSymbol(Create(NAME_XYZ, TypeVector3, VARF_Transient, 0)); + TypeTable.AddType(TypeVector4, NAME_Struct); + TypeVector4->loadOp = OP_LV4; + TypeVector4->storeOp = OP_SV4; + TypeVector4->moveOp = OP_MOVEV4; + TypeVector4->RegType = REGT_FLOAT; + TypeVector4->RegCount = 3; + TypeVector4->isOrdered = true; + TypeFVector2 = new PStruct(NAME_FVector2, nullptr); TypeFVector2->AddField(NAME_X, TypeFloat32); @@ -376,6 +393,21 @@ void PType::StaticInit() TypeFVector3->RegCount = 3; TypeFVector3->isOrdered = true; + TypeFVector4 = new PStruct(NAME_FVector4, nullptr); + TypeFVector4->AddField(NAME_X, TypeFloat32); + TypeFVector4->AddField(NAME_Y, TypeFloat32); + TypeFVector4->AddField(NAME_Z, TypeFloat32); + TypeFVector4->AddField(NAME_W, TypeFloat32); + // allow accessing xy as a vector2 + TypeFVector4->Symbols.AddSymbol(Create(NAME_XYZ, TypeFVector3, VARF_Transient, 0)); + TypeTable.AddType(TypeFVector4, NAME_Struct); + TypeFVector4->loadOp = OP_LFV4; + TypeFVector4->storeOp = OP_SFV4; + TypeFVector4->moveOp = OP_MOVEV4; + TypeFVector4->RegType = REGT_FLOAT; + TypeFVector4->RegCount = 4; + TypeFVector4->isOrdered = true; + Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_sByte, TypeSInt8)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Byte, TypeUInt8)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Short, TypeSInt16)); @@ -394,8 +426,10 @@ void PType::StaticInit() Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_State, TypeState)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Vector2, TypeVector2)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Vector3, TypeVector3)); + Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Vector4, TypeVector4)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_FVector2, TypeFVector2)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_FVector3, TypeFVector3)); + Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_FVector4, TypeFVector4)); } diff --git a/src/common/scripting/core/types.h b/src/common/scripting/core/types.h index 959d66bdac..25fe895fd9 100644 --- a/src/common/scripting/core/types.h +++ b/src/common/scripting/core/types.h @@ -615,8 +615,10 @@ extern PTextureID *TypeTextureID; extern PSpriteID *TypeSpriteID; extern PStruct* TypeVector2; extern PStruct* TypeVector3; +extern PStruct* TypeVector4; extern PStruct* TypeFVector2; extern PStruct* TypeFVector3; +extern PStruct* TypeFVector4; extern PStruct *TypeColorStruct; extern PStruct *TypeStringStruct; extern PStatePointer *TypeState; diff --git a/src/common/scripting/frontend/ast.cpp b/src/common/scripting/frontend/ast.cpp index cb29457e4b..929352db8f 100644 --- a/src/common/scripting/frontend/ast.cpp +++ b/src/common/scripting/frontend/ast.cpp @@ -53,6 +53,7 @@ static const char *BuiltInTypeNames[] = "string", "vector2", "vector3", + "vector4", "name", "color", @@ -684,6 +685,7 @@ static void PrintVectorInitializer(FLispString &out, ZCC_TreeNode *node) PrintNodes(out, enode->X); PrintNodes(out, enode->Y); PrintNodes(out, enode->Z); + PrintNodes(out, enode->W); out.Close(); } diff --git a/src/common/scripting/frontend/zcc_compile.cpp b/src/common/scripting/frontend/zcc_compile.cpp index 00cc4ace41..620f945e0e 100644 --- a/src/common/scripting/frontend/zcc_compile.cpp +++ b/src/common/scripting/frontend/zcc_compile.cpp @@ -1790,6 +1790,10 @@ PType *ZCCCompiler::DetermineType(PType *outertype, ZCC_TreeNode *field, FName n retval = TypeVector3; break; + case ZCC_Vector4: + retval = TypeVector4; + break; + case ZCC_State: retval = TypeState; break; @@ -2150,7 +2154,7 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool do { auto type = DetermineType(c->Type(), f, f->Name, t, false, false); - if (type->isContainer() && type != TypeVector2 && type != TypeVector3 && type != TypeFVector2 && type != TypeFVector3) + if (type->isContainer() && type != TypeVector2 && type != TypeVector3 && type != TypeVector4 && type != TypeFVector2 && type != TypeFVector3 && type != TypeFVector4) { // structs and classes only get passed by pointer. type = NewPointer(type); @@ -2168,6 +2172,10 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool { type = TypeVector3; } + else if (type == TypeFVector4) + { + type = TypeVector4; + } // TBD: disallow certain types? For now, let everything pass that isn't an array. rets.Push(type); t = static_cast(t->SiblingNext); @@ -2340,7 +2348,7 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool do { int elementcount = 1; - TypedVMValue vmval[3]; // default is REGT_NIL which means 'no default value' here. + TypedVMValue vmval[4]; // default is REGT_NIL which means 'no default value' here. if (p->Type != nullptr) { auto type = DetermineType(c->Type(), p, f->Name, p->Type, false, false); @@ -2362,8 +2370,12 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool { elementcount = 3; } + else if (type == TypeVector4 || type == TypeFVector4) + { + elementcount = 4; + } } - if (type->GetRegType() == REGT_NIL && type != TypeVector2 && type != TypeVector3 && type != TypeFVector2 && type != TypeFVector3) + if (type->GetRegType() == REGT_NIL && type != TypeVector2 && type != TypeVector3 && type != TypeVector4 && type != TypeFVector2 && type != TypeFVector3 && type != TypeFVector4) { // If it's TypeError, then an error was already given if (type != TypeError) @@ -2407,15 +2419,23 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool if ((type == TypeVector2 || type == TypeFVector2) && x->ExprType == EFX_VectorValue && static_cast(x)->isConstVector(2)) { auto vx = static_cast(x); - vmval[0] = static_cast(vx->xyz[0])->GetValue().GetFloat(); - vmval[1] = static_cast(vx->xyz[1])->GetValue().GetFloat(); + vmval[0] = static_cast(vx->xyzw[0])->GetValue().GetFloat(); + vmval[1] = static_cast(vx->xyzw[1])->GetValue().GetFloat(); } else if ((type == TypeVector3 || type == TypeFVector3) && x->ExprType == EFX_VectorValue && static_cast(x)->isConstVector(3)) { auto vx = static_cast(x); - vmval[0] = static_cast(vx->xyz[0])->GetValue().GetFloat(); - vmval[1] = static_cast(vx->xyz[1])->GetValue().GetFloat(); - vmval[2] = static_cast(vx->xyz[2])->GetValue().GetFloat(); + vmval[0] = static_cast(vx->xyzw[0])->GetValue().GetFloat(); + vmval[1] = static_cast(vx->xyzw[1])->GetValue().GetFloat(); + vmval[2] = static_cast(vx->xyzw[2])->GetValue().GetFloat(); + } + else if ((type == TypeVector4 || type == TypeFVector4) && x->ExprType == EFX_VectorValue && static_cast(x)->isConstVector(4)) + { + auto vx = static_cast(x); + vmval[0] = static_cast(vx->xyzw[0])->GetValue().GetFloat(); + vmval[1] = static_cast(vx->xyzw[1])->GetValue().GetFloat(); + vmval[2] = static_cast(vx->xyzw[2])->GetValue().GetFloat(); + vmval[3] = static_cast(vx->xyzw[3])->GetValue().GetFloat(); } else if (!x->isConstant()) { @@ -3038,7 +3058,8 @@ FxExpression *ZCCCompiler::ConvertNode(ZCC_TreeNode *ast, bool substitute) auto xx = ConvertNode(vecini->X); auto yy = ConvertNode(vecini->Y); auto zz = ConvertNode(vecini->Z); - return new FxVectorValue(xx, yy, zz, *ast); + auto ww = ConvertNode(vecini->W); + return new FxVectorValue(xx, yy, zz, ww, *ast); } case AST_LocalVarStmt: diff --git a/src/common/scripting/frontend/zcc_parser.h b/src/common/scripting/frontend/zcc_parser.h index 2d89d0d947..b48ecbe3c8 100644 --- a/src/common/scripting/frontend/zcc_parser.h +++ b/src/common/scripting/frontend/zcc_parser.h @@ -158,6 +158,7 @@ enum EZCCBuiltinType ZCC_String, ZCC_Vector2, ZCC_Vector3, + ZCC_Vector4, ZCC_Name, ZCC_Color, // special types for ZDoom. @@ -442,7 +443,7 @@ struct ZCC_ExprTrinary : ZCC_Expression struct ZCC_VectorValue : ZCC_Expression { - ZCC_Expression *X, *Y, *Z; + ZCC_Expression *X, *Y, *Z, *W; }; struct ZCC_Statement : ZCC_TreeNode diff --git a/src/common/scripting/jit/jit.cpp b/src/common/scripting/jit/jit.cpp index 8937fa1e03..6cd78bd057 100644 --- a/src/common/scripting/jit/jit.cpp +++ b/src/common/scripting/jit/jit.cpp @@ -6,6 +6,7 @@ extern PString *TypeString; extern PStruct *TypeVector2; extern PStruct *TypeVector3; +extern PStruct* TypeVector4; static void OutputJitLog(const asmjit::StringLogger &logger); @@ -315,6 +316,13 @@ void JitCompiler::SetupSimpleFrame() cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); } + else if (type == TypeVector4 || type == TypeFVector4) + { + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + } else if (type == TypeFloat64) { cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); @@ -551,6 +559,20 @@ asmjit::X86Xmm JitCompiler::CheckRegF(int r0, int r1, int r2, int r3) } } +asmjit::X86Xmm JitCompiler::CheckRegF(int r0, int r1, int r2, int r3, int r4) +{ + if (r0 != r1 && r0 != r2 && r0 != r3 && r0 != r4) + { + return regF[r0]; + } + else + { + auto copy = newTempXmmSd(); + cc.movsd(copy, regF[r0]); + return copy; + } +} + asmjit::X86Gp JitCompiler::CheckRegS(int r0, int r1) { if (r0 != r1) diff --git a/src/common/scripting/jit/jit_load.cpp b/src/common/scripting/jit/jit_load.cpp index ec5f795df8..f601eb1b0b 100644 --- a/src/common/scripting/jit/jit_load.cpp +++ b/src/common/scripting/jit/jit_load.cpp @@ -325,6 +325,28 @@ void JitCompiler::EmitLV3_R() cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16)); } +void JitCompiler::EmitLV4() +{ + EmitNullPointerThrow(B, X_READ_NIL); + auto tmp = newTempIntPtr(); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C])); + cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8)); + cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16)); + cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 32)); +} + +void JitCompiler::EmitLV4_R() +{ + EmitNullPointerThrow(B, X_READ_NIL); + auto tmp = newTempIntPtr(); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C])); + cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8)); + cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16)); + cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 32)); +} + void JitCompiler::EmitLFV2() { EmitNullPointerThrow(B, X_READ_NIL); @@ -373,6 +395,36 @@ void JitCompiler::EmitLFV3_R() cc.cvtss2sd(regF[A + 2], regF[A + 2]); } +void JitCompiler::EmitLFV4() +{ + EmitNullPointerThrow(B, X_READ_NIL); + auto tmp = newTempIntPtr(); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C])); + cc.movss(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4)); + cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8)); + cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 16)); + cc.cvtss2sd(regF[A], regF[A]); + cc.cvtss2sd(regF[A + 1], regF[A + 1]); + cc.cvtss2sd(regF[A + 2], regF[A + 2]); + cc.cvtss2sd(regF[A + 3], regF[A + 3]); +} + +void JitCompiler::EmitLFV4_R() +{ + EmitNullPointerThrow(B, X_READ_NIL); + auto tmp = newTempIntPtr(); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C])); + cc.movss(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4)); + cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8)); + cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 16)); + cc.cvtss2sd(regF[A], regF[A]); + cc.cvtss2sd(regF[A + 1], regF[A + 1]); + cc.cvtss2sd(regF[A + 2], regF[A + 2]); + cc.cvtss2sd(regF[A + 3], regF[A + 3]); +} + static void SetString(FString *to, char **from) { *to = *from; diff --git a/src/common/scripting/jit/jit_math.cpp b/src/common/scripting/jit/jit_math.cpp index ea94025945..c75fa20416 100644 --- a/src/common/scripting/jit/jit_math.cpp +++ b/src/common/scripting/jit/jit_math.cpp @@ -1447,6 +1447,165 @@ void JitCompiler::EmitEQV3_K() I_Error("EQV3_K is not used."); } +///////////////////////////////////////////////////////////////////////////// +// Vector math. (4D/Quaternion) + +void JitCompiler::EmitNEGV4() +{ + auto mask = cc.newDoubleConst(asmjit::kConstScopeLocal, -0.0); + auto maskXmm = newTempXmmSd(); + cc.movsd(maskXmm, mask); + cc.movsd(regF[A], regF[B]); + cc.xorpd(regF[A], maskXmm); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.xorpd(regF[A + 1], maskXmm); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.xorpd(regF[A + 2], maskXmm); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.xorpd(regF[A + 3], maskXmm); +} + +void JitCompiler::EmitADDV4_RR() +{ + auto rc0 = CheckRegF(C, A); + auto rc1 = CheckRegF(C + 1, A + 1); + auto rc2 = CheckRegF(C + 2, A + 2); + auto rc3 = CheckRegF(C + 3, A + 3); + cc.movsd(regF[A], regF[B]); + cc.addsd(regF[A], rc0); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.addsd(regF[A + 1], rc1); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.addsd(regF[A + 2], rc2); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.addsd(regF[A + 3], rc3); +} + +void JitCompiler::EmitSUBV4_RR() +{ + auto rc0 = CheckRegF(C, A); + auto rc1 = CheckRegF(C + 1, A + 1); + auto rc2 = CheckRegF(C + 2, A + 2); + auto rc3 = CheckRegF(C + 3, A + 3); + cc.movsd(regF[A], regF[B]); + cc.subsd(regF[A], rc0); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.subsd(regF[A + 1], rc1); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.subsd(regF[A + 2], rc2); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.subsd(regF[A + 3], rc3); +} + +void JitCompiler::EmitDOTV4_RR() +{ + auto rb1 = CheckRegF(B + 1, A); + auto rb2 = CheckRegF(B + 2, A); + auto rb3 = CheckRegF(B + 3, A); + auto rc0 = CheckRegF(C, A); + auto rc1 = CheckRegF(C + 1, A); + auto rc2 = CheckRegF(C + 2, A); + auto rc3 = CheckRegF(C + 3, A); + auto tmp = newTempXmmSd(); + cc.movsd(regF[A], regF[B]); + cc.mulsd(regF[A], rc0); + cc.movsd(tmp, rb1); + cc.mulsd(tmp, rc1); + cc.addsd(regF[A], tmp); + cc.movsd(tmp, rb2); + cc.mulsd(tmp, rc2); + cc.addsd(regF[A], tmp); + cc.movsd(tmp, rb3); + cc.mulsd(tmp, rc3); + cc.addsd(regF[A], tmp); +} + +void JitCompiler::EmitMULVF4_RR() +{ + auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3); + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.mulsd(regF[A], rc); + cc.mulsd(regF[A + 1], rc); + cc.mulsd(regF[A + 2], rc); + cc.mulsd(regF[A + 3], rc); +} + +void JitCompiler::EmitMULVF4_RK() +{ + auto tmp = newTempIntPtr(); + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.mov(tmp, asmjit::imm_ptr(&konstf[C])); + cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.mulsd(regF[A + 1], asmjit::x86::qword_ptr(tmp)); + cc.mulsd(regF[A + 2], asmjit::x86::qword_ptr(tmp)); + cc.mulsd(regF[A + 3], asmjit::x86::qword_ptr(tmp)); +} + +void JitCompiler::EmitDIVVF4_RR() +{ + auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3); + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.divsd(regF[A], rc); + cc.divsd(regF[A + 1], rc); + cc.divsd(regF[A + 2], rc); + cc.divsd(regF[A + 3], rc); +} + +void JitCompiler::EmitDIVVF4_RK() +{ + auto tmp = newTempIntPtr(); + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.mov(tmp, asmjit::imm_ptr(&konstf[C])); + cc.divsd(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.divsd(regF[A + 1], asmjit::x86::qword_ptr(tmp)); + cc.divsd(regF[A + 2], asmjit::x86::qword_ptr(tmp)); + cc.divsd(regF[A + 3], asmjit::x86::qword_ptr(tmp)); +} + +void JitCompiler::EmitLENV4() +{ + auto rb1 = CheckRegF(B + 1, A); + auto rb2 = CheckRegF(B + 2, A); + auto rb3 = CheckRegF(B + 3, A); + auto tmp = newTempXmmSd(); + cc.movsd(regF[A], regF[B]); + cc.mulsd(regF[A], regF[B]); + cc.movsd(tmp, rb1); + cc.mulsd(tmp, rb1); + cc.addsd(regF[A], tmp); + cc.movsd(tmp, rb2); + cc.mulsd(tmp, rb2); + cc.addsd(regF[A], tmp); + cc.movsd(tmp, rb3); + cc.mulsd(tmp, rb3); + cc.addsd(regF[A], tmp); + CallSqrt(regF[A], regF[A]); +} + +void JitCompiler::EmitEQV4_R() +{ + EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) { + EmitVectorComparison<4> (check, fail, success); + }); +} + +void JitCompiler::EmitEQV4_K() +{ + I_Error("EQV4_K is not used."); +} + ///////////////////////////////////////////////////////////////////////////// // Pointer math. diff --git a/src/common/scripting/jit/jit_move.cpp b/src/common/scripting/jit/jit_move.cpp index 63f6158e39..1f0d4edc92 100644 --- a/src/common/scripting/jit/jit_move.cpp +++ b/src/common/scripting/jit/jit_move.cpp @@ -39,11 +39,20 @@ void JitCompiler::EmitMOVEV3() cc.movsd(regF[A + 2], regF[B + 2]); } +void JitCompiler::EmitMOVEV4() +{ + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); +} + static void CastI2S(FString *a, int b) { a->Format("%d", b); } static void CastU2S(FString *a, int b) { a->Format("%u", b); } static void CastF2S(FString *a, double b) { a->Format("%.5f", b); } static void CastV22S(FString *a, double b, double b1) { a->Format("(%.5f, %.5f)", b, b1); } static void CastV32S(FString *a, double b, double b1, double b2) { a->Format("(%.5f, %.5f, %.5f)", b, b1, b2); } +static void CastV42S(FString *a, double b, double b1, double b2, double b3) { a->Format("(%.5f, %.5f, %.5f, %.5f)", b, b1, b2, b3); } static void CastP2S(FString *a, void *b) { if (b == nullptr) *a = "null"; else a->Format("%p", b); } static int CastS2I(FString *b) { return (int)b->ToLong(); } static double CastS2F(FString *b) { return b->ToDouble(); } @@ -109,6 +118,14 @@ void JitCompiler::EmitCAST() call->setArg(2, regF[B + 1]); call->setArg(3, regF[B + 2]); break; + case CAST_V42S: + call = CreateCall(CastV42S); + call->setArg(0, regS[A]); + call->setArg(1, regF[B]); + call->setArg(2, regF[B + 1]); + call->setArg(3, regF[B + 2]); + call->setArg(4, regF[B + 3]); + break; case CAST_P2S: call = CreateCall(CastP2S); call->setArg(0, regS[A]); diff --git a/src/common/scripting/jit/jit_store.cpp b/src/common/scripting/jit/jit_store.cpp index 2bce225664..84fa8d9d42 100644 --- a/src/common/scripting/jit/jit_store.cpp +++ b/src/common/scripting/jit/jit_store.cpp @@ -161,6 +161,30 @@ void JitCompiler::EmitSV3_R() cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]); } +void JitCompiler::EmitSV4() +{ + EmitNullPointerThrow(A, X_WRITE_NIL); + auto tmp = newTempIntPtr(); + cc.mov(tmp, regA[A]); + cc.add(tmp, konstd[C]); + cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 32), regF[B + 3]); +} + +void JitCompiler::EmitSV4_R() +{ + EmitNullPointerThrow(A, X_WRITE_NIL); + auto tmp = newTempIntPtr(); + cc.mov(tmp, regA[A]); + cc.add(tmp, regD[C]); + cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 32), regF[B + 3]); +} + void JitCompiler::EmitSFV2() { EmitNullPointerThrow(A, X_WRITE_NIL); @@ -219,6 +243,40 @@ void JitCompiler::EmitSFV3_R() cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF); } +void JitCompiler::EmitSFV4() +{ + EmitNullPointerThrow(A, X_WRITE_NIL); + auto tmp = newTempIntPtr(); + cc.mov(tmp, regA[A]); + cc.add(tmp, konstd[C]); + auto tmpF = newTempXmmSs(); + cc.cvtsd2ss(tmpF, regF[B]); + cc.movss(asmjit::x86::qword_ptr(tmp), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 1]); + cc.movss(asmjit::x86::qword_ptr(tmp, 4), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 2]); + cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 3]); + cc.movss(asmjit::x86::qword_ptr(tmp, 16), tmpF); +} + +void JitCompiler::EmitSFV4_R() +{ + EmitNullPointerThrow(A, X_WRITE_NIL); + auto tmp = newTempIntPtr(); + cc.mov(tmp, regA[A]); + cc.add(tmp, regD[C]); + auto tmpF = newTempXmmSs(); + cc.cvtsd2ss(tmpF, regF[B]); + cc.movss(asmjit::x86::qword_ptr(tmp), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 1]); + cc.movss(asmjit::x86::qword_ptr(tmp, 4), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 2]); + cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 3]); + cc.movss(asmjit::x86::qword_ptr(tmp, 16), tmpF); +} + void JitCompiler::EmitSBIT() { EmitNullPointerThrow(A, X_WRITE_NIL); diff --git a/src/common/scripting/jit/jitintern.h b/src/common/scripting/jit/jitintern.h index ac3d8acf56..2a3dda4265 100644 --- a/src/common/scripting/jit/jitintern.h +++ b/src/common/scripting/jit/jitintern.h @@ -241,6 +241,7 @@ private: asmjit::X86Xmm CheckRegF(int r0, int r1); asmjit::X86Xmm CheckRegF(int r0, int r1, int r2); asmjit::X86Xmm CheckRegF(int r0, int r1, int r2, int r3); + asmjit::X86Xmm CheckRegF(int r0, int r1, int r2, int r3, int r4); asmjit::X86Gp CheckRegS(int r0, int r1); asmjit::X86Gp CheckRegA(int r0, int r1); diff --git a/src/common/scripting/vm/vmexec.h b/src/common/scripting/vm/vmexec.h index 0e4b50b638..eae4cafffa 100644 --- a/src/common/scripting/vm/vmexec.h +++ b/src/common/scripting/vm/vmexec.h @@ -287,18 +287,40 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) { auto v = (double*)ptr; reg.f[a] = v[0]; - reg.f[a + 1] = v[1]; - reg.f[a + 2] = v[2]; + reg.f[a+1] = v[1]; + reg.f[a+2] = v[2]; } NEXTOP; OP(LV3_R) : ASSERTF(a + 2); ASSERTA(B); ASSERTD(C); GETADDR(PB, RC, X_READ_NIL); + { + auto v = (double*)ptr; + reg.f[a] = v[0]; + reg.f[a+1] = v[1]; + reg.f[a+2] = v[2]; + } + NEXTOP; + OP(LV4) : + ASSERTF(a + 3); ASSERTA(B); ASSERTKD(C); + GETADDR(PB, KC, X_READ_NIL); { auto v = (double*)ptr; reg.f[a] = v[0]; reg.f[a + 1] = v[1]; reg.f[a + 2] = v[2]; + reg.f[a + 3] = v[3]; + } + NEXTOP; + OP(LV4_R) : + ASSERTF(a + 3); ASSERTA(B); ASSERTD(C); + GETADDR(PB, RC, X_READ_NIL); + { + auto v = (double*)ptr; + reg.f[a] = v[0]; + reg.f[a + 1] = v[1]; + reg.f[a + 2] = v[2]; + reg.f[a + 3] = v[3]; } NEXTOP; OP(LFV2): @@ -339,6 +361,28 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) reg.f[a+2] = v[2]; } NEXTOP; + OP(LFV4) : + ASSERTF(a + 3); ASSERTA(B); ASSERTKD(C); + GETADDR(PB, KC, X_READ_NIL); + { + auto v = (float*)ptr; + reg.f[a] = v[0]; + reg.f[a+1] = v[1]; + reg.f[a+2] = v[2]; + reg.f[a+3] = v[3]; + } + NEXTOP; + OP(LFV4_R) : + ASSERTF(a + 3); ASSERTA(B); ASSERTD(C); + GETADDR(PB, RC, X_READ_NIL); + { + auto v = (float*)ptr; + reg.f[a] = v[0]; + reg.f[a+1] = v[1]; + reg.f[a+2] = v[2]; + reg.f[a+3] = v[3]; + } + NEXTOP; OP(LBIT): ASSERTD(a); ASSERTA(B); GETADDR(PB,0,X_READ_NIL); @@ -555,6 +599,16 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) reg.f[a + 2] = reg.f[b + 2]; NEXTOP; } + OP(MOVEV4) : + { + ASSERTF(a); ASSERTF(B); + b = B; + reg.f[a] = reg.f[b]; + reg.f[a + 1] = reg.f[b + 1]; + reg.f[a + 2] = reg.f[b + 2]; + reg.f[a + 3] = reg.f[b + 3]; + NEXTOP; + } OP(DYNCAST_R) : ASSERTA(a); ASSERTA(B); ASSERTA(C); b = B; @@ -1690,6 +1744,97 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) fcp = &konstf[C]; goto Do_EQV3; + OP(NEGV4): + ASSERTF(a+3); ASSERTF(B+3); + reg.f[a] = -reg.f[B]; + reg.f[a+1] = -reg.f[B+1]; + reg.f[a+2] = -reg.f[B+2]; + reg.f[a+3] = -reg.f[B+3]; + NEXTOP; + + OP(ADDV4_RR): + ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C+3); + fcp = ®.f[C]; + fbp = ®.f[B]; + reg.f[a] = fbp[0] + fcp[0]; + reg.f[a+1] = fbp[1] + fcp[1]; + reg.f[a+2] = fbp[2] + fcp[2]; + reg.f[a+3] = fbp[3] + fcp[3]; + NEXTOP; + + OP(SUBV4_RR): + ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C+3); + fbp = ®.f[B]; + fcp = ®.f[C]; + reg.f[a] = fbp[0] - fcp[0]; + reg.f[a+1] = fbp[1] - fcp[1]; + reg.f[a+2] = fbp[2] - fcp[2]; + reg.f[a+3] = fbp[3] - fcp[3]; + NEXTOP; + + OP(DOTV4_RR): + ASSERTF(a); ASSERTF(B+3); ASSERTF(C+3); + reg.f[a] = reg.f[B] * reg.f[C] + reg.f[B+1] * reg.f[C+1] + reg.f[B+2] * reg.f[C+2] + reg.f[B+3] * reg.f[C+3]; + NEXTOP; + + OP(MULVF4_RR): + ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C); + fc = reg.f[C]; + fbp = ®.f[B]; + Do_MULV4: + reg.f[a] = fbp[0] * fc; + reg.f[a+1] = fbp[1] * fc; + reg.f[a+2] = fbp[2] * fc; + reg.f[a+3] = fbp[3] * fc; + NEXTOP; + OP(MULVF4_RK): + ASSERTF(a+3); ASSERTF(B+3); ASSERTKF(C); + fc = konstf[C]; + fbp = ®.f[B]; + goto Do_MULV4; + + OP(DIVVF4_RR): + ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C); + fc = reg.f[C]; + fbp = ®.f[B]; + Do_DIVV4: + reg.f[a] = fbp[0] / fc; + reg.f[a+1] = fbp[1] / fc; + reg.f[a+2] = fbp[2] / fc; + reg.f[a+3] = fbp[3] / fc; + NEXTOP; + OP(DIVVF4_RK): + ASSERTF(a+3); ASSERTF(B+3); ASSERTKF(C); + fc = konstf[C]; + fbp = ®.f[B]; + goto Do_DIVV4; + + OP(LENV4): + ASSERTF(a); ASSERTF(B+3); + reg.f[a] = g_sqrt(reg.f[B] * reg.f[B] + reg.f[B+1] * reg.f[B+1] + reg.f[B+2] * reg.f[B+2]+ reg.f[B+3] * reg.f[B+3]); + NEXTOP; + + OP(EQV4_R): + ASSERTF(B+3); ASSERTF(C+3); + fcp = ®.f[C]; + Do_EQV4: + if (a & CMP_APPROX) + { + CMPJMP(fabs(reg.f[B ] - fcp[0]) < VM_EPSILON && + fabs(reg.f[B+1] - fcp[1]) < VM_EPSILON && + fabs(reg.f[B+2] - fcp[2]) < VM_EPSILON && + fabs(reg.f[B+3] - fcp[3]) < VM_EPSILON); + } + else + { + CMPJMP(reg.f[B] == fcp[0] && reg.f[B+1] == fcp[1] && reg.f[B+2] == fcp[2] && reg.f[B+3] == fcp[3]); + } + NEXTOP; + OP(EQV4_K): + ASSERTF(B+3); ASSERTKF(C+3); + fcp = &konstf[C]; + goto Do_EQV4; + OP(ADDA_RR): ASSERTA(a); ASSERTA(B); ASSERTD(C); c = reg.d[C]; diff --git a/src/common/scripting/vm/vmintern.h b/src/common/scripting/vm/vmintern.h index b93f3e6ef5..9a3e2d4010 100644 --- a/src/common/scripting/vm/vmintern.h +++ b/src/common/scripting/vm/vmintern.h @@ -126,6 +126,7 @@ enum CAST_So2S, CAST_V22S, CAST_V32S, + CAST_V42S, CAST_SID2S, CAST_TID2S, diff --git a/src/common/scripting/vm/vmops.h b/src/common/scripting/vm/vmops.h index af7fb233b9..1c367e75fb 100644 --- a/src/common/scripting/vm/vmops.h +++ b/src/common/scripting/vm/vmops.h @@ -51,12 +51,16 @@ xx(LV2, lv2, RVRPKI, LV2_R, 4, REGT_INT) // load vector2 xx(LV2_R, lv2, RVRPRI, NOP, 0, 0) xx(LV3, lv3, RVRPKI, LV3_R, 4, REGT_INT) // load vector3 xx(LV3_R, lv3, RVRPRI, NOP, 0, 0) +xx(LV4, lv4, RVRPKI, LV4_R, 4, REGT_INT) // load vector4 +xx(LV4_R, lv4, RVRPRI, NOP, 0, 0) xx(LCS, lcs, RSRPKI, LCS_R, 4, REGT_INT) // load string from char ptr. xx(LCS_R, lcs, RSRPRI, NOP, 0, 0) xx(LFV2, lfv2, RVRPKI, LFV2_R, 4, REGT_INT) // load fvector2 xx(LFV2_R, lfv2, RVRPRI, NOP, 0, 0) xx(LFV3, lfv3, RVRPKI, LFV3_R, 4, REGT_INT) // load fvector3 xx(LFV3_R, lfv3, RVRPRI, NOP, 0, 0) +xx(LFV4, lfv4, RVRPKI, LFV4_R, 4, REGT_INT) // load fvector4 +xx(LFV4_R, lfv4, RVRPRI, NOP, 0, 0) xx(LBIT, lbit, RIRPI8, NOP, 0, 0) // rA = !!(*rB & C) -- *rB is a byte @@ -81,10 +85,14 @@ xx(SV2, sv2, RPRVKI, SV2_R, 4, REGT_INT) // store vector2 xx(SV2_R, sv2, RPRVRI, NOP, 0, 0) xx(SV3, sv3, RPRVKI, SV3_R, 4, REGT_INT) // store vector3 xx(SV3_R, sv3, RPRVRI, NOP, 0, 0) +xx(SV4, sv3, RPRVKI, SV4_R, 4, REGT_INT) // store vector4 +xx(SV4_R, sv3, RPRVRI, NOP, 0, 0) xx(SFV2, sfv2, RPRVKI, SFV2_R, 4, REGT_INT) // store fvector2 xx(SFV2_R, sfv2, RPRVRI, NOP, 0, 0) xx(SFV3, sfv3, RPRVKI, SFV3_R, 4, REGT_INT) // store fvector3 xx(SFV3_R, sfv3, RPRVRI, NOP, 0, 0) +xx(SFV4, sfv4, RPRVKI, SFV4_R, 4, REGT_INT) // store fvector4 +xx(SFV4_R, sfv4, RPRVRI, NOP, 0, 0) xx(SBIT, sbit, RPRII8, NOP, 0, 0) // *rA |= C if rB is true, *rA &= ~C otherwise @@ -95,6 +103,7 @@ xx(MOVES, mov, RSRS, NOP, 0, 0) // sA = sB xx(MOVEA, mov, RPRP, NOP, 0, 0) // aA = aB xx(MOVEV2, mov2, RFRF, NOP, 0, 0) // fA = fB (2 elements) xx(MOVEV3, mov3, RFRF, NOP, 0, 0) // fA = fB (3 elements) +xx(MOVEV4, mov4, RFRF, NOP, 0, 0) // fA = fB (4 elements) xx(CAST, cast, CAST, NOP, 0, 0) // xA = xB, conversion specified by C xx(CASTB, castb, CAST, NOP, 0, 0) // xA = !!xB, type specified by C xx(DYNCAST_R, dyncast, RPRPRP, NOP, 0, 0) // aA = dyn_cast(aB); @@ -256,6 +265,19 @@ xx(LENV3, lenv3, RFRV, NOP, 0, 0) // fA = vB.Length xx(EQV3_R, beqv3, CVRR, NOP, 0, 0) // if ((vB == vkC) != A) then pc++ (inexact if A & 33) xx(EQV3_K, beqv3, CVRK, NOP, 0, 0) // this will never be used. +// Vector math (4D/Quaternion) +xx(NEGV4, negv4, RVRV, NOP, 0, 0) // vA = -vB +xx(ADDV4_RR, addv4, RVRVRV, NOP, 0, 0) // vA = vB + vkC +xx(SUBV4_RR, subv4, RVRVRV, NOP, 0, 0) // vA = vkB - vkC +xx(DOTV4_RR, dotv4, RVRVRV, NOP, 0, 0) // va = vB dot vkC +xx(MULVF4_RR, mulv4, RVRVRF, NOP, 0, 0) // vA = vkB * fkC +xx(MULVF4_RK, mulv4, RVRVKF, MULVF4_RR, 4, REGT_FLOAT) +xx(DIVVF4_RR, divv4, RVRVRF, NOP, 0, 0) // vA = vkB / fkC +xx(DIVVF4_RK, divv4, RVRVKF, DIVVF4_RR, 4, REGT_FLOAT) +xx(LENV4, lenv4, RFRV, NOP, 0, 0) // fA = vB.Length +xx(EQV4_R, beqv4, CVRR, NOP, 0, 0) // if ((vB == vkC) != A) then pc++ (inexact if A & 33) +xx(EQV4_K, beqv4, CVRK, NOP, 0, 0) // this will never be used. + // Pointer math. xx(ADDA_RR, add, RPRPRI, NOP, 0, 0) // pA = pB + dkC xx(ADDA_RK, add, RPRPKI, ADDA_RR,4, REGT_INT)