diff --git a/source/common/engine/namedef.h b/source/common/engine/namedef.h index af8e2bca0..5e30591ac 100644 --- a/source/common/engine/namedef.h +++ b/source/common/engine/namedef.h @@ -124,8 +124,10 @@ xx(State) xx(Fixed) xx(Vector2) xx(Vector3) +xx(Vector4) xx(FVector2) xx(FVector3) +xx(FVector4) xx(let) xx(Min) @@ -175,7 +177,9 @@ xx(b) xx(X) xx(Y) xx(Z) +xx(W) xx(XY) +xx(XYZ) xx(Prototype) xx(Void) diff --git a/source/common/engine/serializer.h b/source/common/engine/serializer.h index d98798f95..63bfb3529 100644 --- a/source/common/engine/serializer.h +++ b/source/common/engine/serializer.h @@ -326,6 +326,11 @@ inline FSerializer &Serialize(FSerializer &arc, const char *key, DVector2 &p, DV return arc.Array(key, &p[0], def? &(*def)[0] : nullptr, 2, true); } +inline FSerializer& Serialize(FSerializer& arc, const char* key, FVector4& p, FVector4* def) +{ + return arc.Array(key, &p[0], def ? &(*def)[0] : nullptr, 4, true); +} + inline FSerializer& Serialize(FSerializer& arc, const char* key, FVector3& p, FVector3* def) { return arc.Array(key, &p[0], def ? &(*def)[0] : nullptr, 3, true); diff --git a/source/common/engine/stats.h b/source/common/engine/stats.h index 0794455d2..c7473531e 100644 --- a/source/common/engine/stats.h +++ b/source/common/engine/stats.h @@ -48,6 +48,7 @@ public: cycle_t &operator= (const cycle_t &o) { return *this; } void Reset() {} void Clock() {} + void ResetAndClock() {} void Unclock() {} double Time() { return 0; } double TimeMS() { return 0; } @@ -120,6 +121,12 @@ public: Sec -= ts.tv_sec + ts.tv_nsec * 1e-9; } + void ResetAndClock() + { + Reset(); + Clock(); + } + void Unclock() { #ifdef __linux__ @@ -221,6 +228,11 @@ public: Counter = 0; } + void ResetAndClock() + { + Counter = -static_cast(rdtsc()); + } + void Clock() { int64_t time = rdtsc(); diff --git a/source/common/menu/menudef.cpp b/source/common/menu/menudef.cpp index c93c76bd7..0ef4e4fe1 100644 --- a/source/common/menu/menudef.cpp +++ b/source/common/menu/menudef.cpp @@ -250,6 +250,12 @@ static bool CheckSkipOptionBlock(FScanner &sc) filter = true; #endif } + else if (sc.Compare("SWRender")) + { +#ifndef NO_SWRENDERER + filter = true; +#endif + } } while (sc.CheckString(",")); sc.MustGetStringName(")"); diff --git a/source/common/objects/dobjgc.cpp b/source/common/objects/dobjgc.cpp index 8664ac541..34199efff 100644 --- a/source/common/objects/dobjgc.cpp +++ b/source/common/objects/dobjgc.cpp @@ -82,23 +82,58 @@ ** infinity, where each step performs a full collection.) You can also ** change this value dynamically. */ -#define DEFAULT_GCMUL 200 // GC runs 'double the speed' of memory allocation +#ifndef _DEBUG +#define DEFAULT_GCMUL 600 // GC runs gcmul% the speed of memory allocation +#else +// Higher in debug builds to account for the extra time spent freeing objects +#define DEFAULT_GCMUL 800 +#endif // Minimum step size -#define GCSTEPSIZE (sizeof(DObject) * 16) +#define GCMINSTEPSIZE (sizeof(DObject) * 16) -// Maximum number of elements to sweep in a single step -#define GCSWEEPMAX 40 +// Sweeps traverse objects in chunks of this size +#define GCSWEEPGRANULARITY 40 -// Cost of sweeping one element (the size of a small object divided by -// some adjust for the sweep speed) -#define GCSWEEPCOST (sizeof(DObject) / 4) +// Cost of deleting an object +#ifndef _DEBUG +#define GCDELETECOST 75 +#else +// Freeing memory is much more costly in debug builds +#define GCDELETECOST 230 +#endif -// Cost of calling of one destructor -#define GCFINALIZECOST 100 +// Cost of destroying an object +#define GCDESTROYCOST 15 // TYPES ------------------------------------------------------------------- +class FAveragizer +{ + // Number of allocations to track + static inline constexpr unsigned HistorySize = 512; + + size_t History[HistorySize]; + size_t TotalAmount; + int TotalCount; + unsigned NewestPos; + +public: + FAveragizer(); + void AddAlloc(size_t alloc); + size_t GetAverage(); +}; + +struct FStepStats +{ + cycle_t Clock[GC::GCS_COUNT]; + size_t BytesCovered[GC::GCS_COUNT]; + int Count[GC::GCS_COUNT]; + + void Format(FString &out); + void Reset(); +}; + // EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- // PUBLIC FUNCTION PROTOTYPES ---------------------------------------------- @@ -114,28 +149,50 @@ static size_t CalcStepSize(); namespace GC { size_t AllocBytes; +size_t RunningAllocBytes; +size_t RunningDeallocBytes; size_t Threshold; size_t Estimate; DObject *Gray; DObject *Root; DObject *SoftRoots; DObject **SweepPos; +DObject *ToDestroy; uint32_t CurrentWhite = OF_White0 | OF_Fixed; EGCState State = GCS_Pause; int Pause = DEFAULT_GCPAUSE; int StepMul = DEFAULT_GCMUL; -int StepCount; -uint64_t CheckTime; +FStepStats StepStats; +FStepStats PrevStepStats; bool FinalGC; +bool HadToDestroy; // PRIVATE DATA DEFINITIONS ------------------------------------------------ -static int LastCollectTime; // Time last time collector finished -static size_t LastCollectAlloc; // Memory allocation when collector finished -static size_t MinStepSize; // Cover at least this much memory per step +static FAveragizer AllocHistory;// Tracks allocation rate over time +static cycle_t GCTime; // Track time spent in GC // CODE -------------------------------------------------------------------- +//========================================================================== +// +// CheckGC +// +// Check if it's time to collect, and do a collection step if it is. +// Also does some bookkeeping. Should be called fairly consistantly. +// +//========================================================================== + +void CheckGC() +{ + AllocHistory.AddAlloc(RunningAllocBytes); + RunningAllocBytes = 0; + if (State > GCS_Pause || AllocBytes >= Threshold) + { + Step(); + } +} + //========================================================================== // // SetThreshold @@ -146,7 +203,7 @@ static size_t MinStepSize; // Cover at least this much memory per step void SetThreshold() { - Threshold = (Estimate / 100) * Pause; + Threshold = (std::min(Estimate, AllocBytes) / 100) * Pause; } //========================================================================== @@ -170,55 +227,72 @@ size_t PropagateMark() //========================================================================== // -// SweepList +// SweepObjects // -// Runs a limited sweep on a list, returning the position in the list just -// after the last object swept. +// Runs a limited sweep on the object list, returning the number of bytes +// swept. // //========================================================================== -static DObject **SweepList(DObject **p, size_t count, size_t *finalize_count) +static size_t SweepObjects(size_t count) { DObject *curr; int deadmask = OtherWhite(); - size_t finalized = 0; + size_t swept = 0; - while ((curr = *p) != NULL && count-- > 0) + while ((curr = *SweepPos) != nullptr && count-- > 0) { + swept += curr->GetClass()->Size; if ((curr->ObjectFlags ^ OF_WhiteBits) & deadmask) // not dead? { assert(!curr->IsDead() || (curr->ObjectFlags & OF_Fixed)); curr->MakeWhite(); // make it white (for next cycle) - p = &curr->ObjNext; + SweepPos = &curr->ObjNext; } - else // must erase 'curr' + else { assert(curr->IsDead()); - *p = curr->ObjNext; if (!(curr->ObjectFlags & OF_EuthanizeMe)) - { // The object must be destroyed before it can be finalized. - // Note that thinkers must already have been destroyed. If they get here without - // having been destroyed first, it means they somehow became unattached from the - // thinker lists. If I don't maintain the invariant that all live thinkers must - // be in a thinker list, then I need to add write barriers for every time a - // thinker pointer is changed. This seems easier and perfectly reasonable, since - // a live thinker that isn't on a thinker list isn't much of a thinker. - - // However, this can happen during deletion of the thinker list while cleaning up - // from a savegame error so we can't assume that any thinker that gets here is an error. - - curr->Destroy(); + { // The object must be destroyed before it can be deleted. + curr->GCNext = ToDestroy; + ToDestroy = curr; + SweepPos = &curr->ObjNext; + } + else + { // must erase 'curr' + *SweepPos = curr->ObjNext; + curr->ObjectFlags |= OF_Cleanup; + delete curr; + swept += GCDELETECOST; } - curr->ObjectFlags |= OF_Cleanup; - delete curr; - finalized++; } } - if (finalize_count != NULL) + return swept; +} + +//========================================================================== +// +// DestroyObjects +// +// Destroys up to count objects on a list linked on GCNext, returning the +// size of objects destroyed, for updating the estimate. +// +//========================================================================== + +static size_t DestroyObjects(size_t count) +{ + DObject *curr; + size_t bytes_destroyed = 0; + + while ((curr = ToDestroy) != nullptr && count-- > 0) { - *finalize_count = finalized; + assert(!(curr->ObjectFlags & OF_EuthanizeMe)); + bytes_destroyed += curr->GetClass()->Size + GCDESTROYCOST; + ToDestroy = curr->GCNext; + curr->GCNext = nullptr; + curr->Destroy(); } - return p; + return bytes_destroyed; } //========================================================================== @@ -269,20 +343,14 @@ void MarkArray(DObject **obj, size_t count) // // CalcStepSize // -// Decide how big a step should be based, depending on how long it took to -// allocate up to the threshold from the amount left after the previous -// collection. +// Decide how big a step should be, based on the current allocation rate. // //========================================================================== static size_t CalcStepSize() { - int time_passed = int(CheckTime - LastCollectTime); - auto alloc = min(LastCollectAlloc, Estimate); - size_t bytes_gained = AllocBytes > alloc ? AllocBytes - alloc : 0; - return (StepMul > 0 && time_passed > 0) - ? std::max(GCSTEPSIZE, bytes_gained / time_passed * StepMul / 100) - : std::numeric_limits::max() / 2; // no limit + size_t avg = AllocHistory.GetAverage(); + return std::max(GCMINSTEPSIZE, avg * StepMul / 100); } //========================================================================== @@ -302,15 +370,18 @@ void AddMarkerFunc(GCMarkerFunc func) static void MarkRoot() { - Gray = NULL; + PrevStepStats = StepStats; + StepStats.Reset(); + + Gray = nullptr; for (auto func : markers) func(); // Mark soft roots. - if (SoftRoots != NULL) + if (SoftRoots != nullptr) { DObject **probe = &SoftRoots->ObjNext; - while (*probe != NULL) + while (*probe != nullptr) { DObject *soft = *probe; probe = &soft->ObjNext; @@ -322,7 +393,6 @@ static void MarkRoot() } // Time to propagate the marks. State = GCS_Propagate; - StepCount = 0; } //========================================================================== @@ -341,10 +411,21 @@ static void Atomic() SweepPos = &Root; State = GCS_Sweep; Estimate = AllocBytes; +} - // Now that we are about to start a sweep, establish a baseline minimum - // step size for how much memory we want to sweep each CheckGC(). - MinStepSize = CalcStepSize(); +//========================================================================== +// +// SweepDone +// +// Sets up the Destroy phase, if there are any dead objects that haven't +// been destroyed yet, or skips to the Done state. +// +//========================================================================== + +static void SweepDone() +{ + HadToDestroy = ToDestroy != nullptr; + State = HadToDestroy ? GCS_Destroy : GCS_Done; } //========================================================================== @@ -364,7 +445,7 @@ static size_t SingleStep() return 0; case GCS_Propagate: - if (Gray != NULL) + if (Gray != nullptr) { return PropagateMark(); } @@ -375,22 +456,30 @@ static size_t SingleStep() } case GCS_Sweep: { - size_t old = AllocBytes; - size_t finalize_count; - SweepPos = SweepList(SweepPos, GCSWEEPMAX, &finalize_count); - if (*SweepPos == NULL) + RunningDeallocBytes = 0; + size_t swept = SweepObjects(GCSWEEPGRANULARITY); + Estimate -= RunningDeallocBytes; + if (*SweepPos == nullptr) { // Nothing more to sweep? - State = GCS_Finalize; + SweepDone(); } - //assert(old >= AllocBytes); - Estimate -= max(0, old - AllocBytes); - return (GCSWEEPMAX - finalize_count) * GCSWEEPCOST + finalize_count * GCFINALIZECOST; + return swept; } - case GCS_Finalize: + case GCS_Destroy: { + size_t destroy_size; + destroy_size = DestroyObjects(GCSWEEPGRANULARITY); + Estimate -= destroy_size; + if (ToDestroy == nullptr) + { // Nothing more to destroy? + State = GCS_Done; + } + return destroy_size; + } + + case GCS_Done: State = GCS_Pause; // end collection - LastCollectAlloc = AllocBytes; - LastCollectTime = (int)CheckTime; + SetThreshold(); return 0; default: @@ -403,21 +492,27 @@ static size_t SingleStep() // // Step // -// Performs enough single steps to cover GCSTEPSIZE * StepMul% bytes of -// memory. +// Performs enough single steps to cover bytes of memory. +// Some of those bytes might be "fake" to account for the cost of freeing +// or destroying object. // //========================================================================== void Step() { - // We recalculate a step size in case the rate of allocation went up - // since we started sweeping because we don't want to fall behind. - // However, we also don't want to go slower than what was decided upon - // when the sweep began if the rate of allocation has slowed. - size_t lim = max(CalcStepSize(), MinStepSize); + GCTime.ResetAndClock(); + + auto enter_state = State; + StepStats.Count[enter_state]++; + StepStats.Clock[enter_state].Clock(); + + size_t did = 0; + size_t lim = CalcStepSize(); + do { size_t done = SingleStep(); + did += done; if (done < lim) { lim -= done; @@ -426,17 +521,23 @@ void Step() { lim = 0; } + if (State != enter_state) + { + // Finish stats on old state + StepStats.Clock[enter_state].Unclock(); + StepStats.BytesCovered[enter_state] += did; + + // Start stats on new state + did = 0; + enter_state = State; + StepStats.Clock[enter_state].Clock(); + StepStats.Count[enter_state]++; + } } while (lim && State != GCS_Pause); - if (State != GCS_Pause) - { - Threshold = AllocBytes; - } - else - { - assert(AllocBytes >= Estimate); - SetThreshold(); - } - StepCount++; + + StepStats.Clock[enter_state].Unclock(); + StepStats.BytesCovered[enter_state] += did; + GCTime.Unclock(); } //========================================================================== @@ -454,20 +555,23 @@ void FullGC() // Reset sweep mark to sweep all elements (returning them to white) SweepPos = &Root; // Reset other collector lists - Gray = NULL; + Gray = nullptr; State = GCS_Sweep; } - // Finish any pending sweep phase - while (State != GCS_Finalize) - { - SingleStep(); - } - MarkRoot(); + // Finish any pending GC stages while (State != GCS_Pause) { SingleStep(); } - SetThreshold(); + // Loop until everything that can be destroyed and freed is + do + { + MarkRoot(); + while (State != GCS_Pause) + { + SingleStep(); + } + } while (HadToDestroy); } //========================================================================== @@ -481,9 +585,9 @@ void FullGC() void Barrier(DObject *pointing, DObject *pointed) { - assert(pointing == NULL || (pointing->IsBlack() && !pointing->IsDead())); + assert(pointing == nullptr || (pointing->IsBlack() && !pointing->IsDead())); assert(pointed->IsWhite() && !pointed->IsDead()); - assert(State != GCS_Finalize && State != GCS_Pause); + assert(State != GCS_Destroy && State != GCS_Pause); assert(!(pointed->ObjectFlags & OF_Released)); // if a released object gets here, something must be wrong. if (pointed->ObjectFlags & OF_Released) return; // don't do anything with non-GC'd objects. // The invariant only needs to be maintained in the propagate state. @@ -495,7 +599,7 @@ void Barrier(DObject *pointing, DObject *pointed) } // In other states, we can mark the pointing object white so this // barrier won't be triggered again, saving a few cycles in the future. - else if (pointing != NULL) + else if (pointing != nullptr) { pointing->MakeWhite(); } @@ -503,13 +607,13 @@ void Barrier(DObject *pointing, DObject *pointed) void DelSoftRootHead() { - if (SoftRoots != NULL) + if (SoftRoots != nullptr) { // Don't let the destructor print a warning message SoftRoots->ObjectFlags |= OF_YesReallyDelete; delete SoftRoots; } - SoftRoots = NULL; + SoftRoots = nullptr; } //========================================================================== @@ -526,7 +630,7 @@ void AddSoftRoot(DObject *obj) DObject **probe; // Are there any soft roots yet? - if (SoftRoots == NULL) + if (SoftRoots == nullptr) { // Create a new object to root the soft roots off of, and stick // it at the end of the object list, so we know that anything @@ -534,17 +638,17 @@ void AddSoftRoot(DObject *obj) SoftRoots = Create(); SoftRoots->ObjectFlags |= OF_Fixed; probe = &Root; - while (*probe != NULL) + while (*probe != nullptr) { probe = &(*probe)->ObjNext; } Root = SoftRoots->ObjNext; - SoftRoots->ObjNext = NULL; + SoftRoots->ObjNext = nullptr; *probe = SoftRoots; } // Mark this object as rooted and move it after the SoftRoots marker. probe = &Root; - while (*probe != NULL && *probe != obj) + while (*probe != nullptr && *probe != obj) { probe = &(*probe)->ObjNext; } @@ -567,14 +671,14 @@ void DelSoftRoot(DObject *obj) { DObject **probe; - if (!(obj->ObjectFlags & OF_Rooted)) + if (obj == nullptr || !(obj->ObjectFlags & OF_Rooted)) { // Not rooted, so nothing to do. return; } obj->ObjectFlags &= ~OF_Rooted; // Move object out of the soft roots part of the list. probe = &SoftRoots; - while (*probe != NULL && *probe != obj) + while (*probe != nullptr && *probe != obj) { probe = &(*probe)->ObjNext; } @@ -588,6 +692,52 @@ void DelSoftRoot(DObject *obj) } +//========================================================================== +// +// FAveragizer - Constructor +// +//========================================================================== + +FAveragizer::FAveragizer() +{ + NewestPos = 0; + TotalAmount = 0; + TotalCount = 0; + memset(History, 0, sizeof(History)); +} + +//========================================================================== +// +// FAveragizer :: AddAlloc +// +//========================================================================== + +void FAveragizer::AddAlloc(size_t alloc) +{ + NewestPos = (NewestPos + 1) & (HistorySize - 1); + if (TotalCount < HistorySize) + { + TotalCount++; + } + else + { + TotalAmount -= History[NewestPos]; + } + History[NewestPos] = alloc; + TotalAmount += alloc; +} + +//========================================================================== +// +// FAveragizer :: GetAverage +// +//========================================================================== + +size_t FAveragizer::GetAverage() +{ + return TotalCount != 0 ? TotalAmount / TotalCount : 0; +} + //========================================================================== // // STAT gc @@ -602,18 +752,66 @@ ADD_STAT(gc) " Pause ", "Propagate", " Sweep ", - "Finalize " }; + " Destroy ", + " Done " + }; FString out; - out.Format("[%s] Alloc:%6zuK Thresh:%6zuK Est:%6zuK Steps: %d %zuK", + double time = GC::State != GC::GCS_Pause ? GC::GCTime.TimeMS() : 0; + + GC::PrevStepStats.Format(out); + out << "\n"; + GC::StepStats.Format(out); + out.AppendFormat("\n%.2fms [%s] Rate:%3zuK (%3zuK) Alloc:%6zuK Est:%6zuK Thresh:%6zuK", + time, StateStrings[GC::State], + (GC::AllocHistory.GetAverage() + 1023) >> 10, + (GC::CalcStepSize() + 1023) >> 10, (GC::AllocBytes + 1023) >> 10, - (GC::Threshold + 1023) >> 10, (GC::Estimate + 1023) >> 10, - GC::StepCount, - (GC::MinStepSize + 1023) >> 10); + (GC::Threshold + 1023) >> 10); return out; } +//========================================================================== +// +// FStepStats :: Reset +// +//========================================================================== + +void FStepStats::Reset() +{ + for (unsigned i = 0; i < countof(Count); ++i) + { + Count[i] = 0; + BytesCovered[i] = 0; + Clock[i].Reset(); + } +} + +//========================================================================== +// +// FStepStats :: Format +// +// Appends its stats to the given FString. +// +//========================================================================== + +void FStepStats::Format(FString &out) +{ + // Because everything in the default green is hard to distinguish, + // each stage has its own color. + for (int i = GC::GCS_Propagate; i < GC::GCS_Done; ++i) + { + int count = Count[i]; + double time = Clock[i].TimeMS(); + out.AppendFormat(TEXTCOLOR_ESCAPESTR "%c[%c%6zuK %4d*%.2fms]", + "-NKB"[i], /* Color codes */ + "-PSD"[i], /* Stage prefixes: (P)ropagate, (S)weep, (D)estroy */ + (BytesCovered[i] + 1023) >> 10, count, count != 0 ? time / count : time); + } + out << TEXTCOLOR_GREEN; +} + //========================================================================== // // CCMD gc diff --git a/source/common/objects/dobjgc.h b/source/common/objects/dobjgc.h index 7997145b7..a844a16db 100644 --- a/source/common/objects/dobjgc.h +++ b/source/common/objects/dobjgc.h @@ -37,12 +37,21 @@ namespace GC GCS_Pause, GCS_Propagate, GCS_Sweep, - GCS_Finalize + GCS_Destroy, + GCS_Done, + + GCS_COUNT }; // Number of bytes currently allocated through M_Malloc/M_Realloc. extern size_t AllocBytes; + // Number of bytes allocated since last collection step. + extern size_t RunningAllocBytes; + + // Number of bytes freed since last collection step. + extern size_t RunningDeallocBytes; + // Amount of memory to allocate before triggering a collection. extern size_t Threshold; @@ -70,18 +79,12 @@ namespace GC // Is this the final collection just before exit? extern bool FinalGC; - // Counts the number of times CheckGC has been called. - extern uint64_t CheckTime; - // Current white value for known-dead objects. static inline uint32_t OtherWhite() { return CurrentWhite ^ OF_WhiteBits; } - // Frees all objects, whether they're dead or not. - void FreeAll(); - // Does one collection step. void Step(); @@ -118,12 +121,7 @@ namespace GC } // Check if it's time to collect, and do a collection step if it is. - static inline void CheckGC() - { - CheckTime++; - if (AllocBytes >= Threshold) - Step(); - } + void CheckGC(); // Forces a collection to start now. static inline void StartCollection() @@ -176,6 +174,32 @@ namespace GC using GCMarkerFunc = void(*)(); void AddMarkerFunc(GCMarkerFunc func); + // Report an allocation to the GC + static inline void ReportAlloc(size_t alloc) + { + AllocBytes += alloc; + RunningAllocBytes += alloc; + } + + // Report a deallocation to the GC + static inline void ReportDealloc(size_t dealloc) + { + AllocBytes -= dealloc; + RunningDeallocBytes += dealloc; + } + + // Report a reallocation to the GC + static inline void ReportRealloc(size_t oldsize, size_t newsize) + { + if (oldsize < newsize) + { + ReportAlloc(newsize - oldsize); + } + else + { + ReportDealloc(oldsize - newsize); + } + } } // A template class to help with handling read barriers. It does not diff --git a/source/common/rendering/gl/gl_framebuffer.h b/source/common/rendering/gl/gl_framebuffer.h index 06492fd9d..7b659db0a 100644 --- a/source/common/rendering/gl/gl_framebuffer.h +++ b/source/common/rendering/gl/gl_framebuffer.h @@ -22,6 +22,7 @@ public: OpenGLFrameBuffer(void *hMonitor, bool fullscreen) ; ~OpenGLFrameBuffer(); + int Backend() override { return 2; } bool CompileNextShader() override; void InitializeState() override; void Update() override; diff --git a/source/common/scripting/backend/codegen.cpp b/source/common/scripting/backend/codegen.cpp index 24ce94447..7aedc48ae 100644 --- a/source/common/scripting/backend/codegen.cpp +++ b/source/common/scripting/backend/codegen.cpp @@ -484,12 +484,15 @@ int EncodeRegType(ExpEmit reg) else if (reg.RegCount == 2) { regtype |= REGT_MULTIREG2; - } else if (reg.RegCount == 3) { regtype |= REGT_MULTIREG3; } + else if (reg.RegCount == 4) + { + regtype |= REGT_MULTIREG4; + } return regtype; } @@ -573,19 +576,20 @@ ExpEmit FxConstant::Emit(VMFunctionBuilder *build) // //========================================================================== -FxVectorValue::FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, const FScriptPosition &sc) +FxVectorValue::FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, FxExpression* w, const FScriptPosition &sc) :FxExpression(EFX_VectorValue, sc) { - xyz[0] = x; - xyz[1] = y; - xyz[2] = z; + xyzw[0] = x; + xyzw[1] = y; + xyzw[2] = z; + xyzw[3] = w; isConst = false; ValueType = TypeVoid; // we do not know yet } FxVectorValue::~FxVectorValue() { - for (auto &a : xyz) + for (auto &a : xyzw) { SAFE_DELETE(a); } @@ -595,7 +599,8 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx) { bool fails = false; - for (auto &a : xyz) + // Cast every scalar to float64 + for (auto &a : xyzw) { if (a != nullptr) { @@ -603,7 +608,7 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx) if (a == nullptr) fails = true; else { - if (a->ValueType != TypeVector2) // a vec3 may be initialized with (vec2, z) + if (a->ValueType != TypeVector2 && a->ValueType != TypeVector3) // smaller vector can be used to initialize another vector { a = new FxFloatCast(a); a = a->Resolve(ctx); @@ -612,51 +617,89 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx) } } } + if (fails) { delete this; return nullptr; } - // at this point there are three legal cases: - // * two floats = vector2 - // * three floats = vector3 - // * vector2 + float = vector3 - if (xyz[0]->ValueType == TypeVector2) + + // The actual dimension of the Vector does not correspond to the amount of non-null elements in xyzw + // For example: '(asdf.xy, 1)' would be Vector3 where xyzw[0]->ValueType == TypeVector2 and xyzw[1]->ValueType == TypeFloat64 + + // Handle nesting and figure out the dimension of the vector + int vectorDimensions = 0; + + for (int i = 0; i < maxVectorDimensions && xyzw[i]; ++i) { - if (xyz[1]->ValueType != TypeFloat64 || xyz[2] != nullptr) + assert(dynamic_cast(xyzw[i])); + + if (xyzw[i]->ValueType == TypeFloat64) + { + vectorDimensions++; + } + else if (xyzw[i]->ValueType == TypeVector2 || xyzw[i]->ValueType == TypeVector3 || xyzw[i]->ValueType == TypeVector4) + { + // Solve nested vector + int regCount = xyzw[i]->ValueType->RegCount; + + if (regCount + vectorDimensions > maxVectorDimensions) + { + vectorDimensions += regCount; // Show proper number + goto too_big; + } + + // Nested initializer gets simplified + if (xyzw[i]->ExprType == EFX_VectorValue) + { + // Shifts current elements to leave space for unwrapping nested initialization + for (int l = maxVectorDimensions - 1; l > i; --l) + { + xyzw[l] = xyzw[l - regCount + 1]; + } + + auto vi = static_cast(xyzw[i]); + for (int j = 0; j < regCount; ++j) + { + xyzw[i + j] = vi->xyzw[j]; + vi->xyzw[j] = nullptr; // Preserve object after 'delete vi;' + } + delete vi; + + // We extracted something, let's iterate on that again: + --i; + continue; + } + else + { + vectorDimensions += regCount; + } + } + else { ScriptPosition.Message(MSG_ERROR, "Not a valid vector"); delete this; return nullptr; } - ValueType = TypeVector3; - if (xyz[0]->ExprType == EFX_VectorValue) - { - // If two vector initializers are nested, unnest them now. - auto vi = static_cast(xyz[0]); - xyz[2] = xyz[1]; - xyz[1] = vi->xyz[1]; - xyz[0] = vi->xyz[0]; - vi->xyz[0] = vi->xyz[1] = nullptr; // Don't delete our own expressions. - delete vi; - } } - else if (xyz[0]->ValueType == TypeFloat64 && xyz[1]->ValueType == TypeFloat64) + + switch (vectorDimensions) { - ValueType = xyz[2] == nullptr ? TypeVector2 : TypeVector3; - } - else - { - ScriptPosition.Message(MSG_ERROR, "Not a valid vector"); + case 2: ValueType = TypeVector2; break; + case 3: ValueType = TypeVector3; break; + case 4: ValueType = TypeVector4; break; + default: + too_big:; + ScriptPosition.Message(MSG_ERROR, "Vector of %d dimensions is not supported", vectorDimensions); delete this; return nullptr; } // check if all elements are constant. If so this can be emitted as a constant vector. isConst = true; - for (auto &a : xyz) + for (auto &a : xyzw) { - if (a != nullptr && !a->isConstant()) isConst = false; + if (a && !a->isConstant()) isConst = false; } return this; } @@ -674,100 +717,96 @@ static ExpEmit EmitKonst(VMFunctionBuilder *build, ExpEmit &emit) ExpEmit FxVectorValue::Emit(VMFunctionBuilder *build) { - // no const handling here. Ultimately it's too rarely used (i.e. the only fully constant vector ever allocated in ZDoom is the 0-vector in a very few places) - // and the negatives (excessive allocation of float constants) outweigh the positives (saved a few instructions) - assert(xyz[0] != nullptr); - assert(xyz[1] != nullptr); - if (ValueType == TypeVector2) + int vectorDimensions = ValueType->RegCount; + int vectorElements = 0; + for (auto& e : xyzw) { - ExpEmit tempxval = xyz[0]->Emit(build); - ExpEmit tempyval = xyz[1]->Emit(build); - ExpEmit xval = EmitKonst(build, tempxval); - ExpEmit yval = EmitKonst(build, tempyval); - assert(xval.RegType == REGT_FLOAT && yval.RegType == REGT_FLOAT); - if (yval.RegNum == xval.RegNum + 1) - { - // The results are already in two continuous registers so just return them as-is. - xval.RegCount++; - return xval; - } - else - { - // The values are not in continuous registers so they need to be copied together now. - ExpEmit out(build, REGT_FLOAT, 2); - build->Emit(OP_MOVEF, out.RegNum, xval.RegNum); - build->Emit(OP_MOVEF, out.RegNum + 1, yval.RegNum); - xval.Free(build); - yval.Free(build); - return out; - } + if (e) vectorElements++; } - else if (xyz[0]->ValueType == TypeVector2) // vec2+float + assert(vectorElements > 0); + + ExpEmit* tempVal = (ExpEmit*)calloc(vectorElements, sizeof(ExpEmit)); + ExpEmit* val = (ExpEmit*)calloc(vectorElements, sizeof(ExpEmit)); + + // Init ExpEmit + for (int i = 0; i < vectorElements; ++i) { - ExpEmit xyval = xyz[0]->Emit(build); - ExpEmit tempzval = xyz[1]->Emit(build); - ExpEmit zval = EmitKonst(build, tempzval); - assert(xyval.RegType == REGT_FLOAT && xyval.RegCount == 2 && zval.RegType == REGT_FLOAT); - if (zval.RegNum == xyval.RegNum + 2) - { - // The results are already in three continuous registers so just return them as-is. - xyval.RegCount++; - return xyval; - } - else - { - // The values are not in continuous registers so they need to be copied together now. - ExpEmit out(build, REGT_FLOAT, 3); - build->Emit(OP_MOVEV2, out.RegNum, xyval.RegNum); - build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum); - xyval.Free(build); - zval.Free(build); - return out; - } + new(tempVal + i) ExpEmit(xyzw[i]->Emit(build)); + new(val + i) ExpEmit(EmitKonst(build, tempVal[i])); } - else // 3*float + { - assert(xyz[2] != nullptr); - ExpEmit tempxval = xyz[0]->Emit(build); - ExpEmit tempyval = xyz[1]->Emit(build); - ExpEmit tempzval = xyz[2]->Emit(build); - ExpEmit xval = EmitKonst(build, tempxval); - ExpEmit yval = EmitKonst(build, tempyval); - ExpEmit zval = EmitKonst(build, tempzval); - assert(xval.RegType == REGT_FLOAT && yval.RegType == REGT_FLOAT && zval.RegType == REGT_FLOAT); - if (yval.RegNum == xval.RegNum + 1 && zval.RegNum == xval.RegNum + 2) + bool isContinuous = true; + + for (int i = 1; i < vectorElements; ++i) { - // The results are already in three continuous registers so just return them as-is. - xval.RegCount += 2; - return xval; - } - else - { - // The values are not in continuous registers so they need to be copied together now. - ExpEmit out(build, REGT_FLOAT, 3); - //Try to optimize a bit... - if (yval.RegNum == xval.RegNum + 1) + if (val[i - 1].RegNum + val[i - 1].RegCount != val[i].RegNum) { - build->Emit(OP_MOVEV2, out.RegNum, xval.RegNum); - build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum); + isContinuous = false; + break; } - else if (zval.RegNum == yval.RegNum + 1) + } + + // all values are in continuous registers: + if (isContinuous) + { + val[0].RegCount = vectorDimensions; + return val[0]; + } + } + + ExpEmit out(build, REGT_FLOAT, vectorDimensions); + + { + auto emitRegMove = [&](int regsToMove, int dstRegIndex, int srcRegIndex) { + assert(dstRegIndex < vectorDimensions); + assert(srcRegIndex < vectorDimensions); + assert(regsToMove > 0 && regsToMove <= 4); + build->Emit(regsToMove == 1 ? OP_MOVEF : OP_MOVEV2 + regsToMove - 2, out.RegNum + dstRegIndex, val[srcRegIndex].RegNum); + static_assert(OP_MOVEV2 + 1 == OP_MOVEV3); + static_assert(OP_MOVEV3 + 1 == OP_MOVEV4); + }; + + int regsToPush = 0; + int nextRegNum = val[0].RegNum; + int lastElementIndex = 0; + int reg = 0; + + // Use larger MOVE OPs for any groups of registers that are continuous including those across individual xyzw[] elements + for (int elementIndex = 0; elementIndex < vectorElements; ++elementIndex) + { + int regCount = xyzw[elementIndex]->ValueType->RegCount; + + if (nextRegNum != val[elementIndex].RegNum) { - build->Emit(OP_MOVEF, out.RegNum, xval.RegNum); - build->Emit(OP_MOVEV2, out.RegNum+1, yval.RegNum); + emitRegMove(regsToPush, reg, lastElementIndex); + + reg += regsToPush; + regsToPush = regCount; + nextRegNum = val[elementIndex].RegNum + val[elementIndex].RegCount; + lastElementIndex = elementIndex; } else { - build->Emit(OP_MOVEF, out.RegNum, xval.RegNum); - build->Emit(OP_MOVEF, out.RegNum + 1, yval.RegNum); - build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum); + regsToPush += regCount; + nextRegNum = val[elementIndex].RegNum + val[elementIndex].RegCount; } - xval.Free(build); - yval.Free(build); - zval.Free(build); - return out; + } + + // Emit move instructions on the last register + if (regsToPush > 0) + { + emitRegMove(regsToPush, reg, lastElementIndex); } } + + for (int i = 0; i < vectorElements; ++i) + { + val[i].Free(build); + val[i].~ExpEmit(); + } + + return out; } //========================================================================== @@ -1688,7 +1727,7 @@ FxExpression *FxTypeCast::Resolve(FCompileContext &ctx) delete this; return x; } - else if ((basex->IsVector2() && IsVector2()) || (basex->IsVector3() && IsVector3())) + else if ((basex->IsVector2() && IsVector2()) || (basex->IsVector3() && IsVector3()) || (basex->IsVector4() && IsVector4())) { auto x = basex; basex = nullptr; @@ -1887,6 +1926,10 @@ ExpEmit FxMinusSign::Emit(VMFunctionBuilder *build) build->Emit(OP_NEGV3, to.RegNum, from.RegNum); break; + case 4: + build->Emit(OP_NEGV4, to.RegNum, from.RegNum); + break; + } } return to; @@ -2799,7 +2842,7 @@ FxExpression *FxAddSub::Resolve(FCompileContext& ctx) else if (left->IsVector() && right->IsVector()) { // a vector2 can be added to or subtracted from a vector 3 but it needs to be the right operand. - if (((left->IsVector3() || left->IsVector2()) && right->IsVector2()) || (left->IsVector3() && right->IsVector3())) + if (((left->IsVector3() || left->IsVector2()) && right->IsVector2()) || (left->IsVector3() && right->IsVector3()) || (left->IsVector4() && right->IsVector4())) { ValueType = left->ValueType; } @@ -2893,7 +2936,7 @@ ExpEmit FxAddSub::Emit(VMFunctionBuilder *build) { assert(op1.RegType == REGT_FLOAT && op2.RegType == REGT_FLOAT); - build->Emit(right->IsVector2() ? OP_ADDV2_RR : OP_ADDV3_RR, to.RegNum, op1.RegNum, op2.RegNum); + build->Emit(right->IsVector4() ? OP_ADDV4_RR : right->IsVector3() ? OP_ADDV3_RR : OP_ADDV2_RR, to.RegNum, op1.RegNum, op2.RegNum); if (left->IsVector3() && right->IsVector2() && to.RegNum != op1.RegNum) { // must move the z-coordinate @@ -2926,7 +2969,7 @@ ExpEmit FxAddSub::Emit(VMFunctionBuilder *build) if (IsVector()) { assert(op1.RegType == REGT_FLOAT && op2.RegType == REGT_FLOAT); - build->Emit(right->IsVector2() ? OP_SUBV2_RR : OP_SUBV3_RR, to.RegNum, op1.RegNum, op2.RegNum); + build->Emit(right->IsVector4() ? OP_SUBV4_RR : right->IsVector3() ? OP_SUBV3_RR : OP_SUBV2_RR, to.RegNum, op1.RegNum, op2.RegNum); return to; } else if (ValueType->GetRegType() == REGT_FLOAT) @@ -3129,11 +3172,11 @@ ExpEmit FxMulDiv::Emit(VMFunctionBuilder *build) int op; if (op2.Konst) { - op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RK : OP_MULVF3_RK) : (IsVector2() ? OP_DIVVF2_RK : OP_DIVVF3_RK); + op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RK : IsVector3() ? OP_MULVF3_RK : OP_MULVF4_RK) : (IsVector2() ? OP_DIVVF2_RK : IsVector3() ? OP_DIVVF3_RK : OP_DIVVF4_RK); } else { - op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RR : OP_MULVF3_RR) : (IsVector2() ? OP_DIVVF2_RR : OP_DIVVF3_RR); + op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RR : IsVector3() ? OP_MULVF3_RR : OP_MULVF4_RR) : (IsVector2() ? OP_DIVVF2_RR : IsVector3() ? OP_DIVVF3_RR : OP_DIVVF4_RR); } op1.Free(build); op2.Free(build); @@ -3598,7 +3641,7 @@ FxExpression *FxCompareEq::Resolve(FCompileContext& ctx) } // identical types are always comparable, if they can be placed in a register, so we can save most checks if this is the case. - if (left->ValueType != right->ValueType && !(left->IsVector2() && right->IsVector2()) && !(left->IsVector3() && right->IsVector3())) + if (left->ValueType != right->ValueType && !(left->IsVector2() && right->IsVector2()) && !(left->IsVector3() && right->IsVector3()) && !(left->IsVector4() && right->IsVector4())) { FxExpression *x; if (left->IsNumeric() && right->ValueType == TypeString && (x = StringConstToChar(right))) @@ -3836,11 +3879,11 @@ ExpEmit FxCompareEq::EmitCommon(VMFunctionBuilder *build, bool forcompare, bool std::swap(op1, op2); } assert(!op1.Konst); - assert(op1.RegCount >= 1 && op1.RegCount <= 3); + assert(op1.RegCount >= 1 && op1.RegCount <= 4); ExpEmit to(build, REGT_INT); - static int flops[] = { OP_EQF_R, OP_EQV2_R, OP_EQV3_R }; + static int flops[] = { OP_EQF_R, OP_EQV2_R, OP_EQV3_R, OP_EQV4_R }; instr = op1.RegType == REGT_INT ? OP_EQ_R : op1.RegType == REGT_FLOAT ? flops[op1.RegCount - 1] : OP_EQA_R; @@ -4256,7 +4299,7 @@ ExpEmit FxConcat::Emit(VMFunctionBuilder *build) build->Emit(op1.RegType == REGT_INT ? OP_LK : op1.RegType == REGT_FLOAT ? OP_LKF : OP_LKP, nonconst.RegNum, op1.RegNum); op1 = nonconst; } - if (op1.RegType == REGT_FLOAT) cast = op1.RegCount == 1 ? CAST_F2S : op1.RegCount == 2 ? CAST_V22S : CAST_V32S; + if (op1.RegType == REGT_FLOAT) cast = op1.RegCount == 1 ? CAST_F2S : op1.RegCount == 2 ? CAST_V22S : op1.RegCount == 3 ? CAST_V32S : CAST_V42S; else if (left->ValueType == TypeUInt32) cast = CAST_U2S; else if (left->ValueType == TypeName) cast = CAST_N2S; else if (left->ValueType == TypeSound) cast = CAST_So2S; @@ -4289,7 +4332,7 @@ ExpEmit FxConcat::Emit(VMFunctionBuilder *build) build->Emit(op2.RegType == REGT_INT ? OP_LK : op2.RegType == REGT_FLOAT ? OP_LKF : OP_LKP, nonconst.RegNum, op2.RegNum); op2 = nonconst; } - if (op2.RegType == REGT_FLOAT) cast = op2.RegCount == 1 ? CAST_F2S : op2.RegCount == 2 ? CAST_V22S : CAST_V32S; + if (op1.RegType == REGT_FLOAT) cast = op1.RegCount == 1 ? CAST_F2S : op1.RegCount == 2 ? CAST_V22S : op1.RegCount == 3 ? CAST_V32S : CAST_V42S; else if (right->ValueType == TypeUInt32) cast = CAST_U2S; else if (right->ValueType == TypeName) cast = CAST_N2S; else if (right->ValueType == TypeSound) cast = CAST_So2S; @@ -4552,7 +4595,7 @@ ExpEmit FxDotCross::Emit(VMFunctionBuilder *build) ExpEmit to(build, ValueType->GetRegType(), ValueType->GetRegCount()); ExpEmit op1 = left->Emit(build); ExpEmit op2 = right->Emit(build); - int op = Operator == TK_Cross ? OP_CROSSV_RR : left->ValueType == TypeVector3 ? OP_DOTV3_RR : OP_DOTV2_RR; + int op = Operator == TK_Cross ? OP_CROSSV_RR : left->ValueType == TypeVector4 ? OP_DOTV4_RR : left->ValueType == TypeVector3 ? OP_DOTV3_RR : OP_DOTV2_RR; build->Emit(op, to.RegNum, op1.RegNum, op2.RegNum); op1.Free(build); op2.Free(build); @@ -8740,12 +8783,12 @@ FxExpression *FxVMFunctionCall::Resolve(FCompileContext& ctx) else { // Vectors need special treatment because they are not normal constants - FxConstant *cs[3] = { nullptr }; + FxConstant *cs[4] = { nullptr }; for (int l = 0; l < ntype->GetRegCount(); l++) { cs[l] = new FxConstant(TypeFloat64, defaults[l + i + k + skipdefs + implicit], ScriptPosition); } - FxExpression *x = new FxVectorValue(cs[0], cs[1], cs[2], ScriptPosition); + FxExpression *x = new FxVectorValue(cs[0], cs[1], cs[2], cs[3], ScriptPosition); ArgList.Insert(i + k, x); skipdefs += ntype->GetRegCount() - 1; } @@ -9155,15 +9198,19 @@ ExpEmit FxVectorBuiltin::Emit(VMFunctionBuilder *build) { ExpEmit to(build, ValueType->GetRegType(), ValueType->GetRegCount()); ExpEmit op = Self->Emit(build); + + const int vecSize = (Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2) ? 2 + : (Self->ValueType == TypeVector3 || Self->ValueType == TypeFVector3) ? 3 : 4; + if (Function == NAME_Length) { - build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : OP_LENV3, to.RegNum, op.RegNum); + build->Emit(vecSize == 2 ? OP_LENV2 : vecSize == 3 ? OP_LENV3 : OP_LENV4, to.RegNum, op.RegNum); } else { ExpEmit len(build, REGT_FLOAT); - build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : OP_LENV3, len.RegNum, op.RegNum); - build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_DIVVF2_RR : OP_DIVVF3_RR, to.RegNum, op.RegNum, len.RegNum); + build->Emit(vecSize == 2 ? OP_LENV2 : vecSize == 3 ? OP_LENV3 : OP_LENV4, len.RegNum, op.RegNum); + build->Emit(vecSize == 2 ? OP_DIVVF2_RR : vecSize == 3 ? OP_DIVVF3_RR : OP_DIVVF4_RR, to.RegNum, op.RegNum, len.RegNum); len.Free(build); } op.Free(build); @@ -10827,11 +10874,12 @@ FxLocalVariableDeclaration::FxLocalVariableDeclaration(PType *type, FName name, // Local FVector isn't different from Vector if (type == TypeFVector2) type = TypeVector2; else if (type == TypeFVector3) type = TypeVector3; + else if (type == TypeFVector4) type = TypeVector4; ValueType = type; VarFlags = varflags; Name = name; - RegCount = type == TypeVector2 ? 2 : type == TypeVector3 ? 3 : 1; + RegCount = type->RegCount; Init = initval; clearExpr = nullptr; } diff --git a/source/common/scripting/backend/codegen.h b/source/common/scripting/backend/codegen.h index 2e3617700..aa1b9ee2d 100644 --- a/source/common/scripting/backend/codegen.h +++ b/source/common/scripting/backend/codegen.h @@ -336,9 +336,10 @@ public: bool IsFloat() const { return ValueType->isFloat(); } bool IsInteger() const { return ValueType->isNumeric() && ValueType->isIntCompatible(); } bool IsPointer() const { return ValueType->isPointer(); } - bool IsVector() const { return ValueType == TypeVector2 || ValueType == TypeVector3 || ValueType == TypeFVector2 || ValueType == TypeFVector3; }; + bool IsVector() const { return IsVector2() || IsVector3() || IsVector4(); }; bool IsVector2() const { return ValueType == TypeVector2 || ValueType == TypeFVector2; }; bool IsVector3() const { return ValueType == TypeVector3 || ValueType == TypeFVector3; }; + bool IsVector4() const { return ValueType == TypeVector4 || ValueType == TypeFVector4; }; bool IsBoolCompat() const { return ValueType->isScalar(); } bool IsObject() const { return ValueType->isObjectPointer(); } bool IsArray() const { return ValueType->isArray() || (ValueType->isPointer() && ValueType->toPointer()->PointedType->isArray()); } @@ -550,20 +551,23 @@ public: class FxVectorValue : public FxExpression { - FxExpression *xyz[3]; + constexpr static int maxVectorDimensions = 4; + + FxExpression *xyzw[maxVectorDimensions]; bool isConst; // gets set to true if all element are const (used by function defaults parser) public: friend class ZCCCompiler; - FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, const FScriptPosition &sc); + FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, FxExpression* w, const FScriptPosition &sc); ~FxVectorValue(); FxExpression *Resolve(FCompileContext&); bool isConstVector(int dim) { - if (!isConst) return false; - return dim == 2 ? xyz[2] == nullptr : xyz[2] != nullptr; + if (!isConst) + return false; + return dim >= 0 && dim <= maxVectorDimensions && xyzw[dim - 1] && (dim == maxVectorDimensions || !xyzw[dim]); } ExpEmit Emit(VMFunctionBuilder *build); diff --git a/source/common/scripting/backend/vmbuilder.cpp b/source/common/scripting/backend/vmbuilder.cpp index 1624aa42f..7d270af61 100644 --- a/source/common/scripting/backend/vmbuilder.cpp +++ b/source/common/scripting/backend/vmbuilder.cpp @@ -637,6 +637,7 @@ size_t VMFunctionBuilder::Emit(int opcode, int opa, VM_SHALF opbc) int chg; if (opa & REGT_MULTIREG2) chg = 2; else if (opa & REGT_MULTIREG3) chg = 3; + else if (opa & REGT_MULTIREG4) chg = 4; else chg = 1; ParamChange(chg); } diff --git a/source/common/scripting/core/types.cpp b/source/common/scripting/core/types.cpp index 5dd7cd6b3..f9b367da8 100644 --- a/source/common/scripting/core/types.cpp +++ b/source/common/scripting/core/types.cpp @@ -61,8 +61,10 @@ PPointer *TypeFont; PStateLabel *TypeStateLabel; PStruct *TypeVector2; PStruct *TypeVector3; +PStruct* TypeVector4; PStruct* TypeFVector2; PStruct* TypeFVector3; +PStruct* TypeFVector4; PStruct *TypeColorStruct; PStruct *TypeStringStruct; PPointer *TypeNullPtr; @@ -350,6 +352,22 @@ void PType::StaticInit() TypeVector3->RegCount = 3; TypeVector3->isOrdered = true; + TypeVector4 = new PStruct(NAME_Vector4, nullptr); + TypeVector4->AddField(NAME_X, TypeFloat64); + TypeVector4->AddField(NAME_Y, TypeFloat64); + TypeVector4->AddField(NAME_Z, TypeFloat64); + TypeVector4->AddField(NAME_W, TypeFloat64); + // allow accessing xyz as a vector3. This is not supposed to be serialized so it's marked transient + TypeVector4->Symbols.AddSymbol(Create(NAME_XYZ, TypeVector3, VARF_Transient, 0)); + TypeVector4->Symbols.AddSymbol(Create(NAME_XY, TypeVector2, VARF_Transient, 0)); + TypeTable.AddType(TypeVector4, NAME_Struct); + TypeVector4->loadOp = OP_LV4; + TypeVector4->storeOp = OP_SV4; + TypeVector4->moveOp = OP_MOVEV4; + TypeVector4->RegType = REGT_FLOAT; + TypeVector4->RegCount = 4; + TypeVector4->isOrdered = true; + TypeFVector2 = new PStruct(NAME_FVector2, nullptr); TypeFVector2->AddField(NAME_X, TypeFloat32); @@ -376,6 +394,22 @@ void PType::StaticInit() TypeFVector3->RegCount = 3; TypeFVector3->isOrdered = true; + TypeFVector4 = new PStruct(NAME_FVector4, nullptr); + TypeFVector4->AddField(NAME_X, TypeFloat32); + TypeFVector4->AddField(NAME_Y, TypeFloat32); + TypeFVector4->AddField(NAME_Z, TypeFloat32); + TypeFVector4->AddField(NAME_W, TypeFloat32); + // allow accessing xyz as a vector3 + TypeFVector4->Symbols.AddSymbol(Create(NAME_XYZ, TypeFVector3, VARF_Transient, 0)); + TypeFVector4->Symbols.AddSymbol(Create(NAME_XY, TypeFVector2, VARF_Transient, 0)); + TypeTable.AddType(TypeFVector4, NAME_Struct); + TypeFVector4->loadOp = OP_LFV4; + TypeFVector4->storeOp = OP_SFV4; + TypeFVector4->moveOp = OP_MOVEV4; + TypeFVector4->RegType = REGT_FLOAT; + TypeFVector4->RegCount = 4; + TypeFVector4->isOrdered = true; + Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_sByte, TypeSInt8)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Byte, TypeUInt8)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Short, TypeSInt16)); @@ -394,8 +428,10 @@ void PType::StaticInit() Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_State, TypeState)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Vector2, TypeVector2)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Vector3, TypeVector3)); + Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_Vector4, TypeVector4)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_FVector2, TypeFVector2)); Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_FVector3, TypeFVector3)); + Namespaces.GlobalNamespace->Symbols.AddSymbol(Create(NAME_FVector4, TypeFVector4)); } diff --git a/source/common/scripting/core/types.h b/source/common/scripting/core/types.h index 959d66bda..25fe895fd 100644 --- a/source/common/scripting/core/types.h +++ b/source/common/scripting/core/types.h @@ -615,8 +615,10 @@ extern PTextureID *TypeTextureID; extern PSpriteID *TypeSpriteID; extern PStruct* TypeVector2; extern PStruct* TypeVector3; +extern PStruct* TypeVector4; extern PStruct* TypeFVector2; extern PStruct* TypeFVector3; +extern PStruct* TypeFVector4; extern PStruct *TypeColorStruct; extern PStruct *TypeStringStruct; extern PStatePointer *TypeState; diff --git a/source/common/scripting/core/vmdisasm.cpp b/source/common/scripting/core/vmdisasm.cpp index ae9078e62..433a2fd1a 100644 --- a/source/common/scripting/core/vmdisasm.cpp +++ b/source/common/scripting/core/vmdisasm.cpp @@ -639,6 +639,8 @@ static int print_reg(FILE *out, int col, int arg, int mode, int immshift, const return col+printf_wrapper(out, "v%d.2", regnum); case REGT_FLOAT | REGT_MULTIREG3: return col+printf_wrapper(out, "v%d.3", regnum); + case REGT_FLOAT | REGT_MULTIREG4: + return col+printf_wrapper(out, "v%d.4", regnum); case REGT_INT | REGT_KONST: return col+print_reg(out, 0, regnum, MODE_KI, 0, func); case REGT_FLOAT | REGT_KONST: diff --git a/source/common/scripting/frontend/ast.cpp b/source/common/scripting/frontend/ast.cpp index cb29457e4..929352db8 100644 --- a/source/common/scripting/frontend/ast.cpp +++ b/source/common/scripting/frontend/ast.cpp @@ -53,6 +53,7 @@ static const char *BuiltInTypeNames[] = "string", "vector2", "vector3", + "vector4", "name", "color", @@ -684,6 +685,7 @@ static void PrintVectorInitializer(FLispString &out, ZCC_TreeNode *node) PrintNodes(out, enode->X); PrintNodes(out, enode->Y); PrintNodes(out, enode->Z); + PrintNodes(out, enode->W); out.Close(); } diff --git a/source/common/scripting/frontend/zcc-parse.lemon b/source/common/scripting/frontend/zcc-parse.lemon index 6765defa1..aab1f5377 100644 --- a/source/common/scripting/frontend/zcc-parse.lemon +++ b/source/common/scripting/frontend/zcc-parse.lemon @@ -861,6 +861,7 @@ type_name1(X) ::= DOUBLE(T). { X.Int = ZCC_Float64; X.SourceLoc = T.SourceLoc //type_name1(X) ::= STRING(T). { X.Int = ZCC_String; X.SourceLoc = T.SourceLoc; } // [ZZ] it's handled elsewhere. this particular line only causes troubles in the form of String.Format being invalid. type_name1(X) ::= VECTOR2(T). { X.Int = ZCC_Vector2; X.SourceLoc = T.SourceLoc; } type_name1(X) ::= VECTOR3(T). { X.Int = ZCC_Vector3; X.SourceLoc = T.SourceLoc; } +type_name1(X) ::= VECTOR4(T). { X.Int = ZCC_Vector4; X.SourceLoc = T.SourceLoc; } type_name1(X) ::= NAME(T). { X.Int = ZCC_Name; X.SourceLoc = T.SourceLoc; } type_name1(X) ::= SOUND(T). { X.Int = ZCC_Sound; X.SourceLoc = T.SourceLoc; } type_name1(X) ::= STATE(T). { X.Int = ZCC_State; X.SourceLoc = T.SourceLoc; } @@ -931,7 +932,7 @@ type_name(X) ::= DOT dottable_id(A). /* Type names can also be used as identifiers in contexts where type names * are not normally allowed. */ %fallback IDENTIFIER - SBYTE BYTE SHORT USHORT INT UINT BOOL FLOAT DOUBLE STRING VECTOR2 VECTOR3 NAME MAP ARRAY VOID STATE COLOR SOUND UINT8 INT8 UINT16 INT16 PROPERTY. + SBYTE BYTE SHORT USHORT INT UINT BOOL FLOAT DOUBLE STRING VECTOR2 VECTOR3 VECTOR4 NAME MAP ARRAY VOID STATE COLOR SOUND UINT8 INT8 UINT16 INT16 PROPERTY. /* Aggregate types */ %type aggregate_type {ZCC_Type *} @@ -1303,6 +1304,17 @@ primary(X) ::= SUPER(T). X = expr; } primary(X) ::= constant(A). { X = A; /*X-overwrites-A*/ } +primary(XX) ::= LPAREN expr(A) COMMA expr(B) COMMA expr(C) COMMA expr(D) RPAREN. [DOT] +{ + NEW_AST_NODE(VectorValue, expr, A); + expr->Operation = PEX_Vector; + expr->Type = TypeVector4; + expr->X = A; + expr->Y = B; + expr->Z = C; + expr->W = D; + XX = expr; +} primary(XX) ::= LPAREN expr(A) COMMA expr(B) COMMA expr(C) RPAREN. [DOT] { NEW_AST_NODE(VectorValue, expr, A); @@ -1311,6 +1323,7 @@ primary(XX) ::= LPAREN expr(A) COMMA expr(B) COMMA expr(C) RPAREN. [DOT] expr->X = A; expr->Y = B; expr->Z = C; + expr->W = nullptr; XX = expr; } primary(XX) ::= LPAREN expr(A) COMMA expr(B) RPAREN. [DOT] @@ -1321,6 +1334,7 @@ primary(XX) ::= LPAREN expr(A) COMMA expr(B) RPAREN. [DOT] expr->X = A; expr->Y = B; expr->Z = nullptr; + expr->W = nullptr; XX = expr; } primary(X) ::= LPAREN expr(A) RPAREN. diff --git a/source/common/scripting/frontend/zcc_compile.cpp b/source/common/scripting/frontend/zcc_compile.cpp index 00cc4ace4..ed4848897 100644 --- a/source/common/scripting/frontend/zcc_compile.cpp +++ b/source/common/scripting/frontend/zcc_compile.cpp @@ -1790,6 +1790,10 @@ PType *ZCCCompiler::DetermineType(PType *outertype, ZCC_TreeNode *field, FName n retval = TypeVector3; break; + case ZCC_Vector4: + retval = TypeVector4; + break; + case ZCC_State: retval = TypeState; break; @@ -2150,7 +2154,7 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool do { auto type = DetermineType(c->Type(), f, f->Name, t, false, false); - if (type->isContainer() && type != TypeVector2 && type != TypeVector3 && type != TypeFVector2 && type != TypeFVector3) + if (type->isContainer() && type != TypeVector2 && type != TypeVector3 && type != TypeVector4 && type != TypeFVector2 && type != TypeFVector3 && type != TypeFVector4) { // structs and classes only get passed by pointer. type = NewPointer(type); @@ -2168,6 +2172,10 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool { type = TypeVector3; } + else if (type == TypeFVector4) + { + type = TypeVector4; + } // TBD: disallow certain types? For now, let everything pass that isn't an array. rets.Push(type); t = static_cast(t->SiblingNext); @@ -2340,12 +2348,12 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool do { int elementcount = 1; - TypedVMValue vmval[3]; // default is REGT_NIL which means 'no default value' here. + TypedVMValue vmval[4]; // default is REGT_NIL which means 'no default value' here. if (p->Type != nullptr) { auto type = DetermineType(c->Type(), p, f->Name, p->Type, false, false); int flags = 0; - if ((type->isStruct() && type != TypeVector2 && type != TypeVector3) || type->isDynArray()) + if ((type->isStruct() && type != TypeVector2 && type != TypeVector3 && type != TypeVector4) || type->isDynArray()) { // Structs are being passed by pointer, but unless marked 'out' that pointer must be readonly. type = NewPointer(type /*, !(p->Flags & ZCC_Out)*/); @@ -2362,8 +2370,12 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool { elementcount = 3; } + else if (type == TypeVector4 || type == TypeFVector4) + { + elementcount = 4; + } } - if (type->GetRegType() == REGT_NIL && type != TypeVector2 && type != TypeVector3 && type != TypeFVector2 && type != TypeFVector3) + if (type->GetRegType() == REGT_NIL && type != TypeVector2 && type != TypeVector3 && type != TypeVector4 && type != TypeFVector2 && type != TypeFVector3 && type != TypeFVector4) { // If it's TypeError, then an error was already given if (type != TypeError) @@ -2407,15 +2419,23 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool if ((type == TypeVector2 || type == TypeFVector2) && x->ExprType == EFX_VectorValue && static_cast(x)->isConstVector(2)) { auto vx = static_cast(x); - vmval[0] = static_cast(vx->xyz[0])->GetValue().GetFloat(); - vmval[1] = static_cast(vx->xyz[1])->GetValue().GetFloat(); + vmval[0] = static_cast(vx->xyzw[0])->GetValue().GetFloat(); + vmval[1] = static_cast(vx->xyzw[1])->GetValue().GetFloat(); } else if ((type == TypeVector3 || type == TypeFVector3) && x->ExprType == EFX_VectorValue && static_cast(x)->isConstVector(3)) { auto vx = static_cast(x); - vmval[0] = static_cast(vx->xyz[0])->GetValue().GetFloat(); - vmval[1] = static_cast(vx->xyz[1])->GetValue().GetFloat(); - vmval[2] = static_cast(vx->xyz[2])->GetValue().GetFloat(); + vmval[0] = static_cast(vx->xyzw[0])->GetValue().GetFloat(); + vmval[1] = static_cast(vx->xyzw[1])->GetValue().GetFloat(); + vmval[2] = static_cast(vx->xyzw[2])->GetValue().GetFloat(); + } + else if ((type == TypeVector4 || type == TypeFVector4) && x->ExprType == EFX_VectorValue && static_cast(x)->isConstVector(4)) + { + auto vx = static_cast(x); + vmval[0] = static_cast(vx->xyzw[0])->GetValue().GetFloat(); + vmval[1] = static_cast(vx->xyzw[1])->GetValue().GetFloat(); + vmval[2] = static_cast(vx->xyzw[2])->GetValue().GetFloat(); + vmval[3] = static_cast(vx->xyzw[3])->GetValue().GetFloat(); } else if (!x->isConstant()) { @@ -3038,7 +3058,8 @@ FxExpression *ZCCCompiler::ConvertNode(ZCC_TreeNode *ast, bool substitute) auto xx = ConvertNode(vecini->X); auto yy = ConvertNode(vecini->Y); auto zz = ConvertNode(vecini->Z); - return new FxVectorValue(xx, yy, zz, *ast); + auto ww = ConvertNode(vecini->W); + return new FxVectorValue(xx, yy, zz, ww, *ast); } case AST_LocalVarStmt: diff --git a/source/common/scripting/frontend/zcc_parser.cpp b/source/common/scripting/frontend/zcc_parser.cpp index 27b624953..22b531a76 100644 --- a/source/common/scripting/frontend/zcc_parser.cpp +++ b/source/common/scripting/frontend/zcc_parser.cpp @@ -1297,7 +1297,8 @@ ZCC_TreeNode *TreeNodeDeepCopy_Internal(ZCC_AST *ast, ZCC_TreeNode *orig, bool c // ZCC_VectorValue copy->X = static_cast(TreeNodeDeepCopy_Internal(ast, origCasted->X, true, copiedNodesList)); copy->Y = static_cast(TreeNodeDeepCopy_Internal(ast, origCasted->Y, true, copiedNodesList)); - copy->Z = static_cast(TreeNodeDeepCopy_Internal(ast, origCasted->Z, true, copiedNodesList)); + copy->Z = static_cast(TreeNodeDeepCopy_Internal(ast, origCasted->Z, true, copiedNodesList)); + copy->W = static_cast(TreeNodeDeepCopy_Internal(ast, origCasted->W, true, copiedNodesList)); break; } diff --git a/source/common/scripting/frontend/zcc_parser.h b/source/common/scripting/frontend/zcc_parser.h index 2d89d0d94..b48ecbe3c 100644 --- a/source/common/scripting/frontend/zcc_parser.h +++ b/source/common/scripting/frontend/zcc_parser.h @@ -158,6 +158,7 @@ enum EZCCBuiltinType ZCC_String, ZCC_Vector2, ZCC_Vector3, + ZCC_Vector4, ZCC_Name, ZCC_Color, // special types for ZDoom. @@ -442,7 +443,7 @@ struct ZCC_ExprTrinary : ZCC_Expression struct ZCC_VectorValue : ZCC_Expression { - ZCC_Expression *X, *Y, *Z; + ZCC_Expression *X, *Y, *Z, *W; }; struct ZCC_Statement : ZCC_TreeNode diff --git a/source/common/scripting/jit/jit.cpp b/source/common/scripting/jit/jit.cpp index 8937fa1e0..6cd78bd05 100644 --- a/source/common/scripting/jit/jit.cpp +++ b/source/common/scripting/jit/jit.cpp @@ -6,6 +6,7 @@ extern PString *TypeString; extern PStruct *TypeVector2; extern PStruct *TypeVector3; +extern PStruct* TypeVector4; static void OutputJitLog(const asmjit::StringLogger &logger); @@ -315,6 +316,13 @@ void JitCompiler::SetupSimpleFrame() cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); } + else if (type == TypeVector4 || type == TypeFVector4) + { + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); + } else if (type == TypeFloat64) { cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f))); @@ -551,6 +559,20 @@ asmjit::X86Xmm JitCompiler::CheckRegF(int r0, int r1, int r2, int r3) } } +asmjit::X86Xmm JitCompiler::CheckRegF(int r0, int r1, int r2, int r3, int r4) +{ + if (r0 != r1 && r0 != r2 && r0 != r3 && r0 != r4) + { + return regF[r0]; + } + else + { + auto copy = newTempXmmSd(); + cc.movsd(copy, regF[r0]); + return copy; + } +} + asmjit::X86Gp JitCompiler::CheckRegS(int r0, int r1) { if (r0 != r1) diff --git a/source/common/scripting/jit/jit_call.cpp b/source/common/scripting/jit/jit_call.cpp index 83808c0ed..e6c1feb0b 100644 --- a/source/common/scripting/jit/jit_call.cpp +++ b/source/common/scripting/jit/jit_call.cpp @@ -182,6 +182,13 @@ int JitCompiler::StoreCallParams() } numparams += 2; break; + case REGT_FLOAT | REGT_MULTIREG4: + for (int j = 0; j < 4; j++) + { + cc.movsd(x86::qword_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc + j]); + } + numparams += 3; + break; case REGT_FLOAT | REGT_ADDROF: cc.lea(stackPtr, x86::ptr(vmframe, offsetF + (int)(bc * sizeof(double)))); // When passing the address to a float we don't know if the receiving function will treat it as float, vec2 or vec3. @@ -256,6 +263,12 @@ void JitCompiler::LoadCallResult(int type, int regnum, bool addrof) cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double))); cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double))); } + else if (type & REGT_MULTIREG4) + { + cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double))); + cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double))); + cc.movsd(regF[regnum + 3], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 3) * sizeof(double))); + } break; case REGT_STRING: // We don't have to do anything in this case. String values are never moved to virtual registers. @@ -408,6 +421,11 @@ void JitCompiler::EmitNativeCall(VMNativeFunction *target) call->setArg(slot + j, regF[bc + j]); numparams += 2; break; + case REGT_FLOAT | REGT_MULTIREG4: + for (int j = 0; j < 4; j++) + call->setArg(slot + j, regF[bc + j]); + numparams += 3; + break; case REGT_FLOAT | REGT_KONST: tmp = newTempIntPtr(); tmp2 = newTempXmmSd(); @@ -550,6 +568,12 @@ void JitCompiler::EmitNativeCall(VMNativeFunction *target) cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double))); cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double))); break; + case REGT_FLOAT | REGT_MULTIREG4: + cc.movsd(regF[regnum], asmjit::x86::qword_ptr(vmframe, offsetF + regnum * sizeof(double))); + cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double))); + cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double))); + cc.movsd(regF[regnum + 3], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 3) * sizeof(double))); + break; case REGT_STRING: // We don't have to do anything in this case. String values are never moved to virtual registers. break; @@ -624,6 +648,13 @@ asmjit::FuncSignature JitCompiler::CreateFuncSignature() args.Push(TypeIdOf::kTypeId); key += "fff"; break; + case REGT_FLOAT | REGT_MULTIREG4: + args.Push(TypeIdOf::kTypeId); + args.Push(TypeIdOf::kTypeId); + args.Push(TypeIdOf::kTypeId); + args.Push(TypeIdOf::kTypeId); + key += "ffff"; + break; default: I_Error("Unknown REGT value passed to EmitPARAM\n"); diff --git a/source/common/scripting/jit/jit_flow.cpp b/source/common/scripting/jit/jit_flow.cpp index 6157bf340..3b5acd1ab 100644 --- a/source/common/scripting/jit/jit_flow.cpp +++ b/source/common/scripting/jit/jit_flow.cpp @@ -110,7 +110,21 @@ void JitCompiler::EmitRET() if (regtype & REGT_KONST) { auto tmp = newTempInt64(); - if (regtype & REGT_MULTIREG3) + if (regtype & REGT_MULTIREG4) + { + cc.mov(tmp, (((int64_t*)konstf)[regnum])); + cc.mov(x86::qword_ptr(location), tmp); + + cc.mov(tmp, (((int64_t*)konstf)[regnum + 1])); + cc.mov(x86::qword_ptr(location, 8), tmp); + + cc.mov(tmp, (((int64_t*)konstf)[regnum + 2])); + cc.mov(x86::qword_ptr(location, 16), tmp); + + cc.mov(tmp, (((int64_t*)konstf)[regnum + 3])); + cc.mov(x86::qword_ptr(location, 24), tmp); + } + else if (regtype & REGT_MULTIREG3) { cc.mov(tmp, (((int64_t *)konstf)[regnum])); cc.mov(x86::qword_ptr(location), tmp); @@ -137,7 +151,14 @@ void JitCompiler::EmitRET() } else { - if (regtype & REGT_MULTIREG3) + if (regtype & REGT_MULTIREG4) + { + cc.movsd(x86::qword_ptr(location), regF[regnum]); + cc.movsd(x86::qword_ptr(location, 8), regF[regnum + 1]); + cc.movsd(x86::qword_ptr(location, 16), regF[regnum + 2]); + cc.movsd(x86::qword_ptr(location, 24), regF[regnum + 3]); + } + else if (regtype & REGT_MULTIREG3) { cc.movsd(x86::qword_ptr(location), regF[regnum]); cc.movsd(x86::qword_ptr(location, 8), regF[regnum + 1]); diff --git a/source/common/scripting/jit/jit_load.cpp b/source/common/scripting/jit/jit_load.cpp index ec5f795df..60947084d 100644 --- a/source/common/scripting/jit/jit_load.cpp +++ b/source/common/scripting/jit/jit_load.cpp @@ -325,6 +325,28 @@ void JitCompiler::EmitLV3_R() cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16)); } +void JitCompiler::EmitLV4() +{ + EmitNullPointerThrow(B, X_READ_NIL); + auto tmp = newTempIntPtr(); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C])); + cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8)); + cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16)); + cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 24)); +} + +void JitCompiler::EmitLV4_R() +{ + EmitNullPointerThrow(B, X_READ_NIL); + auto tmp = newTempIntPtr(); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C])); + cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8)); + cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16)); + cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 24)); +} + void JitCompiler::EmitLFV2() { EmitNullPointerThrow(B, X_READ_NIL); @@ -373,6 +395,36 @@ void JitCompiler::EmitLFV3_R() cc.cvtss2sd(regF[A + 2], regF[A + 2]); } +void JitCompiler::EmitLFV4() +{ + EmitNullPointerThrow(B, X_READ_NIL); + auto tmp = newTempIntPtr(); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C])); + cc.movss(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4)); + cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8)); + cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 12)); + cc.cvtss2sd(regF[A], regF[A]); + cc.cvtss2sd(regF[A + 1], regF[A + 1]); + cc.cvtss2sd(regF[A + 2], regF[A + 2]); + cc.cvtss2sd(regF[A + 3], regF[A + 3]); +} + +void JitCompiler::EmitLFV4_R() +{ + EmitNullPointerThrow(B, X_READ_NIL); + auto tmp = newTempIntPtr(); + cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C])); + cc.movss(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4)); + cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8)); + cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 12)); + cc.cvtss2sd(regF[A], regF[A]); + cc.cvtss2sd(regF[A + 1], regF[A + 1]); + cc.cvtss2sd(regF[A + 2], regF[A + 2]); + cc.cvtss2sd(regF[A + 3], regF[A + 3]); +} + static void SetString(FString *to, char **from) { *to = *from; diff --git a/source/common/scripting/jit/jit_math.cpp b/source/common/scripting/jit/jit_math.cpp index ea9402594..c75fa2041 100644 --- a/source/common/scripting/jit/jit_math.cpp +++ b/source/common/scripting/jit/jit_math.cpp @@ -1447,6 +1447,165 @@ void JitCompiler::EmitEQV3_K() I_Error("EQV3_K is not used."); } +///////////////////////////////////////////////////////////////////////////// +// Vector math. (4D/Quaternion) + +void JitCompiler::EmitNEGV4() +{ + auto mask = cc.newDoubleConst(asmjit::kConstScopeLocal, -0.0); + auto maskXmm = newTempXmmSd(); + cc.movsd(maskXmm, mask); + cc.movsd(regF[A], regF[B]); + cc.xorpd(regF[A], maskXmm); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.xorpd(regF[A + 1], maskXmm); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.xorpd(regF[A + 2], maskXmm); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.xorpd(regF[A + 3], maskXmm); +} + +void JitCompiler::EmitADDV4_RR() +{ + auto rc0 = CheckRegF(C, A); + auto rc1 = CheckRegF(C + 1, A + 1); + auto rc2 = CheckRegF(C + 2, A + 2); + auto rc3 = CheckRegF(C + 3, A + 3); + cc.movsd(regF[A], regF[B]); + cc.addsd(regF[A], rc0); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.addsd(regF[A + 1], rc1); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.addsd(regF[A + 2], rc2); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.addsd(regF[A + 3], rc3); +} + +void JitCompiler::EmitSUBV4_RR() +{ + auto rc0 = CheckRegF(C, A); + auto rc1 = CheckRegF(C + 1, A + 1); + auto rc2 = CheckRegF(C + 2, A + 2); + auto rc3 = CheckRegF(C + 3, A + 3); + cc.movsd(regF[A], regF[B]); + cc.subsd(regF[A], rc0); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.subsd(regF[A + 1], rc1); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.subsd(regF[A + 2], rc2); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.subsd(regF[A + 3], rc3); +} + +void JitCompiler::EmitDOTV4_RR() +{ + auto rb1 = CheckRegF(B + 1, A); + auto rb2 = CheckRegF(B + 2, A); + auto rb3 = CheckRegF(B + 3, A); + auto rc0 = CheckRegF(C, A); + auto rc1 = CheckRegF(C + 1, A); + auto rc2 = CheckRegF(C + 2, A); + auto rc3 = CheckRegF(C + 3, A); + auto tmp = newTempXmmSd(); + cc.movsd(regF[A], regF[B]); + cc.mulsd(regF[A], rc0); + cc.movsd(tmp, rb1); + cc.mulsd(tmp, rc1); + cc.addsd(regF[A], tmp); + cc.movsd(tmp, rb2); + cc.mulsd(tmp, rc2); + cc.addsd(regF[A], tmp); + cc.movsd(tmp, rb3); + cc.mulsd(tmp, rc3); + cc.addsd(regF[A], tmp); +} + +void JitCompiler::EmitMULVF4_RR() +{ + auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3); + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.mulsd(regF[A], rc); + cc.mulsd(regF[A + 1], rc); + cc.mulsd(regF[A + 2], rc); + cc.mulsd(regF[A + 3], rc); +} + +void JitCompiler::EmitMULVF4_RK() +{ + auto tmp = newTempIntPtr(); + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.mov(tmp, asmjit::imm_ptr(&konstf[C])); + cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.mulsd(regF[A + 1], asmjit::x86::qword_ptr(tmp)); + cc.mulsd(regF[A + 2], asmjit::x86::qword_ptr(tmp)); + cc.mulsd(regF[A + 3], asmjit::x86::qword_ptr(tmp)); +} + +void JitCompiler::EmitDIVVF4_RR() +{ + auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3); + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.divsd(regF[A], rc); + cc.divsd(regF[A + 1], rc); + cc.divsd(regF[A + 2], rc); + cc.divsd(regF[A + 3], rc); +} + +void JitCompiler::EmitDIVVF4_RK() +{ + auto tmp = newTempIntPtr(); + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); + cc.mov(tmp, asmjit::imm_ptr(&konstf[C])); + cc.divsd(regF[A], asmjit::x86::qword_ptr(tmp)); + cc.divsd(regF[A + 1], asmjit::x86::qword_ptr(tmp)); + cc.divsd(regF[A + 2], asmjit::x86::qword_ptr(tmp)); + cc.divsd(regF[A + 3], asmjit::x86::qword_ptr(tmp)); +} + +void JitCompiler::EmitLENV4() +{ + auto rb1 = CheckRegF(B + 1, A); + auto rb2 = CheckRegF(B + 2, A); + auto rb3 = CheckRegF(B + 3, A); + auto tmp = newTempXmmSd(); + cc.movsd(regF[A], regF[B]); + cc.mulsd(regF[A], regF[B]); + cc.movsd(tmp, rb1); + cc.mulsd(tmp, rb1); + cc.addsd(regF[A], tmp); + cc.movsd(tmp, rb2); + cc.mulsd(tmp, rb2); + cc.addsd(regF[A], tmp); + cc.movsd(tmp, rb3); + cc.mulsd(tmp, rb3); + cc.addsd(regF[A], tmp); + CallSqrt(regF[A], regF[A]); +} + +void JitCompiler::EmitEQV4_R() +{ + EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) { + EmitVectorComparison<4> (check, fail, success); + }); +} + +void JitCompiler::EmitEQV4_K() +{ + I_Error("EQV4_K is not used."); +} + ///////////////////////////////////////////////////////////////////////////// // Pointer math. diff --git a/source/common/scripting/jit/jit_move.cpp b/source/common/scripting/jit/jit_move.cpp index 63f6158e3..1f0d4edc9 100644 --- a/source/common/scripting/jit/jit_move.cpp +++ b/source/common/scripting/jit/jit_move.cpp @@ -39,11 +39,20 @@ void JitCompiler::EmitMOVEV3() cc.movsd(regF[A + 2], regF[B + 2]); } +void JitCompiler::EmitMOVEV4() +{ + cc.movsd(regF[A], regF[B]); + cc.movsd(regF[A + 1], regF[B + 1]); + cc.movsd(regF[A + 2], regF[B + 2]); + cc.movsd(regF[A + 3], regF[B + 3]); +} + static void CastI2S(FString *a, int b) { a->Format("%d", b); } static void CastU2S(FString *a, int b) { a->Format("%u", b); } static void CastF2S(FString *a, double b) { a->Format("%.5f", b); } static void CastV22S(FString *a, double b, double b1) { a->Format("(%.5f, %.5f)", b, b1); } static void CastV32S(FString *a, double b, double b1, double b2) { a->Format("(%.5f, %.5f, %.5f)", b, b1, b2); } +static void CastV42S(FString *a, double b, double b1, double b2, double b3) { a->Format("(%.5f, %.5f, %.5f, %.5f)", b, b1, b2, b3); } static void CastP2S(FString *a, void *b) { if (b == nullptr) *a = "null"; else a->Format("%p", b); } static int CastS2I(FString *b) { return (int)b->ToLong(); } static double CastS2F(FString *b) { return b->ToDouble(); } @@ -109,6 +118,14 @@ void JitCompiler::EmitCAST() call->setArg(2, regF[B + 1]); call->setArg(3, regF[B + 2]); break; + case CAST_V42S: + call = CreateCall(CastV42S); + call->setArg(0, regS[A]); + call->setArg(1, regF[B]); + call->setArg(2, regF[B + 1]); + call->setArg(3, regF[B + 2]); + call->setArg(4, regF[B + 3]); + break; case CAST_P2S: call = CreateCall(CastP2S); call->setArg(0, regS[A]); diff --git a/source/common/scripting/jit/jit_store.cpp b/source/common/scripting/jit/jit_store.cpp index 2bce22566..49464691e 100644 --- a/source/common/scripting/jit/jit_store.cpp +++ b/source/common/scripting/jit/jit_store.cpp @@ -161,6 +161,30 @@ void JitCompiler::EmitSV3_R() cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]); } +void JitCompiler::EmitSV4() +{ + EmitNullPointerThrow(A, X_WRITE_NIL); + auto tmp = newTempIntPtr(); + cc.mov(tmp, regA[A]); + cc.add(tmp, konstd[C]); + cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 24), regF[B + 3]); +} + +void JitCompiler::EmitSV4_R() +{ + EmitNullPointerThrow(A, X_WRITE_NIL); + auto tmp = newTempIntPtr(); + cc.mov(tmp, regA[A]); + cc.add(tmp, regD[C]); + cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]); + cc.movsd(asmjit::x86::qword_ptr(tmp, 24), regF[B + 3]); +} + void JitCompiler::EmitSFV2() { EmitNullPointerThrow(A, X_WRITE_NIL); @@ -219,6 +243,40 @@ void JitCompiler::EmitSFV3_R() cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF); } +void JitCompiler::EmitSFV4() +{ + EmitNullPointerThrow(A, X_WRITE_NIL); + auto tmp = newTempIntPtr(); + cc.mov(tmp, regA[A]); + cc.add(tmp, konstd[C]); + auto tmpF = newTempXmmSs(); + cc.cvtsd2ss(tmpF, regF[B]); + cc.movss(asmjit::x86::qword_ptr(tmp), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 1]); + cc.movss(asmjit::x86::qword_ptr(tmp, 4), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 2]); + cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 3]); + cc.movss(asmjit::x86::qword_ptr(tmp, 12), tmpF); +} + +void JitCompiler::EmitSFV4_R() +{ + EmitNullPointerThrow(A, X_WRITE_NIL); + auto tmp = newTempIntPtr(); + cc.mov(tmp, regA[A]); + cc.add(tmp, regD[C]); + auto tmpF = newTempXmmSs(); + cc.cvtsd2ss(tmpF, regF[B]); + cc.movss(asmjit::x86::qword_ptr(tmp), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 1]); + cc.movss(asmjit::x86::qword_ptr(tmp, 4), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 2]); + cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF); + cc.cvtsd2ss(tmpF, regF[B + 3]); + cc.movss(asmjit::x86::qword_ptr(tmp, 12), tmpF); +} + void JitCompiler::EmitSBIT() { EmitNullPointerThrow(A, X_WRITE_NIL); diff --git a/source/common/scripting/jit/jitintern.h b/source/common/scripting/jit/jitintern.h index ac3d8acf5..2a3dda426 100644 --- a/source/common/scripting/jit/jitintern.h +++ b/source/common/scripting/jit/jitintern.h @@ -241,6 +241,7 @@ private: asmjit::X86Xmm CheckRegF(int r0, int r1); asmjit::X86Xmm CheckRegF(int r0, int r1, int r2); asmjit::X86Xmm CheckRegF(int r0, int r1, int r2, int r3); + asmjit::X86Xmm CheckRegF(int r0, int r1, int r2, int r3, int r4); asmjit::X86Gp CheckRegS(int r0, int r1); asmjit::X86Gp CheckRegA(int r0, int r1); diff --git a/source/common/scripting/vm/vm.h b/source/common/scripting/vm/vm.h index 1ba1a3c34..35db9a732 100644 --- a/source/common/scripting/vm/vm.h +++ b/source/common/scripting/vm/vm.h @@ -80,8 +80,9 @@ enum REGT_KONST = 4, REGT_MULTIREG2 = 8, REGT_MULTIREG3 = 16, // (e.g. a vector) - REGT_MULTIREG = 24, + REGT_MULTIREG = 8 | 16 | 64, REGT_ADDROF = 32, // used with PARAM: pass address of this register + REGT_MULTIREG4 = 64, REGT_NIL = 128 // parameter was omitted }; @@ -130,6 +131,22 @@ struct VMReturn assert(RegType == REGT_FLOAT); *(double *)Location = val; } + void SetVector4(const double val[4]) + { + assert(RegType == (REGT_FLOAT|REGT_MULTIREG4)); + ((double *)Location)[0] = val[0]; + ((double *)Location)[1] = val[1]; + ((double *)Location)[2] = val[2]; + ((double *)Location)[3] = val[3]; + } + void SetVector4(const DVector4 &val) + { + assert(RegType == (REGT_FLOAT | REGT_MULTIREG4)); + ((double *)Location)[0] = val[0]; + ((double *)Location)[1] = val[1]; + ((double *)Location)[2] = val[2]; + ((double *)Location)[3] = val[3]; + } void SetVector(const double val[3]) { assert(RegType == (REGT_FLOAT|REGT_MULTIREG3)); diff --git a/source/common/scripting/vm/vmexec.h b/source/common/scripting/vm/vmexec.h index 0e4b50b63..b7428eff2 100644 --- a/source/common/scripting/vm/vmexec.h +++ b/source/common/scripting/vm/vmexec.h @@ -287,18 +287,40 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) { auto v = (double*)ptr; reg.f[a] = v[0]; - reg.f[a + 1] = v[1]; - reg.f[a + 2] = v[2]; + reg.f[a+1] = v[1]; + reg.f[a+2] = v[2]; } NEXTOP; OP(LV3_R) : ASSERTF(a + 2); ASSERTA(B); ASSERTD(C); GETADDR(PB, RC, X_READ_NIL); + { + auto v = (double*)ptr; + reg.f[a] = v[0]; + reg.f[a+1] = v[1]; + reg.f[a+2] = v[2]; + } + NEXTOP; + OP(LV4) : + ASSERTF(a + 3); ASSERTA(B); ASSERTKD(C); + GETADDR(PB, KC, X_READ_NIL); { auto v = (double*)ptr; reg.f[a] = v[0]; reg.f[a + 1] = v[1]; reg.f[a + 2] = v[2]; + reg.f[a + 3] = v[3]; + } + NEXTOP; + OP(LV4_R) : + ASSERTF(a + 3); ASSERTA(B); ASSERTD(C); + GETADDR(PB, RC, X_READ_NIL); + { + auto v = (double*)ptr; + reg.f[a] = v[0]; + reg.f[a + 1] = v[1]; + reg.f[a + 2] = v[2]; + reg.f[a + 3] = v[3]; } NEXTOP; OP(LFV2): @@ -339,6 +361,28 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) reg.f[a+2] = v[2]; } NEXTOP; + OP(LFV4) : + ASSERTF(a + 3); ASSERTA(B); ASSERTKD(C); + GETADDR(PB, KC, X_READ_NIL); + { + auto v = (float*)ptr; + reg.f[a] = v[0]; + reg.f[a+1] = v[1]; + reg.f[a+2] = v[2]; + reg.f[a+3] = v[3]; + } + NEXTOP; + OP(LFV4_R) : + ASSERTF(a + 3); ASSERTA(B); ASSERTD(C); + GETADDR(PB, RC, X_READ_NIL); + { + auto v = (float*)ptr; + reg.f[a] = v[0]; + reg.f[a+1] = v[1]; + reg.f[a+2] = v[2]; + reg.f[a+3] = v[3]; + } + NEXTOP; OP(LBIT): ASSERTD(a); ASSERTA(B); GETADDR(PB,0,X_READ_NIL); @@ -468,6 +512,28 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) v[2] = reg.f[B+2]; } NEXTOP; + OP(SV4): + ASSERTA(a); ASSERTF(B+3); ASSERTKD(C); + GETADDR(PA,KC,X_WRITE_NIL); + { + auto v = (double *)ptr; + v[0] = reg.f[B]; + v[1] = reg.f[B+1]; + v[2] = reg.f[B+2]; + v[3] = reg.f[B+3]; + } + NEXTOP; + OP(SV4_R): + ASSERTA(a); ASSERTF(B+3); ASSERTD(C); + GETADDR(PA,RC,X_WRITE_NIL); + { + auto v = (double *)ptr; + v[0] = reg.f[B]; + v[1] = reg.f[B+1]; + v[2] = reg.f[B+2]; + v[3] = reg.f[B+3]; + } + NEXTOP; OP(SFV2): ASSERTA(a); ASSERTF(B+1); ASSERTKD(C); GETADDR(PA,KC,X_WRITE_NIL); @@ -506,6 +572,28 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) v[2] = (float)reg.f[B+2]; } NEXTOP; + OP(SFV4): + ASSERTA(a); ASSERTF(B+3); ASSERTKD(C); + GETADDR(PA,KC,X_WRITE_NIL); + { + auto v = (float *)ptr; + v[0] = (float)reg.f[B]; + v[1] = (float)reg.f[B+1]; + v[2] = (float)reg.f[B+2]; + v[3] = (float)reg.f[B+3]; + } + NEXTOP; + OP(SFV4_R): + ASSERTA(a); ASSERTF(B+3); ASSERTD(C); + GETADDR(PA,RC,X_WRITE_NIL); + { + auto v = (float *)ptr; + v[0] = (float)reg.f[B]; + v[1] = (float)reg.f[B+1]; + v[2] = (float)reg.f[B+2]; + v[3] = (float)reg.f[B+3]; + } + NEXTOP; OP(SBIT): ASSERTA(a); ASSERTD(B); GETADDR(PA,0,X_WRITE_NIL); @@ -555,6 +643,16 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) reg.f[a + 2] = reg.f[b + 2]; NEXTOP; } + OP(MOVEV4) : + { + ASSERTF(a); ASSERTF(B); + b = B; + reg.f[a] = reg.f[b]; + reg.f[a + 1] = reg.f[b + 1]; + reg.f[a + 2] = reg.f[b + 2]; + reg.f[a + 3] = reg.f[b + 3]; + NEXTOP; + } OP(DYNCAST_R) : ASSERTA(a); ASSERTA(B); ASSERTA(C); b = B; @@ -713,6 +811,15 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) ::new(param + 2) VMValue(reg.f[b + 2]); f->NumParam += 2; break; + case REGT_FLOAT | REGT_MULTIREG4: + assert(b < f->NumRegF - 3); + assert(f->NumParam < sfunc->MaxParam - 2); + ::new(param) VMValue(reg.f[b]); + ::new(param + 1) VMValue(reg.f[b + 1]); + ::new(param + 2) VMValue(reg.f[b + 2]); + ::new(param + 3) VMValue(reg.f[b + 3]); + f->NumParam += 3; + break; case REGT_FLOAT | REGT_ADDROF: assert(b < f->NumRegF); ::new(param) VMValue(®.f[b]); @@ -1690,6 +1797,97 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret) fcp = &konstf[C]; goto Do_EQV3; + OP(NEGV4): + ASSERTF(a+3); ASSERTF(B+3); + reg.f[a] = -reg.f[B]; + reg.f[a+1] = -reg.f[B+1]; + reg.f[a+2] = -reg.f[B+2]; + reg.f[a+3] = -reg.f[B+3]; + NEXTOP; + + OP(ADDV4_RR): + ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C+3); + fcp = ®.f[C]; + fbp = ®.f[B]; + reg.f[a] = fbp[0] + fcp[0]; + reg.f[a+1] = fbp[1] + fcp[1]; + reg.f[a+2] = fbp[2] + fcp[2]; + reg.f[a+3] = fbp[3] + fcp[3]; + NEXTOP; + + OP(SUBV4_RR): + ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C+3); + fbp = ®.f[B]; + fcp = ®.f[C]; + reg.f[a] = fbp[0] - fcp[0]; + reg.f[a+1] = fbp[1] - fcp[1]; + reg.f[a+2] = fbp[2] - fcp[2]; + reg.f[a+3] = fbp[3] - fcp[3]; + NEXTOP; + + OP(DOTV4_RR): + ASSERTF(a); ASSERTF(B+3); ASSERTF(C+3); + reg.f[a] = reg.f[B] * reg.f[C] + reg.f[B+1] * reg.f[C+1] + reg.f[B+2] * reg.f[C+2] + reg.f[B+3] * reg.f[C+3]; + NEXTOP; + + OP(MULVF4_RR): + ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C); + fc = reg.f[C]; + fbp = ®.f[B]; + Do_MULV4: + reg.f[a] = fbp[0] * fc; + reg.f[a+1] = fbp[1] * fc; + reg.f[a+2] = fbp[2] * fc; + reg.f[a+3] = fbp[3] * fc; + NEXTOP; + OP(MULVF4_RK): + ASSERTF(a+3); ASSERTF(B+3); ASSERTKF(C); + fc = konstf[C]; + fbp = ®.f[B]; + goto Do_MULV4; + + OP(DIVVF4_RR): + ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C); + fc = reg.f[C]; + fbp = ®.f[B]; + Do_DIVV4: + reg.f[a] = fbp[0] / fc; + reg.f[a+1] = fbp[1] / fc; + reg.f[a+2] = fbp[2] / fc; + reg.f[a+3] = fbp[3] / fc; + NEXTOP; + OP(DIVVF4_RK): + ASSERTF(a+3); ASSERTF(B+3); ASSERTKF(C); + fc = konstf[C]; + fbp = ®.f[B]; + goto Do_DIVV4; + + OP(LENV4): + ASSERTF(a); ASSERTF(B+3); + reg.f[a] = g_sqrt(reg.f[B] * reg.f[B] + reg.f[B+1] * reg.f[B+1] + reg.f[B+2] * reg.f[B+2]+ reg.f[B+3] * reg.f[B+3]); + NEXTOP; + + OP(EQV4_R): + ASSERTF(B+3); ASSERTF(C+3); + fcp = ®.f[C]; + Do_EQV4: + if (a & CMP_APPROX) + { + CMPJMP(fabs(reg.f[B ] - fcp[0]) < VM_EPSILON && + fabs(reg.f[B+1] - fcp[1]) < VM_EPSILON && + fabs(reg.f[B+2] - fcp[2]) < VM_EPSILON && + fabs(reg.f[B+3] - fcp[3]) < VM_EPSILON); + } + else + { + CMPJMP(reg.f[B] == fcp[0] && reg.f[B+1] == fcp[1] && reg.f[B+2] == fcp[2] && reg.f[B+3] == fcp[3]); + } + NEXTOP; + OP(EQV4_K): + ASSERTF(B+3); ASSERTKF(C+3); + fcp = &konstf[C]; + goto Do_EQV4; + OP(ADDA_RR): ASSERTA(a); ASSERTA(B); ASSERTD(C); c = reg.d[C]; @@ -2028,7 +2226,11 @@ static void SetReturn(const VMRegisters ®, VMFrame *frame, VMReturn *ret, VM_ assert(regnum < frame->NumRegF); src = ®.f[regnum]; } - if (regtype & REGT_MULTIREG3) + if (regtype & REGT_MULTIREG4) + { + ret->SetVector4((double*)src); + } + else if (regtype & REGT_MULTIREG3) { ret->SetVector((double *)src); } diff --git a/source/common/scripting/vm/vmintern.h b/source/common/scripting/vm/vmintern.h index b93f3e6ef..9a3e2d401 100644 --- a/source/common/scripting/vm/vmintern.h +++ b/source/common/scripting/vm/vmintern.h @@ -126,6 +126,7 @@ enum CAST_So2S, CAST_V22S, CAST_V32S, + CAST_V42S, CAST_SID2S, CAST_TID2S, diff --git a/source/common/scripting/vm/vmops.h b/source/common/scripting/vm/vmops.h index af7fb233b..9b9274d14 100644 --- a/source/common/scripting/vm/vmops.h +++ b/source/common/scripting/vm/vmops.h @@ -51,12 +51,16 @@ xx(LV2, lv2, RVRPKI, LV2_R, 4, REGT_INT) // load vector2 xx(LV2_R, lv2, RVRPRI, NOP, 0, 0) xx(LV3, lv3, RVRPKI, LV3_R, 4, REGT_INT) // load vector3 xx(LV3_R, lv3, RVRPRI, NOP, 0, 0) +xx(LV4, lv4, RVRPKI, LV4_R, 4, REGT_INT) // load vector4 +xx(LV4_R, lv4, RVRPRI, NOP, 0, 0) xx(LCS, lcs, RSRPKI, LCS_R, 4, REGT_INT) // load string from char ptr. xx(LCS_R, lcs, RSRPRI, NOP, 0, 0) xx(LFV2, lfv2, RVRPKI, LFV2_R, 4, REGT_INT) // load fvector2 xx(LFV2_R, lfv2, RVRPRI, NOP, 0, 0) xx(LFV3, lfv3, RVRPKI, LFV3_R, 4, REGT_INT) // load fvector3 xx(LFV3_R, lfv3, RVRPRI, NOP, 0, 0) +xx(LFV4, lfv4, RVRPKI, LFV4_R, 4, REGT_INT) // load fvector4 +xx(LFV4_R, lfv4, RVRPRI, NOP, 0, 0) xx(LBIT, lbit, RIRPI8, NOP, 0, 0) // rA = !!(*rB & C) -- *rB is a byte @@ -81,10 +85,14 @@ xx(SV2, sv2, RPRVKI, SV2_R, 4, REGT_INT) // store vector2 xx(SV2_R, sv2, RPRVRI, NOP, 0, 0) xx(SV3, sv3, RPRVKI, SV3_R, 4, REGT_INT) // store vector3 xx(SV3_R, sv3, RPRVRI, NOP, 0, 0) +xx(SV4, sv4, RPRVKI, SV4_R, 4, REGT_INT) // store vector4 +xx(SV4_R, sv4, RPRVRI, NOP, 0, 0) xx(SFV2, sfv2, RPRVKI, SFV2_R, 4, REGT_INT) // store fvector2 xx(SFV2_R, sfv2, RPRVRI, NOP, 0, 0) xx(SFV3, sfv3, RPRVKI, SFV3_R, 4, REGT_INT) // store fvector3 xx(SFV3_R, sfv3, RPRVRI, NOP, 0, 0) +xx(SFV4, sfv4, RPRVKI, SFV4_R, 4, REGT_INT) // store fvector4 +xx(SFV4_R, sfv4, RPRVRI, NOP, 0, 0) xx(SBIT, sbit, RPRII8, NOP, 0, 0) // *rA |= C if rB is true, *rA &= ~C otherwise @@ -95,6 +103,7 @@ xx(MOVES, mov, RSRS, NOP, 0, 0) // sA = sB xx(MOVEA, mov, RPRP, NOP, 0, 0) // aA = aB xx(MOVEV2, mov2, RFRF, NOP, 0, 0) // fA = fB (2 elements) xx(MOVEV3, mov3, RFRF, NOP, 0, 0) // fA = fB (3 elements) +xx(MOVEV4, mov4, RFRF, NOP, 0, 0) // fA = fB (4 elements) xx(CAST, cast, CAST, NOP, 0, 0) // xA = xB, conversion specified by C xx(CASTB, castb, CAST, NOP, 0, 0) // xA = !!xB, type specified by C xx(DYNCAST_R, dyncast, RPRPRP, NOP, 0, 0) // aA = dyn_cast(aB); @@ -256,6 +265,19 @@ xx(LENV3, lenv3, RFRV, NOP, 0, 0) // fA = vB.Length xx(EQV3_R, beqv3, CVRR, NOP, 0, 0) // if ((vB == vkC) != A) then pc++ (inexact if A & 33) xx(EQV3_K, beqv3, CVRK, NOP, 0, 0) // this will never be used. +// Vector math (4D) +xx(NEGV4, negv4, RVRV, NOP, 0, 0) // vA = -vB +xx(ADDV4_RR, addv4, RVRVRV, NOP, 0, 0) // vA = vB + vkC +xx(SUBV4_RR, subv4, RVRVRV, NOP, 0, 0) // vA = vkB - vkC +xx(DOTV4_RR, dotv4, RVRVRV, NOP, 0, 0) // va = vB dot vkC +xx(MULVF4_RR, mulv4, RVRVRF, NOP, 0, 0) // vA = vkB * fkC +xx(MULVF4_RK, mulv4, RVRVKF, MULVF4_RR, 4, REGT_FLOAT) +xx(DIVVF4_RR, divv4, RVRVRF, NOP, 0, 0) // vA = vkB / fkC +xx(DIVVF4_RK, divv4, RVRVKF, DIVVF4_RR, 4, REGT_FLOAT) +xx(LENV4, lenv4, RFRV, NOP, 0, 0) // fA = vB.Length +xx(EQV4_R, beqv4, CVRR, NOP, 0, 0) // if ((vB == vkC) != A) then pc++ (inexact if A & 33) +xx(EQV4_K, beqv4, CVRK, NOP, 0, 0) // this will never be used. + // Pointer math. xx(ADDA_RR, add, RPRPRI, NOP, 0, 0) // pA = pB + dkC xx(ADDA_RK, add, RPRPKI, ADDA_RR,4, REGT_INT) diff --git a/source/common/utility/m_alloc.cpp b/source/common/utility/m_alloc.cpp index 5c2ea25b9..3b7e12f43 100644 --- a/source/common/utility/m_alloc.cpp +++ b/source/common/utility/m_alloc.cpp @@ -3,7 +3,7 @@ ** Wrappers for the malloc family of functions that count used bytes. ** **--------------------------------------------------------------------------- -** Copyright 1998-2008 Randy Heit +** Copyright 1998-2008 Marisa Heit ** All rights reserved. ** ** Redistribution and use in source and binary forms, with or without @@ -45,7 +45,7 @@ #endif #include "engineerrors.h" -#include "dobject.h" +#include "dobjgc.h" #ifndef _MSC_VER #define _NORMAL_BLOCK 0 @@ -59,25 +59,22 @@ void *M_Malloc(size_t size) { void *block = malloc(size); - if (block == NULL) + if (block == nullptr) I_FatalError("Could not malloc %zu bytes", size); - GC::AllocBytes += _msize(block); + GC::ReportAlloc(_msize(block)); return block; } void *M_Realloc(void *memblock, size_t size) { - if (memblock != NULL) - { - GC::AllocBytes -= _msize(memblock); - } + size_t oldsize = memblock ? _msize(memblock) : 0; void *block = realloc(memblock, size); - if (block == NULL) + if (block == nullptr) { I_FatalError("Could not realloc %zu bytes", size); } - GC::AllocBytes += _msize(block); + GC::ReportRealloc(oldsize, _msize(block)); return block; } #else @@ -85,28 +82,25 @@ void *M_Malloc(size_t size) { void *block = malloc(size+sizeof(size_t)); - if (block == NULL) + if (block == nullptr) I_FatalError("Could not malloc %zu bytes", size); size_t *sizeStore = (size_t *) block; *sizeStore = size; block = sizeStore+1; - GC::AllocBytes += _msize(block); + GC::ReportAlloc(_msize(block)); return block; } void *M_Realloc(void *memblock, size_t size) { - if(memblock == NULL) + if (memblock == nullptr) return M_Malloc(size); - if (memblock != NULL) - { - GC::AllocBytes -= _msize(memblock); - } + size_t oldsize = _msize(memblock); void *block = realloc(((size_t*) memblock)-1, size+sizeof(size_t)); - if (block == NULL) + if (block == nullptr) { I_FatalError("Could not realloc %zu bytes", size); } @@ -115,7 +109,7 @@ void *M_Realloc(void *memblock, size_t size) *sizeStore = size; block = sizeStore+1; - GC::AllocBytes += _msize(block); + GC::ReportRealloc(oldsize, _msize(block)); return block; } #endif @@ -129,25 +123,22 @@ void *M_Malloc_Dbg(size_t size, const char *file, int lineno) { void *block = _malloc_dbg(size, _NORMAL_BLOCK, file, lineno); - if (block == NULL) + if (block == nullptr) I_FatalError("Could not malloc %zu bytes in %s, line %d", size, file, lineno); - GC::AllocBytes += _msize(block); + GC::ReportAlloc(_msize(block)); return block; } void *M_Realloc_Dbg(void *memblock, size_t size, const char *file, int lineno) { - if (memblock != NULL) - { - GC::AllocBytes -= _msize(memblock); - } + size_t oldsize = memblock ? _msize(memblock) : 0; void *block = _realloc_dbg(memblock, size, _NORMAL_BLOCK, file, lineno); - if (block == NULL) + if (block == nullptr) { I_FatalError("Could not realloc %zu bytes in %s, line %d", size, file, lineno); } - GC::AllocBytes += _msize(block); + GC::ReportRealloc(oldsize, _msize(block)); return block; } #else @@ -155,29 +146,26 @@ void *M_Malloc_Dbg(size_t size, const char *file, int lineno) { void *block = _malloc_dbg(size+sizeof(size_t), _NORMAL_BLOCK, file, lineno); - if (block == NULL) + if (block == nullptr) I_FatalError("Could not malloc %zu bytes in %s, line %d", size, file, lineno); size_t *sizeStore = (size_t *) block; *sizeStore = size; block = sizeStore+1; - GC::AllocBytes += _msize(block); + GC::ReportAlloc(_msize(block)); return block; } void *M_Realloc_Dbg(void *memblock, size_t size, const char *file, int lineno) { - if(memblock == NULL) + if (memblock == nullptr) return M_Malloc_Dbg(size, file, lineno); - if (memblock != NULL) - { - GC::AllocBytes -= _msize(memblock); - } + size_t oldsize = _msize(memblock); void *block = _realloc_dbg(((size_t*) memblock)-1, size+sizeof(size_t), _NORMAL_BLOCK, file, lineno); - if (block == NULL) + if (block == nullptr) { I_FatalError("Could not realloc %zu bytes in %s, line %d", size, file, lineno); } @@ -186,29 +174,22 @@ void *M_Realloc_Dbg(void *memblock, size_t size, const char *file, int lineno) *sizeStore = size; block = sizeStore+1; - GC::AllocBytes += _msize(block); + GC::ReportRealloc(oldsize, _msize(block)); return block; } #endif #endif +void M_Free (void *block) +{ + if (block != nullptr) + { + GC::ReportDealloc(_msize(block)); #if !defined(__solaris__) && !defined(__OpenBSD__) && !defined(__DragonFly__) -void M_Free (void *block) -{ - if (block != NULL) - { - GC::AllocBytes -= _msize(block); free(block); - } -} #else -void M_Free (void *block) -{ - if(block != NULL) - { - GC::AllocBytes -= _msize(block); free(((size_t*) block)-1); +#endif } } -#endif diff --git a/wadsrc/static/zscript/engine/screenjob.zs b/wadsrc/static/zscript/engine/screenjob.zs index 71345f709..a65516a48 100644 --- a/wadsrc/static/zscript/engine/screenjob.zs +++ b/wadsrc/static/zscript/engine/screenjob.zs @@ -371,7 +371,7 @@ class ScreenJobRunner : Object UI bool CanWipe() { - if (index < jobs.Size()) return !jobs[index].nowipe; + if (index < jobs.Size()) return !jobs[max(0, index)].nowipe; return true; }