- Backend update from GZDoom

* Vector 4 in zscript
* garbage collector fixes
This commit is contained in:
Christoph Oelckers 2022-11-12 10:45:39 +01:00
parent f7a2fd29ba
commit 8806fb930b
32 changed files with 1294 additions and 328 deletions

View file

@ -124,8 +124,10 @@ xx(State)
xx(Fixed)
xx(Vector2)
xx(Vector3)
xx(Vector4)
xx(FVector2)
xx(FVector3)
xx(FVector4)
xx(let)
xx(Min)
@ -175,7 +177,9 @@ xx(b)
xx(X)
xx(Y)
xx(Z)
xx(W)
xx(XY)
xx(XYZ)
xx(Prototype)
xx(Void)

View file

@ -326,6 +326,11 @@ inline FSerializer &Serialize(FSerializer &arc, const char *key, DVector2 &p, DV
return arc.Array<double>(key, &p[0], def? &(*def)[0] : nullptr, 2, true);
}
inline FSerializer& Serialize(FSerializer& arc, const char* key, FVector4& p, FVector4* def)
{
return arc.Array<float>(key, &p[0], def ? &(*def)[0] : nullptr, 4, true);
}
inline FSerializer& Serialize(FSerializer& arc, const char* key, FVector3& p, FVector3* def)
{
return arc.Array<float>(key, &p[0], def ? &(*def)[0] : nullptr, 3, true);

View file

@ -48,6 +48,7 @@ public:
cycle_t &operator= (const cycle_t &o) { return *this; }
void Reset() {}
void Clock() {}
void ResetAndClock() {}
void Unclock() {}
double Time() { return 0; }
double TimeMS() { return 0; }
@ -120,6 +121,12 @@ public:
Sec -= ts.tv_sec + ts.tv_nsec * 1e-9;
}
void ResetAndClock()
{
Reset();
Clock();
}
void Unclock()
{
#ifdef __linux__
@ -221,6 +228,11 @@ public:
Counter = 0;
}
void ResetAndClock()
{
Counter = -static_cast<int64_t>(rdtsc());
}
void Clock()
{
int64_t time = rdtsc();

View file

@ -250,6 +250,12 @@ static bool CheckSkipOptionBlock(FScanner &sc)
filter = true;
#endif
}
else if (sc.Compare("SWRender"))
{
#ifndef NO_SWRENDERER
filter = true;
#endif
}
}
while (sc.CheckString(","));
sc.MustGetStringName(")");

View file

@ -82,23 +82,58 @@
** infinity, where each step performs a full collection.) You can also
** change this value dynamically.
*/
#define DEFAULT_GCMUL 200 // GC runs 'double the speed' of memory allocation
#ifndef _DEBUG
#define DEFAULT_GCMUL 600 // GC runs gcmul% the speed of memory allocation
#else
// Higher in debug builds to account for the extra time spent freeing objects
#define DEFAULT_GCMUL 800
#endif
// Minimum step size
#define GCSTEPSIZE (sizeof(DObject) * 16)
#define GCMINSTEPSIZE (sizeof(DObject) * 16)
// Maximum number of elements to sweep in a single step
#define GCSWEEPMAX 40
// Sweeps traverse objects in chunks of this size
#define GCSWEEPGRANULARITY 40
// Cost of sweeping one element (the size of a small object divided by
// some adjust for the sweep speed)
#define GCSWEEPCOST (sizeof(DObject) / 4)
// Cost of deleting an object
#ifndef _DEBUG
#define GCDELETECOST 75
#else
// Freeing memory is much more costly in debug builds
#define GCDELETECOST 230
#endif
// Cost of calling of one destructor
#define GCFINALIZECOST 100
// Cost of destroying an object
#define GCDESTROYCOST 15
// TYPES -------------------------------------------------------------------
class FAveragizer
{
// Number of allocations to track
static inline constexpr unsigned HistorySize = 512;
size_t History[HistorySize];
size_t TotalAmount;
int TotalCount;
unsigned NewestPos;
public:
FAveragizer();
void AddAlloc(size_t alloc);
size_t GetAverage();
};
struct FStepStats
{
cycle_t Clock[GC::GCS_COUNT];
size_t BytesCovered[GC::GCS_COUNT];
int Count[GC::GCS_COUNT];
void Format(FString &out);
void Reset();
};
// EXTERNAL FUNCTION PROTOTYPES --------------------------------------------
// PUBLIC FUNCTION PROTOTYPES ----------------------------------------------
@ -114,28 +149,50 @@ static size_t CalcStepSize();
namespace GC
{
size_t AllocBytes;
size_t RunningAllocBytes;
size_t RunningDeallocBytes;
size_t Threshold;
size_t Estimate;
DObject *Gray;
DObject *Root;
DObject *SoftRoots;
DObject **SweepPos;
DObject *ToDestroy;
uint32_t CurrentWhite = OF_White0 | OF_Fixed;
EGCState State = GCS_Pause;
int Pause = DEFAULT_GCPAUSE;
int StepMul = DEFAULT_GCMUL;
int StepCount;
uint64_t CheckTime;
FStepStats StepStats;
FStepStats PrevStepStats;
bool FinalGC;
bool HadToDestroy;
// PRIVATE DATA DEFINITIONS ------------------------------------------------
static int LastCollectTime; // Time last time collector finished
static size_t LastCollectAlloc; // Memory allocation when collector finished
static size_t MinStepSize; // Cover at least this much memory per step
static FAveragizer AllocHistory;// Tracks allocation rate over time
static cycle_t GCTime; // Track time spent in GC
// CODE --------------------------------------------------------------------
//==========================================================================
//
// CheckGC
//
// Check if it's time to collect, and do a collection step if it is.
// Also does some bookkeeping. Should be called fairly consistantly.
//
//==========================================================================
void CheckGC()
{
AllocHistory.AddAlloc(RunningAllocBytes);
RunningAllocBytes = 0;
if (State > GCS_Pause || AllocBytes >= Threshold)
{
Step();
}
}
//==========================================================================
//
// SetThreshold
@ -146,7 +203,7 @@ static size_t MinStepSize; // Cover at least this much memory per step
void SetThreshold()
{
Threshold = (Estimate / 100) * Pause;
Threshold = (std::min(Estimate, AllocBytes) / 100) * Pause;
}
//==========================================================================
@ -170,55 +227,72 @@ size_t PropagateMark()
//==========================================================================
//
// SweepList
// SweepObjects
//
// Runs a limited sweep on a list, returning the position in the list just
// after the last object swept.
// Runs a limited sweep on the object list, returning the number of bytes
// swept.
//
//==========================================================================
static DObject **SweepList(DObject **p, size_t count, size_t *finalize_count)
static size_t SweepObjects(size_t count)
{
DObject *curr;
int deadmask = OtherWhite();
size_t finalized = 0;
size_t swept = 0;
while ((curr = *p) != NULL && count-- > 0)
while ((curr = *SweepPos) != nullptr && count-- > 0)
{
swept += curr->GetClass()->Size;
if ((curr->ObjectFlags ^ OF_WhiteBits) & deadmask) // not dead?
{
assert(!curr->IsDead() || (curr->ObjectFlags & OF_Fixed));
curr->MakeWhite(); // make it white (for next cycle)
p = &curr->ObjNext;
SweepPos = &curr->ObjNext;
}
else // must erase 'curr'
else
{
assert(curr->IsDead());
*p = curr->ObjNext;
if (!(curr->ObjectFlags & OF_EuthanizeMe))
{ // The object must be destroyed before it can be finalized.
// Note that thinkers must already have been destroyed. If they get here without
// having been destroyed first, it means they somehow became unattached from the
// thinker lists. If I don't maintain the invariant that all live thinkers must
// be in a thinker list, then I need to add write barriers for every time a
// thinker pointer is changed. This seems easier and perfectly reasonable, since
// a live thinker that isn't on a thinker list isn't much of a thinker.
// However, this can happen during deletion of the thinker list while cleaning up
// from a savegame error so we can't assume that any thinker that gets here is an error.
curr->Destroy();
{ // The object must be destroyed before it can be deleted.
curr->GCNext = ToDestroy;
ToDestroy = curr;
SweepPos = &curr->ObjNext;
}
else
{ // must erase 'curr'
*SweepPos = curr->ObjNext;
curr->ObjectFlags |= OF_Cleanup;
delete curr;
swept += GCDELETECOST;
}
curr->ObjectFlags |= OF_Cleanup;
delete curr;
finalized++;
}
}
if (finalize_count != NULL)
return swept;
}
//==========================================================================
//
// DestroyObjects
//
// Destroys up to count objects on a list linked on GCNext, returning the
// size of objects destroyed, for updating the estimate.
//
//==========================================================================
static size_t DestroyObjects(size_t count)
{
DObject *curr;
size_t bytes_destroyed = 0;
while ((curr = ToDestroy) != nullptr && count-- > 0)
{
*finalize_count = finalized;
assert(!(curr->ObjectFlags & OF_EuthanizeMe));
bytes_destroyed += curr->GetClass()->Size + GCDESTROYCOST;
ToDestroy = curr->GCNext;
curr->GCNext = nullptr;
curr->Destroy();
}
return p;
return bytes_destroyed;
}
//==========================================================================
@ -269,20 +343,14 @@ void MarkArray(DObject **obj, size_t count)
//
// CalcStepSize
//
// Decide how big a step should be based, depending on how long it took to
// allocate up to the threshold from the amount left after the previous
// collection.
// Decide how big a step should be, based on the current allocation rate.
//
//==========================================================================
static size_t CalcStepSize()
{
int time_passed = int(CheckTime - LastCollectTime);
auto alloc = min(LastCollectAlloc, Estimate);
size_t bytes_gained = AllocBytes > alloc ? AllocBytes - alloc : 0;
return (StepMul > 0 && time_passed > 0)
? std::max<size_t>(GCSTEPSIZE, bytes_gained / time_passed * StepMul / 100)
: std::numeric_limits<size_t>::max() / 2; // no limit
size_t avg = AllocHistory.GetAverage();
return std::max<size_t>(GCMINSTEPSIZE, avg * StepMul / 100);
}
//==========================================================================
@ -302,15 +370,18 @@ void AddMarkerFunc(GCMarkerFunc func)
static void MarkRoot()
{
Gray = NULL;
PrevStepStats = StepStats;
StepStats.Reset();
Gray = nullptr;
for (auto func : markers) func();
// Mark soft roots.
if (SoftRoots != NULL)
if (SoftRoots != nullptr)
{
DObject **probe = &SoftRoots->ObjNext;
while (*probe != NULL)
while (*probe != nullptr)
{
DObject *soft = *probe;
probe = &soft->ObjNext;
@ -322,7 +393,6 @@ static void MarkRoot()
}
// Time to propagate the marks.
State = GCS_Propagate;
StepCount = 0;
}
//==========================================================================
@ -341,10 +411,21 @@ static void Atomic()
SweepPos = &Root;
State = GCS_Sweep;
Estimate = AllocBytes;
}
// Now that we are about to start a sweep, establish a baseline minimum
// step size for how much memory we want to sweep each CheckGC().
MinStepSize = CalcStepSize();
//==========================================================================
//
// SweepDone
//
// Sets up the Destroy phase, if there are any dead objects that haven't
// been destroyed yet, or skips to the Done state.
//
//==========================================================================
static void SweepDone()
{
HadToDestroy = ToDestroy != nullptr;
State = HadToDestroy ? GCS_Destroy : GCS_Done;
}
//==========================================================================
@ -364,7 +445,7 @@ static size_t SingleStep()
return 0;
case GCS_Propagate:
if (Gray != NULL)
if (Gray != nullptr)
{
return PropagateMark();
}
@ -375,22 +456,30 @@ static size_t SingleStep()
}
case GCS_Sweep: {
size_t old = AllocBytes;
size_t finalize_count;
SweepPos = SweepList(SweepPos, GCSWEEPMAX, &finalize_count);
if (*SweepPos == NULL)
RunningDeallocBytes = 0;
size_t swept = SweepObjects(GCSWEEPGRANULARITY);
Estimate -= RunningDeallocBytes;
if (*SweepPos == nullptr)
{ // Nothing more to sweep?
State = GCS_Finalize;
SweepDone();
}
//assert(old >= AllocBytes);
Estimate -= max<size_t>(0, old - AllocBytes);
return (GCSWEEPMAX - finalize_count) * GCSWEEPCOST + finalize_count * GCFINALIZECOST;
return swept;
}
case GCS_Finalize:
case GCS_Destroy: {
size_t destroy_size;
destroy_size = DestroyObjects(GCSWEEPGRANULARITY);
Estimate -= destroy_size;
if (ToDestroy == nullptr)
{ // Nothing more to destroy?
State = GCS_Done;
}
return destroy_size;
}
case GCS_Done:
State = GCS_Pause; // end collection
LastCollectAlloc = AllocBytes;
LastCollectTime = (int)CheckTime;
SetThreshold();
return 0;
default:
@ -403,21 +492,27 @@ static size_t SingleStep()
//
// Step
//
// Performs enough single steps to cover GCSTEPSIZE * StepMul% bytes of
// memory.
// Performs enough single steps to cover <StepSize> bytes of memory.
// Some of those bytes might be "fake" to account for the cost of freeing
// or destroying object.
//
//==========================================================================
void Step()
{
// We recalculate a step size in case the rate of allocation went up
// since we started sweeping because we don't want to fall behind.
// However, we also don't want to go slower than what was decided upon
// when the sweep began if the rate of allocation has slowed.
size_t lim = max(CalcStepSize(), MinStepSize);
GCTime.ResetAndClock();
auto enter_state = State;
StepStats.Count[enter_state]++;
StepStats.Clock[enter_state].Clock();
size_t did = 0;
size_t lim = CalcStepSize();
do
{
size_t done = SingleStep();
did += done;
if (done < lim)
{
lim -= done;
@ -426,17 +521,23 @@ void Step()
{
lim = 0;
}
if (State != enter_state)
{
// Finish stats on old state
StepStats.Clock[enter_state].Unclock();
StepStats.BytesCovered[enter_state] += did;
// Start stats on new state
did = 0;
enter_state = State;
StepStats.Clock[enter_state].Clock();
StepStats.Count[enter_state]++;
}
} while (lim && State != GCS_Pause);
if (State != GCS_Pause)
{
Threshold = AllocBytes;
}
else
{
assert(AllocBytes >= Estimate);
SetThreshold();
}
StepCount++;
StepStats.Clock[enter_state].Unclock();
StepStats.BytesCovered[enter_state] += did;
GCTime.Unclock();
}
//==========================================================================
@ -454,20 +555,23 @@ void FullGC()
// Reset sweep mark to sweep all elements (returning them to white)
SweepPos = &Root;
// Reset other collector lists
Gray = NULL;
Gray = nullptr;
State = GCS_Sweep;
}
// Finish any pending sweep phase
while (State != GCS_Finalize)
{
SingleStep();
}
MarkRoot();
// Finish any pending GC stages
while (State != GCS_Pause)
{
SingleStep();
}
SetThreshold();
// Loop until everything that can be destroyed and freed is
do
{
MarkRoot();
while (State != GCS_Pause)
{
SingleStep();
}
} while (HadToDestroy);
}
//==========================================================================
@ -481,9 +585,9 @@ void FullGC()
void Barrier(DObject *pointing, DObject *pointed)
{
assert(pointing == NULL || (pointing->IsBlack() && !pointing->IsDead()));
assert(pointing == nullptr || (pointing->IsBlack() && !pointing->IsDead()));
assert(pointed->IsWhite() && !pointed->IsDead());
assert(State != GCS_Finalize && State != GCS_Pause);
assert(State != GCS_Destroy && State != GCS_Pause);
assert(!(pointed->ObjectFlags & OF_Released)); // if a released object gets here, something must be wrong.
if (pointed->ObjectFlags & OF_Released) return; // don't do anything with non-GC'd objects.
// The invariant only needs to be maintained in the propagate state.
@ -495,7 +599,7 @@ void Barrier(DObject *pointing, DObject *pointed)
}
// In other states, we can mark the pointing object white so this
// barrier won't be triggered again, saving a few cycles in the future.
else if (pointing != NULL)
else if (pointing != nullptr)
{
pointing->MakeWhite();
}
@ -503,13 +607,13 @@ void Barrier(DObject *pointing, DObject *pointed)
void DelSoftRootHead()
{
if (SoftRoots != NULL)
if (SoftRoots != nullptr)
{
// Don't let the destructor print a warning message
SoftRoots->ObjectFlags |= OF_YesReallyDelete;
delete SoftRoots;
}
SoftRoots = NULL;
SoftRoots = nullptr;
}
//==========================================================================
@ -526,7 +630,7 @@ void AddSoftRoot(DObject *obj)
DObject **probe;
// Are there any soft roots yet?
if (SoftRoots == NULL)
if (SoftRoots == nullptr)
{
// Create a new object to root the soft roots off of, and stick
// it at the end of the object list, so we know that anything
@ -534,17 +638,17 @@ void AddSoftRoot(DObject *obj)
SoftRoots = Create<DObject>();
SoftRoots->ObjectFlags |= OF_Fixed;
probe = &Root;
while (*probe != NULL)
while (*probe != nullptr)
{
probe = &(*probe)->ObjNext;
}
Root = SoftRoots->ObjNext;
SoftRoots->ObjNext = NULL;
SoftRoots->ObjNext = nullptr;
*probe = SoftRoots;
}
// Mark this object as rooted and move it after the SoftRoots marker.
probe = &Root;
while (*probe != NULL && *probe != obj)
while (*probe != nullptr && *probe != obj)
{
probe = &(*probe)->ObjNext;
}
@ -567,14 +671,14 @@ void DelSoftRoot(DObject *obj)
{
DObject **probe;
if (!(obj->ObjectFlags & OF_Rooted))
if (obj == nullptr || !(obj->ObjectFlags & OF_Rooted))
{ // Not rooted, so nothing to do.
return;
}
obj->ObjectFlags &= ~OF_Rooted;
// Move object out of the soft roots part of the list.
probe = &SoftRoots;
while (*probe != NULL && *probe != obj)
while (*probe != nullptr && *probe != obj)
{
probe = &(*probe)->ObjNext;
}
@ -588,6 +692,52 @@ void DelSoftRoot(DObject *obj)
}
//==========================================================================
//
// FAveragizer - Constructor
//
//==========================================================================
FAveragizer::FAveragizer()
{
NewestPos = 0;
TotalAmount = 0;
TotalCount = 0;
memset(History, 0, sizeof(History));
}
//==========================================================================
//
// FAveragizer :: AddAlloc
//
//==========================================================================
void FAveragizer::AddAlloc(size_t alloc)
{
NewestPos = (NewestPos + 1) & (HistorySize - 1);
if (TotalCount < HistorySize)
{
TotalCount++;
}
else
{
TotalAmount -= History[NewestPos];
}
History[NewestPos] = alloc;
TotalAmount += alloc;
}
//==========================================================================
//
// FAveragizer :: GetAverage
//
//==========================================================================
size_t FAveragizer::GetAverage()
{
return TotalCount != 0 ? TotalAmount / TotalCount : 0;
}
//==========================================================================
//
// STAT gc
@ -602,18 +752,66 @@ ADD_STAT(gc)
" Pause ",
"Propagate",
" Sweep ",
"Finalize " };
" Destroy ",
" Done "
};
FString out;
out.Format("[%s] Alloc:%6zuK Thresh:%6zuK Est:%6zuK Steps: %d %zuK",
double time = GC::State != GC::GCS_Pause ? GC::GCTime.TimeMS() : 0;
GC::PrevStepStats.Format(out);
out << "\n";
GC::StepStats.Format(out);
out.AppendFormat("\n%.2fms [%s] Rate:%3zuK (%3zuK) Alloc:%6zuK Est:%6zuK Thresh:%6zuK",
time,
StateStrings[GC::State],
(GC::AllocHistory.GetAverage() + 1023) >> 10,
(GC::CalcStepSize() + 1023) >> 10,
(GC::AllocBytes + 1023) >> 10,
(GC::Threshold + 1023) >> 10,
(GC::Estimate + 1023) >> 10,
GC::StepCount,
(GC::MinStepSize + 1023) >> 10);
(GC::Threshold + 1023) >> 10);
return out;
}
//==========================================================================
//
// FStepStats :: Reset
//
//==========================================================================
void FStepStats::Reset()
{
for (unsigned i = 0; i < countof(Count); ++i)
{
Count[i] = 0;
BytesCovered[i] = 0;
Clock[i].Reset();
}
}
//==========================================================================
//
// FStepStats :: Format
//
// Appends its stats to the given FString.
//
//==========================================================================
void FStepStats::Format(FString &out)
{
// Because everything in the default green is hard to distinguish,
// each stage has its own color.
for (int i = GC::GCS_Propagate; i < GC::GCS_Done; ++i)
{
int count = Count[i];
double time = Clock[i].TimeMS();
out.AppendFormat(TEXTCOLOR_ESCAPESTR "%c[%c%6zuK %4d*%.2fms]",
"-NKB"[i], /* Color codes */
"-PSD"[i], /* Stage prefixes: (P)ropagate, (S)weep, (D)estroy */
(BytesCovered[i] + 1023) >> 10, count, count != 0 ? time / count : time);
}
out << TEXTCOLOR_GREEN;
}
//==========================================================================
//
// CCMD gc

View file

@ -37,12 +37,21 @@ namespace GC
GCS_Pause,
GCS_Propagate,
GCS_Sweep,
GCS_Finalize
GCS_Destroy,
GCS_Done,
GCS_COUNT
};
// Number of bytes currently allocated through M_Malloc/M_Realloc.
extern size_t AllocBytes;
// Number of bytes allocated since last collection step.
extern size_t RunningAllocBytes;
// Number of bytes freed since last collection step.
extern size_t RunningDeallocBytes;
// Amount of memory to allocate before triggering a collection.
extern size_t Threshold;
@ -70,18 +79,12 @@ namespace GC
// Is this the final collection just before exit?
extern bool FinalGC;
// Counts the number of times CheckGC has been called.
extern uint64_t CheckTime;
// Current white value for known-dead objects.
static inline uint32_t OtherWhite()
{
return CurrentWhite ^ OF_WhiteBits;
}
// Frees all objects, whether they're dead or not.
void FreeAll();
// Does one collection step.
void Step();
@ -118,12 +121,7 @@ namespace GC
}
// Check if it's time to collect, and do a collection step if it is.
static inline void CheckGC()
{
CheckTime++;
if (AllocBytes >= Threshold)
Step();
}
void CheckGC();
// Forces a collection to start now.
static inline void StartCollection()
@ -176,6 +174,32 @@ namespace GC
using GCMarkerFunc = void(*)();
void AddMarkerFunc(GCMarkerFunc func);
// Report an allocation to the GC
static inline void ReportAlloc(size_t alloc)
{
AllocBytes += alloc;
RunningAllocBytes += alloc;
}
// Report a deallocation to the GC
static inline void ReportDealloc(size_t dealloc)
{
AllocBytes -= dealloc;
RunningDeallocBytes += dealloc;
}
// Report a reallocation to the GC
static inline void ReportRealloc(size_t oldsize, size_t newsize)
{
if (oldsize < newsize)
{
ReportAlloc(newsize - oldsize);
}
else
{
ReportDealloc(oldsize - newsize);
}
}
}
// A template class to help with handling read barriers. It does not

View file

@ -22,6 +22,7 @@ public:
OpenGLFrameBuffer(void *hMonitor, bool fullscreen) ;
~OpenGLFrameBuffer();
int Backend() override { return 2; }
bool CompileNextShader() override;
void InitializeState() override;
void Update() override;

View file

@ -484,12 +484,15 @@ int EncodeRegType(ExpEmit reg)
else if (reg.RegCount == 2)
{
regtype |= REGT_MULTIREG2;
}
else if (reg.RegCount == 3)
{
regtype |= REGT_MULTIREG3;
}
else if (reg.RegCount == 4)
{
regtype |= REGT_MULTIREG4;
}
return regtype;
}
@ -573,19 +576,20 @@ ExpEmit FxConstant::Emit(VMFunctionBuilder *build)
//
//==========================================================================
FxVectorValue::FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, const FScriptPosition &sc)
FxVectorValue::FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, FxExpression* w, const FScriptPosition &sc)
:FxExpression(EFX_VectorValue, sc)
{
xyz[0] = x;
xyz[1] = y;
xyz[2] = z;
xyzw[0] = x;
xyzw[1] = y;
xyzw[2] = z;
xyzw[3] = w;
isConst = false;
ValueType = TypeVoid; // we do not know yet
}
FxVectorValue::~FxVectorValue()
{
for (auto &a : xyz)
for (auto &a : xyzw)
{
SAFE_DELETE(a);
}
@ -595,7 +599,8 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx)
{
bool fails = false;
for (auto &a : xyz)
// Cast every scalar to float64
for (auto &a : xyzw)
{
if (a != nullptr)
{
@ -603,7 +608,7 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx)
if (a == nullptr) fails = true;
else
{
if (a->ValueType != TypeVector2) // a vec3 may be initialized with (vec2, z)
if (a->ValueType != TypeVector2 && a->ValueType != TypeVector3) // smaller vector can be used to initialize another vector
{
a = new FxFloatCast(a);
a = a->Resolve(ctx);
@ -612,51 +617,89 @@ FxExpression *FxVectorValue::Resolve(FCompileContext&ctx)
}
}
}
if (fails)
{
delete this;
return nullptr;
}
// at this point there are three legal cases:
// * two floats = vector2
// * three floats = vector3
// * vector2 + float = vector3
if (xyz[0]->ValueType == TypeVector2)
// The actual dimension of the Vector does not correspond to the amount of non-null elements in xyzw
// For example: '(asdf.xy, 1)' would be Vector3 where xyzw[0]->ValueType == TypeVector2 and xyzw[1]->ValueType == TypeFloat64
// Handle nesting and figure out the dimension of the vector
int vectorDimensions = 0;
for (int i = 0; i < maxVectorDimensions && xyzw[i]; ++i)
{
if (xyz[1]->ValueType != TypeFloat64 || xyz[2] != nullptr)
assert(dynamic_cast<FxExpression*>(xyzw[i]));
if (xyzw[i]->ValueType == TypeFloat64)
{
vectorDimensions++;
}
else if (xyzw[i]->ValueType == TypeVector2 || xyzw[i]->ValueType == TypeVector3 || xyzw[i]->ValueType == TypeVector4)
{
// Solve nested vector
int regCount = xyzw[i]->ValueType->RegCount;
if (regCount + vectorDimensions > maxVectorDimensions)
{
vectorDimensions += regCount; // Show proper number
goto too_big;
}
// Nested initializer gets simplified
if (xyzw[i]->ExprType == EFX_VectorValue)
{
// Shifts current elements to leave space for unwrapping nested initialization
for (int l = maxVectorDimensions - 1; l > i; --l)
{
xyzw[l] = xyzw[l - regCount + 1];
}
auto vi = static_cast<FxVectorValue*>(xyzw[i]);
for (int j = 0; j < regCount; ++j)
{
xyzw[i + j] = vi->xyzw[j];
vi->xyzw[j] = nullptr; // Preserve object after 'delete vi;'
}
delete vi;
// We extracted something, let's iterate on that again:
--i;
continue;
}
else
{
vectorDimensions += regCount;
}
}
else
{
ScriptPosition.Message(MSG_ERROR, "Not a valid vector");
delete this;
return nullptr;
}
ValueType = TypeVector3;
if (xyz[0]->ExprType == EFX_VectorValue)
{
// If two vector initializers are nested, unnest them now.
auto vi = static_cast<FxVectorValue*>(xyz[0]);
xyz[2] = xyz[1];
xyz[1] = vi->xyz[1];
xyz[0] = vi->xyz[0];
vi->xyz[0] = vi->xyz[1] = nullptr; // Don't delete our own expressions.
delete vi;
}
}
else if (xyz[0]->ValueType == TypeFloat64 && xyz[1]->ValueType == TypeFloat64)
switch (vectorDimensions)
{
ValueType = xyz[2] == nullptr ? TypeVector2 : TypeVector3;
}
else
{
ScriptPosition.Message(MSG_ERROR, "Not a valid vector");
case 2: ValueType = TypeVector2; break;
case 3: ValueType = TypeVector3; break;
case 4: ValueType = TypeVector4; break;
default:
too_big:;
ScriptPosition.Message(MSG_ERROR, "Vector of %d dimensions is not supported", vectorDimensions);
delete this;
return nullptr;
}
// check if all elements are constant. If so this can be emitted as a constant vector.
isConst = true;
for (auto &a : xyz)
for (auto &a : xyzw)
{
if (a != nullptr && !a->isConstant()) isConst = false;
if (a && !a->isConstant()) isConst = false;
}
return this;
}
@ -674,100 +717,96 @@ static ExpEmit EmitKonst(VMFunctionBuilder *build, ExpEmit &emit)
ExpEmit FxVectorValue::Emit(VMFunctionBuilder *build)
{
// no const handling here. Ultimately it's too rarely used (i.e. the only fully constant vector ever allocated in ZDoom is the 0-vector in a very few places)
// and the negatives (excessive allocation of float constants) outweigh the positives (saved a few instructions)
assert(xyz[0] != nullptr);
assert(xyz[1] != nullptr);
if (ValueType == TypeVector2)
int vectorDimensions = ValueType->RegCount;
int vectorElements = 0;
for (auto& e : xyzw)
{
ExpEmit tempxval = xyz[0]->Emit(build);
ExpEmit tempyval = xyz[1]->Emit(build);
ExpEmit xval = EmitKonst(build, tempxval);
ExpEmit yval = EmitKonst(build, tempyval);
assert(xval.RegType == REGT_FLOAT && yval.RegType == REGT_FLOAT);
if (yval.RegNum == xval.RegNum + 1)
{
// The results are already in two continuous registers so just return them as-is.
xval.RegCount++;
return xval;
}
else
{
// The values are not in continuous registers so they need to be copied together now.
ExpEmit out(build, REGT_FLOAT, 2);
build->Emit(OP_MOVEF, out.RegNum, xval.RegNum);
build->Emit(OP_MOVEF, out.RegNum + 1, yval.RegNum);
xval.Free(build);
yval.Free(build);
return out;
}
if (e) vectorElements++;
}
else if (xyz[0]->ValueType == TypeVector2) // vec2+float
assert(vectorElements > 0);
ExpEmit* tempVal = (ExpEmit*)calloc(vectorElements, sizeof(ExpEmit));
ExpEmit* val = (ExpEmit*)calloc(vectorElements, sizeof(ExpEmit));
// Init ExpEmit
for (int i = 0; i < vectorElements; ++i)
{
ExpEmit xyval = xyz[0]->Emit(build);
ExpEmit tempzval = xyz[1]->Emit(build);
ExpEmit zval = EmitKonst(build, tempzval);
assert(xyval.RegType == REGT_FLOAT && xyval.RegCount == 2 && zval.RegType == REGT_FLOAT);
if (zval.RegNum == xyval.RegNum + 2)
{
// The results are already in three continuous registers so just return them as-is.
xyval.RegCount++;
return xyval;
}
else
{
// The values are not in continuous registers so they need to be copied together now.
ExpEmit out(build, REGT_FLOAT, 3);
build->Emit(OP_MOVEV2, out.RegNum, xyval.RegNum);
build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum);
xyval.Free(build);
zval.Free(build);
return out;
}
new(tempVal + i) ExpEmit(xyzw[i]->Emit(build));
new(val + i) ExpEmit(EmitKonst(build, tempVal[i]));
}
else // 3*float
{
assert(xyz[2] != nullptr);
ExpEmit tempxval = xyz[0]->Emit(build);
ExpEmit tempyval = xyz[1]->Emit(build);
ExpEmit tempzval = xyz[2]->Emit(build);
ExpEmit xval = EmitKonst(build, tempxval);
ExpEmit yval = EmitKonst(build, tempyval);
ExpEmit zval = EmitKonst(build, tempzval);
assert(xval.RegType == REGT_FLOAT && yval.RegType == REGT_FLOAT && zval.RegType == REGT_FLOAT);
if (yval.RegNum == xval.RegNum + 1 && zval.RegNum == xval.RegNum + 2)
bool isContinuous = true;
for (int i = 1; i < vectorElements; ++i)
{
// The results are already in three continuous registers so just return them as-is.
xval.RegCount += 2;
return xval;
}
else
{
// The values are not in continuous registers so they need to be copied together now.
ExpEmit out(build, REGT_FLOAT, 3);
//Try to optimize a bit...
if (yval.RegNum == xval.RegNum + 1)
if (val[i - 1].RegNum + val[i - 1].RegCount != val[i].RegNum)
{
build->Emit(OP_MOVEV2, out.RegNum, xval.RegNum);
build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum);
isContinuous = false;
break;
}
else if (zval.RegNum == yval.RegNum + 1)
}
// all values are in continuous registers:
if (isContinuous)
{
val[0].RegCount = vectorDimensions;
return val[0];
}
}
ExpEmit out(build, REGT_FLOAT, vectorDimensions);
{
auto emitRegMove = [&](int regsToMove, int dstRegIndex, int srcRegIndex) {
assert(dstRegIndex < vectorDimensions);
assert(srcRegIndex < vectorDimensions);
assert(regsToMove > 0 && regsToMove <= 4);
build->Emit(regsToMove == 1 ? OP_MOVEF : OP_MOVEV2 + regsToMove - 2, out.RegNum + dstRegIndex, val[srcRegIndex].RegNum);
static_assert(OP_MOVEV2 + 1 == OP_MOVEV3);
static_assert(OP_MOVEV3 + 1 == OP_MOVEV4);
};
int regsToPush = 0;
int nextRegNum = val[0].RegNum;
int lastElementIndex = 0;
int reg = 0;
// Use larger MOVE OPs for any groups of registers that are continuous including those across individual xyzw[] elements
for (int elementIndex = 0; elementIndex < vectorElements; ++elementIndex)
{
int regCount = xyzw[elementIndex]->ValueType->RegCount;
if (nextRegNum != val[elementIndex].RegNum)
{
build->Emit(OP_MOVEF, out.RegNum, xval.RegNum);
build->Emit(OP_MOVEV2, out.RegNum+1, yval.RegNum);
emitRegMove(regsToPush, reg, lastElementIndex);
reg += regsToPush;
regsToPush = regCount;
nextRegNum = val[elementIndex].RegNum + val[elementIndex].RegCount;
lastElementIndex = elementIndex;
}
else
{
build->Emit(OP_MOVEF, out.RegNum, xval.RegNum);
build->Emit(OP_MOVEF, out.RegNum + 1, yval.RegNum);
build->Emit(OP_MOVEF, out.RegNum + 2, zval.RegNum);
regsToPush += regCount;
nextRegNum = val[elementIndex].RegNum + val[elementIndex].RegCount;
}
xval.Free(build);
yval.Free(build);
zval.Free(build);
return out;
}
// Emit move instructions on the last register
if (regsToPush > 0)
{
emitRegMove(regsToPush, reg, lastElementIndex);
}
}
for (int i = 0; i < vectorElements; ++i)
{
val[i].Free(build);
val[i].~ExpEmit();
}
return out;
}
//==========================================================================
@ -1688,7 +1727,7 @@ FxExpression *FxTypeCast::Resolve(FCompileContext &ctx)
delete this;
return x;
}
else if ((basex->IsVector2() && IsVector2()) || (basex->IsVector3() && IsVector3()))
else if ((basex->IsVector2() && IsVector2()) || (basex->IsVector3() && IsVector3()) || (basex->IsVector4() && IsVector4()))
{
auto x = basex;
basex = nullptr;
@ -1887,6 +1926,10 @@ ExpEmit FxMinusSign::Emit(VMFunctionBuilder *build)
build->Emit(OP_NEGV3, to.RegNum, from.RegNum);
break;
case 4:
build->Emit(OP_NEGV4, to.RegNum, from.RegNum);
break;
}
}
return to;
@ -2799,7 +2842,7 @@ FxExpression *FxAddSub::Resolve(FCompileContext& ctx)
else if (left->IsVector() && right->IsVector())
{
// a vector2 can be added to or subtracted from a vector 3 but it needs to be the right operand.
if (((left->IsVector3() || left->IsVector2()) && right->IsVector2()) || (left->IsVector3() && right->IsVector3()))
if (((left->IsVector3() || left->IsVector2()) && right->IsVector2()) || (left->IsVector3() && right->IsVector3()) || (left->IsVector4() && right->IsVector4()))
{
ValueType = left->ValueType;
}
@ -2893,7 +2936,7 @@ ExpEmit FxAddSub::Emit(VMFunctionBuilder *build)
{
assert(op1.RegType == REGT_FLOAT && op2.RegType == REGT_FLOAT);
build->Emit(right->IsVector2() ? OP_ADDV2_RR : OP_ADDV3_RR, to.RegNum, op1.RegNum, op2.RegNum);
build->Emit(right->IsVector4() ? OP_ADDV4_RR : right->IsVector3() ? OP_ADDV3_RR : OP_ADDV2_RR, to.RegNum, op1.RegNum, op2.RegNum);
if (left->IsVector3() && right->IsVector2() && to.RegNum != op1.RegNum)
{
// must move the z-coordinate
@ -2926,7 +2969,7 @@ ExpEmit FxAddSub::Emit(VMFunctionBuilder *build)
if (IsVector())
{
assert(op1.RegType == REGT_FLOAT && op2.RegType == REGT_FLOAT);
build->Emit(right->IsVector2() ? OP_SUBV2_RR : OP_SUBV3_RR, to.RegNum, op1.RegNum, op2.RegNum);
build->Emit(right->IsVector4() ? OP_SUBV4_RR : right->IsVector3() ? OP_SUBV3_RR : OP_SUBV2_RR, to.RegNum, op1.RegNum, op2.RegNum);
return to;
}
else if (ValueType->GetRegType() == REGT_FLOAT)
@ -3129,11 +3172,11 @@ ExpEmit FxMulDiv::Emit(VMFunctionBuilder *build)
int op;
if (op2.Konst)
{
op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RK : OP_MULVF3_RK) : (IsVector2() ? OP_DIVVF2_RK : OP_DIVVF3_RK);
op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RK : IsVector3() ? OP_MULVF3_RK : OP_MULVF4_RK) : (IsVector2() ? OP_DIVVF2_RK : IsVector3() ? OP_DIVVF3_RK : OP_DIVVF4_RK);
}
else
{
op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RR : OP_MULVF3_RR) : (IsVector2() ? OP_DIVVF2_RR : OP_DIVVF3_RR);
op = Operator == '*' ? (IsVector2() ? OP_MULVF2_RR : IsVector3() ? OP_MULVF3_RR : OP_MULVF4_RR) : (IsVector2() ? OP_DIVVF2_RR : IsVector3() ? OP_DIVVF3_RR : OP_DIVVF4_RR);
}
op1.Free(build);
op2.Free(build);
@ -3598,7 +3641,7 @@ FxExpression *FxCompareEq::Resolve(FCompileContext& ctx)
}
// identical types are always comparable, if they can be placed in a register, so we can save most checks if this is the case.
if (left->ValueType != right->ValueType && !(left->IsVector2() && right->IsVector2()) && !(left->IsVector3() && right->IsVector3()))
if (left->ValueType != right->ValueType && !(left->IsVector2() && right->IsVector2()) && !(left->IsVector3() && right->IsVector3()) && !(left->IsVector4() && right->IsVector4()))
{
FxExpression *x;
if (left->IsNumeric() && right->ValueType == TypeString && (x = StringConstToChar(right)))
@ -3836,11 +3879,11 @@ ExpEmit FxCompareEq::EmitCommon(VMFunctionBuilder *build, bool forcompare, bool
std::swap(op1, op2);
}
assert(!op1.Konst);
assert(op1.RegCount >= 1 && op1.RegCount <= 3);
assert(op1.RegCount >= 1 && op1.RegCount <= 4);
ExpEmit to(build, REGT_INT);
static int flops[] = { OP_EQF_R, OP_EQV2_R, OP_EQV3_R };
static int flops[] = { OP_EQF_R, OP_EQV2_R, OP_EQV3_R, OP_EQV4_R };
instr = op1.RegType == REGT_INT ? OP_EQ_R :
op1.RegType == REGT_FLOAT ? flops[op1.RegCount - 1] :
OP_EQA_R;
@ -4256,7 +4299,7 @@ ExpEmit FxConcat::Emit(VMFunctionBuilder *build)
build->Emit(op1.RegType == REGT_INT ? OP_LK : op1.RegType == REGT_FLOAT ? OP_LKF : OP_LKP, nonconst.RegNum, op1.RegNum);
op1 = nonconst;
}
if (op1.RegType == REGT_FLOAT) cast = op1.RegCount == 1 ? CAST_F2S : op1.RegCount == 2 ? CAST_V22S : CAST_V32S;
if (op1.RegType == REGT_FLOAT) cast = op1.RegCount == 1 ? CAST_F2S : op1.RegCount == 2 ? CAST_V22S : op1.RegCount == 3 ? CAST_V32S : CAST_V42S;
else if (left->ValueType == TypeUInt32) cast = CAST_U2S;
else if (left->ValueType == TypeName) cast = CAST_N2S;
else if (left->ValueType == TypeSound) cast = CAST_So2S;
@ -4289,7 +4332,7 @@ ExpEmit FxConcat::Emit(VMFunctionBuilder *build)
build->Emit(op2.RegType == REGT_INT ? OP_LK : op2.RegType == REGT_FLOAT ? OP_LKF : OP_LKP, nonconst.RegNum, op2.RegNum);
op2 = nonconst;
}
if (op2.RegType == REGT_FLOAT) cast = op2.RegCount == 1 ? CAST_F2S : op2.RegCount == 2 ? CAST_V22S : CAST_V32S;
if (op1.RegType == REGT_FLOAT) cast = op1.RegCount == 1 ? CAST_F2S : op1.RegCount == 2 ? CAST_V22S : op1.RegCount == 3 ? CAST_V32S : CAST_V42S;
else if (right->ValueType == TypeUInt32) cast = CAST_U2S;
else if (right->ValueType == TypeName) cast = CAST_N2S;
else if (right->ValueType == TypeSound) cast = CAST_So2S;
@ -4552,7 +4595,7 @@ ExpEmit FxDotCross::Emit(VMFunctionBuilder *build)
ExpEmit to(build, ValueType->GetRegType(), ValueType->GetRegCount());
ExpEmit op1 = left->Emit(build);
ExpEmit op2 = right->Emit(build);
int op = Operator == TK_Cross ? OP_CROSSV_RR : left->ValueType == TypeVector3 ? OP_DOTV3_RR : OP_DOTV2_RR;
int op = Operator == TK_Cross ? OP_CROSSV_RR : left->ValueType == TypeVector4 ? OP_DOTV4_RR : left->ValueType == TypeVector3 ? OP_DOTV3_RR : OP_DOTV2_RR;
build->Emit(op, to.RegNum, op1.RegNum, op2.RegNum);
op1.Free(build);
op2.Free(build);
@ -8740,12 +8783,12 @@ FxExpression *FxVMFunctionCall::Resolve(FCompileContext& ctx)
else
{
// Vectors need special treatment because they are not normal constants
FxConstant *cs[3] = { nullptr };
FxConstant *cs[4] = { nullptr };
for (int l = 0; l < ntype->GetRegCount(); l++)
{
cs[l] = new FxConstant(TypeFloat64, defaults[l + i + k + skipdefs + implicit], ScriptPosition);
}
FxExpression *x = new FxVectorValue(cs[0], cs[1], cs[2], ScriptPosition);
FxExpression *x = new FxVectorValue(cs[0], cs[1], cs[2], cs[3], ScriptPosition);
ArgList.Insert(i + k, x);
skipdefs += ntype->GetRegCount() - 1;
}
@ -9155,15 +9198,19 @@ ExpEmit FxVectorBuiltin::Emit(VMFunctionBuilder *build)
{
ExpEmit to(build, ValueType->GetRegType(), ValueType->GetRegCount());
ExpEmit op = Self->Emit(build);
const int vecSize = (Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2) ? 2
: (Self->ValueType == TypeVector3 || Self->ValueType == TypeFVector3) ? 3 : 4;
if (Function == NAME_Length)
{
build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : OP_LENV3, to.RegNum, op.RegNum);
build->Emit(vecSize == 2 ? OP_LENV2 : vecSize == 3 ? OP_LENV3 : OP_LENV4, to.RegNum, op.RegNum);
}
else
{
ExpEmit len(build, REGT_FLOAT);
build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_LENV2 : OP_LENV3, len.RegNum, op.RegNum);
build->Emit(Self->ValueType == TypeVector2 || Self->ValueType == TypeFVector2 ? OP_DIVVF2_RR : OP_DIVVF3_RR, to.RegNum, op.RegNum, len.RegNum);
build->Emit(vecSize == 2 ? OP_LENV2 : vecSize == 3 ? OP_LENV3 : OP_LENV4, len.RegNum, op.RegNum);
build->Emit(vecSize == 2 ? OP_DIVVF2_RR : vecSize == 3 ? OP_DIVVF3_RR : OP_DIVVF4_RR, to.RegNum, op.RegNum, len.RegNum);
len.Free(build);
}
op.Free(build);
@ -10827,11 +10874,12 @@ FxLocalVariableDeclaration::FxLocalVariableDeclaration(PType *type, FName name,
// Local FVector isn't different from Vector
if (type == TypeFVector2) type = TypeVector2;
else if (type == TypeFVector3) type = TypeVector3;
else if (type == TypeFVector4) type = TypeVector4;
ValueType = type;
VarFlags = varflags;
Name = name;
RegCount = type == TypeVector2 ? 2 : type == TypeVector3 ? 3 : 1;
RegCount = type->RegCount;
Init = initval;
clearExpr = nullptr;
}

View file

@ -336,9 +336,10 @@ public:
bool IsFloat() const { return ValueType->isFloat(); }
bool IsInteger() const { return ValueType->isNumeric() && ValueType->isIntCompatible(); }
bool IsPointer() const { return ValueType->isPointer(); }
bool IsVector() const { return ValueType == TypeVector2 || ValueType == TypeVector3 || ValueType == TypeFVector2 || ValueType == TypeFVector3; };
bool IsVector() const { return IsVector2() || IsVector3() || IsVector4(); };
bool IsVector2() const { return ValueType == TypeVector2 || ValueType == TypeFVector2; };
bool IsVector3() const { return ValueType == TypeVector3 || ValueType == TypeFVector3; };
bool IsVector4() const { return ValueType == TypeVector4 || ValueType == TypeFVector4; };
bool IsBoolCompat() const { return ValueType->isScalar(); }
bool IsObject() const { return ValueType->isObjectPointer(); }
bool IsArray() const { return ValueType->isArray() || (ValueType->isPointer() && ValueType->toPointer()->PointedType->isArray()); }
@ -550,20 +551,23 @@ public:
class FxVectorValue : public FxExpression
{
FxExpression *xyz[3];
constexpr static int maxVectorDimensions = 4;
FxExpression *xyzw[maxVectorDimensions];
bool isConst; // gets set to true if all element are const (used by function defaults parser)
public:
friend class ZCCCompiler;
FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, const FScriptPosition &sc);
FxVectorValue(FxExpression *x, FxExpression *y, FxExpression *z, FxExpression* w, const FScriptPosition &sc);
~FxVectorValue();
FxExpression *Resolve(FCompileContext&);
bool isConstVector(int dim)
{
if (!isConst) return false;
return dim == 2 ? xyz[2] == nullptr : xyz[2] != nullptr;
if (!isConst)
return false;
return dim >= 0 && dim <= maxVectorDimensions && xyzw[dim - 1] && (dim == maxVectorDimensions || !xyzw[dim]);
}
ExpEmit Emit(VMFunctionBuilder *build);

View file

@ -637,6 +637,7 @@ size_t VMFunctionBuilder::Emit(int opcode, int opa, VM_SHALF opbc)
int chg;
if (opa & REGT_MULTIREG2) chg = 2;
else if (opa & REGT_MULTIREG3) chg = 3;
else if (opa & REGT_MULTIREG4) chg = 4;
else chg = 1;
ParamChange(chg);
}

View file

@ -61,8 +61,10 @@ PPointer *TypeFont;
PStateLabel *TypeStateLabel;
PStruct *TypeVector2;
PStruct *TypeVector3;
PStruct* TypeVector4;
PStruct* TypeFVector2;
PStruct* TypeFVector3;
PStruct* TypeFVector4;
PStruct *TypeColorStruct;
PStruct *TypeStringStruct;
PPointer *TypeNullPtr;
@ -350,6 +352,22 @@ void PType::StaticInit()
TypeVector3->RegCount = 3;
TypeVector3->isOrdered = true;
TypeVector4 = new PStruct(NAME_Vector4, nullptr);
TypeVector4->AddField(NAME_X, TypeFloat64);
TypeVector4->AddField(NAME_Y, TypeFloat64);
TypeVector4->AddField(NAME_Z, TypeFloat64);
TypeVector4->AddField(NAME_W, TypeFloat64);
// allow accessing xyz as a vector3. This is not supposed to be serialized so it's marked transient
TypeVector4->Symbols.AddSymbol(Create<PField>(NAME_XYZ, TypeVector3, VARF_Transient, 0));
TypeVector4->Symbols.AddSymbol(Create<PField>(NAME_XY, TypeVector2, VARF_Transient, 0));
TypeTable.AddType(TypeVector4, NAME_Struct);
TypeVector4->loadOp = OP_LV4;
TypeVector4->storeOp = OP_SV4;
TypeVector4->moveOp = OP_MOVEV4;
TypeVector4->RegType = REGT_FLOAT;
TypeVector4->RegCount = 4;
TypeVector4->isOrdered = true;
TypeFVector2 = new PStruct(NAME_FVector2, nullptr);
TypeFVector2->AddField(NAME_X, TypeFloat32);
@ -376,6 +394,22 @@ void PType::StaticInit()
TypeFVector3->RegCount = 3;
TypeFVector3->isOrdered = true;
TypeFVector4 = new PStruct(NAME_FVector4, nullptr);
TypeFVector4->AddField(NAME_X, TypeFloat32);
TypeFVector4->AddField(NAME_Y, TypeFloat32);
TypeFVector4->AddField(NAME_Z, TypeFloat32);
TypeFVector4->AddField(NAME_W, TypeFloat32);
// allow accessing xyz as a vector3
TypeFVector4->Symbols.AddSymbol(Create<PField>(NAME_XYZ, TypeFVector3, VARF_Transient, 0));
TypeFVector4->Symbols.AddSymbol(Create<PField>(NAME_XY, TypeFVector2, VARF_Transient, 0));
TypeTable.AddType(TypeFVector4, NAME_Struct);
TypeFVector4->loadOp = OP_LFV4;
TypeFVector4->storeOp = OP_SFV4;
TypeFVector4->moveOp = OP_MOVEV4;
TypeFVector4->RegType = REGT_FLOAT;
TypeFVector4->RegCount = 4;
TypeFVector4->isOrdered = true;
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_sByte, TypeSInt8));
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_Byte, TypeUInt8));
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_Short, TypeSInt16));
@ -394,8 +428,10 @@ void PType::StaticInit()
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_State, TypeState));
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_Vector2, TypeVector2));
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_Vector3, TypeVector3));
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_Vector4, TypeVector4));
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_FVector2, TypeFVector2));
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_FVector3, TypeFVector3));
Namespaces.GlobalNamespace->Symbols.AddSymbol(Create<PSymbolType>(NAME_FVector4, TypeFVector4));
}

View file

@ -615,8 +615,10 @@ extern PTextureID *TypeTextureID;
extern PSpriteID *TypeSpriteID;
extern PStruct* TypeVector2;
extern PStruct* TypeVector3;
extern PStruct* TypeVector4;
extern PStruct* TypeFVector2;
extern PStruct* TypeFVector3;
extern PStruct* TypeFVector4;
extern PStruct *TypeColorStruct;
extern PStruct *TypeStringStruct;
extern PStatePointer *TypeState;

View file

@ -639,6 +639,8 @@ static int print_reg(FILE *out, int col, int arg, int mode, int immshift, const
return col+printf_wrapper(out, "v%d.2", regnum);
case REGT_FLOAT | REGT_MULTIREG3:
return col+printf_wrapper(out, "v%d.3", regnum);
case REGT_FLOAT | REGT_MULTIREG4:
return col+printf_wrapper(out, "v%d.4", regnum);
case REGT_INT | REGT_KONST:
return col+print_reg(out, 0, regnum, MODE_KI, 0, func);
case REGT_FLOAT | REGT_KONST:

View file

@ -53,6 +53,7 @@ static const char *BuiltInTypeNames[] =
"string",
"vector2",
"vector3",
"vector4",
"name",
"color",
@ -684,6 +685,7 @@ static void PrintVectorInitializer(FLispString &out, ZCC_TreeNode *node)
PrintNodes(out, enode->X);
PrintNodes(out, enode->Y);
PrintNodes(out, enode->Z);
PrintNodes(out, enode->W);
out.Close();
}

View file

@ -861,6 +861,7 @@ type_name1(X) ::= DOUBLE(T). { X.Int = ZCC_Float64; X.SourceLoc = T.SourceLoc
//type_name1(X) ::= STRING(T). { X.Int = ZCC_String; X.SourceLoc = T.SourceLoc; } // [ZZ] it's handled elsewhere. this particular line only causes troubles in the form of String.Format being invalid.
type_name1(X) ::= VECTOR2(T). { X.Int = ZCC_Vector2; X.SourceLoc = T.SourceLoc; }
type_name1(X) ::= VECTOR3(T). { X.Int = ZCC_Vector3; X.SourceLoc = T.SourceLoc; }
type_name1(X) ::= VECTOR4(T). { X.Int = ZCC_Vector4; X.SourceLoc = T.SourceLoc; }
type_name1(X) ::= NAME(T). { X.Int = ZCC_Name; X.SourceLoc = T.SourceLoc; }
type_name1(X) ::= SOUND(T). { X.Int = ZCC_Sound; X.SourceLoc = T.SourceLoc; }
type_name1(X) ::= STATE(T). { X.Int = ZCC_State; X.SourceLoc = T.SourceLoc; }
@ -931,7 +932,7 @@ type_name(X) ::= DOT dottable_id(A).
/* Type names can also be used as identifiers in contexts where type names
* are not normally allowed. */
%fallback IDENTIFIER
SBYTE BYTE SHORT USHORT INT UINT BOOL FLOAT DOUBLE STRING VECTOR2 VECTOR3 NAME MAP ARRAY VOID STATE COLOR SOUND UINT8 INT8 UINT16 INT16 PROPERTY.
SBYTE BYTE SHORT USHORT INT UINT BOOL FLOAT DOUBLE STRING VECTOR2 VECTOR3 VECTOR4 NAME MAP ARRAY VOID STATE COLOR SOUND UINT8 INT8 UINT16 INT16 PROPERTY.
/* Aggregate types */
%type aggregate_type {ZCC_Type *}
@ -1303,6 +1304,17 @@ primary(X) ::= SUPER(T).
X = expr;
}
primary(X) ::= constant(A). { X = A; /*X-overwrites-A*/ }
primary(XX) ::= LPAREN expr(A) COMMA expr(B) COMMA expr(C) COMMA expr(D) RPAREN. [DOT]
{
NEW_AST_NODE(VectorValue, expr, A);
expr->Operation = PEX_Vector;
expr->Type = TypeVector4;
expr->X = A;
expr->Y = B;
expr->Z = C;
expr->W = D;
XX = expr;
}
primary(XX) ::= LPAREN expr(A) COMMA expr(B) COMMA expr(C) RPAREN. [DOT]
{
NEW_AST_NODE(VectorValue, expr, A);
@ -1311,6 +1323,7 @@ primary(XX) ::= LPAREN expr(A) COMMA expr(B) COMMA expr(C) RPAREN. [DOT]
expr->X = A;
expr->Y = B;
expr->Z = C;
expr->W = nullptr;
XX = expr;
}
primary(XX) ::= LPAREN expr(A) COMMA expr(B) RPAREN. [DOT]
@ -1321,6 +1334,7 @@ primary(XX) ::= LPAREN expr(A) COMMA expr(B) RPAREN. [DOT]
expr->X = A;
expr->Y = B;
expr->Z = nullptr;
expr->W = nullptr;
XX = expr;
}
primary(X) ::= LPAREN expr(A) RPAREN.

View file

@ -1790,6 +1790,10 @@ PType *ZCCCompiler::DetermineType(PType *outertype, ZCC_TreeNode *field, FName n
retval = TypeVector3;
break;
case ZCC_Vector4:
retval = TypeVector4;
break;
case ZCC_State:
retval = TypeState;
break;
@ -2150,7 +2154,7 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool
do
{
auto type = DetermineType(c->Type(), f, f->Name, t, false, false);
if (type->isContainer() && type != TypeVector2 && type != TypeVector3 && type != TypeFVector2 && type != TypeFVector3)
if (type->isContainer() && type != TypeVector2 && type != TypeVector3 && type != TypeVector4 && type != TypeFVector2 && type != TypeFVector3 && type != TypeFVector4)
{
// structs and classes only get passed by pointer.
type = NewPointer(type);
@ -2168,6 +2172,10 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool
{
type = TypeVector3;
}
else if (type == TypeFVector4)
{
type = TypeVector4;
}
// TBD: disallow certain types? For now, let everything pass that isn't an array.
rets.Push(type);
t = static_cast<decltype(t)>(t->SiblingNext);
@ -2340,12 +2348,12 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool
do
{
int elementcount = 1;
TypedVMValue vmval[3]; // default is REGT_NIL which means 'no default value' here.
TypedVMValue vmval[4]; // default is REGT_NIL which means 'no default value' here.
if (p->Type != nullptr)
{
auto type = DetermineType(c->Type(), p, f->Name, p->Type, false, false);
int flags = 0;
if ((type->isStruct() && type != TypeVector2 && type != TypeVector3) || type->isDynArray())
if ((type->isStruct() && type != TypeVector2 && type != TypeVector3 && type != TypeVector4) || type->isDynArray())
{
// Structs are being passed by pointer, but unless marked 'out' that pointer must be readonly.
type = NewPointer(type /*, !(p->Flags & ZCC_Out)*/);
@ -2362,8 +2370,12 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool
{
elementcount = 3;
}
else if (type == TypeVector4 || type == TypeFVector4)
{
elementcount = 4;
}
}
if (type->GetRegType() == REGT_NIL && type != TypeVector2 && type != TypeVector3 && type != TypeFVector2 && type != TypeFVector3)
if (type->GetRegType() == REGT_NIL && type != TypeVector2 && type != TypeVector3 && type != TypeVector4 && type != TypeFVector2 && type != TypeFVector3 && type != TypeFVector4)
{
// If it's TypeError, then an error was already given
if (type != TypeError)
@ -2407,15 +2419,23 @@ void ZCCCompiler::CompileFunction(ZCC_StructWork *c, ZCC_FuncDeclarator *f, bool
if ((type == TypeVector2 || type == TypeFVector2) && x->ExprType == EFX_VectorValue && static_cast<FxVectorValue *>(x)->isConstVector(2))
{
auto vx = static_cast<FxVectorValue *>(x);
vmval[0] = static_cast<FxConstant *>(vx->xyz[0])->GetValue().GetFloat();
vmval[1] = static_cast<FxConstant *>(vx->xyz[1])->GetValue().GetFloat();
vmval[0] = static_cast<FxConstant *>(vx->xyzw[0])->GetValue().GetFloat();
vmval[1] = static_cast<FxConstant *>(vx->xyzw[1])->GetValue().GetFloat();
}
else if ((type == TypeVector3 || type == TypeFVector3) && x->ExprType == EFX_VectorValue && static_cast<FxVectorValue *>(x)->isConstVector(3))
{
auto vx = static_cast<FxVectorValue *>(x);
vmval[0] = static_cast<FxConstant *>(vx->xyz[0])->GetValue().GetFloat();
vmval[1] = static_cast<FxConstant *>(vx->xyz[1])->GetValue().GetFloat();
vmval[2] = static_cast<FxConstant *>(vx->xyz[2])->GetValue().GetFloat();
vmval[0] = static_cast<FxConstant *>(vx->xyzw[0])->GetValue().GetFloat();
vmval[1] = static_cast<FxConstant *>(vx->xyzw[1])->GetValue().GetFloat();
vmval[2] = static_cast<FxConstant *>(vx->xyzw[2])->GetValue().GetFloat();
}
else if ((type == TypeVector4 || type == TypeFVector4) && x->ExprType == EFX_VectorValue && static_cast<FxVectorValue*>(x)->isConstVector(4))
{
auto vx = static_cast<FxVectorValue*>(x);
vmval[0] = static_cast<FxConstant*>(vx->xyzw[0])->GetValue().GetFloat();
vmval[1] = static_cast<FxConstant*>(vx->xyzw[1])->GetValue().GetFloat();
vmval[2] = static_cast<FxConstant*>(vx->xyzw[2])->GetValue().GetFloat();
vmval[3] = static_cast<FxConstant*>(vx->xyzw[3])->GetValue().GetFloat();
}
else if (!x->isConstant())
{
@ -3038,7 +3058,8 @@ FxExpression *ZCCCompiler::ConvertNode(ZCC_TreeNode *ast, bool substitute)
auto xx = ConvertNode(vecini->X);
auto yy = ConvertNode(vecini->Y);
auto zz = ConvertNode(vecini->Z);
return new FxVectorValue(xx, yy, zz, *ast);
auto ww = ConvertNode(vecini->W);
return new FxVectorValue(xx, yy, zz, ww, *ast);
}
case AST_LocalVarStmt:

View file

@ -1297,7 +1297,8 @@ ZCC_TreeNode *TreeNodeDeepCopy_Internal(ZCC_AST *ast, ZCC_TreeNode *orig, bool c
// ZCC_VectorValue
copy->X = static_cast<ZCC_Expression *>(TreeNodeDeepCopy_Internal(ast, origCasted->X, true, copiedNodesList));
copy->Y = static_cast<ZCC_Expression *>(TreeNodeDeepCopy_Internal(ast, origCasted->Y, true, copiedNodesList));
copy->Z = static_cast<ZCC_Expression *>(TreeNodeDeepCopy_Internal(ast, origCasted->Z, true, copiedNodesList));
copy->Z = static_cast<ZCC_Expression*>(TreeNodeDeepCopy_Internal(ast, origCasted->Z, true, copiedNodesList));
copy->W = static_cast<ZCC_Expression*>(TreeNodeDeepCopy_Internal(ast, origCasted->W, true, copiedNodesList));
break;
}

View file

@ -158,6 +158,7 @@ enum EZCCBuiltinType
ZCC_String,
ZCC_Vector2,
ZCC_Vector3,
ZCC_Vector4,
ZCC_Name,
ZCC_Color, // special types for ZDoom.
@ -442,7 +443,7 @@ struct ZCC_ExprTrinary : ZCC_Expression
struct ZCC_VectorValue : ZCC_Expression
{
ZCC_Expression *X, *Y, *Z;
ZCC_Expression *X, *Y, *Z, *W;
};
struct ZCC_Statement : ZCC_TreeNode

View file

@ -6,6 +6,7 @@
extern PString *TypeString;
extern PStruct *TypeVector2;
extern PStruct *TypeVector3;
extern PStruct* TypeVector4;
static void OutputJitLog(const asmjit::StringLogger &logger);
@ -315,6 +316,13 @@ void JitCompiler::SetupSimpleFrame()
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
}
else if (type == TypeVector4 || type == TypeFVector4)
{
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
}
else if (type == TypeFloat64)
{
cc.movsd(regF[regf++], x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
@ -551,6 +559,20 @@ asmjit::X86Xmm JitCompiler::CheckRegF(int r0, int r1, int r2, int r3)
}
}
asmjit::X86Xmm JitCompiler::CheckRegF(int r0, int r1, int r2, int r3, int r4)
{
if (r0 != r1 && r0 != r2 && r0 != r3 && r0 != r4)
{
return regF[r0];
}
else
{
auto copy = newTempXmmSd();
cc.movsd(copy, regF[r0]);
return copy;
}
}
asmjit::X86Gp JitCompiler::CheckRegS(int r0, int r1)
{
if (r0 != r1)

View file

@ -182,6 +182,13 @@ int JitCompiler::StoreCallParams()
}
numparams += 2;
break;
case REGT_FLOAT | REGT_MULTIREG4:
for (int j = 0; j < 4; j++)
{
cc.movsd(x86::qword_ptr(vmframe, offsetParams + (slot + j) * sizeof(VMValue) + myoffsetof(VMValue, f)), regF[bc + j]);
}
numparams += 3;
break;
case REGT_FLOAT | REGT_ADDROF:
cc.lea(stackPtr, x86::ptr(vmframe, offsetF + (int)(bc * sizeof(double))));
// When passing the address to a float we don't know if the receiving function will treat it as float, vec2 or vec3.
@ -256,6 +263,12 @@ void JitCompiler::LoadCallResult(int type, int regnum, bool addrof)
cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
}
else if (type & REGT_MULTIREG4)
{
cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
cc.movsd(regF[regnum + 3], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 3) * sizeof(double)));
}
break;
case REGT_STRING:
// We don't have to do anything in this case. String values are never moved to virtual registers.
@ -408,6 +421,11 @@ void JitCompiler::EmitNativeCall(VMNativeFunction *target)
call->setArg(slot + j, regF[bc + j]);
numparams += 2;
break;
case REGT_FLOAT | REGT_MULTIREG4:
for (int j = 0; j < 4; j++)
call->setArg(slot + j, regF[bc + j]);
numparams += 3;
break;
case REGT_FLOAT | REGT_KONST:
tmp = newTempIntPtr();
tmp2 = newTempXmmSd();
@ -550,6 +568,12 @@ void JitCompiler::EmitNativeCall(VMNativeFunction *target)
cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
break;
case REGT_FLOAT | REGT_MULTIREG4:
cc.movsd(regF[regnum], asmjit::x86::qword_ptr(vmframe, offsetF + regnum * sizeof(double)));
cc.movsd(regF[regnum + 1], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 1) * sizeof(double)));
cc.movsd(regF[regnum + 2], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 2) * sizeof(double)));
cc.movsd(regF[regnum + 3], asmjit::x86::qword_ptr(vmframe, offsetF + (regnum + 3) * sizeof(double)));
break;
case REGT_STRING:
// We don't have to do anything in this case. String values are never moved to virtual registers.
break;
@ -624,6 +648,13 @@ asmjit::FuncSignature JitCompiler::CreateFuncSignature()
args.Push(TypeIdOf<double>::kTypeId);
key += "fff";
break;
case REGT_FLOAT | REGT_MULTIREG4:
args.Push(TypeIdOf<double>::kTypeId);
args.Push(TypeIdOf<double>::kTypeId);
args.Push(TypeIdOf<double>::kTypeId);
args.Push(TypeIdOf<double>::kTypeId);
key += "ffff";
break;
default:
I_Error("Unknown REGT value passed to EmitPARAM\n");

View file

@ -110,7 +110,21 @@ void JitCompiler::EmitRET()
if (regtype & REGT_KONST)
{
auto tmp = newTempInt64();
if (regtype & REGT_MULTIREG3)
if (regtype & REGT_MULTIREG4)
{
cc.mov(tmp, (((int64_t*)konstf)[regnum]));
cc.mov(x86::qword_ptr(location), tmp);
cc.mov(tmp, (((int64_t*)konstf)[regnum + 1]));
cc.mov(x86::qword_ptr(location, 8), tmp);
cc.mov(tmp, (((int64_t*)konstf)[regnum + 2]));
cc.mov(x86::qword_ptr(location, 16), tmp);
cc.mov(tmp, (((int64_t*)konstf)[regnum + 3]));
cc.mov(x86::qword_ptr(location, 24), tmp);
}
else if (regtype & REGT_MULTIREG3)
{
cc.mov(tmp, (((int64_t *)konstf)[regnum]));
cc.mov(x86::qword_ptr(location), tmp);
@ -137,7 +151,14 @@ void JitCompiler::EmitRET()
}
else
{
if (regtype & REGT_MULTIREG3)
if (regtype & REGT_MULTIREG4)
{
cc.movsd(x86::qword_ptr(location), regF[regnum]);
cc.movsd(x86::qword_ptr(location, 8), regF[regnum + 1]);
cc.movsd(x86::qword_ptr(location, 16), regF[regnum + 2]);
cc.movsd(x86::qword_ptr(location, 24), regF[regnum + 3]);
}
else if (regtype & REGT_MULTIREG3)
{
cc.movsd(x86::qword_ptr(location), regF[regnum]);
cc.movsd(x86::qword_ptr(location, 8), regF[regnum + 1]);

View file

@ -325,6 +325,28 @@ void JitCompiler::EmitLV3_R()
cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16));
}
void JitCompiler::EmitLV4()
{
EmitNullPointerThrow(B, X_READ_NIL);
auto tmp = newTempIntPtr();
cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C]));
cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8));
cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16));
cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 24));
}
void JitCompiler::EmitLV4_R()
{
EmitNullPointerThrow(B, X_READ_NIL);
auto tmp = newTempIntPtr();
cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C]));
cc.movsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.movsd(regF[A + 1], asmjit::x86::qword_ptr(tmp, 8));
cc.movsd(regF[A + 2], asmjit::x86::qword_ptr(tmp, 16));
cc.movsd(regF[A + 3], asmjit::x86::qword_ptr(tmp, 24));
}
void JitCompiler::EmitLFV2()
{
EmitNullPointerThrow(B, X_READ_NIL);
@ -373,6 +395,36 @@ void JitCompiler::EmitLFV3_R()
cc.cvtss2sd(regF[A + 2], regF[A + 2]);
}
void JitCompiler::EmitLFV4()
{
EmitNullPointerThrow(B, X_READ_NIL);
auto tmp = newTempIntPtr();
cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], konstd[C]));
cc.movss(regF[A], asmjit::x86::qword_ptr(tmp));
cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4));
cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8));
cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 12));
cc.cvtss2sd(regF[A], regF[A]);
cc.cvtss2sd(regF[A + 1], regF[A + 1]);
cc.cvtss2sd(regF[A + 2], regF[A + 2]);
cc.cvtss2sd(regF[A + 3], regF[A + 3]);
}
void JitCompiler::EmitLFV4_R()
{
EmitNullPointerThrow(B, X_READ_NIL);
auto tmp = newTempIntPtr();
cc.lea(tmp, asmjit::x86::qword_ptr(regA[B], regD[C]));
cc.movss(regF[A], asmjit::x86::qword_ptr(tmp));
cc.movss(regF[A + 1], asmjit::x86::qword_ptr(tmp, 4));
cc.movss(regF[A + 2], asmjit::x86::qword_ptr(tmp, 8));
cc.movss(regF[A + 3], asmjit::x86::qword_ptr(tmp, 12));
cc.cvtss2sd(regF[A], regF[A]);
cc.cvtss2sd(regF[A + 1], regF[A + 1]);
cc.cvtss2sd(regF[A + 2], regF[A + 2]);
cc.cvtss2sd(regF[A + 3], regF[A + 3]);
}
static void SetString(FString *to, char **from)
{
*to = *from;

View file

@ -1447,6 +1447,165 @@ void JitCompiler::EmitEQV3_K()
I_Error("EQV3_K is not used.");
}
/////////////////////////////////////////////////////////////////////////////
// Vector math. (4D/Quaternion)
void JitCompiler::EmitNEGV4()
{
auto mask = cc.newDoubleConst(asmjit::kConstScopeLocal, -0.0);
auto maskXmm = newTempXmmSd();
cc.movsd(maskXmm, mask);
cc.movsd(regF[A], regF[B]);
cc.xorpd(regF[A], maskXmm);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.xorpd(regF[A + 1], maskXmm);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.xorpd(regF[A + 2], maskXmm);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.xorpd(regF[A + 3], maskXmm);
}
void JitCompiler::EmitADDV4_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
auto rc2 = CheckRegF(C + 2, A + 2);
auto rc3 = CheckRegF(C + 3, A + 3);
cc.movsd(regF[A], regF[B]);
cc.addsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.addsd(regF[A + 1], rc1);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.addsd(regF[A + 2], rc2);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.addsd(regF[A + 3], rc3);
}
void JitCompiler::EmitSUBV4_RR()
{
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A + 1);
auto rc2 = CheckRegF(C + 2, A + 2);
auto rc3 = CheckRegF(C + 3, A + 3);
cc.movsd(regF[A], regF[B]);
cc.subsd(regF[A], rc0);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.subsd(regF[A + 1], rc1);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.subsd(regF[A + 2], rc2);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.subsd(regF[A + 3], rc3);
}
void JitCompiler::EmitDOTV4_RR()
{
auto rb1 = CheckRegF(B + 1, A);
auto rb2 = CheckRegF(B + 2, A);
auto rb3 = CheckRegF(B + 3, A);
auto rc0 = CheckRegF(C, A);
auto rc1 = CheckRegF(C + 1, A);
auto rc2 = CheckRegF(C + 2, A);
auto rc3 = CheckRegF(C + 3, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], rc0);
cc.movsd(tmp, rb1);
cc.mulsd(tmp, rc1);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb2);
cc.mulsd(tmp, rc2);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb3);
cc.mulsd(tmp, rc3);
cc.addsd(regF[A], tmp);
}
void JitCompiler::EmitMULVF4_RR()
{
auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.mulsd(regF[A], rc);
cc.mulsd(regF[A + 1], rc);
cc.mulsd(regF[A + 2], rc);
cc.mulsd(regF[A + 3], rc);
}
void JitCompiler::EmitMULVF4_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.mulsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 2], asmjit::x86::qword_ptr(tmp));
cc.mulsd(regF[A + 3], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitDIVVF4_RR()
{
auto rc = CheckRegF(C, A, A + 1, A + 2, A + 3);
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.divsd(regF[A], rc);
cc.divsd(regF[A + 1], rc);
cc.divsd(regF[A + 2], rc);
cc.divsd(regF[A + 3], rc);
}
void JitCompiler::EmitDIVVF4_RK()
{
auto tmp = newTempIntPtr();
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
cc.mov(tmp, asmjit::imm_ptr(&konstf[C]));
cc.divsd(regF[A], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 1], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 2], asmjit::x86::qword_ptr(tmp));
cc.divsd(regF[A + 3], asmjit::x86::qword_ptr(tmp));
}
void JitCompiler::EmitLENV4()
{
auto rb1 = CheckRegF(B + 1, A);
auto rb2 = CheckRegF(B + 2, A);
auto rb3 = CheckRegF(B + 3, A);
auto tmp = newTempXmmSd();
cc.movsd(regF[A], regF[B]);
cc.mulsd(regF[A], regF[B]);
cc.movsd(tmp, rb1);
cc.mulsd(tmp, rb1);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb2);
cc.mulsd(tmp, rb2);
cc.addsd(regF[A], tmp);
cc.movsd(tmp, rb3);
cc.mulsd(tmp, rb3);
cc.addsd(regF[A], tmp);
CallSqrt(regF[A], regF[A]);
}
void JitCompiler::EmitEQV4_R()
{
EmitComparisonOpcode([&](bool check, asmjit::Label& fail, asmjit::Label& success) {
EmitVectorComparison<4> (check, fail, success);
});
}
void JitCompiler::EmitEQV4_K()
{
I_Error("EQV4_K is not used.");
}
/////////////////////////////////////////////////////////////////////////////
// Pointer math.

View file

@ -39,11 +39,20 @@ void JitCompiler::EmitMOVEV3()
cc.movsd(regF[A + 2], regF[B + 2]);
}
void JitCompiler::EmitMOVEV4()
{
cc.movsd(regF[A], regF[B]);
cc.movsd(regF[A + 1], regF[B + 1]);
cc.movsd(regF[A + 2], regF[B + 2]);
cc.movsd(regF[A + 3], regF[B + 3]);
}
static void CastI2S(FString *a, int b) { a->Format("%d", b); }
static void CastU2S(FString *a, int b) { a->Format("%u", b); }
static void CastF2S(FString *a, double b) { a->Format("%.5f", b); }
static void CastV22S(FString *a, double b, double b1) { a->Format("(%.5f, %.5f)", b, b1); }
static void CastV32S(FString *a, double b, double b1, double b2) { a->Format("(%.5f, %.5f, %.5f)", b, b1, b2); }
static void CastV42S(FString *a, double b, double b1, double b2, double b3) { a->Format("(%.5f, %.5f, %.5f, %.5f)", b, b1, b2, b3); }
static void CastP2S(FString *a, void *b) { if (b == nullptr) *a = "null"; else a->Format("%p", b); }
static int CastS2I(FString *b) { return (int)b->ToLong(); }
static double CastS2F(FString *b) { return b->ToDouble(); }
@ -109,6 +118,14 @@ void JitCompiler::EmitCAST()
call->setArg(2, regF[B + 1]);
call->setArg(3, regF[B + 2]);
break;
case CAST_V42S:
call = CreateCall<void, FString*, double, double, double>(CastV42S);
call->setArg(0, regS[A]);
call->setArg(1, regF[B]);
call->setArg(2, regF[B + 1]);
call->setArg(3, regF[B + 2]);
call->setArg(4, regF[B + 3]);
break;
case CAST_P2S:
call = CreateCall<void, FString*, void*>(CastP2S);
call->setArg(0, regS[A]);

View file

@ -161,6 +161,30 @@ void JitCompiler::EmitSV3_R()
cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]);
}
void JitCompiler::EmitSV4()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
auto tmp = newTempIntPtr();
cc.mov(tmp, regA[A]);
cc.add(tmp, konstd[C]);
cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 24), regF[B + 3]);
}
void JitCompiler::EmitSV4_R()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
auto tmp = newTempIntPtr();
cc.mov(tmp, regA[A]);
cc.add(tmp, regD[C]);
cc.movsd(asmjit::x86::qword_ptr(tmp), regF[B]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 8), regF[B + 1]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 16), regF[B + 2]);
cc.movsd(asmjit::x86::qword_ptr(tmp, 24), regF[B + 3]);
}
void JitCompiler::EmitSFV2()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
@ -219,6 +243,40 @@ void JitCompiler::EmitSFV3_R()
cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF);
}
void JitCompiler::EmitSFV4()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
auto tmp = newTempIntPtr();
cc.mov(tmp, regA[A]);
cc.add(tmp, konstd[C]);
auto tmpF = newTempXmmSs();
cc.cvtsd2ss(tmpF, regF[B]);
cc.movss(asmjit::x86::qword_ptr(tmp), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 1]);
cc.movss(asmjit::x86::qword_ptr(tmp, 4), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 2]);
cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 3]);
cc.movss(asmjit::x86::qword_ptr(tmp, 12), tmpF);
}
void JitCompiler::EmitSFV4_R()
{
EmitNullPointerThrow(A, X_WRITE_NIL);
auto tmp = newTempIntPtr();
cc.mov(tmp, regA[A]);
cc.add(tmp, regD[C]);
auto tmpF = newTempXmmSs();
cc.cvtsd2ss(tmpF, regF[B]);
cc.movss(asmjit::x86::qword_ptr(tmp), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 1]);
cc.movss(asmjit::x86::qword_ptr(tmp, 4), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 2]);
cc.movss(asmjit::x86::qword_ptr(tmp, 8), tmpF);
cc.cvtsd2ss(tmpF, regF[B + 3]);
cc.movss(asmjit::x86::qword_ptr(tmp, 12), tmpF);
}
void JitCompiler::EmitSBIT()
{
EmitNullPointerThrow(A, X_WRITE_NIL);

View file

@ -241,6 +241,7 @@ private:
asmjit::X86Xmm CheckRegF(int r0, int r1);
asmjit::X86Xmm CheckRegF(int r0, int r1, int r2);
asmjit::X86Xmm CheckRegF(int r0, int r1, int r2, int r3);
asmjit::X86Xmm CheckRegF(int r0, int r1, int r2, int r3, int r4);
asmjit::X86Gp CheckRegS(int r0, int r1);
asmjit::X86Gp CheckRegA(int r0, int r1);

View file

@ -80,8 +80,9 @@ enum
REGT_KONST = 4,
REGT_MULTIREG2 = 8,
REGT_MULTIREG3 = 16, // (e.g. a vector)
REGT_MULTIREG = 24,
REGT_MULTIREG = 8 | 16 | 64,
REGT_ADDROF = 32, // used with PARAM: pass address of this register
REGT_MULTIREG4 = 64,
REGT_NIL = 128 // parameter was omitted
};
@ -130,6 +131,22 @@ struct VMReturn
assert(RegType == REGT_FLOAT);
*(double *)Location = val;
}
void SetVector4(const double val[4])
{
assert(RegType == (REGT_FLOAT|REGT_MULTIREG4));
((double *)Location)[0] = val[0];
((double *)Location)[1] = val[1];
((double *)Location)[2] = val[2];
((double *)Location)[3] = val[3];
}
void SetVector4(const DVector4 &val)
{
assert(RegType == (REGT_FLOAT | REGT_MULTIREG4));
((double *)Location)[0] = val[0];
((double *)Location)[1] = val[1];
((double *)Location)[2] = val[2];
((double *)Location)[3] = val[3];
}
void SetVector(const double val[3])
{
assert(RegType == (REGT_FLOAT|REGT_MULTIREG3));

View file

@ -287,18 +287,40 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
{
auto v = (double*)ptr;
reg.f[a] = v[0];
reg.f[a + 1] = v[1];
reg.f[a + 2] = v[2];
reg.f[a+1] = v[1];
reg.f[a+2] = v[2];
}
NEXTOP;
OP(LV3_R) :
ASSERTF(a + 2); ASSERTA(B); ASSERTD(C);
GETADDR(PB, RC, X_READ_NIL);
{
auto v = (double*)ptr;
reg.f[a] = v[0];
reg.f[a+1] = v[1];
reg.f[a+2] = v[2];
}
NEXTOP;
OP(LV4) :
ASSERTF(a + 3); ASSERTA(B); ASSERTKD(C);
GETADDR(PB, KC, X_READ_NIL);
{
auto v = (double*)ptr;
reg.f[a] = v[0];
reg.f[a + 1] = v[1];
reg.f[a + 2] = v[2];
reg.f[a + 3] = v[3];
}
NEXTOP;
OP(LV4_R) :
ASSERTF(a + 3); ASSERTA(B); ASSERTD(C);
GETADDR(PB, RC, X_READ_NIL);
{
auto v = (double*)ptr;
reg.f[a] = v[0];
reg.f[a + 1] = v[1];
reg.f[a + 2] = v[2];
reg.f[a + 3] = v[3];
}
NEXTOP;
OP(LFV2):
@ -339,6 +361,28 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
reg.f[a+2] = v[2];
}
NEXTOP;
OP(LFV4) :
ASSERTF(a + 3); ASSERTA(B); ASSERTKD(C);
GETADDR(PB, KC, X_READ_NIL);
{
auto v = (float*)ptr;
reg.f[a] = v[0];
reg.f[a+1] = v[1];
reg.f[a+2] = v[2];
reg.f[a+3] = v[3];
}
NEXTOP;
OP(LFV4_R) :
ASSERTF(a + 3); ASSERTA(B); ASSERTD(C);
GETADDR(PB, RC, X_READ_NIL);
{
auto v = (float*)ptr;
reg.f[a] = v[0];
reg.f[a+1] = v[1];
reg.f[a+2] = v[2];
reg.f[a+3] = v[3];
}
NEXTOP;
OP(LBIT):
ASSERTD(a); ASSERTA(B);
GETADDR(PB,0,X_READ_NIL);
@ -468,6 +512,28 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
v[2] = reg.f[B+2];
}
NEXTOP;
OP(SV4):
ASSERTA(a); ASSERTF(B+3); ASSERTKD(C);
GETADDR(PA,KC,X_WRITE_NIL);
{
auto v = (double *)ptr;
v[0] = reg.f[B];
v[1] = reg.f[B+1];
v[2] = reg.f[B+2];
v[3] = reg.f[B+3];
}
NEXTOP;
OP(SV4_R):
ASSERTA(a); ASSERTF(B+3); ASSERTD(C);
GETADDR(PA,RC,X_WRITE_NIL);
{
auto v = (double *)ptr;
v[0] = reg.f[B];
v[1] = reg.f[B+1];
v[2] = reg.f[B+2];
v[3] = reg.f[B+3];
}
NEXTOP;
OP(SFV2):
ASSERTA(a); ASSERTF(B+1); ASSERTKD(C);
GETADDR(PA,KC,X_WRITE_NIL);
@ -506,6 +572,28 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
v[2] = (float)reg.f[B+2];
}
NEXTOP;
OP(SFV4):
ASSERTA(a); ASSERTF(B+3); ASSERTKD(C);
GETADDR(PA,KC,X_WRITE_NIL);
{
auto v = (float *)ptr;
v[0] = (float)reg.f[B];
v[1] = (float)reg.f[B+1];
v[2] = (float)reg.f[B+2];
v[3] = (float)reg.f[B+3];
}
NEXTOP;
OP(SFV4_R):
ASSERTA(a); ASSERTF(B+3); ASSERTD(C);
GETADDR(PA,RC,X_WRITE_NIL);
{
auto v = (float *)ptr;
v[0] = (float)reg.f[B];
v[1] = (float)reg.f[B+1];
v[2] = (float)reg.f[B+2];
v[3] = (float)reg.f[B+3];
}
NEXTOP;
OP(SBIT):
ASSERTA(a); ASSERTD(B);
GETADDR(PA,0,X_WRITE_NIL);
@ -555,6 +643,16 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
reg.f[a + 2] = reg.f[b + 2];
NEXTOP;
}
OP(MOVEV4) :
{
ASSERTF(a); ASSERTF(B);
b = B;
reg.f[a] = reg.f[b];
reg.f[a + 1] = reg.f[b + 1];
reg.f[a + 2] = reg.f[b + 2];
reg.f[a + 3] = reg.f[b + 3];
NEXTOP;
}
OP(DYNCAST_R) :
ASSERTA(a); ASSERTA(B); ASSERTA(C);
b = B;
@ -713,6 +811,15 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
::new(param + 2) VMValue(reg.f[b + 2]);
f->NumParam += 2;
break;
case REGT_FLOAT | REGT_MULTIREG4:
assert(b < f->NumRegF - 3);
assert(f->NumParam < sfunc->MaxParam - 2);
::new(param) VMValue(reg.f[b]);
::new(param + 1) VMValue(reg.f[b + 1]);
::new(param + 2) VMValue(reg.f[b + 2]);
::new(param + 3) VMValue(reg.f[b + 3]);
f->NumParam += 3;
break;
case REGT_FLOAT | REGT_ADDROF:
assert(b < f->NumRegF);
::new(param) VMValue(&reg.f[b]);
@ -1690,6 +1797,97 @@ static int ExecScriptFunc(VMFrameStack *stack, VMReturn *ret, int numret)
fcp = &konstf[C];
goto Do_EQV3;
OP(NEGV4):
ASSERTF(a+3); ASSERTF(B+3);
reg.f[a] = -reg.f[B];
reg.f[a+1] = -reg.f[B+1];
reg.f[a+2] = -reg.f[B+2];
reg.f[a+3] = -reg.f[B+3];
NEXTOP;
OP(ADDV4_RR):
ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C+3);
fcp = &reg.f[C];
fbp = &reg.f[B];
reg.f[a] = fbp[0] + fcp[0];
reg.f[a+1] = fbp[1] + fcp[1];
reg.f[a+2] = fbp[2] + fcp[2];
reg.f[a+3] = fbp[3] + fcp[3];
NEXTOP;
OP(SUBV4_RR):
ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C+3);
fbp = &reg.f[B];
fcp = &reg.f[C];
reg.f[a] = fbp[0] - fcp[0];
reg.f[a+1] = fbp[1] - fcp[1];
reg.f[a+2] = fbp[2] - fcp[2];
reg.f[a+3] = fbp[3] - fcp[3];
NEXTOP;
OP(DOTV4_RR):
ASSERTF(a); ASSERTF(B+3); ASSERTF(C+3);
reg.f[a] = reg.f[B] * reg.f[C] + reg.f[B+1] * reg.f[C+1] + reg.f[B+2] * reg.f[C+2] + reg.f[B+3] * reg.f[C+3];
NEXTOP;
OP(MULVF4_RR):
ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C);
fc = reg.f[C];
fbp = &reg.f[B];
Do_MULV4:
reg.f[a] = fbp[0] * fc;
reg.f[a+1] = fbp[1] * fc;
reg.f[a+2] = fbp[2] * fc;
reg.f[a+3] = fbp[3] * fc;
NEXTOP;
OP(MULVF4_RK):
ASSERTF(a+3); ASSERTF(B+3); ASSERTKF(C);
fc = konstf[C];
fbp = &reg.f[B];
goto Do_MULV4;
OP(DIVVF4_RR):
ASSERTF(a+3); ASSERTF(B+3); ASSERTF(C);
fc = reg.f[C];
fbp = &reg.f[B];
Do_DIVV4:
reg.f[a] = fbp[0] / fc;
reg.f[a+1] = fbp[1] / fc;
reg.f[a+2] = fbp[2] / fc;
reg.f[a+3] = fbp[3] / fc;
NEXTOP;
OP(DIVVF4_RK):
ASSERTF(a+3); ASSERTF(B+3); ASSERTKF(C);
fc = konstf[C];
fbp = &reg.f[B];
goto Do_DIVV4;
OP(LENV4):
ASSERTF(a); ASSERTF(B+3);
reg.f[a] = g_sqrt(reg.f[B] * reg.f[B] + reg.f[B+1] * reg.f[B+1] + reg.f[B+2] * reg.f[B+2]+ reg.f[B+3] * reg.f[B+3]);
NEXTOP;
OP(EQV4_R):
ASSERTF(B+3); ASSERTF(C+3);
fcp = &reg.f[C];
Do_EQV4:
if (a & CMP_APPROX)
{
CMPJMP(fabs(reg.f[B ] - fcp[0]) < VM_EPSILON &&
fabs(reg.f[B+1] - fcp[1]) < VM_EPSILON &&
fabs(reg.f[B+2] - fcp[2]) < VM_EPSILON &&
fabs(reg.f[B+3] - fcp[3]) < VM_EPSILON);
}
else
{
CMPJMP(reg.f[B] == fcp[0] && reg.f[B+1] == fcp[1] && reg.f[B+2] == fcp[2] && reg.f[B+3] == fcp[3]);
}
NEXTOP;
OP(EQV4_K):
ASSERTF(B+3); ASSERTKF(C+3);
fcp = &konstf[C];
goto Do_EQV4;
OP(ADDA_RR):
ASSERTA(a); ASSERTA(B); ASSERTD(C);
c = reg.d[C];
@ -2028,7 +2226,11 @@ static void SetReturn(const VMRegisters &reg, VMFrame *frame, VMReturn *ret, VM_
assert(regnum < frame->NumRegF);
src = &reg.f[regnum];
}
if (regtype & REGT_MULTIREG3)
if (regtype & REGT_MULTIREG4)
{
ret->SetVector4((double*)src);
}
else if (regtype & REGT_MULTIREG3)
{
ret->SetVector((double *)src);
}

View file

@ -126,6 +126,7 @@ enum
CAST_So2S,
CAST_V22S,
CAST_V32S,
CAST_V42S,
CAST_SID2S,
CAST_TID2S,

View file

@ -51,12 +51,16 @@ xx(LV2, lv2, RVRPKI, LV2_R, 4, REGT_INT) // load vector2
xx(LV2_R, lv2, RVRPRI, NOP, 0, 0)
xx(LV3, lv3, RVRPKI, LV3_R, 4, REGT_INT) // load vector3
xx(LV3_R, lv3, RVRPRI, NOP, 0, 0)
xx(LV4, lv4, RVRPKI, LV4_R, 4, REGT_INT) // load vector4
xx(LV4_R, lv4, RVRPRI, NOP, 0, 0)
xx(LCS, lcs, RSRPKI, LCS_R, 4, REGT_INT) // load string from char ptr.
xx(LCS_R, lcs, RSRPRI, NOP, 0, 0)
xx(LFV2, lfv2, RVRPKI, LFV2_R, 4, REGT_INT) // load fvector2
xx(LFV2_R, lfv2, RVRPRI, NOP, 0, 0)
xx(LFV3, lfv3, RVRPKI, LFV3_R, 4, REGT_INT) // load fvector3
xx(LFV3_R, lfv3, RVRPRI, NOP, 0, 0)
xx(LFV4, lfv4, RVRPKI, LFV4_R, 4, REGT_INT) // load fvector4
xx(LFV4_R, lfv4, RVRPRI, NOP, 0, 0)
xx(LBIT, lbit, RIRPI8, NOP, 0, 0) // rA = !!(*rB & C) -- *rB is a byte
@ -81,10 +85,14 @@ xx(SV2, sv2, RPRVKI, SV2_R, 4, REGT_INT) // store vector2
xx(SV2_R, sv2, RPRVRI, NOP, 0, 0)
xx(SV3, sv3, RPRVKI, SV3_R, 4, REGT_INT) // store vector3
xx(SV3_R, sv3, RPRVRI, NOP, 0, 0)
xx(SV4, sv4, RPRVKI, SV4_R, 4, REGT_INT) // store vector4
xx(SV4_R, sv4, RPRVRI, NOP, 0, 0)
xx(SFV2, sfv2, RPRVKI, SFV2_R, 4, REGT_INT) // store fvector2
xx(SFV2_R, sfv2, RPRVRI, NOP, 0, 0)
xx(SFV3, sfv3, RPRVKI, SFV3_R, 4, REGT_INT) // store fvector3
xx(SFV3_R, sfv3, RPRVRI, NOP, 0, 0)
xx(SFV4, sfv4, RPRVKI, SFV4_R, 4, REGT_INT) // store fvector4
xx(SFV4_R, sfv4, RPRVRI, NOP, 0, 0)
xx(SBIT, sbit, RPRII8, NOP, 0, 0) // *rA |= C if rB is true, *rA &= ~C otherwise
@ -95,6 +103,7 @@ xx(MOVES, mov, RSRS, NOP, 0, 0) // sA = sB
xx(MOVEA, mov, RPRP, NOP, 0, 0) // aA = aB
xx(MOVEV2, mov2, RFRF, NOP, 0, 0) // fA = fB (2 elements)
xx(MOVEV3, mov3, RFRF, NOP, 0, 0) // fA = fB (3 elements)
xx(MOVEV4, mov4, RFRF, NOP, 0, 0) // fA = fB (4 elements)
xx(CAST, cast, CAST, NOP, 0, 0) // xA = xB, conversion specified by C
xx(CASTB, castb, CAST, NOP, 0, 0) // xA = !!xB, type specified by C
xx(DYNCAST_R, dyncast, RPRPRP, NOP, 0, 0) // aA = dyn_cast<aC>(aB);
@ -256,6 +265,19 @@ xx(LENV3, lenv3, RFRV, NOP, 0, 0) // fA = vB.Length
xx(EQV3_R, beqv3, CVRR, NOP, 0, 0) // if ((vB == vkC) != A) then pc++ (inexact if A & 33)
xx(EQV3_K, beqv3, CVRK, NOP, 0, 0) // this will never be used.
// Vector math (4D)
xx(NEGV4, negv4, RVRV, NOP, 0, 0) // vA = -vB
xx(ADDV4_RR, addv4, RVRVRV, NOP, 0, 0) // vA = vB + vkC
xx(SUBV4_RR, subv4, RVRVRV, NOP, 0, 0) // vA = vkB - vkC
xx(DOTV4_RR, dotv4, RVRVRV, NOP, 0, 0) // va = vB dot vkC
xx(MULVF4_RR, mulv4, RVRVRF, NOP, 0, 0) // vA = vkB * fkC
xx(MULVF4_RK, mulv4, RVRVKF, MULVF4_RR, 4, REGT_FLOAT)
xx(DIVVF4_RR, divv4, RVRVRF, NOP, 0, 0) // vA = vkB / fkC
xx(DIVVF4_RK, divv4, RVRVKF, DIVVF4_RR, 4, REGT_FLOAT)
xx(LENV4, lenv4, RFRV, NOP, 0, 0) // fA = vB.Length
xx(EQV4_R, beqv4, CVRR, NOP, 0, 0) // if ((vB == vkC) != A) then pc++ (inexact if A & 33)
xx(EQV4_K, beqv4, CVRK, NOP, 0, 0) // this will never be used.
// Pointer math.
xx(ADDA_RR, add, RPRPRI, NOP, 0, 0) // pA = pB + dkC
xx(ADDA_RK, add, RPRPKI, ADDA_RR,4, REGT_INT)

View file

@ -3,7 +3,7 @@
** Wrappers for the malloc family of functions that count used bytes.
**
**---------------------------------------------------------------------------
** Copyright 1998-2008 Randy Heit
** Copyright 1998-2008 Marisa Heit
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
@ -45,7 +45,7 @@
#endif
#include "engineerrors.h"
#include "dobject.h"
#include "dobjgc.h"
#ifndef _MSC_VER
#define _NORMAL_BLOCK 0
@ -59,25 +59,22 @@ void *M_Malloc(size_t size)
{
void *block = malloc(size);
if (block == NULL)
if (block == nullptr)
I_FatalError("Could not malloc %zu bytes", size);
GC::AllocBytes += _msize(block);
GC::ReportAlloc(_msize(block));
return block;
}
void *M_Realloc(void *memblock, size_t size)
{
if (memblock != NULL)
{
GC::AllocBytes -= _msize(memblock);
}
size_t oldsize = memblock ? _msize(memblock) : 0;
void *block = realloc(memblock, size);
if (block == NULL)
if (block == nullptr)
{
I_FatalError("Could not realloc %zu bytes", size);
}
GC::AllocBytes += _msize(block);
GC::ReportRealloc(oldsize, _msize(block));
return block;
}
#else
@ -85,28 +82,25 @@ void *M_Malloc(size_t size)
{
void *block = malloc(size+sizeof(size_t));
if (block == NULL)
if (block == nullptr)
I_FatalError("Could not malloc %zu bytes", size);
size_t *sizeStore = (size_t *) block;
*sizeStore = size;
block = sizeStore+1;
GC::AllocBytes += _msize(block);
GC::ReportAlloc(_msize(block));
return block;
}
void *M_Realloc(void *memblock, size_t size)
{
if(memblock == NULL)
if (memblock == nullptr)
return M_Malloc(size);
if (memblock != NULL)
{
GC::AllocBytes -= _msize(memblock);
}
size_t oldsize = _msize(memblock);
void *block = realloc(((size_t*) memblock)-1, size+sizeof(size_t));
if (block == NULL)
if (block == nullptr)
{
I_FatalError("Could not realloc %zu bytes", size);
}
@ -115,7 +109,7 @@ void *M_Realloc(void *memblock, size_t size)
*sizeStore = size;
block = sizeStore+1;
GC::AllocBytes += _msize(block);
GC::ReportRealloc(oldsize, _msize(block));
return block;
}
#endif
@ -129,25 +123,22 @@ void *M_Malloc_Dbg(size_t size, const char *file, int lineno)
{
void *block = _malloc_dbg(size, _NORMAL_BLOCK, file, lineno);
if (block == NULL)
if (block == nullptr)
I_FatalError("Could not malloc %zu bytes in %s, line %d", size, file, lineno);
GC::AllocBytes += _msize(block);
GC::ReportAlloc(_msize(block));
return block;
}
void *M_Realloc_Dbg(void *memblock, size_t size, const char *file, int lineno)
{
if (memblock != NULL)
{
GC::AllocBytes -= _msize(memblock);
}
size_t oldsize = memblock ? _msize(memblock) : 0;
void *block = _realloc_dbg(memblock, size, _NORMAL_BLOCK, file, lineno);
if (block == NULL)
if (block == nullptr)
{
I_FatalError("Could not realloc %zu bytes in %s, line %d", size, file, lineno);
}
GC::AllocBytes += _msize(block);
GC::ReportRealloc(oldsize, _msize(block));
return block;
}
#else
@ -155,29 +146,26 @@ void *M_Malloc_Dbg(size_t size, const char *file, int lineno)
{
void *block = _malloc_dbg(size+sizeof(size_t), _NORMAL_BLOCK, file, lineno);
if (block == NULL)
if (block == nullptr)
I_FatalError("Could not malloc %zu bytes in %s, line %d", size, file, lineno);
size_t *sizeStore = (size_t *) block;
*sizeStore = size;
block = sizeStore+1;
GC::AllocBytes += _msize(block);
GC::ReportAlloc(_msize(block));
return block;
}
void *M_Realloc_Dbg(void *memblock, size_t size, const char *file, int lineno)
{
if(memblock == NULL)
if (memblock == nullptr)
return M_Malloc_Dbg(size, file, lineno);
if (memblock != NULL)
{
GC::AllocBytes -= _msize(memblock);
}
size_t oldsize = _msize(memblock);
void *block = _realloc_dbg(((size_t*) memblock)-1, size+sizeof(size_t), _NORMAL_BLOCK, file, lineno);
if (block == NULL)
if (block == nullptr)
{
I_FatalError("Could not realloc %zu bytes in %s, line %d", size, file, lineno);
}
@ -186,29 +174,22 @@ void *M_Realloc_Dbg(void *memblock, size_t size, const char *file, int lineno)
*sizeStore = size;
block = sizeStore+1;
GC::AllocBytes += _msize(block);
GC::ReportRealloc(oldsize, _msize(block));
return block;
}
#endif
#endif
void M_Free (void *block)
{
if (block != nullptr)
{
GC::ReportDealloc(_msize(block));
#if !defined(__solaris__) && !defined(__OpenBSD__) && !defined(__DragonFly__)
void M_Free (void *block)
{
if (block != NULL)
{
GC::AllocBytes -= _msize(block);
free(block);
}
}
#else
void M_Free (void *block)
{
if(block != NULL)
{
GC::AllocBytes -= _msize(block);
free(((size_t*) block)-1);
#endif
}
}
#endif

View file

@ -371,7 +371,7 @@ class ScreenJobRunner : Object UI
bool CanWipe()
{
if (index < jobs.Size()) return !jobs[index].nowipe;
if (index < jobs.Size()) return !jobs[max(0, index)].nowipe;
return true;
}