mirror of
https://github.com/ZDoom/qzdoom.git
synced 2025-03-17 08:21:28 +00:00
Merge branch 'asmjit' of https://github.com/coelckers/gzdoom
This commit is contained in:
commit
411430a03d
27 changed files with 504 additions and 378 deletions
|
@ -749,7 +749,7 @@ static void (*MBFCodePointerFactories[])(FunctionCallEmitter&, int, int) =
|
|||
|
||||
void SetDehParams(FState *state, int codepointer)
|
||||
{
|
||||
static uint8_t regts[] = { REGT_POINTER, REGT_POINTER, REGT_POINTER };
|
||||
static const uint8_t regts[] = { REGT_POINTER, REGT_POINTER, REGT_POINTER };
|
||||
int value1 = state->GetMisc1();
|
||||
int value2 = state->GetMisc2();
|
||||
if (!(value1|value2)) return;
|
||||
|
|
|
@ -37,10 +37,12 @@
|
|||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
|
||||
#define MAX_ERRORTEXT 1024
|
||||
|
||||
class CDoomError
|
||||
class CDoomError : public std::exception
|
||||
{
|
||||
public:
|
||||
CDoomError ()
|
||||
|
@ -69,13 +71,22 @@ public:
|
|||
else
|
||||
return NULL;
|
||||
}
|
||||
char const *what() const noexcept override
|
||||
{
|
||||
return m_Message;
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
char m_Message[MAX_ERRORTEXT];
|
||||
};
|
||||
|
||||
class CNoRunExit : public CDoomError
|
||||
class CNoRunExit : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
CNoRunExit() : std::runtime_error("NoRunExit")
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
class CRecoverableError : public CDoomError
|
||||
|
|
|
@ -61,7 +61,7 @@ DEFINE_FIELD_X(GameInfoStruct, gameinfo_t, statusscreen_single)
|
|||
DEFINE_FIELD_X(GameInfoStruct, gameinfo_t, statusscreen_coop)
|
||||
DEFINE_FIELD_X(GameInfoStruct, gameinfo_t, statusscreen_dm)
|
||||
DEFINE_FIELD_X(GameInfoStruct, gameinfo_t, mSliderColor)
|
||||
|
||||
DEFINE_FIELD_X(GameInfoStruct, gameinfo_t, defaultbloodcolor)
|
||||
|
||||
const char *GameNames[17] =
|
||||
{
|
||||
|
|
|
@ -56,10 +56,24 @@ struct ProgramBinary
|
|||
TArray<uint8_t> data;
|
||||
};
|
||||
|
||||
const char *ShaderMagic = "ZDSC";
|
||||
static const char *ShaderMagic = "ZDSC";
|
||||
|
||||
static std::map<FString, std::unique_ptr<ProgramBinary>> ShaderCache; // Not a TMap because it doesn't support unique_ptr move semantics
|
||||
|
||||
bool IsShaderCacheActive()
|
||||
{
|
||||
static bool active = true;
|
||||
static bool firstcall = true;
|
||||
|
||||
if (firstcall)
|
||||
{
|
||||
const char *vendor = (const char *)glGetString(GL_VENDOR);
|
||||
active = !(strstr(vendor, "Intel") == nullptr);
|
||||
firstcall = false;
|
||||
}
|
||||
return active;
|
||||
}
|
||||
|
||||
static FString CalcProgramBinaryChecksum(const FString &vertex, const FString &fragment)
|
||||
{
|
||||
const GLubyte *vendor = glGetString(GL_VENDOR);
|
||||
|
@ -421,7 +435,9 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
|
|||
FGLDebug::LabelObject(GL_PROGRAM, hShader, name);
|
||||
|
||||
uint32_t binaryFormat = 0;
|
||||
TArray<uint8_t> binary = LoadCachedProgramBinary(vp_comb, fp_comb, binaryFormat);
|
||||
TArray<uint8_t> binary;
|
||||
if (IsShaderCacheActive())
|
||||
binary = LoadCachedProgramBinary(vp_comb, fp_comb, binaryFormat);
|
||||
|
||||
bool linked = false;
|
||||
if (binary.Size() > 0 && glProgramBinary)
|
||||
|
@ -481,7 +497,7 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
|
|||
// only print message if there's an error.
|
||||
I_Error("Init Shader '%s':\n%s\n", name, error.GetChars());
|
||||
}
|
||||
else if (glProgramBinary)
|
||||
else if (glProgramBinary && IsShaderCacheActive())
|
||||
{
|
||||
int binaryLength = 0;
|
||||
glGetProgramiv(hShader, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
namespace OpenGLRenderer
|
||||
{
|
||||
|
||||
bool IsShaderCacheActive();
|
||||
TArray<uint8_t> LoadCachedProgramBinary(const FString &vertex, const FString &fragment, uint32_t &binaryFormat);
|
||||
void SaveCachedProgramBinary(const FString &vertex, const FString &fragment, const TArray<uint8_t> &binary, uint32_t binaryFormat);
|
||||
|
||||
|
@ -142,7 +143,9 @@ void FShaderProgram::Link(const char *name)
|
|||
FGLDebug::LabelObject(GL_PROGRAM, mProgram, name);
|
||||
|
||||
uint32_t binaryFormat = 0;
|
||||
TArray<uint8_t> binary = LoadCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binaryFormat);
|
||||
TArray<uint8_t> binary;
|
||||
if (IsShaderCacheActive())
|
||||
binary = LoadCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binaryFormat);
|
||||
|
||||
bool loadedFromBinary = false;
|
||||
if (binary.Size() > 0 && glProgramBinary)
|
||||
|
@ -168,7 +171,7 @@ void FShaderProgram::Link(const char *name)
|
|||
{
|
||||
I_FatalError("Link Shader '%s':\n%s\n", name, GetProgramInfoLog(mProgram).GetChars());
|
||||
}
|
||||
else if (glProgramBinary)
|
||||
else if (glProgramBinary && IsShaderCacheActive())
|
||||
{
|
||||
int binaryLength = 0;
|
||||
glGetProgramiv(mProgram, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
|
||||
|
|
|
@ -147,7 +147,7 @@ FModelVertexBuffer::FModelVertexBuffer(bool needindex, bool singleframe)
|
|||
{ 1, VATTR_VERTEX2, VFmt_Float3, (int)myoffsetof(FModelVertex, x) },
|
||||
{ 1, VATTR_NORMAL2, VFmt_Packed_A2R10G10B10, (int)myoffsetof(FModelVertex, packedNormal) }
|
||||
};
|
||||
mVertexBuffer->SetFormat(2, 4, sizeof(FModelVertex), format);
|
||||
mVertexBuffer->SetFormat(2, 5, sizeof(FModelVertex), format);
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
|
|
|
@ -4487,7 +4487,7 @@ DEFINE_ACTION_FUNCTION(AActor, AimLineAttack)
|
|||
PARAM_SELF_PROLOGUE(AActor);
|
||||
PARAM_ANGLE(angle);
|
||||
PARAM_FLOAT(distance);
|
||||
PARAM_POINTER(pLineTarget, FTranslatedLineTarget);
|
||||
PARAM_OUTPOINTER(pLineTarget, FTranslatedLineTarget);
|
||||
PARAM_ANGLE(vrange);
|
||||
PARAM_INT(flags);
|
||||
PARAM_OBJECT(target, AActor);
|
||||
|
@ -4924,7 +4924,7 @@ DEFINE_ACTION_FUNCTION(AActor, LineAttack)
|
|||
PARAM_NAME(damageType);
|
||||
PARAM_CLASS(puffType, AActor);
|
||||
PARAM_INT(flags);
|
||||
PARAM_POINTER(victim, FTranslatedLineTarget);
|
||||
PARAM_OUTPOINTER(victim, FTranslatedLineTarget);
|
||||
PARAM_FLOAT(offsetz);
|
||||
PARAM_FLOAT(offsetforward);
|
||||
PARAM_FLOAT(offsetside);
|
||||
|
@ -5090,7 +5090,7 @@ DEFINE_ACTION_FUNCTION(AActor, LineTrace)
|
|||
PARAM_FLOAT(offsetz);
|
||||
PARAM_FLOAT(offsetforward);
|
||||
PARAM_FLOAT(offsetside);
|
||||
PARAM_POINTER(data, FLineTraceData);
|
||||
PARAM_OUTPOINTER(data, FLineTraceData);
|
||||
ACTION_RETURN_BOOL(P_LineTrace(self,angle,distance,pitch,flags,offsetz,offsetforward,offsetside,data));
|
||||
}
|
||||
|
||||
|
|
|
@ -7420,7 +7420,7 @@ DEFINE_ACTION_FUNCTION(AActor, SpawnPlayerMissile)
|
|||
PARAM_FLOAT(x);
|
||||
PARAM_FLOAT(y);
|
||||
PARAM_FLOAT(z);
|
||||
PARAM_POINTER(lt, FTranslatedLineTarget);
|
||||
PARAM_OUTPOINTER(lt, FTranslatedLineTarget);
|
||||
PARAM_BOOL(nofreeaim);
|
||||
PARAM_BOOL(noautoaim);
|
||||
PARAM_INT(aimflags);
|
||||
|
|
|
@ -127,8 +127,13 @@ void PolyTriangleThreadData::ClearStencil(uint8_t value)
|
|||
int height = buffer->Height();
|
||||
uint8_t *data = buffer->Values();
|
||||
|
||||
data += core * width;
|
||||
for (int y = core; y < height; y += num_cores)
|
||||
int start_y = numa_node * height / num_numa_nodes;
|
||||
int end_y = (numa_node + 1) * height / num_numa_nodes;
|
||||
int core_skip = (num_cores - (start_y - core) % num_cores) % num_cores;
|
||||
start_y += core_skip;
|
||||
|
||||
data += start_y * width;
|
||||
for (int y = start_y; y < end_y; y += num_cores)
|
||||
{
|
||||
memset(data, value, width);
|
||||
data += num_cores * width;
|
||||
|
@ -146,6 +151,8 @@ void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, ui
|
|||
dest_height = new_dest_height;
|
||||
dest_pitch = new_dest_pitch;
|
||||
dest_bgra = new_dest_bgra;
|
||||
numa_start_y = numa_node * dest_height / num_numa_nodes;
|
||||
numa_end_y = (numa_node + 1) * dest_height / num_numa_nodes;
|
||||
ccw = true;
|
||||
weaponScene = false;
|
||||
}
|
||||
|
@ -642,7 +649,7 @@ int PolyTriangleThreadData::ClipEdge(const ShadedTriVertex *verts, ShadedTriVert
|
|||
PolyTriangleThreadData *PolyTriangleThreadData::Get(DrawerThread *thread)
|
||||
{
|
||||
if (!thread->poly)
|
||||
thread->poly = std::make_shared<PolyTriangleThreadData>(thread->core, thread->num_cores);
|
||||
thread->poly = std::make_shared<PolyTriangleThreadData>(thread->core, thread->num_cores, thread->numa_node, thread->num_numa_nodes);
|
||||
return thread->poly.get();
|
||||
}
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ public:
|
|||
class PolyTriangleThreadData
|
||||
{
|
||||
public:
|
||||
PolyTriangleThreadData(int32_t core, int32_t num_cores) : core(core), num_cores(num_cores) { }
|
||||
PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes) : core(core), num_cores(num_cores), numa_node(numa_node), num_numa_nodes(num_numa_nodes) { }
|
||||
|
||||
void ClearStencil(uint8_t value);
|
||||
void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra);
|
||||
|
@ -63,12 +63,18 @@ public:
|
|||
|
||||
int32_t core;
|
||||
int32_t num_cores;
|
||||
int32_t numa_node;
|
||||
int32_t num_numa_nodes;
|
||||
|
||||
int numa_start_y;
|
||||
int numa_end_y;
|
||||
|
||||
// The number of lines to skip to reach the first line to be rendered by this thread
|
||||
int skipped_by_thread(int first_line)
|
||||
{
|
||||
int core_skip = (num_cores - (first_line - core) % num_cores) % num_cores;
|
||||
return core_skip;
|
||||
int clip_first_line = MAX(first_line, numa_start_y);
|
||||
int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores;
|
||||
return clip_first_line + core_skip - first_line;
|
||||
}
|
||||
|
||||
static PolyTriangleThreadData *Get(DrawerThread *thread);
|
||||
|
|
|
@ -59,9 +59,9 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat
|
|||
SortVertices(args, sortedVertices);
|
||||
|
||||
int clipright = args->clipright;
|
||||
int clipbottom = args->clipbottom;
|
||||
int cliptop = thread->numa_start_y;
|
||||
int clipbottom = MIN(args->clipbottom, thread->numa_end_y);
|
||||
|
||||
// Ranges that different triangles edges are active
|
||||
int topY = (int)(sortedVertices[0]->y + 0.5f);
|
||||
int midY = (int)(sortedVertices[1]->y + 0.5f);
|
||||
int bottomY = (int)(sortedVertices[2]->y + 0.5f);
|
||||
|
@ -1567,6 +1567,7 @@ void DrawRect8(const void *destOrg, int destWidth, int destHeight, int destPitch
|
|||
uint32_t stepV = (int32_t)(fstepV * 0x1000000);
|
||||
|
||||
uint32_t posV = startV;
|
||||
y1 = MIN(y1, thread->numa_end_y);
|
||||
int num_cores = thread->num_cores;
|
||||
int skip = thread->skipped_by_thread(y0);
|
||||
posV += skip * stepV;
|
||||
|
@ -1817,6 +1818,7 @@ void DrawRectOpt32(const void *destOrg, int destWidth, int destHeight, int destP
|
|||
uint32_t stepV = (int32_t)(fstepV * 0x1000000);
|
||||
|
||||
uint32_t posV = startV;
|
||||
y1 = MIN(y1, thread->numa_end_y);
|
||||
int num_cores = thread->num_cores;
|
||||
int skip = thread->skipped_by_thread(y0);
|
||||
posV += skip * stepV;
|
||||
|
|
|
@ -46,13 +46,13 @@ void OriginalMainExcept(int argc, char** argv)
|
|||
{
|
||||
OriginalMainTry(argc, argv);
|
||||
}
|
||||
catch(const CDoomError& error)
|
||||
catch(const std::exception& error)
|
||||
{
|
||||
const char* const message = error.GetMessage();
|
||||
const char* const message = error.what();
|
||||
|
||||
if (NULL != message)
|
||||
{
|
||||
fprintf(stderr, "%s\n", message);
|
||||
if (strcmp(message, "NoRunExit")) fprintf(stderr, "%s\n", message);
|
||||
Mac_I_FatalError(message);
|
||||
}
|
||||
|
||||
|
|
|
@ -178,7 +178,7 @@ TArray<FString> I_GetSteamPath()
|
|||
{
|
||||
SteamInstallFolders = ParseSteamRegistry(regPath);
|
||||
}
|
||||
catch(class CDoomError &error)
|
||||
catch(class CRecoverableError &error)
|
||||
{
|
||||
// If we can't parse for some reason just pretend we can't find anything.
|
||||
return result;
|
||||
|
@ -201,7 +201,7 @@ TArray<FString> I_GetSteamPath()
|
|||
{
|
||||
SteamInstallFolders = ParseSteamRegistry(regPath);
|
||||
}
|
||||
catch(class CDoomError &error)
|
||||
catch(class CRecoverableError &error)
|
||||
{
|
||||
// If we can't parse for some reason just pretend we can't find anything.
|
||||
return result;
|
||||
|
|
|
@ -35,6 +35,8 @@
|
|||
#endif
|
||||
|
||||
#include "doomtype.h"
|
||||
#include <thread>
|
||||
#include <algorithm>
|
||||
|
||||
struct ticcmd_t;
|
||||
struct WadStuff;
|
||||
|
@ -170,4 +172,8 @@ static inline char *strlwr(char *str)
|
|||
return str;
|
||||
}
|
||||
|
||||
inline int I_GetNumaNodeCount() { return 1; }
|
||||
inline int I_GetNumaNodeThreadCount(int numaNode) { return std::max<int>(std::thread::hardware_concurrency(), 1); }
|
||||
inline void I_SetThreadNumaNode(std::thread &thread, int numaNode) { }
|
||||
|
||||
#endif
|
||||
|
|
|
@ -258,18 +258,18 @@ int main (int argc, char **argv)
|
|||
C_InitConsole (80*8, 25*8, false);
|
||||
D_DoomMain ();
|
||||
}
|
||||
catch (class CDoomError &error)
|
||||
catch (std::exception &error)
|
||||
{
|
||||
I_ShutdownJoysticks();
|
||||
if (error.GetMessage ())
|
||||
fprintf (stderr, "%s\n", error.GetMessage ());
|
||||
if (error.what () && strcmp(error.what(), "NoRunExit"))
|
||||
fprintf (stderr, "%s\n", error.what ());
|
||||
|
||||
#ifdef __APPLE__
|
||||
Mac_I_FatalError(error.GetMessage());
|
||||
Mac_I_FatalError(error.what());
|
||||
#endif // __APPLE__
|
||||
|
||||
#ifdef __linux__
|
||||
Linux_I_FatalError(error.GetMessage());
|
||||
Linux_I_FatalError(error.what());
|
||||
#endif // __linux__
|
||||
|
||||
exit (-1);
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
|
||||
#include "jit.h"
|
||||
#include "jitintern.h"
|
||||
#include <map>
|
||||
|
||||
extern PString *TypeString;
|
||||
extern PStruct *TypeVector2;
|
||||
|
@ -826,242 +825,3 @@ void JitCompiler::EmitNOP()
|
|||
{
|
||||
cc.nop();
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
void JitCompiler::SetupNative()
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
ResetTemp();
|
||||
|
||||
static const char *marks = "=======================================================";
|
||||
cc.comment("", 0);
|
||||
cc.comment(marks, 56);
|
||||
|
||||
FString funcname;
|
||||
funcname.Format("Function: %s", sfunc->PrintableName.GetChars());
|
||||
cc.comment(funcname.GetChars(), funcname.Len());
|
||||
|
||||
cc.comment(marks, 56);
|
||||
cc.comment("", 0);
|
||||
|
||||
konstd = sfunc->KonstD;
|
||||
konstf = sfunc->KonstF;
|
||||
konsts = sfunc->KonstS;
|
||||
konsta = sfunc->KonstA;
|
||||
|
||||
CreateRegisters();
|
||||
|
||||
func = cc.addFunc(CreateFuncSignature(sfunc));
|
||||
|
||||
int argsPos = 0;
|
||||
int regd = 0, regf = 0, regs = 0, rega = 0;
|
||||
for (unsigned int i = 0; i < sfunc->Proto->ArgumentTypes.Size(); i++)
|
||||
{
|
||||
const PType *type = sfunc->Proto->ArgumentTypes[i];
|
||||
if (sfunc->ArgFlags[i] & (VARF_Out | VARF_Ref))
|
||||
{
|
||||
cc.setArg(argsPos++, regA[rega++]);
|
||||
}
|
||||
else if (type == TypeVector2)
|
||||
{
|
||||
cc.setArg(argsPos++, regF[regf++]);
|
||||
cc.setArg(argsPos++, regF[regf++]);
|
||||
}
|
||||
else if (type == TypeVector3)
|
||||
{
|
||||
cc.setArg(argsPos++, regF[regf++]);
|
||||
cc.setArg(argsPos++, regF[regf++]);
|
||||
cc.setArg(argsPos++, regF[regf++]);
|
||||
}
|
||||
else if (type == TypeFloat64)
|
||||
{
|
||||
cc.setArg(argsPos++, regF[regf++]);
|
||||
}
|
||||
else if (type == TypeString)
|
||||
{
|
||||
cc.setArg(argsPos++, regS[regs++]);
|
||||
}
|
||||
else if (type->isIntCompatible())
|
||||
{
|
||||
cc.setArg(argsPos++, regA[regd++]);
|
||||
}
|
||||
else
|
||||
{
|
||||
cc.setArg(argsPos++, regA[rega++]);
|
||||
}
|
||||
}
|
||||
|
||||
if (sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || regs > sfunc->NumRegS || rega > sfunc->NumRegA)
|
||||
I_FatalError("JIT: sfunc->NumArgs != argsPos || regd > sfunc->NumRegD || regf > sfunc->NumRegF || regs > sfunc->NumRegS || rega > sfunc->NumRegA");
|
||||
|
||||
for (int i = regd; i < sfunc->NumRegD; i++)
|
||||
cc.xor_(regD[i], regD[i]);
|
||||
|
||||
for (int i = regf; i < sfunc->NumRegF; i++)
|
||||
cc.xorpd(regF[i], regF[i]);
|
||||
|
||||
for (int i = regs; i < sfunc->NumRegS; i++)
|
||||
cc.xor_(regS[i], regS[i]);
|
||||
|
||||
for (int i = rega; i < sfunc->NumRegA; i++)
|
||||
cc.xor_(regA[i], regA[i]);
|
||||
|
||||
labels.Resize(sfunc->CodeSize);
|
||||
|
||||
IncrementVMCalls();
|
||||
}
|
||||
|
||||
asmjit::CCFunc *JitCompiler::CodegenThunk(asmjit::X86Compiler &cc, VMScriptFunction *sfunc, void *nativefunc)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
static const char *marks = "=======================================================";
|
||||
cc.comment("", 0);
|
||||
cc.comment(marks, 56);
|
||||
|
||||
FString funcname;
|
||||
funcname.Format("Thunk: %s", sfunc->PrintableName.GetChars());
|
||||
cc.comment(funcname.GetChars(), funcname.Len());
|
||||
|
||||
cc.comment(marks, 56);
|
||||
cc.comment("", 0);
|
||||
|
||||
auto unusedFunc = cc.newIntPtr("func"); // VMFunction*
|
||||
auto args = cc.newIntPtr("args"); // VMValue *params
|
||||
auto numargs = cc.newInt32("numargs"); // int numargs
|
||||
auto ret = cc.newIntPtr("ret"); // VMReturn *ret
|
||||
auto numret = cc.newInt32("numret"); // int numret
|
||||
|
||||
CCFunc *func = cc.addFunc(FuncSignature5<int, VMFunction *, void *, int, void *, int>());
|
||||
cc.setArg(0, unusedFunc);
|
||||
cc.setArg(1, args);
|
||||
cc.setArg(2, numargs);
|
||||
cc.setArg(3, ret);
|
||||
cc.setArg(4, numret);
|
||||
|
||||
TArray<Reg> callArgs;
|
||||
int argsPos = 0;
|
||||
for (unsigned int i = 0; i < sfunc->Proto->ArgumentTypes.Size(); i++)
|
||||
{
|
||||
const PType *type = sfunc->Proto->ArgumentTypes[i];
|
||||
if (sfunc->ArgFlags[i] & (VARF_Out | VARF_Ref))
|
||||
{
|
||||
auto reg = cc.newIntPtr();
|
||||
cc.mov(reg, x86::ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, a)));
|
||||
callArgs.Push(reg);
|
||||
}
|
||||
else if (type == TypeVector2)
|
||||
{
|
||||
for (int j = 0; j < 2; j++)
|
||||
{
|
||||
auto reg = cc.newXmmSd();
|
||||
cc.movsd(reg, x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
|
||||
callArgs.Push(reg);
|
||||
}
|
||||
}
|
||||
else if (type == TypeVector3)
|
||||
{
|
||||
for (int j = 0; j < 3; j++)
|
||||
{
|
||||
auto reg = cc.newXmmSd();
|
||||
cc.movsd(reg, x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
|
||||
callArgs.Push(reg);
|
||||
}
|
||||
}
|
||||
else if (type == TypeFloat64)
|
||||
{
|
||||
auto reg = cc.newXmmSd();
|
||||
cc.movsd(reg, x86::qword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, f)));
|
||||
callArgs.Push(reg);
|
||||
}
|
||||
else if (type == TypeString)
|
||||
{
|
||||
auto reg = cc.newIntPtr();
|
||||
cc.mov(reg, x86::ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, a)));
|
||||
callArgs.Push(reg);
|
||||
}
|
||||
else if (type->isIntCompatible())
|
||||
{
|
||||
auto reg = cc.newInt32();
|
||||
cc.mov(reg, x86::dword_ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, i)));
|
||||
callArgs.Push(reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto reg = cc.newIntPtr();
|
||||
cc.mov(reg, x86::ptr(args, argsPos++ * sizeof(VMValue) + offsetof(VMValue, a)));
|
||||
callArgs.Push(reg);
|
||||
}
|
||||
}
|
||||
|
||||
auto call = cc.call(imm_ptr(nativefunc), CreateFuncSignature(sfunc));
|
||||
for (unsigned int i = 0; i < callArgs.Size(); i++)
|
||||
call->setArg(i, callArgs[i]);
|
||||
|
||||
cc.ret(numret);
|
||||
|
||||
return func;
|
||||
}
|
||||
|
||||
asmjit::FuncSignature JitCompiler::CreateFuncSignature(VMScriptFunction *sfunc)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
TArray<uint8_t> args;
|
||||
FString key;
|
||||
for (unsigned int i = 0; i < sfunc->Proto->ArgumentTypes.Size(); i++)
|
||||
{
|
||||
const PType *type = sfunc->Proto->ArgumentTypes[i];
|
||||
if (sfunc->ArgFlags[i] & (VARF_Out | VARF_Ref))
|
||||
{
|
||||
args.Push(TypeIdOf<void*>::kTypeId);
|
||||
key += "v";
|
||||
}
|
||||
else if (type == TypeVector2)
|
||||
{
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
key += "ff";
|
||||
}
|
||||
else if (type == TypeVector3)
|
||||
{
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
key += "fff";
|
||||
}
|
||||
else if (type == TypeFloat64)
|
||||
{
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
key += "f";
|
||||
}
|
||||
else if (type == TypeString)
|
||||
{
|
||||
args.Push(TypeIdOf<void*>::kTypeId);
|
||||
key += "s";
|
||||
}
|
||||
else if (type->isIntCompatible())
|
||||
{
|
||||
args.Push(TypeIdOf<int>::kTypeId);
|
||||
key += "i";
|
||||
}
|
||||
else
|
||||
{
|
||||
args.Push(TypeIdOf<void*>::kTypeId);
|
||||
key += "v";
|
||||
}
|
||||
}
|
||||
|
||||
// FuncSignature only keeps a pointer to its args array. Keep a copy of each args array variant.
|
||||
static std::map<FString, std::unique_ptr<TArray<uint8_t>>> argsCache;
|
||||
std::unique_ptr<TArray<uint8_t>> &cachedArgs = argsCache[key];
|
||||
if (!cachedArgs) cachedArgs.reset(new TArray<uint8_t>(args));
|
||||
|
||||
FuncSignature signature;
|
||||
signature.init(CallConv::kIdHost, TypeIdOf<void>::kTypeId, cachedArgs->Data(), cachedArgs->Size());
|
||||
return signature;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
|
||||
#include "jitintern.h"
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
void JitCompiler::EmitPARAM()
|
||||
{
|
||||
|
@ -23,13 +25,6 @@ void JitCompiler::EmitVTBL()
|
|||
// This instruction is handled in the CALL/CALL_K instruction following it
|
||||
}
|
||||
|
||||
static VMFunction *GetVirtual(DObject *o, int c)
|
||||
{
|
||||
auto p = o->GetClass();
|
||||
assert(c < (int)p->Virtuals.Size());
|
||||
return p->Virtuals[c];
|
||||
}
|
||||
|
||||
void JitCompiler::EmitVtbl(const VMOP *op)
|
||||
{
|
||||
int a = op->a;
|
||||
|
@ -40,40 +35,46 @@ void JitCompiler::EmitVtbl(const VMOP *op)
|
|||
cc.test(regA[b], regA[b]);
|
||||
cc.jz(label);
|
||||
|
||||
auto result = newResultIntPtr();
|
||||
auto call = CreateCall<VMFunction*, DObject*, int>(GetVirtual);
|
||||
call->setRet(0, result);
|
||||
call->setArg(0, regA[b]);
|
||||
call->setArg(1, asmjit::Imm(c));
|
||||
cc.mov(regA[a], result);
|
||||
cc.mov(regA[a], asmjit::x86::qword_ptr(regA[b], myoffsetof(DObject, Class)));
|
||||
cc.mov(regA[a], asmjit::x86::qword_ptr(regA[a], myoffsetof(PClass, Virtuals) + myoffsetof(FArray, Array)));
|
||||
cc.mov(regA[a], asmjit::x86::qword_ptr(regA[a], c * (int)sizeof(void*)));
|
||||
}
|
||||
|
||||
void JitCompiler::EmitCALL()
|
||||
{
|
||||
EmitDoCall(regA[A], nullptr);
|
||||
EmitVMCall(regA[A]);
|
||||
pc += C; // Skip RESULTs
|
||||
}
|
||||
|
||||
void JitCompiler::EmitCALL_K()
|
||||
{
|
||||
auto ptr = newTempIntPtr();
|
||||
cc.mov(ptr, asmjit::imm_ptr(konsta[A].v));
|
||||
EmitDoCall(ptr, static_cast<VMFunction*>(konsta[A].v));
|
||||
VMFunction *target = static_cast<VMFunction*>(konsta[A].v);
|
||||
|
||||
VMNativeFunction *ntarget = nullptr;
|
||||
if (target && (target->VarFlags & VARF_Native))
|
||||
ntarget = static_cast<VMNativeFunction *>(target);
|
||||
|
||||
if (ntarget && ntarget->DirectNativeCall)
|
||||
{
|
||||
EmitNativeCall(ntarget);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto ptr = newTempIntPtr();
|
||||
cc.mov(ptr, asmjit::imm_ptr(target));
|
||||
EmitVMCall(ptr);
|
||||
}
|
||||
|
||||
pc += C; // Skip RESULTs
|
||||
}
|
||||
|
||||
void JitCompiler::EmitDoCall(asmjit::X86Gp vmfunc, VMFunction *target)
|
||||
void JitCompiler::EmitVMCall(asmjit::X86Gp vmfunc)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
bool simpleFrameTarget = false;
|
||||
if (target && (target->VarFlags & VARF_Native))
|
||||
{
|
||||
VMScriptFunction *starget = static_cast<VMScriptFunction*>(target);
|
||||
simpleFrameTarget = starget->SpecialInits.Size() == 0 && starget->NumRegS == 0;
|
||||
}
|
||||
|
||||
CheckVMFrame();
|
||||
|
||||
int numparams = StoreCallParams(simpleFrameTarget);
|
||||
int numparams = StoreCallParams();
|
||||
if (numparams != B)
|
||||
I_FatalError("OP_CALL parameter count does not match the number of preceding OP_PARAM instructions");
|
||||
|
||||
|
@ -85,20 +86,6 @@ void JitCompiler::EmitDoCall(asmjit::X86Gp vmfunc, VMFunction *target)
|
|||
X86Gp paramsptr = newTempIntPtr();
|
||||
cc.lea(paramsptr, x86::ptr(vmframe, offsetParams));
|
||||
|
||||
EmitScriptCall(vmfunc, paramsptr);
|
||||
|
||||
LoadInOuts();
|
||||
LoadReturns(pc + 1, C);
|
||||
|
||||
ParamOpcodes.Clear();
|
||||
|
||||
pc += C; // Skip RESULTs
|
||||
}
|
||||
|
||||
void JitCompiler::EmitScriptCall(asmjit::X86Gp vmfunc, asmjit::X86Gp paramsptr)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
auto scriptcall = newTempIntPtr();
|
||||
cc.mov(scriptcall, x86::ptr(vmfunc, myoffsetof(VMScriptFunction, ScriptCall)));
|
||||
|
||||
|
@ -110,9 +97,14 @@ void JitCompiler::EmitScriptCall(asmjit::X86Gp vmfunc, asmjit::X86Gp paramsptr)
|
|||
call->setArg(2, Imm(B));
|
||||
call->setArg(3, GetCallReturns());
|
||||
call->setArg(4, Imm(C));
|
||||
|
||||
LoadInOuts();
|
||||
LoadReturns(pc + 1, C);
|
||||
|
||||
ParamOpcodes.Clear();
|
||||
}
|
||||
|
||||
int JitCompiler::StoreCallParams(bool simpleFrameTarget)
|
||||
int JitCompiler::StoreCallParams()
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
|
@ -320,3 +312,227 @@ void JitCompiler::FillReturns(const VMOP *retval, int numret)
|
|||
cc.mov(x86::byte_ptr(GetCallReturns(), i * sizeof(VMReturn) + myoffsetof(VMReturn, RegType)), type);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompiler::EmitNativeCall(VMNativeFunction *target)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
auto call = cc.call(imm_ptr(target->DirectNativeCall), CreateFuncSignature(target));
|
||||
|
||||
if ((pc - 1)->op == OP_VTBL)
|
||||
{
|
||||
I_FatalError("Native direct member function calls not implemented\n");
|
||||
}
|
||||
|
||||
X86Gp tmp;
|
||||
X86Xmm tmp2;
|
||||
|
||||
int numparams = 0;
|
||||
for (unsigned int i = 0; i < ParamOpcodes.Size(); i++)
|
||||
{
|
||||
int slot = numparams++;
|
||||
|
||||
if (ParamOpcodes[i]->op == OP_PARAMI)
|
||||
{
|
||||
int abcs = ParamOpcodes[i]->i24;
|
||||
call->setArg(slot, imm(abcs));
|
||||
}
|
||||
else // OP_PARAM
|
||||
{
|
||||
int bc = ParamOpcodes[i]->i16u;
|
||||
switch (ParamOpcodes[i]->a)
|
||||
{
|
||||
case REGT_NIL:
|
||||
call->setArg(slot, imm(0));
|
||||
break;
|
||||
case REGT_INT:
|
||||
call->setArg(slot, regD[bc]);
|
||||
break;
|
||||
case REGT_INT | REGT_KONST:
|
||||
call->setArg(slot, imm(konstd[bc]));
|
||||
break;
|
||||
case REGT_STRING:
|
||||
call->setArg(slot, regS[bc]);
|
||||
break;
|
||||
case REGT_STRING | REGT_KONST:
|
||||
call->setArg(slot, imm_ptr(&konsts[bc]));
|
||||
break;
|
||||
case REGT_POINTER:
|
||||
call->setArg(slot, regA[bc]);
|
||||
break;
|
||||
case REGT_POINTER | REGT_KONST:
|
||||
call->setArg(slot, asmjit::imm_ptr(konsta[bc].v));
|
||||
break;
|
||||
case REGT_FLOAT:
|
||||
call->setArg(slot, regF[bc]);
|
||||
break;
|
||||
case REGT_FLOAT | REGT_MULTIREG2:
|
||||
for (int j = 0; j < 2; j++)
|
||||
call->setArg(slot + j, regF[bc + j]);
|
||||
numparams++;
|
||||
break;
|
||||
case REGT_FLOAT | REGT_MULTIREG3:
|
||||
for (int j = 0; j < 3; j++)
|
||||
call->setArg(slot + j, regF[bc + j]);
|
||||
numparams += 2;
|
||||
break;
|
||||
case REGT_FLOAT | REGT_KONST:
|
||||
tmp = newTempIntPtr();
|
||||
tmp2 = newTempXmmSd();
|
||||
cc.mov(tmp, asmjit::imm_ptr(konstf + bc));
|
||||
cc.movsd(tmp2, asmjit::x86::qword_ptr(tmp));
|
||||
call->setArg(slot, tmp2);
|
||||
break;
|
||||
|
||||
case REGT_STRING | REGT_ADDROF:
|
||||
case REGT_INT | REGT_ADDROF:
|
||||
case REGT_POINTER | REGT_ADDROF:
|
||||
case REGT_FLOAT | REGT_ADDROF:
|
||||
I_FatalError("REGT_ADDROF not implemented for native direct calls\n");
|
||||
break;
|
||||
|
||||
default:
|
||||
I_FatalError("Unknown REGT value passed to EmitPARAM\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numparams != B)
|
||||
I_FatalError("OP_CALL parameter count does not match the number of preceding OP_PARAM instructions\n");
|
||||
|
||||
int numret = C;
|
||||
if (numret > 1)
|
||||
I_FatalError("Only one return parameter is supported for direct native calls\n");
|
||||
|
||||
if (numret == 1)
|
||||
{
|
||||
const auto &retval = pc[1];
|
||||
if (retval.op != OP_RESULT)
|
||||
{
|
||||
I_FatalError("Expected OP_RESULT to follow OP_CALL\n");
|
||||
}
|
||||
|
||||
int type = retval.b;
|
||||
int regnum = retval.c;
|
||||
|
||||
if (type & REGT_KONST)
|
||||
{
|
||||
I_FatalError("OP_RESULT with REGT_KONST is not allowed\n");
|
||||
}
|
||||
|
||||
// Note: the usage of newResultXX is intentional. Asmjit has a register allocation bug
|
||||
// if the return virtual register is already allocated in an argument slot.
|
||||
|
||||
switch (type & REGT_TYPE)
|
||||
{
|
||||
case REGT_INT:
|
||||
tmp = newResultInt32();
|
||||
call->setRet(0, tmp);
|
||||
cc.mov(regD[regnum], tmp);
|
||||
break;
|
||||
case REGT_FLOAT:
|
||||
tmp2 = newResultXmmSd();
|
||||
call->setRet(0, tmp2);
|
||||
cc.movsd(regF[regnum], tmp2);
|
||||
break;
|
||||
case REGT_POINTER:
|
||||
tmp = newResultIntPtr();
|
||||
cc.mov(regA[regnum], tmp);
|
||||
break;
|
||||
case REGT_STRING:
|
||||
case REGT_FLOAT | REGT_MULTIREG2:
|
||||
case REGT_FLOAT | REGT_MULTIREG3:
|
||||
default:
|
||||
I_FatalError("Unsupported OP_RESULT type encountered in EmitNativeCall\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ParamOpcodes.Clear();
|
||||
}
|
||||
|
||||
asmjit::FuncSignature JitCompiler::CreateFuncSignature(VMFunction *func)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
TArray<uint8_t> args;
|
||||
FString key;
|
||||
for (unsigned int i = 0; i < func->Proto->ArgumentTypes.Size(); i++)
|
||||
{
|
||||
const PType *type = func->Proto->ArgumentTypes[i];
|
||||
if (func->ArgFlags[i] & (VARF_Out | VARF_Ref))
|
||||
{
|
||||
args.Push(TypeIdOf<void*>::kTypeId);
|
||||
key += "v";
|
||||
}
|
||||
else if (type == TypeVector2)
|
||||
{
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
key += "ff";
|
||||
}
|
||||
else if (type == TypeVector3)
|
||||
{
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
key += "fff";
|
||||
}
|
||||
else if (type == TypeFloat64)
|
||||
{
|
||||
args.Push(TypeIdOf<double>::kTypeId);
|
||||
key += "f";
|
||||
}
|
||||
else if (type == TypeString)
|
||||
{
|
||||
args.Push(TypeIdOf<void*>::kTypeId);
|
||||
key += "s";
|
||||
}
|
||||
else if (type->isIntCompatible())
|
||||
{
|
||||
args.Push(TypeIdOf<int>::kTypeId);
|
||||
key += "i";
|
||||
}
|
||||
else
|
||||
{
|
||||
args.Push(TypeIdOf<void*>::kTypeId);
|
||||
key += "v";
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t rettype = TypeIdOf<void>::kTypeId;
|
||||
if (func->Proto->ReturnTypes.Size() > 0)
|
||||
{
|
||||
const PType *type = func->Proto->ReturnTypes[0];
|
||||
if (type == TypeFloat64)
|
||||
{
|
||||
rettype = TypeIdOf<double>::kTypeId;
|
||||
key += "rf";
|
||||
}
|
||||
else if (type == TypeString)
|
||||
{
|
||||
rettype = TypeIdOf<void*>::kTypeId;
|
||||
key += "rs";
|
||||
}
|
||||
else if (type->isIntCompatible())
|
||||
{
|
||||
rettype = TypeIdOf<int>::kTypeId;
|
||||
key += "ri";
|
||||
}
|
||||
else
|
||||
{
|
||||
rettype = TypeIdOf<void*>::kTypeId;
|
||||
key += "rv";
|
||||
}
|
||||
}
|
||||
|
||||
// FuncSignature only keeps a pointer to its args array. Store a copy of each args array variant.
|
||||
static std::map<FString, std::unique_ptr<TArray<uint8_t>>> argsCache;
|
||||
std::unique_ptr<TArray<uint8_t>> &cachedArgs = argsCache[key];
|
||||
if (!cachedArgs) cachedArgs.reset(new TArray<uint8_t>(args));
|
||||
|
||||
FuncSignature signature;
|
||||
signature.init(CallConv::kIdHost, rettype, cachedArgs->Data(), cachedArgs->Size());
|
||||
return signature;
|
||||
}
|
||||
|
|
|
@ -77,8 +77,6 @@ void JitCompiler::EmitLFP()
|
|||
cc.lea(regA[A], asmjit::x86::ptr(vmframe, offsetExtra));
|
||||
}
|
||||
|
||||
#if 1 // Inline implementation
|
||||
|
||||
void JitCompiler::EmitMETA()
|
||||
{
|
||||
auto label = EmitThrowExceptionLabel(X_READ_NIL);
|
||||
|
@ -97,46 +95,6 @@ void JitCompiler::EmitCLSS()
|
|||
cc.mov(regA[A], asmjit::x86::qword_ptr(regA[B], myoffsetof(DObject, Class)));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static uint8_t *GetClassMeta(DObject *o)
|
||||
{
|
||||
return o->GetClass()->Meta;
|
||||
}
|
||||
|
||||
void JitCompiler::EmitMETA()
|
||||
{
|
||||
auto label = EmitThrowExceptionLabel(X_READ_NIL);
|
||||
cc.test(regA[B], regA[B]);
|
||||
cc.je(label);
|
||||
|
||||
auto result = newResultIntPtr();
|
||||
auto call = CreateCall<uint8_t*, DObject*>(GetClassMeta);
|
||||
call->setRet(0, result);
|
||||
call->setArg(0, regA[B]);
|
||||
cc.mov(regA[A], result);
|
||||
}
|
||||
|
||||
static PClass *GetClass(DObject *o)
|
||||
{
|
||||
return o->GetClass();
|
||||
}
|
||||
|
||||
void JitCompiler::EmitCLSS()
|
||||
{
|
||||
auto label = EmitThrowExceptionLabel(X_READ_NIL);
|
||||
cc.test(regA[B], regA[B]);
|
||||
cc.je(label);
|
||||
|
||||
auto result = newResultIntPtr();
|
||||
auto call = CreateCall<PClass*, DObject*>(GetClass);
|
||||
call->setRet(0, result);
|
||||
call->setArg(0, regA[B]);
|
||||
cc.mov(regA[A], result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Load from memory. rA = *(rB + rkC)
|
||||
|
||||
|
|
|
@ -38,9 +38,7 @@ private:
|
|||
#include "vmops.h"
|
||||
#undef xx
|
||||
|
||||
//static asmjit::FuncSignature CreateFuncSignature(VMScriptFunction *sfunc);
|
||||
//static asmjit::CCFunc *CodegenThunk(asmjit::X86Compiler &cc, VMScriptFunction *sfunc, void *nativefunc);
|
||||
//void SetupNative();
|
||||
static asmjit::FuncSignature CreateFuncSignature(VMFunction *sfunc);
|
||||
|
||||
void Setup();
|
||||
void CreateRegisters();
|
||||
|
@ -52,11 +50,11 @@ private:
|
|||
void EmitOpcode();
|
||||
void EmitPopFrame();
|
||||
|
||||
void EmitDoCall(asmjit::X86Gp ptr, VMFunction *target);
|
||||
void EmitScriptCall(asmjit::X86Gp vmfunc, asmjit::X86Gp paramsptr);
|
||||
void EmitNativeCall(VMNativeFunction *target);
|
||||
void EmitVMCall(asmjit::X86Gp ptr);
|
||||
void EmitVtbl(const VMOP *op);
|
||||
|
||||
int StoreCallParams(bool simpleFrameTarget);
|
||||
int StoreCallParams();
|
||||
void LoadInOuts();
|
||||
void LoadReturns(const VMOP *retval, int numret);
|
||||
void FillReturns(const VMOP *retval, int numret);
|
||||
|
|
|
@ -464,6 +464,9 @@ public:
|
|||
// Return value is the number of results.
|
||||
NativeCallType NativeCall;
|
||||
|
||||
// Function pointer to a native function to be called directly by the JIT using the platform calling convention
|
||||
void *DirectNativeCall = nullptr;
|
||||
|
||||
private:
|
||||
static int NativeScriptCall(VMFunction *func, VMValue *params, int numparams, VMReturn *ret, int numret);
|
||||
};
|
||||
|
@ -502,6 +505,7 @@ bool AssertObject(void * ob);
|
|||
#define PARAM_STATE_AT(p,x) assert((p) < numparam); assert(reginfo[p] == REGT_INT); FState *x = (FState *)StateLabels.GetState(param[p].i, self->GetClass());
|
||||
#define PARAM_STATE_ACTION_AT(p,x) assert((p) < numparam); assert(reginfo[p] == REGT_INT); FState *x = (FState *)StateLabels.GetState(param[p].i, stateowner->GetClass());
|
||||
#define PARAM_POINTER_AT(p,x,type) assert((p) < numparam); assert(reginfo[p] == REGT_POINTER); type *x = (type *)param[p].a;
|
||||
#define PARAM_OUTPOINTER_AT(p,x,type) assert((p) < numparam); type *x = (type *)param[p].a;
|
||||
#define PARAM_POINTERTYPE_AT(p,x,type) assert((p) < numparam); assert(reginfo[p] == REGT_POINTER); type x = (type )param[p].a;
|
||||
#define PARAM_OBJECT_AT(p,x,type) assert((p) < numparam); assert(reginfo[p] == REGT_POINTER && AssertObject(param[p].a)); type *x = (type *)param[p].a; assert(x == NULL || x->IsKindOf(RUNTIME_CLASS(type)));
|
||||
#define PARAM_CLASS_AT(p,x,base) assert((p) < numparam); assert(reginfo[p] == REGT_POINTER); base::MetaClass *x = (base::MetaClass *)param[p].a; assert(x == NULL || x->IsDescendantOf(RUNTIME_CLASS(base)));
|
||||
|
@ -525,6 +529,7 @@ bool AssertObject(void * ob);
|
|||
#define PARAM_STATE(x) ++paramnum; PARAM_STATE_AT(paramnum,x)
|
||||
#define PARAM_STATE_ACTION(x) ++paramnum; PARAM_STATE_ACTION_AT(paramnum,x)
|
||||
#define PARAM_POINTER(x,type) ++paramnum; PARAM_POINTER_AT(paramnum,x,type)
|
||||
#define PARAM_OUTPOINTER(x,type) ++paramnum; PARAM_OUTPOINTER_AT(paramnum,x,type)
|
||||
#define PARAM_POINTERTYPE(x,type) ++paramnum; PARAM_POINTERTYPE_AT(paramnum,x,type)
|
||||
#define PARAM_OBJECT(x,type) ++paramnum; PARAM_OBJECT_AT(paramnum,x,type)
|
||||
#define PARAM_CLASS(x,base) ++paramnum; PARAM_CLASS_AT(paramnum,x,base)
|
||||
|
|
|
@ -80,6 +80,7 @@ void VMFunction::CreateRegUse()
|
|||
return;
|
||||
}
|
||||
assert(Proto->isPrototype());
|
||||
|
||||
for (auto arg : Proto->ArgumentTypes)
|
||||
{
|
||||
count += arg? arg->GetRegCount() : 1;
|
||||
|
@ -87,14 +88,20 @@ void VMFunction::CreateRegUse()
|
|||
uint8_t *regp;
|
||||
RegTypes = regp = (uint8_t*)ClassDataAllocator.Alloc(count);
|
||||
count = 0;
|
||||
for (auto arg : Proto->ArgumentTypes)
|
||||
for (unsigned i = 0; i < Proto->ArgumentTypes.Size(); i++)
|
||||
{
|
||||
auto arg = Proto->ArgumentTypes[i];
|
||||
auto flg = ArgFlags.Size() > i ? ArgFlags[i] : 0;
|
||||
if (arg == nullptr)
|
||||
{
|
||||
// Marker for start of varargs.
|
||||
*regp++ = REGT_NIL;
|
||||
}
|
||||
else for (int i = 0; i < arg->GetRegCount(); i++)
|
||||
else if ((flg & VARF_Out) && !arg->isPointer())
|
||||
{
|
||||
*regp++ = REGT_POINTER;
|
||||
}
|
||||
else for (int j = 0; j < arg->GetRegCount(); j++)
|
||||
{
|
||||
*regp++ = arg->GetRegType();
|
||||
}
|
||||
|
@ -633,7 +640,10 @@ int VMCallWithDefaults(VMFunction *func, TArray<VMValue> ¶ms, VMReturn *resu
|
|||
{
|
||||
auto oldp = params.Size();
|
||||
params.Resize(func->DefaultArgs.Size());
|
||||
memcpy(¶ms[oldp], &func->DefaultArgs[oldp], (params.Size() - oldp) * sizeof(VMValue));
|
||||
for (unsigned i = oldp; i < params.Size(); i++)
|
||||
{
|
||||
params[i] = func->DefaultArgs[i];
|
||||
}
|
||||
}
|
||||
return VMCall(func, params.Data(), params.Size(), results, numresults);
|
||||
}
|
||||
|
|
|
@ -174,7 +174,11 @@ void DrawerThreads::StartThreads()
|
|||
{
|
||||
std::unique_lock<std::mutex> lock(threads_mutex);
|
||||
|
||||
int num_threads = std::thread::hardware_concurrency();
|
||||
int num_numathreads = 0;
|
||||
for (int i = 0; i < I_GetNumaNodeCount(); i++)
|
||||
num_numathreads += I_GetNumaNodeThreadCount(i);
|
||||
|
||||
int num_threads = num_numathreads;
|
||||
if (num_threads == 0)
|
||||
num_threads = 4;
|
||||
|
||||
|
@ -189,13 +193,41 @@ void DrawerThreads::StartThreads()
|
|||
|
||||
threads.resize(num_threads);
|
||||
|
||||
for (int i = 0; i < num_threads; i++)
|
||||
if (num_threads == num_numathreads)
|
||||
{
|
||||
DrawerThreads *queue = this;
|
||||
DrawerThread *thread = &threads[i];
|
||||
thread->core = i;
|
||||
thread->num_cores = num_threads;
|
||||
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
|
||||
int curThread = 0;
|
||||
for (int numaNode = 0; numaNode < I_GetNumaNodeCount(); numaNode++)
|
||||
{
|
||||
for (int i = 0; i < I_GetNumaNodeThreadCount(numaNode); i++)
|
||||
{
|
||||
DrawerThreads *queue = this;
|
||||
DrawerThread *thread = &threads[curThread++];
|
||||
thread->core = i;
|
||||
thread->num_cores = I_GetNumaNodeThreadCount(numaNode);
|
||||
thread->numa_node = numaNode;
|
||||
thread->num_numa_nodes = I_GetNumaNodeCount();
|
||||
thread->numa_start_y = numaNode * viewheight / I_GetNumaNodeCount();
|
||||
thread->numa_end_y = (numaNode + 1) * viewheight / I_GetNumaNodeCount();
|
||||
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
|
||||
I_SetThreadNumaNode(thread->thread, numaNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < num_threads; i++)
|
||||
{
|
||||
DrawerThreads *queue = this;
|
||||
DrawerThread *thread = &threads[i];
|
||||
thread->core = i;
|
||||
thread->num_cores = num_threads;
|
||||
thread->numa_node = 0;
|
||||
thread->num_numa_nodes = 1;
|
||||
thread->numa_start_y = 0;
|
||||
thread->numa_end_y = viewheight;
|
||||
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
|
||||
I_SetThreadNumaNode(thread->thread, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -256,4 +288,7 @@ void MemcpyCommand::Execute(DrawerThread *thread)
|
|||
d += dstep;
|
||||
s += sstep;
|
||||
}
|
||||
|
||||
thread->numa_start_y = thread->numa_node * viewheight / thread->num_numa_nodes;
|
||||
thread->numa_end_y = (thread->numa_node + 1) * viewheight / thread->num_numa_nodes;
|
||||
}
|
||||
|
|
|
@ -47,6 +47,16 @@ public:
|
|||
// Number of active threads
|
||||
int num_cores = 1;
|
||||
|
||||
// NUMA node this thread belongs to
|
||||
int numa_node = 0;
|
||||
|
||||
// Number of active NUMA nodes
|
||||
int num_numa_nodes = 1;
|
||||
|
||||
// Active range for the numa block the cores are part of
|
||||
int numa_start_y = 0;
|
||||
int numa_end_y = 0;
|
||||
|
||||
// Working buffer used by the tilted (sloped) span drawer
|
||||
const uint8_t *tiltlighting[MAXWIDTH];
|
||||
|
||||
|
@ -57,19 +67,21 @@ public:
|
|||
// Checks if a line is rendered by this thread
|
||||
bool line_skipped_by_thread(int line)
|
||||
{
|
||||
return line % num_cores != core;
|
||||
return line < numa_start_y || line >= numa_end_y || line % num_cores != core;
|
||||
}
|
||||
|
||||
// The number of lines to skip to reach the first line to be rendered by this thread
|
||||
int skipped_by_thread(int first_line)
|
||||
{
|
||||
int core_skip = (num_cores - (first_line - core) % num_cores) % num_cores;
|
||||
return core_skip;
|
||||
int clip_first_line = MAX(first_line, numa_start_y);
|
||||
int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores;
|
||||
return clip_first_line + core_skip - first_line;
|
||||
}
|
||||
|
||||
// The number of lines to be rendered by this thread
|
||||
int count_for_thread(int first_line, int count)
|
||||
{
|
||||
count = MIN(count, numa_end_y - first_line);
|
||||
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
|
||||
return MAX(c, 0);
|
||||
}
|
||||
|
|
|
@ -1048,21 +1048,22 @@ void DoMain (HINSTANCE hInstance)
|
|||
}
|
||||
exit(0);
|
||||
}
|
||||
catch (class CDoomError &error)
|
||||
catch (std::exception &error)
|
||||
{
|
||||
I_ShutdownGraphics ();
|
||||
RestoreConView ();
|
||||
S_StopMusic(true);
|
||||
I_FlushBufferedConsoleStuff();
|
||||
if (error.GetMessage ())
|
||||
auto msg = error.what();
|
||||
if (strcmp(msg, "NoRunExit"))
|
||||
{
|
||||
if (!batchrun)
|
||||
{
|
||||
ShowErrorPane(error.GetMessage());
|
||||
ShowErrorPane(msg);
|
||||
}
|
||||
else
|
||||
{
|
||||
Printf("%s\n", error.GetMessage());
|
||||
Printf("%s\n", msg);
|
||||
}
|
||||
}
|
||||
exit (-1);
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
#include <string.h>
|
||||
#include <process.h>
|
||||
#include <time.h>
|
||||
#include <map>
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
|
@ -1470,3 +1471,76 @@ int _stat64i32(const char *path, struct _stat64i32 *buffer)
|
|||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct NumaNode
|
||||
{
|
||||
uint64_t affinityMask = 0;
|
||||
int threadCount = 0;
|
||||
};
|
||||
static TArray<NumaNode> numaNodes;
|
||||
|
||||
static void SetupNumaNodes()
|
||||
{
|
||||
if (numaNodes.Size() == 0)
|
||||
{
|
||||
// Query processors in the system
|
||||
DWORD_PTR processMask = 0, systemMask = 0;
|
||||
BOOL result = GetProcessAffinityMask(GetCurrentProcess(), &processMask, &systemMask);
|
||||
if (result)
|
||||
{
|
||||
// Find the numa node each processor belongs to
|
||||
std::map<int, NumaNode> nodes;
|
||||
for (int i = 0; i < sizeof(DWORD_PTR) * 8; i++)
|
||||
{
|
||||
DWORD_PTR processorMask = (((DWORD_PTR)1) << i);
|
||||
if (processMask & processorMask)
|
||||
{
|
||||
UCHAR nodeNumber = 0;
|
||||
result = GetNumaProcessorNode(i, &nodeNumber);
|
||||
if (nodeNumber != 0xff)
|
||||
{
|
||||
nodes[nodeNumber].affinityMask |= (uint64_t)processorMask;
|
||||
nodes[nodeNumber].threadCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert map to a list
|
||||
for (const auto &it : nodes)
|
||||
{
|
||||
numaNodes.Push(it.second);
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to a single node if something went wrong
|
||||
if (numaNodes.Size() == 0)
|
||||
{
|
||||
NumaNode node;
|
||||
node.threadCount = std::thread::hardware_concurrency();
|
||||
if (node.threadCount == 0)
|
||||
node.threadCount = 1;
|
||||
numaNodes.Push(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int I_GetNumaNodeCount()
|
||||
{
|
||||
SetupNumaNodes();
|
||||
return numaNodes.Size();
|
||||
}
|
||||
|
||||
int I_GetNumaNodeThreadCount(int numaNode)
|
||||
{
|
||||
SetupNumaNodes();
|
||||
return numaNodes[numaNode].threadCount;
|
||||
}
|
||||
|
||||
void I_SetThreadNumaNode(std::thread &thread, int numaNode)
|
||||
{
|
||||
if (numaNodes.Size() > 1)
|
||||
{
|
||||
HANDLE handle = (HANDLE)thread.native_handle();
|
||||
SetThreadAffinityMask(handle, (DWORD_PTR)numaNodes[numaNode].affinityMask);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#define __I_SYSTEM__
|
||||
|
||||
#include "doomtype.h"
|
||||
#include <thread>
|
||||
|
||||
struct ticcmd_t;
|
||||
struct WadStuff;
|
||||
|
@ -186,4 +187,8 @@ inline int I_FindAttr(findstate_t *fileinfo)
|
|||
#define FA_DIREC 0x00000010
|
||||
#define FA_ARCH 0x00000020
|
||||
|
||||
int I_GetNumaNodeCount();
|
||||
int I_GetNumaNodeThreadCount(int numaNode);
|
||||
void I_SetThreadNumaNode(std::thread &thread, int numaNode);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -366,6 +366,7 @@ struct GameInfoStruct native
|
|||
native double gibfactor;
|
||||
native bool intermissioncounter;
|
||||
native Name mSliderColor;
|
||||
native Color defaultbloodcolor;
|
||||
}
|
||||
|
||||
class Object native
|
||||
|
|
Loading…
Reference in a new issue