From 216191c86d2a042e5db8141425df9486457c2520 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 20 Nov 2018 13:41:27 +0100 Subject: [PATCH 1/9] - interpolate the normal for models --- src/hwrenderer/models/hw_models.cpp | 3 ++- wadsrc/static/shaders/glsl/main.vp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hwrenderer/models/hw_models.cpp b/src/hwrenderer/models/hw_models.cpp index ce22625d8..d4241c026 100644 --- a/src/hwrenderer/models/hw_models.cpp +++ b/src/hwrenderer/models/hw_models.cpp @@ -144,7 +144,8 @@ FModelVertexBuffer::FModelVertexBuffer(bool needindex, bool singleframe) { 0, VATTR_VERTEX, VFmt_Float3, (int)myoffsetof(FModelVertex, x) }, { 0, VATTR_TEXCOORD, VFmt_Float2, (int)myoffsetof(FModelVertex, u) }, { 0, VATTR_NORMAL, VFmt_Packed_A2R10G10B10, (int)myoffsetof(FModelVertex, packedNormal) }, - { 1, VATTR_VERTEX2, VFmt_Float3, (int)myoffsetof(FModelVertex, x) } + { 1, VATTR_VERTEX2, VFmt_Float3, (int)myoffsetof(FModelVertex, x) }, + { 1, VATTR_NORMAL2, VFmt_Packed_A2R10G10B10, (int)myoffsetof(FModelVertex, packedNormal) } }; mVertexBuffer->SetFormat(2, 4, sizeof(FModelVertex), format); } diff --git a/wadsrc/static/shaders/glsl/main.vp b/wadsrc/static/shaders/glsl/main.vp index dccbb77c0..a5875318f 100644 --- a/wadsrc/static/shaders/glsl/main.vp +++ b/wadsrc/static/shaders/glsl/main.vp @@ -5,6 +5,7 @@ layout(location = 2) in vec4 aColor; #ifndef SIMPLE // we do not need these for simple shaders layout(location = 3) in vec4 aVertex2; layout(location = 4) in vec4 aNormal; +layout(location = 5) in vec4 aNormal2; out vec4 pixelpos; out vec3 glowdist; out vec3 gradientdist; @@ -62,7 +63,7 @@ void main() gl_ClipDistance[4] = worldcoord.y - ((uSplitBottomPlane.w + uSplitBottomPlane.x * worldcoord.x + uSplitBottomPlane.y * worldcoord.z) * uSplitBottomPlane.z); } - vWorldNormal = NormalModelMatrix * vec4(normalize(aNormal.xyz), 1.0); + vWorldNormal = NormalModelMatrix * vec4(normalize(mix(aNormal.xyz. aNormal2.xyz, uInterpolationFactor)), 1.0); vEyeNormal = NormalViewMatrix * vWorldNormal; #endif From 96df21e3dcd867d7ba6ed2a8b6318dfc646016b8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 20 Nov 2018 13:54:18 +0100 Subject: [PATCH 2/9] - fix typo --- wadsrc/static/shaders/glsl/main.vp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wadsrc/static/shaders/glsl/main.vp b/wadsrc/static/shaders/glsl/main.vp index a5875318f..81d4022f0 100644 --- a/wadsrc/static/shaders/glsl/main.vp +++ b/wadsrc/static/shaders/glsl/main.vp @@ -63,7 +63,7 @@ void main() gl_ClipDistance[4] = worldcoord.y - ((uSplitBottomPlane.w + uSplitBottomPlane.x * worldcoord.x + uSplitBottomPlane.y * worldcoord.z) * uSplitBottomPlane.z); } - vWorldNormal = NormalModelMatrix * vec4(normalize(mix(aNormal.xyz. aNormal2.xyz, uInterpolationFactor)), 1.0); + vWorldNormal = NormalModelMatrix * vec4(normalize(mix(aNormal.xyz, aNormal2.xyz, uInterpolationFactor)), 1.0); vEyeNormal = NormalViewMatrix * vWorldNormal; #endif From dd42557e69c8347bd4b96a4db162e14271370a1d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 20 Nov 2018 23:12:20 +0100 Subject: [PATCH 3/9] - implement a shader cache --- src/gl/shaders/gl_shader.cpp | 263 +++++++++++++++++++++++----- src/gl/shaders/gl_shaderprogram.cpp | 55 +++++- src/gl/shaders/gl_shaderprogram.h | 3 + 3 files changed, 269 insertions(+), 52 deletions(-) diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp index b31e9532f..c3c19df8c 100644 --- a/src/gl/shaders/gl_shader.cpp +++ b/src/gl/shaders/gl_shader.cpp @@ -32,6 +32,8 @@ #include "w_wad.h" #include "doomerrors.h" #include "cmdlib.h" +#include "md5.h" +#include "m_misc.h" #include "hwrenderer/utility/hw_shaderpatcher.h" #include "hwrenderer/data/shaderuniforms.h" #include "hwrenderer/scene/hw_viewpointuniforms.h" @@ -42,10 +44,152 @@ #include "r_data/matrix.h" #include "gl/renderer/gl_renderer.h" #include "gl/shaders/gl_shader.h" +#include +#include namespace OpenGLRenderer { +struct ProgramBinary +{ + uint32_t format; + TArray data; +}; + +const char *ShaderMagic = "ZDSC"; + +static std::map> ShaderCache; // Not a TMap because it doesn't support unique_ptr move semantics + +static FString CalcProgramBinaryChecksum(const FString &vertex, const FString &fragment) +{ + const GLubyte *vendor = glGetString(GL_VENDOR); + const GLubyte *renderer = glGetString(GL_RENDERER); + const GLubyte *version = glGetString(GL_VERSION); + + uint8_t digest[16]; + MD5Context md5; + md5.Update(vendor, (unsigned int)strlen((const char*)vendor)); + md5.Update(renderer, (unsigned int)strlen((const char*)renderer)); + md5.Update(version, (unsigned int)strlen((const char*)version)); + md5.Update((const uint8_t *)vertex.GetChars(), (unsigned int)vertex.Len()); + md5.Update((const uint8_t *)fragment.GetChars(), (unsigned int)fragment.Len()); + md5.Final(digest); + + char hexdigest[33]; + for (int i = 0; i < 16; i++) + { + int v = digest[i] >> 4; + hexdigest[i * 2] = v < 10 ? ('0' + v) : ('a' + v - 10); + v = digest[i] & 15; + hexdigest[i * 2 + 1] = v < 10 ? ('0' + v) : ('a' + v - 10); + } + hexdigest[32] = 0; + return hexdigest; +} + +static FString CreateProgramCacheName(bool create) +{ + FString path = M_GetCachePath(create); + if (create) CreatePath(path); + path << "/shadercache.zdsc"; + return path; +} + +static void LoadShaders() +{ + static bool loaded = false; + if (loaded) + return; + loaded = true; + + try + { + FString path = CreateProgramCacheName(false); + FileReader fr; + if (!fr.OpenFile(path)) + throw std::runtime_error("Could not open shader file"); + + char magic[4]; + fr.Read(magic, 4); + if (memcmp(magic, ShaderMagic, 4) != 0) + throw std::runtime_error("Not a shader cache file"); + + uint32_t count = fr.ReadUInt32(); + if (count > 512) + throw std::runtime_error("Too many shaders cached"); + + for (uint32_t i = 0; i < count; i++) + { + char hexdigest[33]; + if (fr.Read(hexdigest, 32) != 32) + throw std::runtime_error("Read error"); + hexdigest[32] = 0; + + std::unique_ptr binary(new ProgramBinary()); + binary->format = fr.ReadUInt32(); + uint32_t size = fr.ReadUInt32(); + if (size > 1024 * 1024) + throw std::runtime_error("Shader too big, probably file corruption"); + + binary->data.Resize(size); + if (fr.Read(binary->data.Data(), binary->data.Size()) != binary->data.Size()) + throw std::runtime_error("Read error"); + + ShaderCache[hexdigest] = std::move(binary); + } + } + catch (...) + { + ShaderCache.clear(); + } +} + +static void SaveShaders() +{ + FString path = CreateProgramCacheName(true); + std::unique_ptr fw(FileWriter::Open(path)); + if (fw) + { + uint32_t count = (uint32_t)ShaderCache.size(); + fw->Write(ShaderMagic, 4); + fw->Write(&count, sizeof(uint32_t)); + for (const auto &it : ShaderCache) + { + uint32_t size = it.second->data.Size(); + fw->Write(it.first.GetChars(), 32); + fw->Write(&it.second->format, sizeof(uint32_t)); + fw->Write(&size, sizeof(uint32_t)); + fw->Write(it.second->data.Data(), it.second->data.Size()); + } + } +} + +TArray LoadCachedProgramBinary(const FString &vertex, const FString &fragment, uint32_t &binaryFormat) +{ + LoadShaders(); + + auto it = ShaderCache.find(CalcProgramBinaryChecksum(vertex, fragment)); + if (it != ShaderCache.end()) + { + binaryFormat = it->second->format; + return it->second->data; + } + else + { + binaryFormat = 0; + return {}; + } +} + +void SaveCachedProgramBinary(const FString &vertex, const FString &fragment, const TArray &binary, uint32_t binaryFormat) +{ + auto &entry = ShaderCache[CalcProgramBinaryChecksum(vertex, fragment)]; + entry.reset(new ProgramBinary()); + entry->format = binaryFormat; + entry->data = binary; + + SaveShaders(); +} bool FShader::Load(const char * name, const char * vert_prog_lump, const char * frag_prog_lump, const char * proc_prog_lump, const char * light_fragprog, const char * defines) { @@ -273,56 +417,85 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * vp_comb.Substitute("gl_ClipDistance", "//"); } - hVertProg = glCreateShader(GL_VERTEX_SHADER); - hFragProg = glCreateShader(GL_FRAGMENT_SHADER); - - FGLDebug::LabelObject(GL_SHADER, hVertProg, vert_prog_lump); - FGLDebug::LabelObject(GL_SHADER, hFragProg, frag_prog_lump); - - int vp_size = (int)vp_comb.Len(); - int fp_size = (int)fp_comb.Len(); - - const char *vp_ptr = vp_comb.GetChars(); - const char *fp_ptr = fp_comb.GetChars(); - - glShaderSource(hVertProg, 1, &vp_ptr, &vp_size); - glShaderSource(hFragProg, 1, &fp_ptr, &fp_size); - - glCompileShader(hVertProg); - glCompileShader(hFragProg); - hShader = glCreateProgram(); FGLDebug::LabelObject(GL_PROGRAM, hShader, name); - glAttachShader(hShader, hVertProg); - glAttachShader(hShader, hFragProg); + uint32_t binaryFormat = 0; + TArray binary = LoadCachedProgramBinary(vp_comb, fp_comb, binaryFormat); - glLinkProgram(hShader); - - glGetShaderInfoLog(hVertProg, 10000, NULL, buffer); - if (*buffer) + bool linked = false; + if (binary.Size() > 0 && glProgramBinary) { - error << "Vertex shader:\n" << buffer << "\n"; - } - glGetShaderInfoLog(hFragProg, 10000, NULL, buffer); - if (*buffer) - { - error << "Fragment shader:\n" << buffer << "\n"; + glProgramBinary(hShader, binaryFormat, binary.Data(), binary.Size()); + GLint status = 0; + glGetProgramiv(hShader, GL_LINK_STATUS, &status); + linked = (status == GL_TRUE); } - glGetProgramInfoLog(hShader, 10000, NULL, buffer); - if (*buffer) + if (!linked) { - error << "Linking:\n" << buffer << "\n"; - } - int linked; - glGetProgramiv(hShader, GL_LINK_STATUS, &linked); - if (linked == 0) - { - // only print message if there's an error. - I_Error("Init Shader '%s':\n%s\n", name, error.GetChars()); - } + hVertProg = glCreateShader(GL_VERTEX_SHADER); + hFragProg = glCreateShader(GL_FRAGMENT_SHADER); + FGLDebug::LabelObject(GL_SHADER, hVertProg, vert_prog_lump); + FGLDebug::LabelObject(GL_SHADER, hFragProg, frag_prog_lump); + + int vp_size = (int)vp_comb.Len(); + int fp_size = (int)fp_comb.Len(); + + const char *vp_ptr = vp_comb.GetChars(); + const char *fp_ptr = fp_comb.GetChars(); + + glShaderSource(hVertProg, 1, &vp_ptr, &vp_size); + glShaderSource(hFragProg, 1, &fp_ptr, &fp_size); + + glCompileShader(hVertProg); + glCompileShader(hFragProg); + + glAttachShader(hShader, hVertProg); + glAttachShader(hShader, hFragProg); + + glLinkProgram(hShader); + + glGetShaderInfoLog(hVertProg, 10000, NULL, buffer); + if (*buffer) + { + error << "Vertex shader:\n" << buffer << "\n"; + } + glGetShaderInfoLog(hFragProg, 10000, NULL, buffer); + if (*buffer) + { + error << "Fragment shader:\n" << buffer << "\n"; + } + + glGetProgramInfoLog(hShader, 10000, NULL, buffer); + if (*buffer) + { + error << "Linking:\n" << buffer << "\n"; + } + GLint status = 0; + glGetProgramiv(hShader, GL_LINK_STATUS, &status); + linked = (status == GL_TRUE); + if (!linked) + { + // only print message if there's an error. + I_Error("Init Shader '%s':\n%s\n", name, error.GetChars()); + } + else if (glProgramBinary) + { + int binaryLength = 0; + glGetProgramiv(hShader, GL_PROGRAM_BINARY_LENGTH, &binaryLength); + binary.Resize(binaryLength); + glGetProgramBinary(hShader, binary.Size(), &binaryLength, &binaryFormat, binary.Data()); + binary.Resize(binaryLength); + SaveCachedProgramBinary(vp_comb, fp_comb, binary, binaryFormat); + } + } + else + { + hVertProg = 0; + hFragProg = 0; + } muDesaturation.Init(hShader, "uDesaturationFactor"); muFogEnabled.Init(hShader, "uFogEnabled"); @@ -376,7 +549,7 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * if (shadowmapindex > 0) glUniform1i(shadowmapindex, 16); glUseProgram(0); - return !!linked; + return linked; } //========================================================================== @@ -388,8 +561,10 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * FShader::~FShader() { glDeleteProgram(hShader); - glDeleteShader(hVertProg); - glDeleteShader(hFragProg); + if (hVertProg != 0) + glDeleteShader(hVertProg); + if (hFragProg != 0) + glDeleteShader(hFragProg); } diff --git a/src/gl/shaders/gl_shaderprogram.cpp b/src/gl/shaders/gl_shaderprogram.cpp index c402bf77b..684fcd852 100644 --- a/src/gl/shaders/gl_shaderprogram.cpp +++ b/src/gl/shaders/gl_shaderprogram.cpp @@ -37,6 +37,9 @@ namespace OpenGLRenderer { +TArray LoadCachedProgramBinary(const FString &vertex, const FString &fragment, uint32_t &binaryFormat); +void SaveCachedProgramBinary(const FString &vertex, const FString &fragment, const TArray &binary, uint32_t binaryFormat); + FShaderProgram::FShaderProgram() { for (int i = 0; i < NumShaderTypes; i++) @@ -94,14 +97,20 @@ void FShaderProgram::Compile(ShaderType type, const char *lumpName, const char * } void FShaderProgram::Compile(ShaderType type, const char *name, const FString &code, const char *defines, int maxGlslVersion) +{ + mShaderNames[type] = name; + mShaderSources[type] = PatchShader(type, code, defines, maxGlslVersion); +} + +void FShaderProgram::CompileShader(ShaderType type) { CreateShader(type); const auto &handle = mShaders[type]; - FGLDebug::LabelObject(GL_SHADER, handle, name); + FGLDebug::LabelObject(GL_SHADER, handle, mShaderNames[type]); - FString patchedCode = PatchShader(type, code, defines, maxGlslVersion); + const FString &patchedCode = mShaderSources[type]; int lengths[1] = { (int)patchedCode.Len() }; const char *sources[1] = { patchedCode.GetChars() }; glShaderSource(handle, 1, sources, lengths); @@ -112,7 +121,7 @@ void FShaderProgram::Compile(ShaderType type, const char *name, const FString &c glGetShaderiv(handle, GL_COMPILE_STATUS, &status); if (status == GL_FALSE) { - I_FatalError("Compile Shader '%s':\n%s\n", name, GetShaderInfoLog(handle).GetChars()); + I_FatalError("Compile Shader '%s':\n%s\n", mShaderNames[type], GetShaderInfoLog(handle).GetChars()); } else { @@ -131,13 +140,43 @@ void FShaderProgram::Compile(ShaderType type, const char *name, const FString &c void FShaderProgram::Link(const char *name) { FGLDebug::LabelObject(GL_PROGRAM, mProgram, name); - glLinkProgram(mProgram); - GLint status = 0; - glGetProgramiv(mProgram, GL_LINK_STATUS, &status); - if (status == GL_FALSE) + uint32_t binaryFormat = 0; + TArray binary = LoadCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binaryFormat); + + bool loadedFromBinary = false; + if (binary.Size() > 0 && glProgramBinary) { - I_FatalError("Link Shader '%s':\n%s\n", name, GetProgramInfoLog(mProgram).GetChars()); + if (mProgram == 0) + mProgram = glCreateProgram(); + glProgramBinary(mProgram, binaryFormat, binary.Data(), binary.Size()); + GLint status = 0; + glGetProgramiv(mProgram, GL_LINK_STATUS, &status); + loadedFromBinary = (status == GL_TRUE); + } + + if (!loadedFromBinary) + { + CompileShader(Vertex); + CompileShader(Fragment); + + glLinkProgram(mProgram); + + GLint status = 0; + glGetProgramiv(mProgram, GL_LINK_STATUS, &status); + if (status == GL_FALSE) + { + I_FatalError("Link Shader '%s':\n%s\n", name, GetProgramInfoLog(mProgram).GetChars()); + } + else if (glProgramBinary) + { + int binaryLength = 0; + glGetProgramiv(mProgram, GL_PROGRAM_BINARY_LENGTH, &binaryLength); + binary.Resize(binaryLength); + glGetProgramBinary(mProgram, binary.Size(), &binaryLength, &binaryFormat, binary.Data()); + binary.Resize(binaryLength); + SaveCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binary, binaryFormat); + } } // This is only for old OpenGL which didn't allow to set the binding from within the shader. diff --git a/src/gl/shaders/gl_shaderprogram.h b/src/gl/shaders/gl_shaderprogram.h index f9853d5d3..0167618e1 100644 --- a/src/gl/shaders/gl_shaderprogram.h +++ b/src/gl/shaders/gl_shaderprogram.h @@ -30,6 +30,7 @@ private: FShaderProgram(const FShaderProgram &) = delete; FShaderProgram &operator=(const FShaderProgram &) = delete; + void CompileShader(ShaderType type); FString PatchShader(ShaderType type, const FString &code, const char *defines, int maxGlslVersion); void CreateShader(ShaderType type); @@ -38,6 +39,8 @@ private: GLuint mProgram = 0; GLuint mShaders[NumShaderTypes]; + FString mShaderSources[NumShaderTypes]; + FString mShaderNames[NumShaderTypes]; TArray> samplerstobind; }; From 48fd91227c75049cf393fe6568ddde9d58d733f8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 21 Nov 2018 10:46:13 +0100 Subject: [PATCH 4/9] - fix compile error --- src/gl/shaders/gl_shaderprogram.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gl/shaders/gl_shaderprogram.cpp b/src/gl/shaders/gl_shaderprogram.cpp index 684fcd852..126817b59 100644 --- a/src/gl/shaders/gl_shaderprogram.cpp +++ b/src/gl/shaders/gl_shaderprogram.cpp @@ -121,7 +121,7 @@ void FShaderProgram::CompileShader(ShaderType type) glGetShaderiv(handle, GL_COMPILE_STATUS, &status); if (status == GL_FALSE) { - I_FatalError("Compile Shader '%s':\n%s\n", mShaderNames[type], GetShaderInfoLog(handle).GetChars()); + I_FatalError("Compile Shader '%s':\n%s\n", mShaderNames[type].GetChars(), GetShaderInfoLog(handle).GetChars()); } else { From b4aa4bf0ac161fe1aff070324b3747e356ff112e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Nov 2018 05:31:10 +0100 Subject: [PATCH 5/9] - only use shader cache on Intel --- src/gl/shaders/gl_shader.cpp | 22 +++++++++++++++++++--- src/gl/shaders/gl_shaderprogram.cpp | 7 +++++-- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp index c3c19df8c..0af60e412 100644 --- a/src/gl/shaders/gl_shader.cpp +++ b/src/gl/shaders/gl_shader.cpp @@ -56,10 +56,24 @@ struct ProgramBinary TArray data; }; -const char *ShaderMagic = "ZDSC"; +static const char *ShaderMagic = "ZDSC"; static std::map> ShaderCache; // Not a TMap because it doesn't support unique_ptr move semantics +bool IsShaderCacheActive() +{ + static bool active = true; + static bool firstcall = true; + + if (firstcall) + { + const char *vendor = (const char *)glGetString(GL_VENDOR); + active = strstr(vendor, "Intel") == nullptr; + firstcall = false; + } + return active; +} + static FString CalcProgramBinaryChecksum(const FString &vertex, const FString &fragment) { const GLubyte *vendor = glGetString(GL_VENDOR); @@ -421,7 +435,9 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * FGLDebug::LabelObject(GL_PROGRAM, hShader, name); uint32_t binaryFormat = 0; - TArray binary = LoadCachedProgramBinary(vp_comb, fp_comb, binaryFormat); + TArray binary; + if (IsShaderCacheActive()) + binary = LoadCachedProgramBinary(vp_comb, fp_comb, binaryFormat); bool linked = false; if (binary.Size() > 0 && glProgramBinary) @@ -481,7 +497,7 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * // only print message if there's an error. I_Error("Init Shader '%s':\n%s\n", name, error.GetChars()); } - else if (glProgramBinary) + else if (glProgramBinary && IsShaderCacheActive()) { int binaryLength = 0; glGetProgramiv(hShader, GL_PROGRAM_BINARY_LENGTH, &binaryLength); diff --git a/src/gl/shaders/gl_shaderprogram.cpp b/src/gl/shaders/gl_shaderprogram.cpp index 126817b59..8068b3c2c 100644 --- a/src/gl/shaders/gl_shaderprogram.cpp +++ b/src/gl/shaders/gl_shaderprogram.cpp @@ -37,6 +37,7 @@ namespace OpenGLRenderer { +bool IsShaderCacheActive(); TArray LoadCachedProgramBinary(const FString &vertex, const FString &fragment, uint32_t &binaryFormat); void SaveCachedProgramBinary(const FString &vertex, const FString &fragment, const TArray &binary, uint32_t binaryFormat); @@ -142,7 +143,9 @@ void FShaderProgram::Link(const char *name) FGLDebug::LabelObject(GL_PROGRAM, mProgram, name); uint32_t binaryFormat = 0; - TArray binary = LoadCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binaryFormat); + TArray binary; + if (IsShaderCacheActive()) + binary = LoadCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binaryFormat); bool loadedFromBinary = false; if (binary.Size() > 0 && glProgramBinary) @@ -168,7 +171,7 @@ void FShaderProgram::Link(const char *name) { I_FatalError("Link Shader '%s':\n%s\n", name, GetProgramInfoLog(mProgram).GetChars()); } - else if (glProgramBinary) + else if (glProgramBinary && IsShaderCacheActive()) { int binaryLength = 0; glGetProgramiv(mProgram, GL_PROGRAM_BINARY_LENGTH, &binaryLength); From 4859c3d30170d8c5814b67081406347cefee82e1 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 22 Nov 2018 02:02:37 -0500 Subject: [PATCH 6/9] - fix inverted logic of Intel check --- src/gl/shaders/gl_shader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp index 0af60e412..cc9811622 100644 --- a/src/gl/shaders/gl_shader.cpp +++ b/src/gl/shaders/gl_shader.cpp @@ -68,7 +68,7 @@ bool IsShaderCacheActive() if (firstcall) { const char *vendor = (const char *)glGetString(GL_VENDOR); - active = strstr(vendor, "Intel") == nullptr; + active = !(strstr(vendor, "Intel") == nullptr); firstcall = false; } return active; From 3e9f531b5ffd866a4346d41b692f3340855471f3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 22 Nov 2018 14:48:09 +0100 Subject: [PATCH 7/9] - add NUMA awareness to drawer threads --- src/polyrenderer/drawers/poly_triangle.cpp | 13 +++- src/polyrenderer/drawers/poly_triangle.h | 12 +++- src/polyrenderer/drawers/screen_triangle.cpp | 6 +- src/posix/i_system.h | 6 ++ src/swrenderer/drawers/r_thread.cpp | 46 ++++++++++-- src/swrenderer/drawers/r_thread.h | 18 ++++- src/win32/i_system.cpp | 74 ++++++++++++++++++++ src/win32/i_system.h | 5 ++ 8 files changed, 162 insertions(+), 18 deletions(-) diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 53face08c..f75b5e43d 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -127,8 +127,13 @@ void PolyTriangleThreadData::ClearStencil(uint8_t value) int height = buffer->Height(); uint8_t *data = buffer->Values(); - data += core * width; - for (int y = core; y < height; y += num_cores) + int start_y = numa_node * height / num_numa_nodes; + int end_y = (numa_node + 1) * height / num_numa_nodes; + int core_skip = (num_cores - (start_y - core) % num_cores) % num_cores; + start_y += core_skip; + + data += start_y * width; + for (int y = start_y; y < end_y; y += num_cores) { memset(data, value, width); data += num_cores * width; @@ -146,6 +151,8 @@ void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, ui dest_height = new_dest_height; dest_pitch = new_dest_pitch; dest_bgra = new_dest_bgra; + numa_start_y = numa_node * dest_height / num_numa_nodes; + numa_end_y = (numa_node + 1) * dest_height / num_numa_nodes; ccw = true; weaponScene = false; } @@ -642,7 +649,7 @@ int PolyTriangleThreadData::ClipEdge(const ShadedTriVertex *verts, ShadedTriVert PolyTriangleThreadData *PolyTriangleThreadData::Get(DrawerThread *thread) { if (!thread->poly) - thread->poly = std::make_shared(thread->core, thread->num_cores); + thread->poly = std::make_shared(thread->core, thread->num_cores, thread->numa_node, thread->num_numa_nodes); return thread->poly.get(); } diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h index d2a49ec97..6ed9ee8a4 100644 --- a/src/polyrenderer/drawers/poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -48,7 +48,7 @@ public: class PolyTriangleThreadData { public: - PolyTriangleThreadData(int32_t core, int32_t num_cores) : core(core), num_cores(num_cores) { } + PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes) : core(core), num_cores(num_cores), numa_node(numa_node), num_numa_nodes(num_numa_nodes) { } void ClearStencil(uint8_t value); void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra); @@ -63,12 +63,18 @@ public: int32_t core; int32_t num_cores; + int32_t numa_node; + int32_t num_numa_nodes; + + int numa_start_y; + int numa_end_y; // The number of lines to skip to reach the first line to be rendered by this thread int skipped_by_thread(int first_line) { - int core_skip = (num_cores - (first_line - core) % num_cores) % num_cores; - return core_skip; + int clip_first_line = MAX(first_line, numa_start_y); + int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores; + return clip_first_line + core_skip - first_line; } static PolyTriangleThreadData *Get(DrawerThread *thread); diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index f8b1a51f4..30f023166 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -59,9 +59,9 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat SortVertices(args, sortedVertices); int clipright = args->clipright; - int clipbottom = args->clipbottom; + int cliptop = thread->numa_start_y; + int clipbottom = MIN(args->clipbottom, thread->numa_end_y); - // Ranges that different triangles edges are active int topY = (int)(sortedVertices[0]->y + 0.5f); int midY = (int)(sortedVertices[1]->y + 0.5f); int bottomY = (int)(sortedVertices[2]->y + 0.5f); @@ -1567,6 +1567,7 @@ void DrawRect8(const void *destOrg, int destWidth, int destHeight, int destPitch uint32_t stepV = (int32_t)(fstepV * 0x1000000); uint32_t posV = startV; + y1 = MIN(y1, thread->numa_end_y); int num_cores = thread->num_cores; int skip = thread->skipped_by_thread(y0); posV += skip * stepV; @@ -1817,6 +1818,7 @@ void DrawRectOpt32(const void *destOrg, int destWidth, int destHeight, int destP uint32_t stepV = (int32_t)(fstepV * 0x1000000); uint32_t posV = startV; + y1 = MIN(y1, thread->numa_end_y); int num_cores = thread->num_cores; int skip = thread->skipped_by_thread(y0); posV += skip * stepV; diff --git a/src/posix/i_system.h b/src/posix/i_system.h index 7f468f143..1ff06a79e 100644 --- a/src/posix/i_system.h +++ b/src/posix/i_system.h @@ -35,6 +35,8 @@ #endif #include "doomtype.h" +#include +#include struct ticcmd_t; struct WadStuff; @@ -170,4 +172,8 @@ static inline char *strlwr(char *str) return str; } +inline int I_GetNumaNodeCount() { return 1; } +inline int I_GetNumaNodeThreadCount(int numaNode) { return std::max(std::thread::hardware_concurrency(), 1); } +inline void I_SetThreadNumaNode(std::thread &thread, int numaNode) { } + #endif diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index 0edf0d7f7..74956f14e 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -174,7 +174,11 @@ void DrawerThreads::StartThreads() { std::unique_lock lock(threads_mutex); - int num_threads = std::thread::hardware_concurrency(); + int num_numathreads = 0; + for (int i = 0; i < I_GetNumaNodeCount(); i++) + num_numathreads += I_GetNumaNodeThreadCount(i); + + int num_threads = num_numathreads; if (num_threads == 0) num_threads = 4; @@ -189,13 +193,41 @@ void DrawerThreads::StartThreads() threads.resize(num_threads); - for (int i = 0; i < num_threads; i++) + if (num_threads == num_numathreads) { - DrawerThreads *queue = this; - DrawerThread *thread = &threads[i]; - thread->core = i; - thread->num_cores = num_threads; - thread->thread = std::thread([=]() { queue->WorkerMain(thread); }); + int curThread = 0; + for (int numaNode = 0; numaNode < I_GetNumaNodeCount(); numaNode++) + { + for (int i = 0; i < I_GetNumaNodeThreadCount(numaNode); i++) + { + DrawerThreads *queue = this; + DrawerThread *thread = &threads[curThread++]; + thread->core = i; + thread->num_cores = I_GetNumaNodeThreadCount(numaNode); + thread->numa_node = numaNode; + thread->num_numa_nodes = I_GetNumaNodeCount(); + thread->numa_start_y = numaNode * viewheight / I_GetNumaNodeCount(); + thread->numa_end_y = (numaNode + 1) * viewheight / I_GetNumaNodeCount(); + thread->thread = std::thread([=]() { queue->WorkerMain(thread); }); + I_SetThreadNumaNode(thread->thread, numaNode); + } + } + } + else + { + for (int i = 0; i < num_threads; i++) + { + DrawerThreads *queue = this; + DrawerThread *thread = &threads[i]; + thread->core = i; + thread->num_cores = num_threads; + thread->numa_node = 0; + thread->num_numa_nodes = 1; + thread->numa_start_y = 0; + thread->numa_end_y = viewheight; + thread->thread = std::thread([=]() { queue->WorkerMain(thread); }); + I_SetThreadNumaNode(thread->thread, 0); + } } } } diff --git a/src/swrenderer/drawers/r_thread.h b/src/swrenderer/drawers/r_thread.h index c2e8b9c80..f2d1d4d0e 100644 --- a/src/swrenderer/drawers/r_thread.h +++ b/src/swrenderer/drawers/r_thread.h @@ -47,6 +47,16 @@ public: // Number of active threads int num_cores = 1; + // NUMA node this thread belongs to + int numa_node = 0; + + // Number of active NUMA nodes + int num_numa_nodes = 1; + + // Active range for the numa block the cores are part of + int numa_start_y = 0; + int numa_end_y = 0; + // Working buffer used by the tilted (sloped) span drawer const uint8_t *tiltlighting[MAXWIDTH]; @@ -57,19 +67,21 @@ public: // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) { - return line % num_cores != core; + return line < numa_start_y || line >= numa_end_y || line % num_cores != core; } // The number of lines to skip to reach the first line to be rendered by this thread int skipped_by_thread(int first_line) { - int core_skip = (num_cores - (first_line - core) % num_cores) % num_cores; - return core_skip; + int clip_first_line = MAX(first_line, numa_start_y); + int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores; + return clip_first_line + core_skip - first_line; } // The number of lines to be rendered by this thread int count_for_thread(int first_line, int count) { + count = MIN(count, numa_end_y - first_line); int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; return MAX(c, 0); } diff --git a/src/win32/i_system.cpp b/src/win32/i_system.cpp index 44d2d4395..1e5c78640 100644 --- a/src/win32/i_system.cpp +++ b/src/win32/i_system.cpp @@ -50,6 +50,7 @@ #include #include #include +#include #include @@ -1470,3 +1471,76 @@ int _stat64i32(const char *path, struct _stat64i32 *buffer) return 0; } #endif + +struct NumaNode +{ + uint64_t affinityMask = 0; + int threadCount = 0; +}; +static TArray numaNodes; + +static void SetupNumaNodes() +{ + if (numaNodes.Size() == 0) + { + // Query processors in the system + DWORD_PTR processMask = 0, systemMask = 0; + BOOL result = GetProcessAffinityMask(GetCurrentProcess(), &processMask, &systemMask); + if (result) + { + // Find the numa node each processor belongs to + std::map nodes; + for (int i = 0; i < sizeof(DWORD_PTR) * 8; i++) + { + DWORD_PTR processorMask = (((DWORD_PTR)1) << i); + if (processMask & processorMask) + { + UCHAR nodeNumber = 0; + result = GetNumaProcessorNode(i, &nodeNumber); + if (nodeNumber != 0xff) + { + nodes[nodeNumber].affinityMask |= (uint64_t)processorMask; + nodes[nodeNumber].threadCount++; + } + } + } + + // Convert map to a list + for (const auto &it : nodes) + { + numaNodes.Push(it.second); + } + } + + // Fall back to a single node if something went wrong + if (numaNodes.Size() == 0) + { + NumaNode node; + node.threadCount = std::thread::hardware_concurrency(); + if (node.threadCount == 0) + node.threadCount = 1; + numaNodes.Push(node); + } + } +} + +int I_GetNumaNodeCount() +{ + SetupNumaNodes(); + return numaNodes.Size(); +} + +int I_GetNumaNodeThreadCount(int numaNode) +{ + SetupNumaNodes(); + return numaNodes[numaNode].threadCount; +} + +void I_SetThreadNumaNode(std::thread &thread, int numaNode) +{ + if (numaNodes.Size() > 1) + { + HANDLE handle = (HANDLE)thread.native_handle(); + SetThreadAffinityMask(handle, (DWORD_PTR)numaNodes[numaNode].affinityMask); + } +} diff --git a/src/win32/i_system.h b/src/win32/i_system.h index a5f7b5d0b..903203daf 100644 --- a/src/win32/i_system.h +++ b/src/win32/i_system.h @@ -29,6 +29,7 @@ #define __I_SYSTEM__ #include "doomtype.h" +#include struct ticcmd_t; struct WadStuff; @@ -186,4 +187,8 @@ inline int I_FindAttr(findstate_t *fileinfo) #define FA_DIREC 0x00000010 #define FA_ARCH 0x00000020 +int I_GetNumaNodeCount(); +int I_GetNumaNodeThreadCount(int numaNode); +void I_SetThreadNumaNode(std::thread &thread, int numaNode); + #endif From d4e630c127646a4c762b39c35a8b8f4af73e7d4e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 23 Nov 2018 03:00:11 +0100 Subject: [PATCH 8/9] - fix a rendering glitch when changing resolution --- src/swrenderer/drawers/r_thread.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index 74956f14e..31a6f1915 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -288,4 +288,7 @@ void MemcpyCommand::Execute(DrawerThread *thread) d += dstep; s += sstep; } + + thread->numa_start_y = thread->numa_node * viewheight / thread->num_numa_nodes; + thread->numa_end_y = (thread->numa_node + 1) * viewheight / thread->num_numa_nodes; } From bced30d1e3fbd79bb7595406983e8bc32775b624 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Fri, 23 Nov 2018 10:18:14 +0100 Subject: [PATCH 9/9] - made CDoomError inherit from std::exception so that the main catch block can also deal with exceptions thrown by the STL. - Also do not ignore empty exception messages as irrelevant. The only irrelevant exception type is CNoRunExit. --- src/doomerrors.h | 14 ++++++++++++-- src/posix/cocoa/i_main_except.cpp | 6 +++--- src/posix/i_steam.cpp | 4 ++-- src/posix/sdl/i_main.cpp | 6 +++--- src/win32/i_main.cpp | 9 +++++---- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/doomerrors.h b/src/doomerrors.h index c389c6b05..a7ec02654 100644 --- a/src/doomerrors.h +++ b/src/doomerrors.h @@ -37,10 +37,11 @@ #include #include +#include #define MAX_ERRORTEXT 1024 -class CDoomError +class CDoomError : public std::exception { public: CDoomError () @@ -69,13 +70,22 @@ public: else return NULL; } + char const *what() const override + { + return m_Message; + } + protected: char m_Message[MAX_ERRORTEXT]; }; -class CNoRunExit : public CDoomError +class CNoRunExit : public std::exception { +public: + CNoRunExit() : std::exception("NoRunExit") + { + } }; class CRecoverableError : public CDoomError diff --git a/src/posix/cocoa/i_main_except.cpp b/src/posix/cocoa/i_main_except.cpp index 5c3785588..a61eca2cc 100644 --- a/src/posix/cocoa/i_main_except.cpp +++ b/src/posix/cocoa/i_main_except.cpp @@ -46,13 +46,13 @@ void OriginalMainExcept(int argc, char** argv) { OriginalMainTry(argc, argv); } - catch(const CDoomError& error) + catch(const std::exception& error) { - const char* const message = error.GetMessage(); + const char* const message = error.what(); if (NULL != message) { - fprintf(stderr, "%s\n", message); + if (strcmp(message, "NoRunExit")) fprintf(stderr, "%s\n", message); Mac_I_FatalError(message); } diff --git a/src/posix/i_steam.cpp b/src/posix/i_steam.cpp index 29dc9b845..dccadb021 100644 --- a/src/posix/i_steam.cpp +++ b/src/posix/i_steam.cpp @@ -178,7 +178,7 @@ TArray I_GetSteamPath() { SteamInstallFolders = ParseSteamRegistry(regPath); } - catch(class CDoomError &error) + catch(class CRecoverableError &error) { // If we can't parse for some reason just pretend we can't find anything. return result; @@ -201,7 +201,7 @@ TArray I_GetSteamPath() { SteamInstallFolders = ParseSteamRegistry(regPath); } - catch(class CDoomError &error) + catch(class CRecoverableError &error) { // If we can't parse for some reason just pretend we can't find anything. return result; diff --git a/src/posix/sdl/i_main.cpp b/src/posix/sdl/i_main.cpp index 38f035515..8e7230777 100644 --- a/src/posix/sdl/i_main.cpp +++ b/src/posix/sdl/i_main.cpp @@ -258,11 +258,11 @@ int main (int argc, char **argv) C_InitConsole (80*8, 25*8, false); D_DoomMain (); } - catch (class CDoomError &error) + catch (std::exception &error) { I_ShutdownJoysticks(); - if (error.GetMessage ()) - fprintf (stderr, "%s\n", error.GetMessage ()); + if (error.what () && strcmp(error.what(), "NoRunExit")) + fprintf (stderr, "%s\n", error.what ()); #ifdef __APPLE__ Mac_I_FatalError(error.GetMessage()); diff --git a/src/win32/i_main.cpp b/src/win32/i_main.cpp index b1d7a7bb7..6a75a597a 100644 --- a/src/win32/i_main.cpp +++ b/src/win32/i_main.cpp @@ -1048,21 +1048,22 @@ void DoMain (HINSTANCE hInstance) } exit(0); } - catch (class CDoomError &error) + catch (std::exception &error) { I_ShutdownGraphics (); RestoreConView (); S_StopMusic(true); I_FlushBufferedConsoleStuff(); - if (error.GetMessage ()) + auto msg = error.what(); + if (strcmp(msg, "NoRunExit")) { if (!batchrun) { - ShowErrorPane(error.GetMessage()); + ShowErrorPane(msg); } else { - Printf("%s\n", error.GetMessage()); + Printf("%s\n", msg); } } exit (-1);