From 216191c86d2a042e5db8141425df9486457c2520 Mon Sep 17 00:00:00 2001
From: Magnus Norddahl <dpjudas@users.noreply.github.com>
Date: Tue, 20 Nov 2018 13:41:27 +0100
Subject: [PATCH 1/9] - interpolate the normal for models

---
 src/hwrenderer/models/hw_models.cpp | 3 ++-
 wadsrc/static/shaders/glsl/main.vp  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/hwrenderer/models/hw_models.cpp b/src/hwrenderer/models/hw_models.cpp
index ce22625d8..d4241c026 100644
--- a/src/hwrenderer/models/hw_models.cpp
+++ b/src/hwrenderer/models/hw_models.cpp
@@ -144,7 +144,8 @@ FModelVertexBuffer::FModelVertexBuffer(bool needindex, bool singleframe)
 		{ 0, VATTR_VERTEX, VFmt_Float3, (int)myoffsetof(FModelVertex, x) },
 		{ 0, VATTR_TEXCOORD, VFmt_Float2, (int)myoffsetof(FModelVertex, u) },
 		{ 0, VATTR_NORMAL, VFmt_Packed_A2R10G10B10, (int)myoffsetof(FModelVertex, packedNormal) },
-		{ 1, VATTR_VERTEX2, VFmt_Float3, (int)myoffsetof(FModelVertex, x) }
+		{ 1, VATTR_VERTEX2, VFmt_Float3, (int)myoffsetof(FModelVertex, x) },
+		{ 1, VATTR_NORMAL2, VFmt_Packed_A2R10G10B10, (int)myoffsetof(FModelVertex, packedNormal) }
 	};
 	mVertexBuffer->SetFormat(2, 4, sizeof(FModelVertex), format);
 }
diff --git a/wadsrc/static/shaders/glsl/main.vp b/wadsrc/static/shaders/glsl/main.vp
index dccbb77c0..a5875318f 100644
--- a/wadsrc/static/shaders/glsl/main.vp
+++ b/wadsrc/static/shaders/glsl/main.vp
@@ -5,6 +5,7 @@ layout(location = 2) in vec4 aColor;
 #ifndef SIMPLE	// we do not need these for simple shaders
 layout(location = 3) in vec4 aVertex2;
 layout(location = 4) in vec4 aNormal;
+layout(location = 5) in vec4 aNormal2;
 out vec4 pixelpos;
 out vec3 glowdist;
 out vec3 gradientdist;
@@ -62,7 +63,7 @@ void main()
 			gl_ClipDistance[4] = worldcoord.y - ((uSplitBottomPlane.w + uSplitBottomPlane.x * worldcoord.x + uSplitBottomPlane.y * worldcoord.z) * uSplitBottomPlane.z);
 		}
 
-		vWorldNormal = NormalModelMatrix * vec4(normalize(aNormal.xyz), 1.0);
+		vWorldNormal = NormalModelMatrix * vec4(normalize(mix(aNormal.xyz. aNormal2.xyz, uInterpolationFactor)), 1.0);
 		vEyeNormal = NormalViewMatrix * vWorldNormal;
 	#endif
 	

From 96df21e3dcd867d7ba6ed2a8b6318dfc646016b8 Mon Sep 17 00:00:00 2001
From: Magnus Norddahl <dpjudas@users.noreply.github.com>
Date: Tue, 20 Nov 2018 13:54:18 +0100
Subject: [PATCH 2/9] - fix typo

---
 wadsrc/static/shaders/glsl/main.vp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wadsrc/static/shaders/glsl/main.vp b/wadsrc/static/shaders/glsl/main.vp
index a5875318f..81d4022f0 100644
--- a/wadsrc/static/shaders/glsl/main.vp
+++ b/wadsrc/static/shaders/glsl/main.vp
@@ -63,7 +63,7 @@ void main()
 			gl_ClipDistance[4] = worldcoord.y - ((uSplitBottomPlane.w + uSplitBottomPlane.x * worldcoord.x + uSplitBottomPlane.y * worldcoord.z) * uSplitBottomPlane.z);
 		}
 
-		vWorldNormal = NormalModelMatrix * vec4(normalize(mix(aNormal.xyz. aNormal2.xyz, uInterpolationFactor)), 1.0);
+		vWorldNormal = NormalModelMatrix * vec4(normalize(mix(aNormal.xyz, aNormal2.xyz, uInterpolationFactor)), 1.0);
 		vEyeNormal = NormalViewMatrix * vWorldNormal;
 	#endif
 	

From dd42557e69c8347bd4b96a4db162e14271370a1d Mon Sep 17 00:00:00 2001
From: Magnus Norddahl <dpjudas@users.noreply.github.com>
Date: Tue, 20 Nov 2018 23:12:20 +0100
Subject: [PATCH 3/9] - implement a shader cache

---
 src/gl/shaders/gl_shader.cpp        | 263 +++++++++++++++++++++++-----
 src/gl/shaders/gl_shaderprogram.cpp |  55 +++++-
 src/gl/shaders/gl_shaderprogram.h   |   3 +
 3 files changed, 269 insertions(+), 52 deletions(-)

diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp
index b31e9532f..c3c19df8c 100644
--- a/src/gl/shaders/gl_shader.cpp
+++ b/src/gl/shaders/gl_shader.cpp
@@ -32,6 +32,8 @@
 #include "w_wad.h"
 #include "doomerrors.h"
 #include "cmdlib.h"
+#include "md5.h"
+#include "m_misc.h"
 #include "hwrenderer/utility/hw_shaderpatcher.h"
 #include "hwrenderer/data/shaderuniforms.h"
 #include "hwrenderer/scene/hw_viewpointuniforms.h"
@@ -42,10 +44,152 @@
 #include "r_data/matrix.h"
 #include "gl/renderer/gl_renderer.h"
 #include "gl/shaders/gl_shader.h"
+#include <map>
+#include <memory>
 
 namespace OpenGLRenderer
 {
 
+struct ProgramBinary
+{
+	uint32_t format;
+	TArray<uint8_t> data;
+};
+
+const char *ShaderMagic = "ZDSC";
+
+static std::map<FString, std::unique_ptr<ProgramBinary>> ShaderCache; // Not a TMap because it doesn't support unique_ptr move semantics
+
+static FString CalcProgramBinaryChecksum(const FString &vertex, const FString &fragment)
+{
+	const GLubyte *vendor = glGetString(GL_VENDOR);
+	const GLubyte *renderer = glGetString(GL_RENDERER);
+	const GLubyte *version = glGetString(GL_VERSION);
+
+	uint8_t digest[16];
+	MD5Context md5;
+	md5.Update(vendor, (unsigned int)strlen((const char*)vendor));
+	md5.Update(renderer, (unsigned int)strlen((const char*)renderer));
+	md5.Update(version, (unsigned int)strlen((const char*)version));
+	md5.Update((const uint8_t *)vertex.GetChars(), (unsigned int)vertex.Len());
+	md5.Update((const uint8_t *)fragment.GetChars(), (unsigned int)fragment.Len());
+	md5.Final(digest);
+
+	char hexdigest[33];
+	for (int i = 0; i < 16; i++)
+	{
+		int v = digest[i] >> 4;
+		hexdigest[i * 2] = v < 10 ? ('0' + v) : ('a' + v - 10);
+		v = digest[i] & 15;
+		hexdigest[i * 2 + 1] = v < 10 ? ('0' + v) : ('a' + v - 10);
+	}
+	hexdigest[32] = 0;
+	return hexdigest;
+}
+
+static FString CreateProgramCacheName(bool create)
+{
+	FString path = M_GetCachePath(create);
+	if (create) CreatePath(path);
+	path << "/shadercache.zdsc";
+	return path;
+}
+
+static void LoadShaders()
+{
+	static bool loaded = false;
+	if (loaded)
+		return;
+	loaded = true;
+
+	try
+	{
+		FString path = CreateProgramCacheName(false);
+		FileReader fr;
+		if (!fr.OpenFile(path))
+			throw std::runtime_error("Could not open shader file");
+
+		char magic[4];
+		fr.Read(magic, 4);
+		if (memcmp(magic, ShaderMagic, 4) != 0)
+			throw std::runtime_error("Not a shader cache file");
+
+		uint32_t count = fr.ReadUInt32();
+		if (count > 512)
+			throw std::runtime_error("Too many shaders cached");
+
+		for (uint32_t i = 0; i < count; i++)
+		{
+			char hexdigest[33];
+			if (fr.Read(hexdigest, 32) != 32)
+				throw std::runtime_error("Read error");
+			hexdigest[32] = 0;
+
+			std::unique_ptr<ProgramBinary> binary(new ProgramBinary());
+			binary->format = fr.ReadUInt32();
+			uint32_t size = fr.ReadUInt32();
+			if (size > 1024 * 1024)
+				throw std::runtime_error("Shader too big, probably file corruption");
+
+			binary->data.Resize(size);
+			if (fr.Read(binary->data.Data(), binary->data.Size()) != binary->data.Size())
+				throw std::runtime_error("Read error");
+
+			ShaderCache[hexdigest] = std::move(binary);
+		}
+	}
+	catch (...)
+	{
+		ShaderCache.clear();
+	}
+}
+
+static void SaveShaders()
+{
+	FString path = CreateProgramCacheName(true);
+	std::unique_ptr<FileWriter> fw(FileWriter::Open(path));
+	if (fw)
+	{
+		uint32_t count = (uint32_t)ShaderCache.size();
+		fw->Write(ShaderMagic, 4);
+		fw->Write(&count, sizeof(uint32_t));
+		for (const auto &it : ShaderCache)
+		{
+			uint32_t size = it.second->data.Size();
+			fw->Write(it.first.GetChars(), 32);
+			fw->Write(&it.second->format, sizeof(uint32_t));
+			fw->Write(&size, sizeof(uint32_t));
+			fw->Write(it.second->data.Data(), it.second->data.Size());
+		}
+	}
+}
+
+TArray<uint8_t> LoadCachedProgramBinary(const FString &vertex, const FString &fragment, uint32_t &binaryFormat)
+{
+	LoadShaders();
+
+	auto it = ShaderCache.find(CalcProgramBinaryChecksum(vertex, fragment));
+	if (it != ShaderCache.end())
+	{
+		binaryFormat = it->second->format;
+		return it->second->data;
+	}
+	else
+	{
+		binaryFormat = 0;
+		return {};
+	}
+}
+
+void SaveCachedProgramBinary(const FString &vertex, const FString &fragment, const TArray<uint8_t> &binary, uint32_t binaryFormat)
+{
+	auto &entry = ShaderCache[CalcProgramBinaryChecksum(vertex, fragment)];
+	entry.reset(new ProgramBinary());
+	entry->format = binaryFormat;
+	entry->data = binary;
+
+	SaveShaders();
+}
 
 bool FShader::Load(const char * name, const char * vert_prog_lump, const char * frag_prog_lump, const char * proc_prog_lump, const char * light_fragprog, const char * defines)
 {
@@ -273,56 +417,85 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
 		vp_comb.Substitute("gl_ClipDistance", "//");
 	}
 
-	hVertProg = glCreateShader(GL_VERTEX_SHADER);
-	hFragProg = glCreateShader(GL_FRAGMENT_SHADER);	
-
-	FGLDebug::LabelObject(GL_SHADER, hVertProg, vert_prog_lump);
-	FGLDebug::LabelObject(GL_SHADER, hFragProg, frag_prog_lump);
-
-	int vp_size = (int)vp_comb.Len();
-	int fp_size = (int)fp_comb.Len();
-
-	const char *vp_ptr = vp_comb.GetChars();
-	const char *fp_ptr = fp_comb.GetChars();
-
-	glShaderSource(hVertProg, 1, &vp_ptr, &vp_size);
-	glShaderSource(hFragProg, 1, &fp_ptr, &fp_size);
-
-	glCompileShader(hVertProg);
-	glCompileShader(hFragProg);
-
 	hShader = glCreateProgram();
 	FGLDebug::LabelObject(GL_PROGRAM, hShader, name);
 
-	glAttachShader(hShader, hVertProg);
-	glAttachShader(hShader, hFragProg);
+	uint32_t binaryFormat = 0;
+	TArray<uint8_t> binary = LoadCachedProgramBinary(vp_comb, fp_comb, binaryFormat);
 
-	glLinkProgram(hShader);
-
-	glGetShaderInfoLog(hVertProg, 10000, NULL, buffer);
-	if (*buffer) 
+	bool linked = false;
+	if (binary.Size() > 0 && glProgramBinary)
 	{
-		error << "Vertex shader:\n" << buffer << "\n";
-	}
-	glGetShaderInfoLog(hFragProg, 10000, NULL, buffer);
-	if (*buffer) 
-	{
-		error << "Fragment shader:\n" << buffer << "\n";
+		glProgramBinary(hShader, binaryFormat, binary.Data(), binary.Size());
+		GLint status = 0;
+		glGetProgramiv(hShader, GL_LINK_STATUS, &status);
+		linked = (status == GL_TRUE);
 	}
 
-	glGetProgramInfoLog(hShader, 10000, NULL, buffer);
-	if (*buffer) 
+	if (!linked)
 	{
-		error << "Linking:\n" << buffer << "\n";
-	}
-	int linked;
-	glGetProgramiv(hShader, GL_LINK_STATUS, &linked);
-	if (linked == 0)
-	{
-		// only print message if there's an error.
-		I_Error("Init Shader '%s':\n%s\n", name, error.GetChars());
-	}
+		hVertProg = glCreateShader(GL_VERTEX_SHADER);
+		hFragProg = glCreateShader(GL_FRAGMENT_SHADER);
 
+		FGLDebug::LabelObject(GL_SHADER, hVertProg, vert_prog_lump);
+		FGLDebug::LabelObject(GL_SHADER, hFragProg, frag_prog_lump);
+
+		int vp_size = (int)vp_comb.Len();
+		int fp_size = (int)fp_comb.Len();
+
+		const char *vp_ptr = vp_comb.GetChars();
+		const char *fp_ptr = fp_comb.GetChars();
+
+		glShaderSource(hVertProg, 1, &vp_ptr, &vp_size);
+		glShaderSource(hFragProg, 1, &fp_ptr, &fp_size);
+
+		glCompileShader(hVertProg);
+		glCompileShader(hFragProg);
+
+		glAttachShader(hShader, hVertProg);
+		glAttachShader(hShader, hFragProg);
+
+		glLinkProgram(hShader);
+
+		glGetShaderInfoLog(hVertProg, 10000, NULL, buffer);
+		if (*buffer)
+		{
+			error << "Vertex shader:\n" << buffer << "\n";
+		}
+		glGetShaderInfoLog(hFragProg, 10000, NULL, buffer);
+		if (*buffer)
+		{
+			error << "Fragment shader:\n" << buffer << "\n";
+		}
+
+		glGetProgramInfoLog(hShader, 10000, NULL, buffer);
+		if (*buffer)
+		{
+			error << "Linking:\n" << buffer << "\n";
+		}
+		GLint status = 0;
+		glGetProgramiv(hShader, GL_LINK_STATUS, &status);
+		linked = (status == GL_TRUE);
+		if (!linked)
+		{
+			// only print message if there's an error.
+			I_Error("Init Shader '%s':\n%s\n", name, error.GetChars());
+		}
+		else if (glProgramBinary)
+		{
+			int binaryLength = 0;
+			glGetProgramiv(hShader, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
+			binary.Resize(binaryLength);
+			glGetProgramBinary(hShader, binary.Size(), &binaryLength, &binaryFormat, binary.Data());
+			binary.Resize(binaryLength);
+			SaveCachedProgramBinary(vp_comb, fp_comb, binary, binaryFormat);
+		}
+	}
+	else
+	{
+		hVertProg = 0;
+		hFragProg = 0;
+	}
 
 	muDesaturation.Init(hShader, "uDesaturationFactor");
 	muFogEnabled.Init(hShader, "uFogEnabled");
@@ -376,7 +549,7 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
 	if (shadowmapindex > 0) glUniform1i(shadowmapindex, 16);
 
 	glUseProgram(0);
-	return !!linked;
+	return linked;
 }
 
 //==========================================================================
@@ -388,8 +561,10 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
 FShader::~FShader()
 {
 	glDeleteProgram(hShader);
-	glDeleteShader(hVertProg);
-	glDeleteShader(hFragProg);
+	if (hVertProg != 0)
+		glDeleteShader(hVertProg);
+	if (hFragProg != 0)
+		glDeleteShader(hFragProg);
 }
 
 
diff --git a/src/gl/shaders/gl_shaderprogram.cpp b/src/gl/shaders/gl_shaderprogram.cpp
index c402bf77b..684fcd852 100644
--- a/src/gl/shaders/gl_shaderprogram.cpp
+++ b/src/gl/shaders/gl_shaderprogram.cpp
@@ -37,6 +37,9 @@
 namespace OpenGLRenderer
 {
 
+TArray<uint8_t> LoadCachedProgramBinary(const FString &vertex, const FString &fragment, uint32_t &binaryFormat);
+void SaveCachedProgramBinary(const FString &vertex, const FString &fragment, const TArray<uint8_t> &binary, uint32_t binaryFormat);
+
 FShaderProgram::FShaderProgram()
 {
 	for (int i = 0; i < NumShaderTypes; i++)
@@ -94,14 +97,20 @@ void FShaderProgram::Compile(ShaderType type, const char *lumpName, const char *
 }
 
 void FShaderProgram::Compile(ShaderType type, const char *name, const FString &code, const char *defines, int maxGlslVersion)
+{
+	mShaderNames[type] = name;
+	mShaderSources[type] = PatchShader(type, code, defines, maxGlslVersion);
+}
+
+void FShaderProgram::CompileShader(ShaderType type)
 {
 	CreateShader(type);
 
 	const auto &handle = mShaders[type];
 
-	FGLDebug::LabelObject(GL_SHADER, handle, name);
+	FGLDebug::LabelObject(GL_SHADER, handle, mShaderNames[type]);
 
-	FString patchedCode = PatchShader(type, code, defines, maxGlslVersion);
+	const FString &patchedCode = mShaderSources[type];
 	int lengths[1] = { (int)patchedCode.Len() };
 	const char *sources[1] = { patchedCode.GetChars() };
 	glShaderSource(handle, 1, sources, lengths);
@@ -112,7 +121,7 @@ void FShaderProgram::Compile(ShaderType type, const char *name, const FString &c
 	glGetShaderiv(handle, GL_COMPILE_STATUS, &status);
 	if (status == GL_FALSE)
 	{
-		I_FatalError("Compile Shader '%s':\n%s\n", name, GetShaderInfoLog(handle).GetChars());
+		I_FatalError("Compile Shader '%s':\n%s\n", mShaderNames[type], GetShaderInfoLog(handle).GetChars());
 	}
 	else
 	{
@@ -131,13 +140,43 @@ void FShaderProgram::Compile(ShaderType type, const char *name, const FString &c
 void FShaderProgram::Link(const char *name)
 {
 	FGLDebug::LabelObject(GL_PROGRAM, mProgram, name);
-	glLinkProgram(mProgram);
 
-	GLint status = 0;
-	glGetProgramiv(mProgram, GL_LINK_STATUS, &status);
-	if (status == GL_FALSE)
+	uint32_t binaryFormat = 0;
+	TArray<uint8_t> binary = LoadCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binaryFormat);
+
+	bool loadedFromBinary = false;
+	if (binary.Size() > 0 && glProgramBinary)
 	{
-		I_FatalError("Link Shader '%s':\n%s\n", name, GetProgramInfoLog(mProgram).GetChars());
+		if (mProgram == 0)
+			mProgram = glCreateProgram();
+		glProgramBinary(mProgram, binaryFormat, binary.Data(), binary.Size());
+		GLint status = 0;
+		glGetProgramiv(mProgram, GL_LINK_STATUS, &status);
+		loadedFromBinary = (status == GL_TRUE);
+	}
+
+	if (!loadedFromBinary)
+	{
+		CompileShader(Vertex);
+		CompileShader(Fragment);
+
+		glLinkProgram(mProgram);
+
+		GLint status = 0;
+		glGetProgramiv(mProgram, GL_LINK_STATUS, &status);
+		if (status == GL_FALSE)
+		{
+			I_FatalError("Link Shader '%s':\n%s\n", name, GetProgramInfoLog(mProgram).GetChars());
+		}
+		else if (glProgramBinary)
+		{
+			int binaryLength = 0;
+			glGetProgramiv(mProgram, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
+			binary.Resize(binaryLength);
+			glGetProgramBinary(mProgram, binary.Size(), &binaryLength, &binaryFormat, binary.Data());
+			binary.Resize(binaryLength);
+			SaveCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binary, binaryFormat);
+		}
 	}
 
 	// This is only for old OpenGL which didn't allow to set the binding from within the shader.
diff --git a/src/gl/shaders/gl_shaderprogram.h b/src/gl/shaders/gl_shaderprogram.h
index f9853d5d3..0167618e1 100644
--- a/src/gl/shaders/gl_shaderprogram.h
+++ b/src/gl/shaders/gl_shaderprogram.h
@@ -30,6 +30,7 @@ private:
 	FShaderProgram(const FShaderProgram &) = delete;
 	FShaderProgram &operator=(const FShaderProgram &) = delete;
 
+	void CompileShader(ShaderType type);
 	FString PatchShader(ShaderType type, const FString &code, const char *defines, int maxGlslVersion);
 
 	void CreateShader(ShaderType type);
@@ -38,6 +39,8 @@ private:
 
 	GLuint mProgram = 0;
 	GLuint mShaders[NumShaderTypes];
+	FString mShaderSources[NumShaderTypes];
+	FString mShaderNames[NumShaderTypes];
 	TArray<std::pair<FString, int>> samplerstobind;
 };
 

From 48fd91227c75049cf393fe6568ddde9d58d733f8 Mon Sep 17 00:00:00 2001
From: Magnus Norddahl <dpjudas@users.noreply.github.com>
Date: Wed, 21 Nov 2018 10:46:13 +0100
Subject: [PATCH 4/9] - fix compile error

---
 src/gl/shaders/gl_shaderprogram.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gl/shaders/gl_shaderprogram.cpp b/src/gl/shaders/gl_shaderprogram.cpp
index 684fcd852..126817b59 100644
--- a/src/gl/shaders/gl_shaderprogram.cpp
+++ b/src/gl/shaders/gl_shaderprogram.cpp
@@ -121,7 +121,7 @@ void FShaderProgram::CompileShader(ShaderType type)
 	glGetShaderiv(handle, GL_COMPILE_STATUS, &status);
 	if (status == GL_FALSE)
 	{
-		I_FatalError("Compile Shader '%s':\n%s\n", mShaderNames[type], GetShaderInfoLog(handle).GetChars());
+		I_FatalError("Compile Shader '%s':\n%s\n", mShaderNames[type].GetChars(), GetShaderInfoLog(handle).GetChars());
 	}
 	else
 	{

From b4aa4bf0ac161fe1aff070324b3747e356ff112e Mon Sep 17 00:00:00 2001
From: Magnus Norddahl <dpjudas@users.noreply.github.com>
Date: Thu, 22 Nov 2018 05:31:10 +0100
Subject: [PATCH 5/9] - only use shader cache on Intel

---
 src/gl/shaders/gl_shader.cpp        | 22 +++++++++++++++++++---
 src/gl/shaders/gl_shaderprogram.cpp |  7 +++++--
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp
index c3c19df8c..0af60e412 100644
--- a/src/gl/shaders/gl_shader.cpp
+++ b/src/gl/shaders/gl_shader.cpp
@@ -56,10 +56,24 @@ struct ProgramBinary
 	TArray<uint8_t> data;
 };
 
-const char *ShaderMagic = "ZDSC";
+static const char *ShaderMagic = "ZDSC";
 
 static std::map<FString, std::unique_ptr<ProgramBinary>> ShaderCache; // Not a TMap because it doesn't support unique_ptr move semantics
 
+bool IsShaderCacheActive()
+{
+	static bool active = true;
+	static bool firstcall = true;
+
+	if (firstcall)
+	{
+		const char *vendor = (const char *)glGetString(GL_VENDOR);
+		active = strstr(vendor, "Intel") == nullptr;
+		firstcall = false;
+	}
+	return active;
+}
+
 static FString CalcProgramBinaryChecksum(const FString &vertex, const FString &fragment)
 {
 	const GLubyte *vendor = glGetString(GL_VENDOR);
@@ -421,7 +435,9 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
 	FGLDebug::LabelObject(GL_PROGRAM, hShader, name);
 
 	uint32_t binaryFormat = 0;
-	TArray<uint8_t> binary = LoadCachedProgramBinary(vp_comb, fp_comb, binaryFormat);
+	TArray<uint8_t> binary;
+	if (IsShaderCacheActive())
+		binary = LoadCachedProgramBinary(vp_comb, fp_comb, binaryFormat);
 
 	bool linked = false;
 	if (binary.Size() > 0 && glProgramBinary)
@@ -481,7 +497,7 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
 			// only print message if there's an error.
 			I_Error("Init Shader '%s':\n%s\n", name, error.GetChars());
 		}
-		else if (glProgramBinary)
+		else if (glProgramBinary && IsShaderCacheActive())
 		{
 			int binaryLength = 0;
 			glGetProgramiv(hShader, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
diff --git a/src/gl/shaders/gl_shaderprogram.cpp b/src/gl/shaders/gl_shaderprogram.cpp
index 126817b59..8068b3c2c 100644
--- a/src/gl/shaders/gl_shaderprogram.cpp
+++ b/src/gl/shaders/gl_shaderprogram.cpp
@@ -37,6 +37,7 @@
 namespace OpenGLRenderer
 {
 
+bool IsShaderCacheActive();
 TArray<uint8_t> LoadCachedProgramBinary(const FString &vertex, const FString &fragment, uint32_t &binaryFormat);
 void SaveCachedProgramBinary(const FString &vertex, const FString &fragment, const TArray<uint8_t> &binary, uint32_t binaryFormat);
 
@@ -142,7 +143,9 @@ void FShaderProgram::Link(const char *name)
 	FGLDebug::LabelObject(GL_PROGRAM, mProgram, name);
 
 	uint32_t binaryFormat = 0;
-	TArray<uint8_t> binary = LoadCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binaryFormat);
+	TArray<uint8_t> binary;
+	if (IsShaderCacheActive())
+		binary = LoadCachedProgramBinary(mShaderSources[Vertex], mShaderSources[Fragment], binaryFormat);
 
 	bool loadedFromBinary = false;
 	if (binary.Size() > 0 && glProgramBinary)
@@ -168,7 +171,7 @@ void FShaderProgram::Link(const char *name)
 		{
 			I_FatalError("Link Shader '%s':\n%s\n", name, GetProgramInfoLog(mProgram).GetChars());
 		}
-		else if (glProgramBinary)
+		else if (glProgramBinary && IsShaderCacheActive())
 		{
 			int binaryLength = 0;
 			glGetProgramiv(mProgram, GL_PROGRAM_BINARY_LENGTH, &binaryLength);

From 4859c3d30170d8c5814b67081406347cefee82e1 Mon Sep 17 00:00:00 2001
From: Rachael Alexanderson <raa-eruanna@users.noreply.github.com>
Date: Thu, 22 Nov 2018 02:02:37 -0500
Subject: [PATCH 6/9] - fix inverted logic of Intel check

---
 src/gl/shaders/gl_shader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp
index 0af60e412..cc9811622 100644
--- a/src/gl/shaders/gl_shader.cpp
+++ b/src/gl/shaders/gl_shader.cpp
@@ -68,7 +68,7 @@ bool IsShaderCacheActive()
 	if (firstcall)
 	{
 		const char *vendor = (const char *)glGetString(GL_VENDOR);
-		active = strstr(vendor, "Intel") == nullptr;
+		active = !(strstr(vendor, "Intel") == nullptr);
 		firstcall = false;
 	}
 	return active;

From 3e9f531b5ffd866a4346d41b692f3340855471f3 Mon Sep 17 00:00:00 2001
From: Magnus Norddahl <dpjudas@users.noreply.github.com>
Date: Thu, 22 Nov 2018 14:48:09 +0100
Subject: [PATCH 7/9] - add NUMA awareness to drawer threads

---
 src/polyrenderer/drawers/poly_triangle.cpp   | 13 +++-
 src/polyrenderer/drawers/poly_triangle.h     | 12 +++-
 src/polyrenderer/drawers/screen_triangle.cpp |  6 +-
 src/posix/i_system.h                         |  6 ++
 src/swrenderer/drawers/r_thread.cpp          | 46 ++++++++++--
 src/swrenderer/drawers/r_thread.h            | 18 ++++-
 src/win32/i_system.cpp                       | 74 ++++++++++++++++++++
 src/win32/i_system.h                         |  5 ++
 8 files changed, 162 insertions(+), 18 deletions(-)

diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp
index 53face08c..f75b5e43d 100644
--- a/src/polyrenderer/drawers/poly_triangle.cpp
+++ b/src/polyrenderer/drawers/poly_triangle.cpp
@@ -127,8 +127,13 @@ void PolyTriangleThreadData::ClearStencil(uint8_t value)
 	int height = buffer->Height();
 	uint8_t *data = buffer->Values();
 
-	data += core * width;
-	for (int y = core; y < height; y += num_cores)
+	int start_y = numa_node * height / num_numa_nodes;
+	int end_y = (numa_node + 1) * height / num_numa_nodes;
+	int core_skip = (num_cores - (start_y - core) % num_cores) % num_cores;
+	start_y += core_skip;
+
+	data += start_y * width;
+	for (int y = start_y; y < end_y; y += num_cores)
 	{
 		memset(data, value, width);
 		data += num_cores * width;
@@ -146,6 +151,8 @@ void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, ui
 	dest_height = new_dest_height;
 	dest_pitch = new_dest_pitch;
 	dest_bgra = new_dest_bgra;
+	numa_start_y = numa_node * dest_height / num_numa_nodes;
+	numa_end_y = (numa_node + 1) * dest_height / num_numa_nodes;
 	ccw = true;
 	weaponScene = false;
 }
@@ -642,7 +649,7 @@ int PolyTriangleThreadData::ClipEdge(const ShadedTriVertex *verts, ShadedTriVert
 PolyTriangleThreadData *PolyTriangleThreadData::Get(DrawerThread *thread)
 {
 	if (!thread->poly)
-		thread->poly = std::make_shared<PolyTriangleThreadData>(thread->core, thread->num_cores);
+		thread->poly = std::make_shared<PolyTriangleThreadData>(thread->core, thread->num_cores, thread->numa_node, thread->num_numa_nodes);
 	return thread->poly.get();
 }
 
diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h
index d2a49ec97..6ed9ee8a4 100644
--- a/src/polyrenderer/drawers/poly_triangle.h
+++ b/src/polyrenderer/drawers/poly_triangle.h
@@ -48,7 +48,7 @@ public:
 class PolyTriangleThreadData
 {
 public:
-	PolyTriangleThreadData(int32_t core, int32_t num_cores) : core(core), num_cores(num_cores) { }
+	PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes) : core(core), num_cores(num_cores), numa_node(numa_node), num_numa_nodes(num_numa_nodes) { }
 
 	void ClearStencil(uint8_t value);
 	void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra);
@@ -63,12 +63,18 @@ public:
 
 	int32_t core;
 	int32_t num_cores;
+	int32_t numa_node;
+	int32_t num_numa_nodes;
+
+	int numa_start_y;
+	int numa_end_y;
 
 	// The number of lines to skip to reach the first line to be rendered by this thread
 	int skipped_by_thread(int first_line)
 	{
-		int core_skip = (num_cores - (first_line - core) % num_cores) % num_cores;
-		return core_skip;
+		int clip_first_line = MAX(first_line, numa_start_y);
+		int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores;
+		return clip_first_line + core_skip - first_line;
 	}
 
 	static PolyTriangleThreadData *Get(DrawerThread *thread);
diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp
index f8b1a51f4..30f023166 100644
--- a/src/polyrenderer/drawers/screen_triangle.cpp
+++ b/src/polyrenderer/drawers/screen_triangle.cpp
@@ -59,9 +59,9 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat
 	SortVertices(args, sortedVertices);
 
 	int clipright = args->clipright;
-	int clipbottom = args->clipbottom;
+	int cliptop = thread->numa_start_y;
+	int clipbottom = MIN(args->clipbottom, thread->numa_end_y);
 
-	// Ranges that different triangles edges are active
 	int topY = (int)(sortedVertices[0]->y + 0.5f);
 	int midY = (int)(sortedVertices[1]->y + 0.5f);
 	int bottomY = (int)(sortedVertices[2]->y + 0.5f);
@@ -1567,6 +1567,7 @@ void DrawRect8(const void *destOrg, int destWidth, int destHeight, int destPitch
 	uint32_t stepV = (int32_t)(fstepV * 0x1000000);
 
 	uint32_t posV = startV;
+	y1 = MIN(y1, thread->numa_end_y);
 	int num_cores = thread->num_cores;
 	int skip = thread->skipped_by_thread(y0);
 	posV += skip * stepV;
@@ -1817,6 +1818,7 @@ void DrawRectOpt32(const void *destOrg, int destWidth, int destHeight, int destP
 	uint32_t stepV = (int32_t)(fstepV * 0x1000000);
 
 	uint32_t posV = startV;
+	y1 = MIN(y1, thread->numa_end_y);
 	int num_cores = thread->num_cores;
 	int skip = thread->skipped_by_thread(y0);
 	posV += skip * stepV;
diff --git a/src/posix/i_system.h b/src/posix/i_system.h
index 7f468f143..1ff06a79e 100644
--- a/src/posix/i_system.h
+++ b/src/posix/i_system.h
@@ -35,6 +35,8 @@
 #endif
 
 #include "doomtype.h"
+#include <thread>
+#include <algorithm>
 
 struct ticcmd_t;
 struct WadStuff;
@@ -170,4 +172,8 @@ static inline char *strlwr(char *str)
 	return str;
 }
 
+inline int I_GetNumaNodeCount() { return 1; }
+inline int I_GetNumaNodeThreadCount(int numaNode) { return std::max<int>(std::thread::hardware_concurrency(), 1); }
+inline void I_SetThreadNumaNode(std::thread &thread, int numaNode) { }
+
 #endif
diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp
index 0edf0d7f7..74956f14e 100644
--- a/src/swrenderer/drawers/r_thread.cpp
+++ b/src/swrenderer/drawers/r_thread.cpp
@@ -174,7 +174,11 @@ void DrawerThreads::StartThreads()
 {
 	std::unique_lock<std::mutex> lock(threads_mutex);
 
-	int num_threads = std::thread::hardware_concurrency();
+	int num_numathreads = 0;
+	for (int i = 0; i < I_GetNumaNodeCount(); i++)
+		num_numathreads += I_GetNumaNodeThreadCount(i);
+
+	int num_threads = num_numathreads;
 	if (num_threads == 0)
 		num_threads = 4;
 
@@ -189,13 +193,41 @@ void DrawerThreads::StartThreads()
 
 		threads.resize(num_threads);
 
-		for (int i = 0; i < num_threads; i++)
+		if (num_threads == num_numathreads)
 		{
-			DrawerThreads *queue = this;
-			DrawerThread *thread = &threads[i];
-			thread->core = i;
-			thread->num_cores = num_threads;
-			thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
+			int curThread = 0;
+			for (int numaNode = 0; numaNode < I_GetNumaNodeCount(); numaNode++)
+			{
+				for (int i = 0; i < I_GetNumaNodeThreadCount(numaNode); i++)
+				{
+					DrawerThreads *queue = this;
+					DrawerThread *thread = &threads[curThread++];
+					thread->core = i;
+					thread->num_cores = I_GetNumaNodeThreadCount(numaNode);
+					thread->numa_node = numaNode;
+					thread->num_numa_nodes = I_GetNumaNodeCount();
+					thread->numa_start_y = numaNode * viewheight / I_GetNumaNodeCount();
+					thread->numa_end_y = (numaNode + 1) * viewheight / I_GetNumaNodeCount();
+					thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
+					I_SetThreadNumaNode(thread->thread, numaNode);
+				}
+			}
+		}
+		else
+		{
+			for (int i = 0; i < num_threads; i++)
+			{
+				DrawerThreads *queue = this;
+				DrawerThread *thread = &threads[i];
+				thread->core = i;
+				thread->num_cores = num_threads;
+				thread->numa_node = 0;
+				thread->num_numa_nodes = 1;
+				thread->numa_start_y = 0;
+				thread->numa_end_y = viewheight;
+				thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
+				I_SetThreadNumaNode(thread->thread, 0);
+			}
 		}
 	}
 }
diff --git a/src/swrenderer/drawers/r_thread.h b/src/swrenderer/drawers/r_thread.h
index c2e8b9c80..f2d1d4d0e 100644
--- a/src/swrenderer/drawers/r_thread.h
+++ b/src/swrenderer/drawers/r_thread.h
@@ -47,6 +47,16 @@ public:
 	// Number of active threads
 	int num_cores = 1;
 
+	// NUMA node this thread belongs to
+	int numa_node = 0;
+
+	// Number of active NUMA nodes
+	int num_numa_nodes = 1;
+
+	// Active range for the numa block the cores are part of
+	int numa_start_y = 0;
+	int numa_end_y = 0;
+
 	// Working buffer used by the tilted (sloped) span drawer
 	const uint8_t *tiltlighting[MAXWIDTH];
 
@@ -57,19 +67,21 @@ public:
 	// Checks if a line is rendered by this thread
 	bool line_skipped_by_thread(int line)
 	{
-		return line % num_cores != core;
+		return line < numa_start_y || line >= numa_end_y || line % num_cores != core;
 	}
 
 	// The number of lines to skip to reach the first line to be rendered by this thread
 	int skipped_by_thread(int first_line)
 	{
-		int core_skip = (num_cores - (first_line - core) % num_cores) % num_cores;
-		return core_skip;
+		int clip_first_line = MAX(first_line, numa_start_y);
+		int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores;
+		return clip_first_line + core_skip - first_line;
 	}
 
 	// The number of lines to be rendered by this thread
 	int count_for_thread(int first_line, int count)
 	{
+		count = MIN(count, numa_end_y - first_line);
 		int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
 		return MAX(c, 0);
 	}
diff --git a/src/win32/i_system.cpp b/src/win32/i_system.cpp
index 44d2d4395..1e5c78640 100644
--- a/src/win32/i_system.cpp
+++ b/src/win32/i_system.cpp
@@ -50,6 +50,7 @@
 #include <string.h>
 #include <process.h>
 #include <time.h>
+#include <map>
 
 #include <stdarg.h>
 
@@ -1470,3 +1471,76 @@ int _stat64i32(const char *path, struct _stat64i32 *buffer)
 	return 0;
 }
 #endif
+
+struct NumaNode
+{
+	uint64_t affinityMask = 0;
+	int threadCount = 0;
+};
+static TArray<NumaNode> numaNodes;
+
+static void SetupNumaNodes()
+{
+	if (numaNodes.Size() == 0)
+	{
+		// Query processors in the system
+		DWORD_PTR processMask = 0, systemMask = 0;
+		BOOL result = GetProcessAffinityMask(GetCurrentProcess(), &processMask, &systemMask);
+		if (result)
+		{
+			// Find the numa node each processor belongs to
+			std::map<int, NumaNode> nodes;
+			for (int i = 0; i < sizeof(DWORD_PTR) * 8; i++)
+			{
+				DWORD_PTR processorMask = (((DWORD_PTR)1) << i);
+				if (processMask & processorMask)
+				{
+					UCHAR nodeNumber = 0;
+					result = GetNumaProcessorNode(i, &nodeNumber);
+					if (nodeNumber != 0xff)
+					{
+						nodes[nodeNumber].affinityMask |= (uint64_t)processorMask;
+						nodes[nodeNumber].threadCount++;
+					}
+				}
+			}
+
+			// Convert map to a list
+			for (const auto &it : nodes)
+			{
+				numaNodes.Push(it.second);
+			}
+		}
+
+		// Fall back to a single node if something went wrong
+		if (numaNodes.Size() == 0)
+		{
+			NumaNode node;
+			node.threadCount = std::thread::hardware_concurrency();
+			if (node.threadCount == 0)
+				node.threadCount = 1;
+			numaNodes.Push(node);
+		}
+	}
+}
+
+int I_GetNumaNodeCount()
+{
+	SetupNumaNodes();
+	return numaNodes.Size();
+}
+
+int I_GetNumaNodeThreadCount(int numaNode)
+{
+	SetupNumaNodes();
+	return numaNodes[numaNode].threadCount;
+}
+
+void I_SetThreadNumaNode(std::thread &thread, int numaNode)
+{
+	if (numaNodes.Size() > 1)
+	{
+		HANDLE handle = (HANDLE)thread.native_handle();
+		SetThreadAffinityMask(handle, (DWORD_PTR)numaNodes[numaNode].affinityMask);
+	}
+}
diff --git a/src/win32/i_system.h b/src/win32/i_system.h
index a5f7b5d0b..903203daf 100644
--- a/src/win32/i_system.h
+++ b/src/win32/i_system.h
@@ -29,6 +29,7 @@
 #define __I_SYSTEM__
 
 #include "doomtype.h"
+#include <thread>
 
 struct ticcmd_t;
 struct WadStuff;
@@ -186,4 +187,8 @@ inline int I_FindAttr(findstate_t *fileinfo)
 #define FA_DIREC	0x00000010
 #define FA_ARCH		0x00000020
 
+int I_GetNumaNodeCount();
+int I_GetNumaNodeThreadCount(int numaNode);
+void I_SetThreadNumaNode(std::thread &thread, int numaNode);
+
 #endif

From d4e630c127646a4c762b39c35a8b8f4af73e7d4e Mon Sep 17 00:00:00 2001
From: Magnus Norddahl <dpjudas@users.noreply.github.com>
Date: Fri, 23 Nov 2018 03:00:11 +0100
Subject: [PATCH 8/9] - fix a rendering glitch when changing resolution

---
 src/swrenderer/drawers/r_thread.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp
index 74956f14e..31a6f1915 100644
--- a/src/swrenderer/drawers/r_thread.cpp
+++ b/src/swrenderer/drawers/r_thread.cpp
@@ -288,4 +288,7 @@ void MemcpyCommand::Execute(DrawerThread *thread)
 		d += dstep;
 		s += sstep;
 	}
+
+	thread->numa_start_y = thread->numa_node * viewheight / thread->num_numa_nodes;
+	thread->numa_end_y = (thread->numa_node + 1) * viewheight / thread->num_numa_nodes;
 }

From bced30d1e3fbd79bb7595406983e8bc32775b624 Mon Sep 17 00:00:00 2001
From: Christoph Oelckers <coelckers@users.noreply.github.com>
Date: Fri, 23 Nov 2018 10:18:14 +0100
Subject: [PATCH 9/9] - made CDoomError inherit from std::exception so that the
 main catch block can also deal with exceptions thrown by the STL. - Also do
 not ignore empty exception messages as irrelevant. The only irrelevant
 exception type is CNoRunExit.

---
 src/doomerrors.h                  | 14 ++++++++++++--
 src/posix/cocoa/i_main_except.cpp |  6 +++---
 src/posix/i_steam.cpp             |  4 ++--
 src/posix/sdl/i_main.cpp          |  6 +++---
 src/win32/i_main.cpp              |  9 +++++----
 5 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/doomerrors.h b/src/doomerrors.h
index c389c6b05..a7ec02654 100644
--- a/src/doomerrors.h
+++ b/src/doomerrors.h
@@ -37,10 +37,11 @@
 
 #include <string.h>
 #include <stdio.h>
+#include <exception>
 
 #define MAX_ERRORTEXT	1024
 
-class CDoomError
+class CDoomError : public std::exception
 {
 public:
 	CDoomError ()
@@ -69,13 +70,22 @@ public:
 		else
 			return NULL;
 	}
+	char const *what() const override
+	{
+		return m_Message;
+	}
+
 
 protected:
 	char m_Message[MAX_ERRORTEXT];
 };
 
-class CNoRunExit : public CDoomError
+class CNoRunExit : public std::exception
 {
+public:
+	CNoRunExit() : std::exception("NoRunExit")
+	{
+	}
 };
 
 class CRecoverableError : public CDoomError
diff --git a/src/posix/cocoa/i_main_except.cpp b/src/posix/cocoa/i_main_except.cpp
index 5c3785588..a61eca2cc 100644
--- a/src/posix/cocoa/i_main_except.cpp
+++ b/src/posix/cocoa/i_main_except.cpp
@@ -46,13 +46,13 @@ void OriginalMainExcept(int argc, char** argv)
 	{
 		OriginalMainTry(argc, argv);
 	}
-	catch(const CDoomError& error)
+	catch(const std::exception& error)
 	{
-		const char* const message = error.GetMessage();
+		const char* const message = error.what();
 
 		if (NULL != message)
 		{
-			fprintf(stderr, "%s\n", message);
+			if (strcmp(message, "NoRunExit")) fprintf(stderr, "%s\n", message);
 			Mac_I_FatalError(message);
 		}
 
diff --git a/src/posix/i_steam.cpp b/src/posix/i_steam.cpp
index 29dc9b845..dccadb021 100644
--- a/src/posix/i_steam.cpp
+++ b/src/posix/i_steam.cpp
@@ -178,7 +178,7 @@ TArray<FString> I_GetSteamPath()
 	{
 		SteamInstallFolders = ParseSteamRegistry(regPath);
 	}
-	catch(class CDoomError &error)
+	catch(class CRecoverableError &error)
 	{
 		// If we can't parse for some reason just pretend we can't find anything.
 		return result;
@@ -201,7 +201,7 @@ TArray<FString> I_GetSteamPath()
 		{
 			SteamInstallFolders = ParseSteamRegistry(regPath);
 		}
-		catch(class CDoomError &error)
+		catch(class CRecoverableError &error)
 		{
 			// If we can't parse for some reason just pretend we can't find anything.
 			return result;
diff --git a/src/posix/sdl/i_main.cpp b/src/posix/sdl/i_main.cpp
index 38f035515..8e7230777 100644
--- a/src/posix/sdl/i_main.cpp
+++ b/src/posix/sdl/i_main.cpp
@@ -258,11 +258,11 @@ int main (int argc, char **argv)
 		C_InitConsole (80*8, 25*8, false);
 		D_DoomMain ();
     }
-    catch (class CDoomError &error)
+    catch (std::exception &error)
     {
 		I_ShutdownJoysticks();
-		if (error.GetMessage ())
-			fprintf (stderr, "%s\n", error.GetMessage ());
+		if (error.what () && strcmp(error.what(), "NoRunExit"))
+			fprintf (stderr, "%s\n", error.what ());
 
 #ifdef __APPLE__
 		Mac_I_FatalError(error.GetMessage());
diff --git a/src/win32/i_main.cpp b/src/win32/i_main.cpp
index b1d7a7bb7..6a75a597a 100644
--- a/src/win32/i_main.cpp
+++ b/src/win32/i_main.cpp
@@ -1048,21 +1048,22 @@ void DoMain (HINSTANCE hInstance)
 		}
 		exit(0);
 	}
-	catch (class CDoomError &error)
+	catch (std::exception &error)
 	{
 		I_ShutdownGraphics ();
 		RestoreConView ();
 		S_StopMusic(true);
 		I_FlushBufferedConsoleStuff();
-		if (error.GetMessage ())
+		auto msg = error.what();
+		if (strcmp(msg, "NoRunExit"))
 		{
 			if (!batchrun)
 			{
-				ShowErrorPane(error.GetMessage());
+				ShowErrorPane(msg);
 			}
 			else
 			{
-				Printf("%s\n", error.GetMessage());
+				Printf("%s\n", msg);
 			}
 		}
 		exit (-1);