cnq3/code/renderer/tr_backend_gl3.cpp

/*
===========================================================================
Copyright (C) 2019 Gian 'myT' Schellenbaum

This file is part of Challenge Quake 3 (CNQ3).

Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.

Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// OpenGL 3.2+ rendering back-end

#include "tr_local.h"
#include "GL/glew.h"

#if defined(_WIN32)
#include <Windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif


/*
Current info:
- OpenGL 3.2 minimum
- GLSL 1.40 minimum
- fancy mip-map generations requires:
	- OpenGL 4.3 (or equivalent extensions)
	- GLSL 4.30

Vertex and index data streaming notes:
- everyone: persistent coherent buffer mapping is the best option whenever available
- nVidia  : unsynchronized mapping is very slow, even without threaded driver optimization
- Intel   : glBufferSubData is painfully slow, even with immutable storage guarantees and full-range updates
- AMD     : if persistent coherent buffer mapping isn't available, AMD_pinned_memory is the best option
- AMD     : if neither persistent coherent buffer mapping nor AMD_pinned_memory, then pick glBufferSubData to be safe
- AMD     : glBufferSubData is slower than unsynchronized mapping with modern drivers
- AMD     : unsynchronized mapping drops off the performance cliff with old drivers

Known issues:
- nVidia GeForce GTX 1070 - Windows 7 - drivers 430.64
	once the GL2 back-end is used, performance crashes when switching to the GL3 back-end
- AMD Radeon HD 6950 - Windows 10 Pro version 10.0.16299 build 16299 - drivers 15.201.1151.1008
	with r_gpuMipGen 1, performance collapses big time (confirmed: whenever glTexStorage2D is called)
- AMD Radeon R7 360 - Windows 7 - drivers 14.502.0.0
	with r_gpuMipGen 1, the GPU-generated mips are corrupted (not confirmed: broken barrier implementation?)
*/


// @NOTE: MAX_VERTEXES and MAX_INDEXES are *per frame*
#define LARGEBUFFER_MAX_FRAMES      4
#define LARGEBUFFER_MAX_VERTEXES    131072
#define LARGEBUFFER_MAX_INDEXES     (LARGEBUFFER_MAX_VERTEXES * 8)

// this is the highest maximum we'll ever report
#define MAX_GPU_TEXTURE_SIZE        2048


enum PipelineId
{
	PID_GENERIC,
	PID_DYNAMIC_LIGHT,
	PID_SOFT_SPRITE,
	PID_POST_PROCESS,
	PID_COUNT
};

enum ErrorMode
{
	EM_FATAL,
	EM_PRINT,
	EM_SILENT
};

enum VertexBufferId
{
	VB_POSITION,
	VB_NORMAL,
	VB_TEXCOORD,
	VB_TEXCOORD2,
	VB_COLOR,
	VB_COUNT
};

enum AlphaTest
{
	AT_ALWAYS,
	AT_GREATER_THAN_0,
	AT_LESS_THAN_HALF,
	AT_GREATER_OR_EQUAL_TO_HALF
};

struct Program
{
	GLuint vertexShader;
	GLuint fragmentShader;
	GLuint computeShader;
	GLuint program;
};

struct ArrayBuffer
{
	GLuint buffer;
	GLint componentCount;
	GLenum dataType;
	GLboolean normalized;
	int capacity;
	int itemSize;
	int writeIndex;
	int readIndex;
	qbool indexBuffer;
	// persistent mapping:
	byte* mappedData;
	int pinnedByteCount; // when using AMD_pinned_memory
	GLsync fences[LARGEBUFFER_MAX_FRAMES]; // NULL means uninitialized / invalid
	int writeRangeIndex;
};

struct PipelineArrayBuffer
{
	const char* attribName;
	qbool enabled;
};

struct FrameBuffer
{
	GLuint fbo;
	GLuint color;			// texture if MS, buffer if SS
	GLuint depthStencil;	// texture if MS, buffer if SS
	qbool multiSampled;
	qbool hasDepthStencil;
	qbool hasColor;
};

enum GenericUniform
{
	GU_MODELVIEW,
	GU_PROJECTION,
	GU_CLIP_PLANE,
	GU_ALPHA_TEX,
	GU_GAMMA_BRIGHT_NOISE_SEED, // @NOTE: not always defined
	GU_COUNT
};

enum DynamicLightUniform
{
	DU_MODELVIEW,
	DU_PROJECTION,
	DU_CLIP_PLANE,
	DU_LIGHT_POS,
	DU_EYE_POS,
	DU_LIGHT_COLOR_RADIUS,
	DU_OPAQUE,
	DU_INTENSITY,
	DU_COUNT
};

enum SoftSpriteUniform
{
	SU_MODELVIEW,
	SU_PROJECTION,
	SU_CLIP_PLANE,
	SU_ALPHA_TEST,
	SU_DIST_OFFSET,
	SU_COLOR_SCALE,
	SU_COLOR_BIAS,
	SU_COUNT
};

enum PostUniform
{
	PU_BRIGHT_GAMMA_GREY,
	PU_COUNT
};

// yes, one could use some template meta-programming horror for this...
#define MAX_UNIFORM_COUNT DU_COUNT
static const char UniformCountLargeEnoughG[(int)MAX_UNIFORM_COUNT >= (int)GU_COUNT ? 1 : -1] = { '\0' };
static const char UniformCountLargeEnoughD[(int)MAX_UNIFORM_COUNT >= (int)DU_COUNT ? 1 : -1] = { '\0' };
static const char UniformCountLargeEnoughS[(int)MAX_UNIFORM_COUNT >= (int)SU_COUNT ? 1 : -1] = { '\0' };
static const char UniformCountLargeEnoughU[(int)MAX_UNIFORM_COUNT >= (int)PU_COUNT ? 1 : -1] = { '\0' };

struct Pipeline
{
	Program program;
	const char* uniformNames[MAX_UNIFORM_COUNT];
	GLint uniformLocations[MAX_UNIFORM_COUNT];
	qbool uniformsDirty[MAX_UNIFORM_COUNT];
	GLint textureLocations[2];
	PipelineArrayBuffer arrayBuffers[VB_COUNT];
};

enum ComputePipelineId
{
	CPID_GAMMA_TO_LINEAR,
	CPID_LINEAR_TO_GAMMA,
	CPID_DOWN_SAMPLE,
	CPID_COUNT
};

struct MipMapGenerator
{
	Program programs[CPID_COUNT];
	GLuint textures[3]; // 0,1=float16 2=uint8
};

enum MappingType
{
	MT_SUBDATA,		// glBufferSubData
	MT_UNSYNC,		// glMapBufferRange with GL_MAP_UNSYNCHRONIZED_BIT
	MT_PERSISTENT,	// glMapBufferRange with GL_MAP_PERSISTENT_BIT and GL_MAP_COHERENT_BIT
	MT_AMDPIN		// glBufferData with GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD
};

struct OpenGL3
{
	char log[8192];

	int maxTextureSize;

	float modelViewMatrix[16];
	float projectionMatrix[16];
	float clipPlane[4];
	qbool enableClipPlane;
	qbool prevEnableClipPlane;
	AlphaTest alphaTest;
	qbool dlOpaque;
	float dlIntensity;
	float depthFadeScale[4];
	float depthFadeBias[4];
	float depthFadeDist;
	float depthFadeOffset;

	ArrayBuffer arrayBuffers[VB_COUNT];
	ArrayBuffer indexBuffer;

	GLuint boundTextures[2];
	cullType_t cullType;
	unsigned int srcBlendBits;
	unsigned int dstBlendBits;
	qbool enableDepthTest;
	GLenum depthFunc;
	GLboolean enableDepthWrite;
	GLenum polygonMode;
	qbool enablePolygonOffset;
	texEnv_t texEnv;
	qbool enableAlphaToCoverage;

	FrameBuffer fbMS;
	FrameBuffer fbSSDepth; // resolved depth/stencil from fbMS
	FrameBuffer fbSS[2];
	unsigned int fbReadIndex; // indexes fbSS
	qbool fbMSEnabled;

	Pipeline pipelines[PID_COUNT];
	PipelineId pipelineId;

	MappingType mappingType;

	ErrorMode errorMode;

	MipMapGenerator mipGen;

	GLuint timerQueries[8];
	qbool queryStarted[8];
	int queryWriteIndex;
	int queryReadIndex;
};

static OpenGL3 gl;


static const char* generic_vs =
// a good way to test warning reports with r_verbose 1
//"#extension DOESNTEXISTLOL:warn\n"
//----------------------------------
"uniform mat4 modelView;\n"
"uniform mat4 projection;\n"
"uniform vec4 clipPlane;\n"
"\n"
"in vec4 position;\n"
"in vec2 texCoords1;\n"
"in vec2 texCoords2;\n"
"in vec4 color;\n"
"\n"
"centroid out vec2 texCoords1FS;\n"
"centroid out vec2 texCoords2FS;\n"
"centroid out vec4 colorFS;\n"
"\n"
"void main()\n"
"{\n"
"	vec4 positionVS = modelView * vec4(position.xyz, 1);\n"
"	gl_Position = projection * positionVS;\n"
"	gl_ClipDistance[0] = dot(positionVS, clipPlane);\n"
"	texCoords1FS = texCoords1;\n"
"	texCoords2FS = texCoords2;\n"
"	colorFS = color;\n"
"}\n";

static const char* generic_fs =
"uniform sampler2D texture1;\n"
"uniform sampler2D texture2;\n"
"\n"
"uniform uvec2 alphaTex;\n"
"#define alphaTest alphaTex.x\n"
"#define texEnv alphaTex.y\n"
"#if CNQ3_DITHER\n"
"uniform vec4 gammaBrightNoiseSeed;\n"
"#define invGamma gammaBrightNoiseSeed.x\n"
"#define invBrightness gammaBrightNoiseSeed.y\n"
"#define noiseScale gammaBrightNoiseSeed.z\n"
"#define seed gammaBrightNoiseSeed.w\n"
"#endif\n"
"\n"
"centroid in vec2 texCoords1FS;\n"
"centroid in vec2 texCoords2FS;\n"
"centroid in vec4 colorFS;\n"
"\n"
"out vec4 fragColor;\n"
"\n"
"#if CNQ3_DITHER\n"
"float Hash(vec2 v)\n"
"{\n"
"	// this is from Morgan McGuire's 'Hashed Alpha Testing' paper\n"
"	return fract(1.0e4 * sin(17.0 * v.x + 0.1 * v.y) + (0.1 + abs(sin(13.0 * v.y + v.x))));\n"
"}\n"
"\n"
"float Linearize(float color)\n"
"{\n"
"	return pow(abs(color * invBrightness), invGamma) * sign(color);\n"
"}\n"
"\n"
"vec4 Dither(vec4 color, vec3 position)\n"
"{\n"
"	vec2 newSeed = position.xy + vec2(0.6849, 0.6849) * seed + vec2(position.z, position.z);\n"
"	float noise = (noiseScale / 255.0) * Linearize(Hash(newSeed) - 0.5);\n"
"\n"
"	return color + vec4(noise, noise, noise, 0.0);\n"
"}\n"
"#endif\n"
"\n"
"#if CNQ3_A2C\n"
"float CorrectAlpha(float threshold, float alpha, vec2 tc)\n"
"{\n"
"	vec2 size = vec2(textureSize(texture1, 0));\n"
"	float dx = max(abs(dFdx(tc.x * size.x)), 0.001);\n"
"	float dy = max(abs(dFdy(tc.y * size.y)), 0.001);\n"
"	float dxy = max(dx, dy); // apply the smallest boost\n"
"	float scale = max(1.0 / dxy, 1.0);\n"
"	float ac = threshold + (alpha - threshold) * scale;\n"
"\n"
"	return ac;\n"
"}\n"
"#endif\n"
"\n"
"void main()\n"
"{\n"
"	vec4 p = texture(texture1, texCoords1FS);\n"
"	vec4 s = texture(texture2, texCoords2FS);\n"
"	vec4 r;\n"
"	if(texEnv == uint(1))\n"
"		r = colorFS * s * p;\n"
"	else if(texEnv == uint(2))\n"
"		r = s; // use input.color or not?\n"
"	else if(texEnv == uint(3))\n"
"		r = colorFS * vec4(p.rgb * (1 - s.a) + s.rgb * s.a, p.a);\n"
"	else if(texEnv == uint(4))\n"
"		r = colorFS * vec4(p.rgb + s.rgb, p.a * s.a);\n"
"	else // texEnv == 0\n"
"		r = colorFS * p;\n"
"\n"
"#if CNQ3_DITHER\n"
"	r = Dither(r, gl_FragCoord.xyz);\n"
"#endif\n"
"\n"
"#if CNQ3_A2C\n"
"	if(alphaTest == uint(1))\n"
"		r.a = r.a > 0.0 ? 1.0 : 0.0;\n"
"	if(alphaTest == uint(2))\n"
"		r.a = CorrectAlpha(0.5, 1.0 - r.a, texCoords1FS);\n"
"	else if(alphaTest == uint(3))\n"
"		r.a = CorrectAlpha(0.5, r.a, texCoords1FS);\n"
"#else\n"
"	if(	(alphaTest == uint(1) && r.a == 0.0) ||\n"
"		(alphaTest == uint(2) && r.a >= 0.5) ||\n"
"		(alphaTest == uint(3) && r.a <  0.5))\n"
"		discard;\n"
"#endif\n"
"\n"
"	fragColor = r;\n"
"}\n";

static const char* dl_vs =
"uniform mat4 modelView;\n"
"uniform mat4 projection;\n"
"uniform vec4 clipPlane;\n"
"uniform vec3 osLightPos;\n"
"uniform vec3 osEyePos;\n"
"\n"
"in vec4 position;\n"
"in vec4 normal;\n"
"in vec2 texCoords1;\n"
"\n"
"out vec3 normalFS;\n"
"out vec2 texCoords1FS;\n"
"out vec3 L;\n"
"out vec3 V;\n"
"\n"
"void main()\n"
"{\n"
"	vec4 positionVS = modelView * vec4(position.xyz, 1);\n"
"	gl_Position = projection * positionVS;\n"
"	gl_ClipDistance[0] = dot(positionVS, clipPlane);\n"
"	normalFS = normal.xyz;\n"
"	texCoords1FS = texCoords1;\n"
"	L = osLightPos - position.xyz;\n"
"	V = osEyePos - position.xyz;\n"
"}\n";

static const char* dl_fs =
"uniform sampler2D texture1;\n"
"\n"
"uniform vec4 lightColorRadius;\n"
"uniform float opaque;\n"
"uniform float intensity;\n"
"\n"
"in vec3 normalFS;\n"
"in vec2 texCoords1FS;\n"
"in vec3 L;\n"
"in vec3 V;\n"
"\n"
"out vec4 fragColor;\n"
"\n"
"float BezierEase(float t)\n"
"{\n"
"	return t * t * (3.0 - 2.0 * t);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
"	vec4 base = texture2D(texture1, texCoords1FS);\n"
"	vec3 nL = normalize(L);\n"
"	vec3 nV = normalize(V);\n"
"\n"
"	// light intensity\n"
"	float intensFactor = min(dot(L, L) * lightColorRadius.w, 1.0);\n"
"	vec3 intens = lightColorRadius.rgb * BezierEase(1.0 - sqrt(intensFactor));\n"
"\n"
"	// specular reflection term (N.H)\n"
"	float specFactor = min(abs(dot(normalFS, normalize(nL + nV))), 1.0);\n"
"	float spec = pow(specFactor, 16.0) * 0.25;\n"
"\n"
"	// Lambertian diffuse reflection term (N.L)\n"
"	float diffuse = min(abs(dot(normalFS, nL)), 1.0);\n"
"	vec3 color = (base.rgb * vec3(diffuse) + vec3(spec)) * intens * intensity;\n"
"	float alpha = mix(opaque, 1.0, base.a);\n"
"\n"
"	fragColor = vec4(color.rgb * alpha, alpha);\n"
"}\n";

static const char* sprite_vs =
"uniform mat4 modelView;\n"
"uniform mat4 projection;\n"
"uniform vec4 clipPlane;\n"
"\n"
"in vec4 position;\n"
"in vec2 texCoords1;\n"
"in vec4 color;\n"
"\n"
"out vec2 texCoords1FS;\n"
"out vec4 colorFS;\n"
"out float depthVS;\n"
"out vec2 proj22_32;\n"
"\n"
"void main()\n"
"{\n"
"	vec4 positionVS = modelView * vec4(position.xyz, 1);\n"
"	gl_Position = projection * positionVS;\n"
"	gl_ClipDistance[0] = dot(positionVS, clipPlane);\n"
"	texCoords1FS = texCoords1;\n"
"	colorFS = color;\n"
"	depthVS = -positionVS.z;\n"
"	proj22_32 = vec2(-projection[2][2], projection[3][2]);\n"
"}\n";

static const char* sprite_fs =
"uniform sampler2D texture1; // diffuse texture\n"
"uniform sampler2D texture2; // depth texture\n"
"\n"
"uniform uint alphaTest;\n"
"uniform vec2 distOffset;\n"
"uniform vec4 colorScale;\n"
"uniform vec4 colorBias;\n"
"#define distance distOffset.x\n"
"#define offset distOffset.y\n"
"\n"
"in vec2 texCoords1FS;\n"
"in vec4 colorFS;\n"
"in float depthVS;\n"
"in vec2 proj22_32;\n"
"#define proj22 proj22_32.x\n"
"#define proj32 proj22_32.y\n"
"\n"
"out vec4 fragColor;\n"
"\n"
"float LinearDepth(float zwDepth)\n"
"{\n"
"	return proj32 / (zwDepth - proj22);\n"
"}\n"
"\n"
"float Contrast(float d, float power)\n"
"{\n"
"	bool aboveHalf = d > 0.5;\n"
"	float base = clamp(2.0 * (aboveHalf ? (1.0 - d) : d), 0.0, 1.0);\n"
"	float r = 0.5 * pow(base, power);\n"
"\n"
"	return aboveHalf ? (1.0 - r) : r;\n"
"}\n"
"\n"
"void main()\n"
"{\n"
"	vec4 r = colorFS * texture(texture1, texCoords1FS);\n"
"	if(	(alphaTest == uint(1) && r.a == 0.0) ||\n"
"		(alphaTest == uint(2) && r.a >= 0.5) ||\n"
"		(alphaTest == uint(3) && r.a <  0.5))\n"
"		discard;\n"
"\n"
"	float depthSRaw = texelFetch(texture2, ivec2(gl_FragCoord.xy), 0).r;\n"
"	float depthS = LinearDepth(depthSRaw * 2.0 - 1.0);\n"
"	float depthP = depthVS - offset;\n"
"	float scale = Contrast((depthS - depthP) * distance, 2.0);\n"
"	vec4 r2 = mix(r * colorScale + colorBias, r, scale);\n"
"	fragColor = r2;\n"
"}\n";

static const char* post_vs =
"out vec2 texCoords1FS;\n"
"\n"
"void main()\n"
"{\n"
"	gl_Position = vec4(\n"
"		float(gl_VertexID / 2) * 4.0 - 1.0,\n"
"		float(gl_VertexID % 2) * 4.0 - 1.0,\n"
"		0.0,\n"
"		1.0);\n"
"	texCoords1FS = vec2(\n"
"		float(gl_VertexID / 2) * 2.0,\n"
"		float(gl_VertexID % 2) * 2.0);\n"
"}\n";

static const char* post_fs =
"uniform sampler2D texture1;\n"
"\n"
"uniform vec3 brightGammaGrey;\n"
"#define brightness brightGammaGrey.x\n"
"#define gamma brightGammaGrey.y\n"
"#define greyscale brightGammaGrey.z\n"
"\n"
"in vec2 texCoords1FS;\n"
"\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
"	vec3 base = texture(texture1, texCoords1FS).rgb;\n"
"	vec3 gc = pow(base, vec3(gamma)) * brightness;\n"
"	float grey = 0.299 * gc.r + 0.587 * gc.g + 0.114 * gc.b;\n"
"	vec3 result = mix(gc, vec3(grey, grey, grey), greyscale);\n"
"	fragColor = vec4(result.rgb, 1.0);\n"
"}\n";

static const char* gammaToLinear_cs =
"layout (binding = 0, rgba8)   readonly  uniform image2D srcTex;\n"
"layout (binding = 1, rgba16f) writeonly uniform image2D dstTex;\n"
"\n"
"layout (location = 0) uniform float gamma;\n"
"\n"
"layout (local_size_x = 8, local_size_y = 8) in;\n"
"\n"
"void main()\n"
"{\n"
"	ivec2 coords = ivec2(gl_GlobalInvocationID);\n"
"	vec4 inV = imageLoad(srcTex, coords);\n"
"	vec4 outV = vec4(pow(inV.x, gamma), pow(inV.y, gamma), pow(inV.z, gamma), inV.a);\n"
"	imageStore(dstTex, coords, outV);\n"
"}\n";

static const char* linearToGamma_cs =
// yes, intensity *should* be done in light-linear space
// but we keep the old behavior for consistency...
"layout (binding = 0, rgba16f) readonly  uniform image2D srcTex;\n"
"layout (binding = 1, rgba8)   writeonly uniform image2D dstTex;\n"
"\n"
"layout (location = 0) uniform float intensity;\n"
"layout (location = 1) uniform vec4  blendColor;\n"
"layout (location = 2) uniform float invGamma;\n"
"\n"
"layout (local_size_x = 8, local_size_y = 8) in;\n"
"\n"
"void main()\n"
"{\n"
"	ivec2 coords = ivec2(gl_GlobalInvocationID);\n"
"	vec4 in0 = imageLoad(srcTex, coords);\n"
"	vec3 in1 = 0.5 * (in0.rgb + blendColor.rgb);\n"
"	vec3 inV = mix(in0.rgb, in1.rgb, blendColor.a);\n"
"	vec3 out0 = vec3(pow(inV.r, invGamma), pow(inV.g, invGamma), pow(inV.b, invGamma));\n"
"	vec3 out1 = out0 * intensity;\n"
"	vec4 outV = vec4(out1, in0.a);\n"
"	imageStore(dstTex, coords, outV);\n"
"}\n";

static const char* downSample_cs =
"layout (binding = 0, rgba16f) readonly  uniform image2D srcTex;\n"
"layout (binding = 1, rgba16f) writeonly uniform image2D dstTex;\n"
"\n"
"layout (location = 0) uniform vec4  weights;\n"
"layout (location = 1) uniform ivec2 maxSize;\n"
"layout (location = 2) uniform ivec2 scale;\n"
"layout (location = 3) uniform ivec2 offset;\n"
"layout (location = 4) uniform uint  clampMode; // 0 = repeat\n"
"\n"
"layout (local_size_x = 8, local_size_y = 8) in;\n"
"\n"
"ivec2 FixCoords(ivec2 c)\n"
"{\n"
	"if(clampMode > 0)\n"
"	{\n"
"		// clamp\n"
"		return clamp(c, ivec2(0, 0), maxSize);\n"
"	}\n"
"\n"
"	// repeat\n"
"	return c & maxSize;\n"
"}\n"
"\n"
"void main()\n"
"{\n"
	"ivec2 dstTC = ivec2(gl_GlobalInvocationID);\n"
"	ivec2 base  = ivec2(gl_GlobalInvocationID) * scale;\n"
"	vec4 r = vec4(0, 0, 0, 0);\n"
"	r += imageLoad(srcTex, FixCoords(base - offset * 3)) * weights.x;\n"
"	r += imageLoad(srcTex, FixCoords(base - offset * 2)) * weights.y;\n"
"	r += imageLoad(srcTex, FixCoords(base - offset    )) * weights.z;\n"
"	r += imageLoad(srcTex,           base              ) * weights.w;\n"
"	r += imageLoad(srcTex,           base + offset     ) * weights.w;\n"
"	r += imageLoad(srcTex, FixCoords(base + offset * 2)) * weights.z;\n"
"	r += imageLoad(srcTex, FixCoords(base + offset * 3)) * weights.y;\n"
"	r += imageLoad(srcTex, FixCoords(base + offset * 4)) * weights.x;\n"
"	imageStore(dstTex, dstTC, r);\n"
"}\n";


void GL_GetRenderTargetFormat(GLenum* internalFormat, GLenum* format, GLenum* type, int cnq3Format)
{
	switch(cnq3Format)
	{
		case RTCF_R10G10B10A2:
			*internalFormat = GL_RGB10_A2;
			*format = GL_BGRA;
			*type = GL_UNSIGNED_INT_2_10_10_10_REV;
			break;

		case RTCF_R16G16B16A16:
			*internalFormat = GL_RGBA16;
			*format = GL_BGRA;
			*type = GL_UNSIGNED_SHORT;
			break;

		case RTCF_R8G8B8A8:
		default:
			*internalFormat = GL_RGBA8;
			*format = GL_BGRA;
			*type = GL_UNSIGNED_BYTE;
			break;
	}
}

void GL_CreateColorRenderBufferStorageMS(int* samples)
{
	GLenum internalFormat, format, type;
	GL_GetRenderTargetFormat(&internalFormat, &format, &type, r_rtColorFormat->integer);

	int sampleCount = r_msaa->integer;
	while(glGetError() != GL_NO_ERROR) {} // clear the error queue

	if(GLEW_VERSION_4_2 || GLEW_ARB_internalformat_query)
	{
		GLint maxSampleCount = 0;
		glGetInternalformativ(GL_RENDERBUFFER, internalFormat, GL_SAMPLES, 1, &maxSampleCount);
		if(glGetError() == GL_NO_ERROR)
		{
			sampleCount = min(sampleCount, (int)maxSampleCount);
		}
	}

	GLenum errorCode = GL_NO_ERROR;
	for(;;)
	{
		// @NOTE: when the sample count is invalid, the error code is GL_INVALID_OPERATION
		glRenderbufferStorageMultisample(GL_RENDERBUFFER, sampleCount, internalFormat, glConfig.vidWidth, glConfig.vidHeight);
		errorCode = glGetError();
		if(errorCode == GL_NO_ERROR || sampleCount == 0)
		{
			break;
		}

		--sampleCount;
	}

	if(errorCode != GL_NO_ERROR)
	{
		ri.Error(ERR_FATAL, "Failed to create multi-sampled render buffer storage (error 0x%X)\n", (unsigned int)errorCode);
	}

	*samples = sampleCount;
}

#if defined(_WIN32)

static void AllocatePinnedMemory(ArrayBuffer* buffer)
{
	const int byteCount = PAD(buffer->capacity * buffer->itemSize, 4096);
	buffer->mappedData = (byte*)VirtualAlloc(NULL, byteCount, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
	buffer->pinnedByteCount = byteCount;
}

static void FreePinnedMemory(ArrayBuffer* buffer)
{
	VirtualFree(buffer->mappedData, 0, MEM_RELEASE);
	buffer->mappedData = NULL;
	buffer->pinnedByteCount = 0;
}

#else

static void AllocatePinnedMemory(ArrayBuffer* buffer)
{
	const int pageSizeSC = (int)sysconf(_SC_PAGE_SIZE);
	const int pageSize = pageSizeSC > 0 ? pageSizeSC : 4096;
	const int byteCount = PAD(buffer->capacity * buffer->itemSize, pageSize);
	buffer->mappedData = (byte*)mmap(NULL, byteCount, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
	buffer->pinnedByteCount = byteCount;
}

static void FreePinnedMemory(ArrayBuffer* buffer)
{
	munmap(buffer->mappedData, buffer->pinnedByteCount);
	buffer->mappedData = NULL;
	buffer->pinnedByteCount = 0;
}

#endif

static void HandleError(const char* message)
{
	if(gl.errorMode == EM_FATAL)
	{
		ri.Error(ERR_FATAL, message);
	}
	else if(gl.errorMode == EM_PRINT)
	{
		ri.Printf(PRINT_ERROR, message);
	}
}

// identifier must be one of:
// GL_BUFFER, GL_SHADER, GL_PROGRAM, GL_VERTEX_ARRAY, GL_QUERY, GL_SAMPLER, GL_TEXTURE,
// GL_RENDERBUFFER, GL_FRAMEBUFFER, GL_PROGRAM_PIPELINE, GL_TRANSFORM_FEEDBACK
static void SetDebugName(GLenum identifier, GLuint name, const char* string)
{
	if(GLEW_VERSION_4_3 || GLEW_KHR_debug)
	{
		glObjectLabel(identifier, name, -1, string);
	}
}

static const char* GetShaderTypeName(GLenum shaderType)
{
	switch(shaderType)
	{
		case GL_VERTEX_SHADER: return "vertex";
		case GL_FRAGMENT_SHADER: return "fragment";
		case GL_COMPUTE_SHADER: return "compute";
		default: return "???";
	}
}

static qbool CreateShader(GLuint* shaderPtr, PipelineId pipelineId, GLenum shaderType, const char* shaderSource, const char* debugName)
{
	const char* sourceArray[] =
	{
		shaderType == GL_COMPUTE_SHADER ? "#version 430\n" : "#version 140\n",
		"\n",
		pipelineId == PID_GENERIC && glInfo.alphaToCoverageSupport && shaderType == GL_FRAGMENT_SHADER ? "#define CNQ3_A2C 1\n" : "#define CNQ3_A2C 0\n",
		pipelineId == PID_GENERIC && r_dither->integer && shaderType == GL_FRAGMENT_SHADER ? "#define CNQ3_DITHER 1\n" : "#define CNQ3_DITHER 0\n",
		shaderSource
	};

	GLuint shader = glCreateShader(shaderType);
	glShaderSource(shader, ARRAY_LEN(sourceArray), sourceArray, NULL);
	glCompileShader(shader);

	GLint result = GL_FALSE;
	glGetShaderiv(shader, GL_COMPILE_STATUS, &result);
	const qbool success = result == GL_TRUE;
	if(success)
	{
		*shaderPtr = shader;
		SetDebugName(GL_SHADER, shader, va("%s %s shader", debugName, GetShaderTypeName(shaderType)));
	}

	if(!success || r_verbose->integer)
	{
		GLint logLength = 0;
		glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &logLength);
		if(logLength > 0)
		{
			glGetShaderInfoLog(shader, sizeof(gl.log), NULL, gl.log);
			const ErrorMode em = gl.errorMode;
			gl.errorMode = success ? EM_PRINT : EM_FATAL;
			HandleError(va("'%s' %s shader compilation failed: %s\n", debugName, GetShaderTypeName(shaderType), gl.log));
			gl.errorMode = em;
		}
		else if(!success)
		{
			HandleError(va("'%s' %s shader compilation failed\n", debugName, GetShaderTypeName(shaderType)));
		}
	}

	return success;
}

static qbool FinalizeProgram(Program* prog, const char* debugName)
{
	GLint result = GL_FALSE;
	glGetProgramiv(prog->program, GL_LINK_STATUS, &result);
	const qbool success = result == GL_TRUE;
	if(success)
	{
		SetDebugName(GL_PROGRAM, prog->program, va("%s program", debugName));
	}

	if(!success || r_verbose->integer)
	{
		GLint logLength = 0;
		glGetProgramiv(prog->program, GL_INFO_LOG_LENGTH, &logLength);
		if(logLength > 0)
		{
			glGetProgramInfoLog(prog->program, sizeof(gl.log), NULL, gl.log);
			const ErrorMode em = gl.errorMode;
			gl.errorMode = success ? EM_PRINT : EM_FATAL;
			HandleError(va("'%s' program link failed: %s\n", debugName, gl.log));
			gl.errorMode = em;
		}
		else if(!success)
		{
			HandleError(va("'%s' program link failed\n", debugName));
		}
	}

	return success;
}

static qbool CreateGraphicsProgram(PipelineId pipelineId, const char* vs, const char* fs, const char* debugName)
{
	Pipeline* const pipeline = &gl.pipelines[pipelineId];
	Program* const prog = &pipeline->program;
	if(!CreateShader(&prog->vertexShader, pipelineId, GL_VERTEX_SHADER, vs, debugName) ||
	   !CreateShader(&prog->fragmentShader, pipelineId, GL_FRAGMENT_SHADER, fs, debugName))
	{
		return qfalse;
	}

	prog->program = glCreateProgram();
	glAttachShader(prog->program, prog->vertexShader);
	glAttachShader(prog->program, prog->fragmentShader);

	// glBindAttribLocation must be called before the program gets linked
	for(int i = 0; i < VB_COUNT; ++i)
	{
		if(pipeline->arrayBuffers[i].enabled)
		{
			glBindAttribLocation(pipeline->program.program, i, pipeline->arrayBuffers[i].attribName);
		}
	}

	glLinkProgram(prog->program);

	return FinalizeProgram(prog, debugName);
}

static qbool CreateComputeProgram(Program* prog, const char* cs, const char* debugName)
{
	if(!CreateShader(&prog->computeShader, PID_COUNT, GL_COMPUTE_SHADER, cs, debugName))
	{
		return qfalse;
	}

	prog->program = glCreateProgram();
	glAttachShader(prog->program, prog->computeShader);
	glLinkProgram(prog->program);

	return FinalizeProgram(prog, debugName);
}

extern void GL_GetRenderTargetFormat(GLenum* internalFormat, GLenum* format, GLenum* type, int cnq3Format);

static void FBO_CreateSS(FrameBuffer* fb, qbool color, qbool depthStencil, const char* name)
{
	if(depthStencil)
	{
		glGenTextures(1, &fb->depthStencil);
		glBindTexture(GL_TEXTURE_2D, fb->depthStencil);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
		glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, glConfig.vidWidth, glConfig.vidHeight, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL);
		SetDebugName(GL_TEXTURE, fb->depthStencil, va("%s depth/stencil attachment", name));
	}

	if(color)
	{
		GLenum internalFormat, format, type;
		GL_GetRenderTargetFormat(&internalFormat, &format, &type, r_rtColorFormat->integer);
		glGenTextures(1, &fb->color);
		glBindTexture(GL_TEXTURE_2D, fb->color);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
		glTexImage2D(GL_TEXTURE_2D, 0, internalFormat, glConfig.vidWidth, glConfig.vidHeight, 0, format, type, NULL);
		SetDebugName(GL_TEXTURE, fb->color, va("%s color attachment 0", name));
	}

	glGenFramebuffers(1, &fb->fbo);
	glBindFramebuffer(GL_FRAMEBUFFER, fb->fbo);
	if(color)
	{
		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb->color, 0);
	}
	if(depthStencil)
	{
		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb->depthStencil, 0);
	}

	const GLenum fboStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
	if(fboStatus != GL_FRAMEBUFFER_COMPLETE)
	{
		ri.Error(ERR_FATAL, "Failed to create FBO (status 0x%X, error 0x%X)\n", (unsigned int)fboStatus, (unsigned int)glGetError());
	}

	SetDebugName(GL_FRAMEBUFFER, fb->fbo, va("%s frame buffer", name));

	glBindFramebuffer(GL_FRAMEBUFFER, 0);
	fb->multiSampled = qfalse;
	fb->hasDepthStencil = depthStencil;
	fb->hasColor = color;
}

static void FBO_CreateMS(int* sampleCount, FrameBuffer* fb, const char* name)
{
	glGenFramebuffers(1, &fb->fbo);
	glBindFramebuffer(GL_FRAMEBUFFER, fb->fbo);

	glGenRenderbuffers(1, &fb->color);
	glBindRenderbuffer(GL_RENDERBUFFER, fb->color);
	GL_CreateColorRenderBufferStorageMS(sampleCount);
	glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, fb->color);
	SetDebugName(GL_RENDERBUFFER, fb->color, va("%s color attachment 0", name));

	glGenRenderbuffers(1, &fb->depthStencil);
	glBindRenderbuffer(GL_RENDERBUFFER, fb->depthStencil);
	glRenderbufferStorageMultisample(GL_RENDERBUFFER, *sampleCount, GL_DEPTH24_STENCIL8, glConfig.vidWidth, glConfig.vidHeight);
	glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, fb->depthStencil);
	SetDebugName(GL_RENDERBUFFER, fb->depthStencil, va("%s depth/stencil attachment", name));

	const GLenum fboStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
	if(fboStatus != GL_FRAMEBUFFER_COMPLETE)
	{
		ri.Error(ERR_FATAL, "Failed to create FBO (status 0x%X, error 0x%X)\n", (unsigned int)fboStatus, (unsigned int)glGetError());
	}

	SetDebugName(GL_FRAMEBUFFER, fb->fbo, va("%s frame buffer", name));

	glBindFramebuffer(GL_FRAMEBUFFER, 0);
	fb->multiSampled = qtrue;
	fb->hasDepthStencil = qtrue;
	fb->hasColor = qtrue;
}

static void FBO_Init()
{
	gl.fbMSEnabled = r_msaa->integer >= 2 && r_colorMipLevels->integer == 0;
	int finalSampleCount = 1;

	if(gl.fbMSEnabled)
	{
		FBO_CreateMS(&finalSampleCount, &gl.fbMS, "main");
		FBO_CreateSS(&gl.fbSSDepth, qfalse, qtrue, "depth resolve");
		FBO_CreateSS(&gl.fbSS[0], qtrue, qfalse, "post-process #1");
		FBO_CreateSS(&gl.fbSS[1], qtrue, qfalse, "post-process #2");
	}
	else
	{
		FBO_CreateSS(&gl.fbSS[0], qtrue, qtrue, "post-process #1");
		FBO_CreateSS(&gl.fbSS[1], qtrue, qtrue, "post-process #2");
	}

	ri.Printf(PRINT_ALL, "MSAA: %d samples requested, %d selected\n", r_msaa->integer, finalSampleCount);
}

static void FBO_Bind(const FrameBuffer* fb)
{
	glBindFramebuffer(GL_FRAMEBUFFER, fb->fbo);
	glReadBuffer(GL_COLOR_ATTACHMENT0);
	glDrawBuffer(GL_COLOR_ATTACHMENT0);
}

static void FBO_Bind()
{
	if(gl.fbMSEnabled)
	{
		FBO_Bind(&gl.fbMS);
	}
	else
	{
		FBO_Bind(&gl.fbSS[gl.fbReadIndex]);
	}
}

static void FBO_BlitToBackBuffer()
{
	// fixing up the blit mode here to avoid unnecessary glClear calls
	int blitMode = r_blitMode->integer;
	if(r_mode->integer != VIDEOMODE_UPSCALE)
	{
		blitMode = BLITMODE_STRETCHED;
	}

	if(blitMode != BLITMODE_STRETCHED)
	{
		glBindFramebuffer(GL_FRAMEBUFFER, 0);
		glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
		glClear(GL_COLOR_BUFFER_BIT);
	}

	const FrameBuffer& fbo = gl.fbSS[gl.fbReadIndex];
	glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo.fbo);
	glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
	glReadBuffer(GL_COLOR_ATTACHMENT0);
	glDrawBuffer(GL_BACK);

	const int sw = glConfig.vidWidth;
	const int sh = glConfig.vidHeight;
	const int dw = glInfo.winWidth;
	const int dh = glInfo.winHeight;
	if(blitMode == BLITMODE_STRETCHED)
	{
		glBlitFramebuffer(0, 0, sw, sh, 0, 0, dw, dh, GL_COLOR_BUFFER_BIT, GL_LINEAR);
	}
	else if(blitMode == BLITMODE_CENTERED)
	{
		const int dx = (dw - sw) / 2;
		const int dy = (dh - sh) / 2;
		glBlitFramebuffer(0, 0, sw, sh, dx, dy, dx + sw, dy + sh, GL_COLOR_BUFFER_BIT, GL_LINEAR);
	}
	else // blitMode == BLITMODE_ASPECT
	{
		const float rx = (float)dw / (float)sw;
		const float ry = (float)dh / (float)sh;
		const float ar = min(rx, ry);
		const int w = (int)(sw * ar);
		const int h = (int)(sh * ar);
		const int x = (dw - w) / 2;
		const int y = (dh - h) / 2;
		glBlitFramebuffer(0, 0, sw, sh, x, y, x + w, y + h, GL_COLOR_BUFFER_BIT, GL_LINEAR);
	}
}

static void FBO_ResolveColor()
{
	const FrameBuffer& r = gl.fbMS;
	const FrameBuffer& d = gl.fbSS[gl.fbReadIndex];
	glBindFramebuffer(GL_READ_FRAMEBUFFER, r.fbo);
	glBindFramebuffer(GL_DRAW_FRAMEBUFFER, d.fbo);
	glReadBuffer(GL_COLOR_ATTACHMENT0);
	glDrawBuffer(GL_COLOR_ATTACHMENT0);

	const int w = glConfig.vidWidth;
	const int h = glConfig.vidHeight;
	glBlitFramebuffer(0, 0, w, h, 0, 0, w, h, GL_COLOR_BUFFER_BIT, GL_LINEAR);
}

static void FBO_ResolveDepth()
{
	const FrameBuffer& r = gl.fbMS;
	const FrameBuffer& d = gl.fbSSDepth;
	glBindFramebuffer(GL_READ_FRAMEBUFFER, r.fbo);
	glBindFramebuffer(GL_DRAW_FRAMEBUFFER, d.fbo);

	const int w = glConfig.vidWidth;
	const int h = glConfig.vidHeight;
	glBlitFramebuffer(0, 0, w, h, 0, 0, w, h, GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, GL_NEAREST);
}

static void ApplyPipeline(PipelineId pipelineId)
{
	if(pipelineId == gl.pipelineId)
	{
		return;
	}

	gl.pipelineId = pipelineId;

	Pipeline* const pipeline = &gl.pipelines[pipelineId];
	glUseProgram(pipeline->program.program);

	for(int i = 0; i < VB_COUNT; ++i)
	{
		if(pipeline->arrayBuffers[i].enabled)
		{
			ArrayBuffer* const buffer = &gl.arrayBuffers[i];
			glEnableVertexAttribArray(i);
			glBindBuffer(GL_ARRAY_BUFFER, buffer->buffer);
			glVertexAttribPointer(i, buffer->componentCount, buffer->dataType, buffer->normalized, buffer->itemSize, (const void*)0);
		}
		else
		{
			glDisableVertexAttribArray(i);
		}
	}

	if(pipelineId == PID_SOFT_SPRITE && gl.fbMSEnabled)
	{
		// This is not how it should be done and will counter the benefits of MSAA.
		// To do this right, we need to bind the FBO's depth attachment to the shader and for that,
		// we need multi-sampled textures as FBO attachments instead of multi-sampled render buffers.
		// We also need the shader to use gl_SampleID, which changes our minimum requirements.
		// Because of all these changes and lack of testing time,
		// I'll do the necessary changes after the 1.52 release to avoid problems.
		FBO_ResolveDepth();
		FBO_Bind();
	}

	glUniform1i(pipeline->textureLocations[0], 0);
	glActiveTexture(GL_TEXTURE1);
	glUniform1i(pipeline->textureLocations[1], 1);
	glActiveTexture(GL_TEXTURE0);

	memset(pipeline->uniformsDirty, 0xFF, sizeof(pipeline->uniformsDirty));
}

static GLint GetTextureWrapMode(textureWrap_t w)
{
	switch(w)
	{
		case TW_REPEAT: return GL_REPEAT;
		case TW_CLAMP_TO_EDGE: return GL_CLAMP_TO_EDGE;
		default: return GL_REPEAT;
	}
}

static GLint GetTextureInternalFormat(textureFormat_t f)
{
	switch(f)
	{
		case TF_RGBA8:
		default: return GL_RGBA8;
	}
}

static GLenum GetTextureFormat(textureFormat_t f)
{
	switch(f)
	{
		case TF_RGBA8:
		default: return GL_RGBA;
	}
}

static void BindTexture(int slot, GLuint texture)
{
	if(texture == gl.boundTextures[slot])
	{
		return;
	}

	glBindTexture(GL_TEXTURE_2D, texture);
	gl.boundTextures[slot] = texture;
}

static void BindImage(int slot, const image_t* image)
{
	const GLuint texture = (GLuint)image->texnum;
	BindTexture(slot, texture);
}

static void UpdateAnimatedImage(image_t* image, int w, int h, const byte* data, qbool dirty)
{
	glBindTexture(GL_TEXTURE_2D, (GLuint)image->texnum);
	if(w != image->width || h != image->height)
	{
		// if the scratchImage isn't in the format we want, specify it as a new texture
		image->width = w;
		image->height = h;
		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
	}
	else if(dirty)
	{
		// otherwise, just update it
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, data);
	}
}

static const image_t* GetBundleImage(const textureBundle_t* bundle)
{
	return R_UpdateAndGetBundleImage(bundle, &UpdateAnimatedImage);
}

static void BindBundle(int slot, const textureBundle_t* bundle)
{
	BindImage(slot, GetBundleImage(bundle));
}

static void ApplyViewportAndScissor(int x, int y, int w, int h)
{
	glViewport(x, y, w, h);
	glScissor(x, y, w, h);
}

static GLenum GetSourceBlend(unsigned int bits)
{
	switch(bits)
	{
		case GLS_SRCBLEND_ZERO: return GL_ZERO;
		case GLS_SRCBLEND_ONE: return GL_ONE;
		case GLS_SRCBLEND_DST_COLOR: return GL_DST_COLOR;
		case GLS_SRCBLEND_ONE_MINUS_DST_COLOR: return GL_ONE_MINUS_DST_COLOR;
		case GLS_SRCBLEND_SRC_ALPHA: return GL_SRC_ALPHA;
		case GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA: return GL_ONE_MINUS_SRC_ALPHA;
		case GLS_SRCBLEND_DST_ALPHA: return GL_DST_ALPHA;
		case GLS_SRCBLEND_ONE_MINUS_DST_ALPHA: return GL_ONE_MINUS_DST_ALPHA;
		case GLS_SRCBLEND_ALPHA_SATURATE: return GL_SRC_ALPHA_SATURATE;
		default: return GL_ONE;
	}
}

static GLenum GetDestinationBlend(unsigned int bits)
{
	switch(bits)
	{
		case GLS_DSTBLEND_ZERO: return GL_ZERO;
		case GLS_DSTBLEND_ONE: return GL_ONE;
		case GLS_DSTBLEND_SRC_COLOR: return GL_SRC_COLOR;
		case GLS_DSTBLEND_ONE_MINUS_SRC_COLOR: return GL_ONE_MINUS_SRC_COLOR;
		case GLS_DSTBLEND_SRC_ALPHA: return GL_SRC_ALPHA;
		case GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA: return GL_ONE_MINUS_SRC_ALPHA;
		case GLS_DSTBLEND_DST_ALPHA: return GL_DST_ALPHA;
		case GLS_DSTBLEND_ONE_MINUS_DST_ALPHA: return GL_ONE_MINUS_DST_ALPHA;
		default: return GL_ONE;
	}
}

static AlphaTest GetAlphaTest(unsigned int bits)
{
	switch(bits)
	{
		case 0: return AT_ALWAYS;
		case GLS_ATEST_GT_0: return AT_GREATER_THAN_0;
		case GLS_ATEST_LT_80: return AT_LESS_THAN_HALF;
		case GLS_ATEST_GE_80: return AT_GREATER_OR_EQUAL_TO_HALF;
		default: return AT_ALWAYS;
	}
}

static void ApplyCullType(cullType_t cullType)
{
	if(cullType == gl.cullType)
	{
		return;
	}

	gl.cullType = cullType;
	if(cullType == CT_TWO_SIDED)
	{
		glDisable(GL_CULL_FACE);
	}
	else
	{
		glEnable(GL_CULL_FACE);
		glCullFace(cullType == CT_FRONT_SIDED ? GL_FRONT : GL_BACK);
	}
}

static void ApplyBlendFunc(unsigned int srcBlendBits, unsigned int dstBlendBits)
{
	if(srcBlendBits == gl.srcBlendBits && dstBlendBits == gl.dstBlendBits)
	{
		return;
	}

	if((srcBlendBits | dstBlendBits) == 0 &&
	   (gl.srcBlendBits | gl.dstBlendBits) == 0)
	{
		return;
	}

	gl.srcBlendBits = srcBlendBits;
	gl.dstBlendBits = dstBlendBits;
	if((srcBlendBits | dstBlendBits) == 0)
	{
		glDisable(GL_BLEND);
	}
	else
	{
		glEnable(GL_BLEND);
		glBlendFunc(GetSourceBlend(srcBlendBits), GetDestinationBlend(dstBlendBits));
	}
}

static void ApplyDepthTest(qbool enableDepthTest)
{
	if(enableDepthTest == gl.enableDepthTest)
	{
		return;
	}

	gl.enableDepthTest = enableDepthTest;
	if(enableDepthTest)
	{
		glEnable(GL_DEPTH_TEST);
	}
	else
	{
		glDisable(GL_DEPTH_TEST);
	}
}

static void ApplyDepthFunc(GLenum depthFunc)
{
	if(depthFunc == gl.depthFunc)
	{
		return;
	}

	gl.depthFunc = depthFunc;
	glDepthFunc(depthFunc);
}

static void ApplyDepthMask(GLboolean enableDepthWrite)
{
	if(enableDepthWrite == gl.enableDepthWrite)
	{
		return;
	}

	gl.enableDepthWrite = enableDepthWrite;
	glDepthMask(enableDepthWrite ? GL_TRUE : GL_FALSE);
}

static void ApplyPolygonMode(GLenum polygonMode)
{
	if(polygonMode == gl.polygonMode)
	{
		return;
	}

	gl.polygonMode = polygonMode;
	glPolygonMode(GL_FRONT_AND_BACK, polygonMode);
}

static void ApplyPolygonOffset(qbool enablePolygonOffset)
{
	if(enablePolygonOffset == gl.enablePolygonOffset)
	{
		return;
	}

	gl.enablePolygonOffset = enablePolygonOffset;
	if(enablePolygonOffset)
	{
		glEnable(GL_POLYGON_OFFSET_FILL);
	}
	else
	{
		glDisable(GL_POLYGON_OFFSET_FILL);
	}
}

static void ApplyClipPlane(qbool enableClipPlane)
{
	if(enableClipPlane == gl.enableClipPlane)
	{
		return;
	}

	gl.enableClipPlane = enableClipPlane;
	if(enableClipPlane)
	{
		glEnable(GL_CLIP_DISTANCE0);
	}
	else
	{
		glDisable(GL_CLIP_DISTANCE0);
	}
}

static void ApplyAlphaTest(AlphaTest alphaTest)
{
	const qbool enableA2C = glInfo.alphaToCoverageSupport && gl.pipelineId == PID_GENERIC && alphaTest != AT_ALWAYS;
	if(enableA2C != gl.enableAlphaToCoverage)
	{
		gl.enableAlphaToCoverage = enableA2C;
		if(enableA2C)
		{
			glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
		}
		else
		{
			glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
		}
	}

	if(alphaTest == gl.alphaTest)
	{
		return;
	}
	gl.alphaTest = alphaTest;

	if(gl.pipelineId == PID_GENERIC)
	{
		gl.pipelines[PID_GENERIC].uniformsDirty[GU_ALPHA_TEX] = qtrue;
	}
	else if(gl.pipelineId == PID_SOFT_SPRITE)
	{
		gl.pipelines[PID_SOFT_SPRITE].uniformsDirty[SU_ALPHA_TEST] = qtrue;
	}
}

static void ApplyState(unsigned int stateBits, cullType_t cullType, qbool polygonOffset)
{
	// fix up the cull mode for mirrors
	if(backEnd.viewParms.isMirror)
	{
		if(cullType == CT_BACK_SIDED)
		{
			cullType = CT_FRONT_SIDED;
		}
		else if(cullType == CT_FRONT_SIDED)
		{
			cullType = CT_BACK_SIDED;
		}
	}
	ApplyCullType(cullType);

	const unsigned int srcBlendBits = stateBits & GLS_SRCBLEND_BITS;
	const unsigned int dstBlendBits = stateBits & GLS_DSTBLEND_BITS;
	ApplyBlendFunc(srcBlendBits, dstBlendBits);

	const qbool disableDepthTest = ((stateBits & GLS_DEPTHTEST_DISABLE) != 0) || backEnd.projection2D;
	ApplyDepthTest(!disableDepthTest);

	const qbool depthFuncEqual = (stateBits & GLS_DEPTHFUNC_EQUAL) != 0;
	ApplyDepthFunc(depthFuncEqual ? GL_EQUAL : GL_LEQUAL);

	const qbool enableDepthWrite = (stateBits & GLS_DEPTHMASK_TRUE) != 0 && gl.pipelineId != PID_SOFT_SPRITE;
	ApplyDepthMask(enableDepthWrite ? GL_TRUE : GL_FALSE);

	const qbool wireFrame = (stateBits & GLS_POLYMODE_LINE) ? 1 : 0;
	ApplyPolygonMode(wireFrame ? GL_LINE : GL_FILL);

	ApplyPolygonOffset(polygonOffset);

	ApplyAlphaTest(GetAlphaTest(stateBits & GLS_ATEST_BITS));
}

static void ApplyTexEnv(texEnv_t texEnv)
{
	if(gl.pipelineId == PID_GENERIC && texEnv != gl.texEnv)
	{
		gl.pipelines[PID_GENERIC].uniformsDirty[GU_ALPHA_TEX] = qtrue;
	}
	gl.texEnv = texEnv;
}

static void BindVertexArray(VertexBufferId)
{
}

static void Buffer_WaitForRange(ArrayBuffer* buffer)
{
	buffer->writeIndex = buffer->writeRangeIndex * (buffer->capacity / LARGEBUFFER_MAX_FRAMES);

	GLsync& fence = buffer->fences[buffer->writeRangeIndex];
	if(fence == NULL)
	{
		return;
	}

	GLbitfield waitFlags = 0;
	GLuint64 waitDurationNS = 0;
	for(;;)
	{
		GLenum waitRet = glClientWaitSync(fence, waitFlags, waitDurationNS);
		if(waitRet == GL_ALREADY_SIGNALED || waitRet == GL_CONDITION_SATISFIED)
		{
			glDeleteSync(fence);
			fence = NULL;
			return;
		}

		if(waitRet == GL_WAIT_FAILED)
		{
			ri.Error(ERR_FATAL, "glClientWaitSync failed with GL_WAIT_FAILED\n");
		}

		// after the first time, we need to start flushing and wait as long as necessary
		waitFlags = GL_SYNC_FLUSH_COMMANDS_BIT;
		waitDurationNS = 1e9;
	}
}

static void Buffer_LockRange(ArrayBuffer* buffer)
{
	GLsync& fence = buffer->fences[buffer->writeRangeIndex];
	assert(fence == NULL);
	if(fence == NULL)
	{
		fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
		assert(fence != NULL);
	}

	buffer->writeRangeIndex = (buffer->writeRangeIndex + 1) % LARGEBUFFER_MAX_FRAMES;
	buffer->writeIndex = buffer->writeRangeIndex * (buffer->capacity / LARGEBUFFER_MAX_FRAMES);
}

static void Buffers_Wait()
{
	for(int i = 0; i < VB_COUNT; ++i)
	{
		Buffer_WaitForRange(&gl.arrayBuffers[i]);
	}

	Buffer_WaitForRange(&gl.indexBuffer);
}

static void Buffers_Lock()
{
	for(int i = 0; i < VB_COUNT; ++i)
	{
		Buffer_LockRange(&gl.arrayBuffers[i]);
	}

	Buffer_LockRange(&gl.indexBuffer);
}

// if qtrue, we have a large buffer for multiple frames and use fences for synchronization
static qbool MappingType_UsesLargeBuffers()
{
	return gl.mappingType == MT_PERSISTENT || gl.mappingType == MT_UNSYNC || gl.mappingType == MT_AMDPIN;
}

static void UploadGeometry(ArrayBuffer* buffer, const void* data, int itemCount)
{
	const GLenum target = buffer->indexBuffer ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER;
	if(MappingType_UsesLargeBuffers())
	{
		const int rangeLength = buffer->capacity / LARGEBUFFER_MAX_FRAMES;
		const int endRangeIndex = (buffer->writeIndex + itemCount - 1) / rangeLength;
#if defined(_DEBUG)
		assert(endRangeIndex == buffer->writeRangeIndex ||
			   endRangeIndex == buffer->writeRangeIndex + 1 ||
			   (endRangeIndex == 0 && buffer->writeRangeIndex == LARGEBUFFER_MAX_FRAMES - 1));
		const int startRangeIndex = buffer->writeIndex == 0 ? 0 : ((buffer->writeIndex - 1) / rangeLength);
		assert(startRangeIndex == buffer->writeRangeIndex ||
			   startRangeIndex == (buffer->writeRangeIndex + LARGEBUFFER_MAX_FRAMES - 1) % LARGEBUFFER_MAX_FRAMES);
#endif
		if(endRangeIndex == buffer->writeRangeIndex + 1)
		{
			Buffer_LockRange(buffer);
			Buffer_WaitForRange(buffer);
		}

		void* mappedData = NULL;
		if(gl.mappingType == MT_UNSYNC)
		{
			mappedData = glMapBufferRange(target, buffer->writeIndex * buffer->itemSize, itemCount * buffer->itemSize, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
			if(mappedData == NULL)
			{
				ri.Error(ERR_FATAL, "Couldn't map buffer\n");
			}
		}
		else
		{
			mappedData = buffer->mappedData + buffer->writeIndex * buffer->itemSize;
		}
		memcpy(mappedData, data, itemCount * buffer->itemSize);
		if(gl.mappingType == MT_UNSYNC)
		{
			glUnmapBuffer(target);
		}

		buffer->readIndex = buffer->writeIndex;
		buffer->writeIndex += itemCount;
	}
	else
	{
		glBufferSubData(target, (GLintptr)0, itemCount * buffer->itemSize, data);
		buffer->readIndex = 0;
	}
}

static void UploadVertexArray(VertexBufferId vbid, const void* data)
{
	ArrayBuffer* buffer = &gl.arrayBuffers[vbid];

	glBindBuffer(GL_ARRAY_BUFFER, buffer->buffer);
	UploadGeometry(buffer, data, tess.numVertexes);
	if(MappingType_UsesLargeBuffers())
	{
		glVertexAttribPointer(vbid, buffer->componentCount, buffer->dataType, buffer->normalized, buffer->itemSize, (const GLvoid*)(GLintptr)(buffer->readIndex * buffer->itemSize));
	}
}

static void UploadIndices(const void* data, int indexCount)
{
	ArrayBuffer* buffer = &gl.indexBuffer;

	// @NOTE: we only have 1 index buffer and it's already bound
	//glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer->buffer);
	UploadGeometry(buffer, data, indexCount);
}

static void CreateGeometryBufferStorage(ArrayBuffer* buffer)
{
	const GLenum target = buffer->indexBuffer ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER;
	if(gl.mappingType == MT_PERSISTENT)
	{
		glGenBuffers(1, &buffer->buffer);
		glBindBuffer(target, buffer->buffer);
		const GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
		glBufferStorage(target, buffer->capacity * buffer->itemSize, NULL, flags);
		buffer->mappedData = (byte*)glMapBufferRange(target, 0, buffer->capacity * buffer->itemSize, flags);
		if(buffer->mappedData == NULL)
		{
			ri.Error(ERR_FATAL, "Couldn't map buffer storage\n");
		}
	}
	else if(gl.mappingType == MT_AMDPIN)
	{
		while(glGetError() != GL_NO_ERROR) {} // clear the error queue
		GLenum errorCode = GL_NO_ERROR;

		AllocatePinnedMemory(buffer);
		if(buffer->mappedData == NULL)
		{
			ri.Error(ERR_FATAL, "Couldn't allocate buffer storage\n");
		}
		glGenBuffers(1, &buffer->buffer);
		glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, buffer->buffer);
		if((errorCode = glGetError()) != GL_NO_ERROR)
		{
			ri.Error(ERR_FATAL, "glBindBuffer GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD failed with error code: 0x%08X\n", (unsigned int)errorCode);
		}
		glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, buffer->pinnedByteCount, buffer->mappedData, GL_DYNAMIC_DRAW);
		if((errorCode = glGetError()) != GL_NO_ERROR)
		{
			ri.Error(ERR_FATAL, "glBufferData GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD failed with error code: 0x%08X\n", (unsigned int)errorCode);
		}
		glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
		glBindBuffer(target, buffer->buffer);
	}
	else
	{
		glGenBuffers(1, &buffer->buffer);
		glBindBuffer(target, buffer->buffer);
		glBufferData(target, buffer->capacity * buffer->itemSize, NULL, GL_DYNAMIC_DRAW);
	}
}

static void DrawElements(int indexCount)
{
	glDrawElements(GL_TRIANGLES, indexCount, GL_UNSIGNED_INT, (const GLvoid*)(GLintptr)(gl.indexBuffer.readIndex * gl.indexBuffer.itemSize));
}

static void SetDefaultState()
{
	glViewport(0, 0, glConfig.vidWidth, glConfig.vidHeight);
	glScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);
	glEnable(GL_DEPTH_TEST);
	glEnable(GL_SCISSOR_TEST);
	glEnable(GL_BLEND);
	glDisable(GL_CULL_FACE);
	glDisable(GL_POLYGON_OFFSET_FILL);
	glPixelStorei(GL_PACK_ALIGNMENT, 1);
	glDepthFunc(GL_LEQUAL);
	glDepthMask(GL_FALSE);
	glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
	glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
	glCullFace(GL_FRONT);
	glPolygonOffset(-1.0f, -1.0f);
	glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
	glClearDepth(1.0f);
	glActiveTexture(GL_TEXTURE0);
	glDisable(GL_CLIP_DISTANCE0);
	glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
	glPixelStorei(GL_PACK_ALIGNMENT, 1);

	gl.boundTextures[0] = GLuint(-1);
	gl.boundTextures[1] = GLuint(-1);
	gl.cullType = CT_TWO_SIDED;
	gl.srcBlendBits = GLS_SRCBLEND_SRC_ALPHA;
	gl.dstBlendBits = GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA;
	gl.enableDepthTest = qtrue;
	gl.depthFunc = GL_LEQUAL;
	gl.enableDepthWrite = GL_FALSE;
	gl.polygonMode = GL_FILL;
	gl.enablePolygonOffset = qfalse;
	gl.enableClipPlane = qfalse;
	gl.enableAlphaToCoverage = qfalse;
}

static qbool InitCompute()
{
	while(glGetError() != GL_NO_ERROR) {} // clear the error queue

	glGenTextures(ARRAY_LEN(gl.mipGen.textures), gl.mipGen.textures);
	glBindTexture(GL_TEXTURE_2D, gl.mipGen.textures[0]);
	SetDebugName(GL_TEXTURE, gl.mipGen.textures[0], "mip-gen float16 texture #1");
	glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA16F, gl.maxTextureSize, gl.maxTextureSize);
	glBindTexture(GL_TEXTURE_2D, gl.mipGen.textures[1]);
	SetDebugName(GL_TEXTURE, gl.mipGen.textures[1], "mip-gen float16 texture #2");
	glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA16F, gl.maxTextureSize, gl.maxTextureSize);
	glBindTexture(GL_TEXTURE_2D, gl.mipGen.textures[2]);
	SetDebugName(GL_TEXTURE, gl.mipGen.textures[2], "mip-gen uint8 texture");
	glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, gl.maxTextureSize, gl.maxTextureSize);

	if(glGetError() != GL_NO_ERROR)
	{
		HandleError("Failed to allocate storage for the mip-map generation textures\n");
		return qfalse;
	}

	if(!CreateComputeProgram(&gl.mipGen.programs[CPID_GAMMA_TO_LINEAR], gammaToLinear_cs, "gamma to linear") ||
	   !CreateComputeProgram(&gl.mipGen.programs[CPID_LINEAR_TO_GAMMA], linearToGamma_cs, "linear to gamma") ||
	   !CreateComputeProgram(&gl.mipGen.programs[CPID_DOWN_SAMPLE], downSample_cs, "down sample"))
	{
		HandleError("Failed to compile compute shaders for GPU-side mip-map generation\n");
		return qfalse;
	}

	return qtrue;
}

static MappingType GetMappingTypeFromCvar()
{
	const int mode = r_gl3_geoStream->integer;
	if(mode == GL3MAP_SUBDATA)
	{
		return MT_SUBDATA;
	}

	if(mode == GL3MAP_MAPUNSYNC)
	{
		return MT_UNSYNC;
	}

	if(mode == GL3MAP_AMDPIN && GLEW_AMD_pinned_memory)
	{
		return MT_AMDPIN;
	}

	if((mode == GL3MAP_AUTO || mode == GL3MAP_MAPPERS) && (GLEW_VERSION_4_4 || GLEW_ARB_buffer_storage))
	{
		return MT_PERSISTENT;
	}

	if(GLEW_AMD_pinned_memory)
	{
		return MT_AMDPIN;
	}

	if(strstr((const char*)glGetString(GL_RENDERER), "Intel") != NULL)
	{
		return MT_UNSYNC;
	}

	return MT_SUBDATA;
}

static void InitQueries()
{
	glGenQueries(ARRAY_LEN(gl.timerQueries), &gl.timerQueries[0]);
}

static void BeginQueries()
{
	glBeginQuery(GL_TIME_ELAPSED, gl.timerQueries[gl.queryWriteIndex]);
	gl.queryStarted[gl.queryWriteIndex] = qtrue;
}

static void EndQueries()
{
	// finish this frame
	glEndQuery(GL_TIME_ELAPSED);
	gl.queryWriteIndex = (gl.queryWriteIndex + 1) % ARRAY_LEN(gl.timerQueries);

	// try to grab a previous frame's result
	if(gl.queryStarted[gl.queryReadIndex])
	{
		const GLuint query = gl.timerQueries[gl.queryReadIndex];
		backEnd.pc3D[RB_USEC_GPU] = 0;
		GLint done = GL_FALSE;
		glGetQueryObjectiv(query, GL_QUERY_RESULT_AVAILABLE, &done);
		if(done != GL_FALSE)
		{
			GLint durationNS = 0;
			glGetQueryObjectiv(query, GL_QUERY_RESULT, &durationNS);
			if(durationNS > 0)
			{
				backEnd.pc3D[RB_USEC_GPU] = durationNS / 1000;
			}
			gl.queryReadIndex = (gl.queryReadIndex + 1) % ARRAY_LEN(gl.timerQueries);
		}
	}
}

static void Init()
{
	memset(&gl, 0, sizeof(gl));

	GLint maxTextureSize = 0;
	glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
	gl.maxTextureSize = maxTextureSize > 0 ? min((int)maxTextureSize, MAX_GPU_TEXTURE_SIZE) : MAX_GPU_TEXTURE_SIZE;
	glConfig.unused_maxTextureSize = gl.maxTextureSize;
	glInfo.maxTextureSize = gl.maxTextureSize;

	FBO_Init();
	if(gl.fbMSEnabled && r_alphaToCoverage->integer)
	{
		glInfo.alphaToCoverageSupport = qtrue;
	}

	int maxVertexCount = SHADER_MAX_VERTEXES;
	int maxIndexCount = SHADER_MAX_INDEXES;
	gl.mappingType = GetMappingTypeFromCvar();
	if(MappingType_UsesLargeBuffers())
	{
		maxVertexCount = LARGEBUFFER_MAX_VERTEXES * LARGEBUFFER_MAX_FRAMES;
		maxIndexCount = LARGEBUFFER_MAX_INDEXES * LARGEBUFFER_MAX_FRAMES;
	}

	gl.arrayBuffers[VB_POSITION].capacity = maxVertexCount;
	gl.arrayBuffers[VB_POSITION].itemSize = sizeof(tess.xyz[0]);
	gl.arrayBuffers[VB_POSITION].componentCount = 4;
	gl.arrayBuffers[VB_POSITION].dataType = GL_FLOAT;
	gl.arrayBuffers[VB_POSITION].normalized = GL_FALSE;
	gl.arrayBuffers[VB_NORMAL].capacity = maxVertexCount;
	gl.arrayBuffers[VB_NORMAL].itemSize = sizeof(tess.normal[0]);
	gl.arrayBuffers[VB_NORMAL].componentCount = 4;
	gl.arrayBuffers[VB_NORMAL].dataType = GL_FLOAT;
	gl.arrayBuffers[VB_NORMAL].normalized = GL_FALSE;
	gl.arrayBuffers[VB_TEXCOORD].capacity = maxVertexCount;
	gl.arrayBuffers[VB_TEXCOORD].itemSize = sizeof(tess.svars[0].texcoords[0]);
	gl.arrayBuffers[VB_TEXCOORD].componentCount = 2;
	gl.arrayBuffers[VB_TEXCOORD].dataType = GL_FLOAT;
	gl.arrayBuffers[VB_TEXCOORD].normalized = GL_FALSE;
	gl.arrayBuffers[VB_TEXCOORD2].capacity = maxVertexCount;
	gl.arrayBuffers[VB_TEXCOORD2].itemSize = sizeof(tess.svars[0].texcoords[0]);
	gl.arrayBuffers[VB_TEXCOORD2].componentCount = 2;
	gl.arrayBuffers[VB_TEXCOORD2].dataType = GL_FLOAT;
	gl.arrayBuffers[VB_TEXCOORD2].normalized = GL_FALSE;
	gl.arrayBuffers[VB_COLOR].capacity = maxVertexCount;
	gl.arrayBuffers[VB_COLOR].itemSize = sizeof(tess.svars[0].colors[0]);
	gl.arrayBuffers[VB_COLOR].componentCount = 4;
	gl.arrayBuffers[VB_COLOR].dataType = GL_UNSIGNED_BYTE;
	gl.arrayBuffers[VB_COLOR].normalized = GL_TRUE;
	gl.indexBuffer.capacity = maxIndexCount;
	gl.indexBuffer.itemSize = sizeof(tess.indexes[0]);
	gl.indexBuffer.indexBuffer = qtrue;

	gl.pipelines[PID_GENERIC].arrayBuffers[VB_POSITION].enabled = qtrue;
	gl.pipelines[PID_GENERIC].arrayBuffers[VB_POSITION].attribName = "position";
	gl.pipelines[PID_GENERIC].arrayBuffers[VB_TEXCOORD].enabled = qtrue;
	gl.pipelines[PID_GENERIC].arrayBuffers[VB_TEXCOORD].attribName = "texCoords1";
	gl.pipelines[PID_GENERIC].arrayBuffers[VB_TEXCOORD2].enabled = qtrue;
	gl.pipelines[PID_GENERIC].arrayBuffers[VB_TEXCOORD2].attribName = "texCoords2";
	gl.pipelines[PID_GENERIC].arrayBuffers[VB_COLOR].enabled = qtrue;
	gl.pipelines[PID_GENERIC].arrayBuffers[VB_COLOR].attribName = "color";
	gl.pipelines[PID_GENERIC].uniformNames[GU_MODELVIEW] = "modelView";
	gl.pipelines[PID_GENERIC].uniformNames[GU_PROJECTION] = "projection";
	gl.pipelines[PID_GENERIC].uniformNames[GU_CLIP_PLANE] = "clipPlane";
	gl.pipelines[PID_GENERIC].uniformNames[GU_ALPHA_TEX] = "alphaTex";
	gl.pipelines[PID_GENERIC].uniformNames[GU_GAMMA_BRIGHT_NOISE_SEED] = "gammaBrightNoiseSeed";

	gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_POSITION].enabled = qtrue;
	gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_POSITION].attribName = "position";
	gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_NORMAL].enabled = qtrue;
	gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_NORMAL].attribName = "normal";
	gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_TEXCOORD].enabled = qtrue;
	gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_TEXCOORD].attribName = "texCoords1";
	gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_MODELVIEW] = "modelView";
	gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_PROJECTION] = "projection";
	gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_CLIP_PLANE] = "clipPlane";
	gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_LIGHT_POS] = "osLightPos";
	gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_EYE_POS] = "osEyePos";
	gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_LIGHT_COLOR_RADIUS] = "lightColorRadius";
	gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_OPAQUE] = "opaque";
	gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_INTENSITY] = "intensity";

	gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_POSITION].enabled = qtrue;
	gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_POSITION].attribName = "position";
	gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_TEXCOORD].enabled = qtrue;
	gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_TEXCOORD].attribName = "texCoords1";
	gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_COLOR].enabled = qtrue;
	gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_COLOR].attribName = "color";
	gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_MODELVIEW] = "modelView";
	gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_PROJECTION] = "projection";
	gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_CLIP_PLANE] = "clipPlane";
	gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_ALPHA_TEST] = "alphaTest";
	gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_DIST_OFFSET] = "distOffset";
	gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_COLOR_SCALE] = "colorScale";
	gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_COLOR_BIAS] = "colorBias";

	gl.pipelines[PID_POST_PROCESS].uniformNames[PU_BRIGHT_GAMMA_GREY] = "brightGammaGrey";

	CreateGraphicsProgram(PID_GENERIC, generic_vs, generic_fs, "generic");
	CreateGraphicsProgram(PID_DYNAMIC_LIGHT, dl_vs, dl_fs, "dynamic light");
	CreateGraphicsProgram(PID_SOFT_SPRITE, sprite_vs, sprite_fs, "soft sprite");
	CreateGraphicsProgram(PID_POST_PROCESS, post_vs, post_fs, "post-process");

	GLuint vertexArray;
	glGenVertexArrays(1, &vertexArray);
	glBindVertexArray(vertexArray);

	CreateGeometryBufferStorage(&gl.indexBuffer);
	for(int i = 0; i < VB_COUNT; ++i)
	{
		CreateGeometryBufferStorage(&gl.arrayBuffers[i]);
	}

	for(int p = 0; p < PID_COUNT; ++p)
	{
		Pipeline* pipeline = &gl.pipelines[p];

		pipeline->textureLocations[0] = glGetUniformLocation(pipeline->program.program, "texture1");
		pipeline->textureLocations[1] = glGetUniformLocation(pipeline->program.program, "texture2");

		for(int i = 0; i < ARRAY_LEN(pipeline->uniformLocations); ++i)
		{
			if(pipeline->uniformNames[i] != NULL)
			{
				pipeline->uniformLocations[i] = glGetUniformLocation(pipeline->program.program, pipeline->uniformNames[i]);
#if defined(_DEBUG)
				if(!(r_dither->integer == 0 && p == PID_GENERIC && i == GU_GAMMA_BRIGHT_NOISE_SEED))
				{
					assert(pipeline->uniformLocations[i] != -1);
				}
#endif
			}
		}
	}

	if(r_gpuMipGen->integer && (GLEW_VERSION_4_3 || (GLEW_ARB_compute_shader && GLEW_ARB_texture_storage && GLEW_ARB_shader_image_load_store && GLEW_ARB_copy_image)))
	{
		gl.errorMode = EM_PRINT;
		glInfo.mipGenSupport = InitCompute();
		gl.errorMode = EM_FATAL;
	}

	glInfo.depthFadeSupport = r_depthFade->integer == 1;

	gl.pipelineId = PID_COUNT;
	ApplyPipeline(PID_GENERIC);

	InitQueries();
}

static void InitGLConfig()
{
	// @NOTE: could use glGetStringi in a loop to grab the extension list, but it's useless either way
	Q_strncpyz(glConfig.vendor_string, (const char*)glGetString(GL_VENDOR), sizeof(glConfig.vendor_string));
	Q_strncpyz(glConfig.renderer_string, (const char*)glGetString(GL_RENDERER), sizeof(glConfig.renderer_string));
	Q_strncpyz(glConfig.version_string, (const char*)glGetString(GL_VERSION), sizeof(glConfig.version_string));
	Q_strncpyz(glConfig.extensions_string, "", sizeof(glConfig.extensions_string));
	glConfig.unused_maxTextureSize = MAX_GPU_TEXTURE_SIZE;
	glConfig.unused_maxActiveTextures = 0;
	glConfig.unused_driverType = 0;		// ICD
	glConfig.unused_hardwareType = 0;	// generic
	glConfig.unused_deviceSupportsGamma = qtrue;
	glConfig.unused_textureCompression = 0;	// no compression
	glConfig.unused_textureEnvAddAvailable = qtrue;
	glConfig.unused_displayFrequency = 0;
	glConfig.unused_isFullscreen = !!r_fullscreen->integer;
	glConfig.unused_stereoEnabled = qfalse;
	glConfig.unused_smpActive = qfalse;
}

static void InitGLInfo()
{
	glInfo.maxTextureSize = MAX_GPU_TEXTURE_SIZE;

	if(GLEW_EXT_texture_filter_anisotropic)
	{
		glGetIntegerv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &glInfo.maxAnisotropy);
	}
	else
	{
		glInfo.maxAnisotropy = 0;
	}

	glInfo.depthFadeSupport = qfalse;
	glInfo.mipGenSupport = qfalse;
	glInfo.alphaToCoverageSupport = qfalse;
}

static qbool GAL_Init()
{
	if(glConfig.vidWidth == 0)
	{
		// the order of these calls can not be changed
		Sys_V_Init(GAL_GL3);
		if(!GLEW_VERSION_3_2)
		{
			ri.Error(ERR_FATAL, "OpenGL 3.2 is required by the selected back-end!\n");
		}
		InitGLConfig();
		InitGLInfo();
		Init();

		// apply the current V-Sync option after the first rendered frame
		r_swapInterval->modified = qtrue;
	}

	SetDefaultState();

	const int err = glGetError();
	if(err != GL_NO_ERROR)
	{
		ri.Printf(PRINT_ALL, "glGetError() = 0x%x\n", err);
	}

	return qtrue;
}

static void GAL_ShutDown(qbool fullShutDown)
{
	for(int i = 0; i < tr.numImages; ++i)
	{
		const GLuint texture = (GLuint)tr.images[i]->texnum;
		glDeleteTextures(1, &texture);
	}

	tr.numImages = 0;
	memset(tr.images, 0, sizeof(tr.images));

	gl.boundTextures[0] = GLuint(-1);
	gl.boundTextures[1] = GLuint(-1);

	if(fullShutDown && gl.mappingType == MT_AMDPIN)
	{
		// We flush the command queue and wait for all commands to be done executing
		// to make sure the GPU is done accessing our own memory buffers.
		// We could also have used a fence instead.
		glFlush();
		glFinish();

		// Now that it's safe to do so, free our memory buffers.
		for(int i = 0; i < ARRAY_LEN(gl.arrayBuffers); ++i)
		{
			FreePinnedMemory(&gl.arrayBuffers[i]);
		}
		FreePinnedMemory(&gl.indexBuffer);
	}
}

static void GAL_BeginFrame()
{
	BeginQueries();

	FBO_Bind();

	ApplyViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);

	if(r_clear->integer)
	{
		glClearColor(1.0f, 0.0f, 0.5f, 1.0f);
	}
	else
	{
		glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
	}
	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

	if(MappingType_UsesLargeBuffers())
	{
		Buffers_Wait();
	}
}

static void GAL_EndFrame()
{
	if(MappingType_UsesLargeBuffers())
	{
		Buffers_Lock();
	}

	if(gl.fbMSEnabled)
	{
		FBO_ResolveColor();
	}

	ApplyPipeline(PID_POST_PROCESS);
	ApplyState(GLS_DEPTHTEST_DISABLE, CT_TWO_SIDED, qfalse);
	ApplyViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);
	BindTexture(0, gl.fbSS[gl.fbReadIndex].color);
	Pipeline* const pipeline = &gl.pipelines[PID_POST_PROCESS];
	glUniform3f(pipeline->uniformLocations[PU_BRIGHT_GAMMA_GREY], r_brightness->value, 1.0f / r_gamma->value, r_greyscale->value);
	gl.fbReadIndex ^= 1;
	FBO_Bind(&gl.fbSS[gl.fbReadIndex]);
	glDrawArrays(GL_TRIANGLES, 0, 3);

	ApplyViewportAndScissor(0, 0, glInfo.winWidth, glInfo.winHeight);
	FBO_BlitToBackBuffer();

	EndQueries();
}

static void DrawGeneric()
{
	Pipeline* const pipeline = &gl.pipelines[PID_GENERIC];

	if(pipeline->uniformsDirty[GU_MODELVIEW])
	{
		glUniformMatrix4fv(pipeline->uniformLocations[GU_MODELVIEW], 1, GL_FALSE, gl.modelViewMatrix);
	}
	if(pipeline->uniformsDirty[GU_PROJECTION])
	{
		glUniformMatrix4fv(pipeline->uniformLocations[GU_PROJECTION], 1, GL_FALSE, gl.projectionMatrix);
	}
	if(pipeline->uniformsDirty[GU_CLIP_PLANE])
	{
		glUniform4fv(pipeline->uniformLocations[GU_CLIP_PLANE], 1, gl.clipPlane);
	}
	if(pipeline->uniformsDirty[GU_GAMMA_BRIGHT_NOISE_SEED] &&
	   pipeline->uniformLocations[GU_GAMMA_BRIGHT_NOISE_SEED] != -1)
	{
		glUniform4f(
			pipeline->uniformLocations[GU_GAMMA_BRIGHT_NOISE_SEED],
			1.0f / r_gamma->value,
			1.0f / r_brightness->value,
			backEnd.projection2D ? 0.0f : r_ditherStrength->value,
			(float)rand() / (float)RAND_MAX);
	}

	UploadVertexArray(VB_POSITION, tess.xyz);
	UploadIndices(tess.indexes, tess.numIndexes);

	for(int i = 0; i < tess.shader->numStages; ++i)
	{
		const shaderStage_t* const stage = tess.xstages[i];
		ApplyState(stage->stateBits, tess.shader->cullType, tess.shader->polygonOffset);

		UploadVertexArray(VB_TEXCOORD, tess.svars[i].texcoordsptr);
		UploadVertexArray(VB_COLOR, tess.svars[i].colors);

		BindBundle(0, &stage->bundle);
		glActiveTexture(GL_TEXTURE1);

		if(stage->mtStages == 0)
		{
			BindImage(1, tr.whiteImage);
			BindVertexArray(VB_TEXCOORD2);
			ApplyTexEnv(TE_DISABLED);
		}
		else
		{
			const shaderStage_t* const stage2 = tess.xstages[i + 1];
			BindBundle(1, &stage2->bundle);
			UploadVertexArray(VB_TEXCOORD2, tess.svars[i + 1].texcoordsptr);
			ApplyTexEnv(stage2->mtEnv);
			++i;
		}

		glActiveTexture(GL_TEXTURE0);

		if(pipeline->uniformsDirty[GU_ALPHA_TEX])
		{
			glUniform2ui(pipeline->uniformLocations[GU_ALPHA_TEX], gl.alphaTest, gl.texEnv);
		}

		DrawElements(tess.numIndexes);
	}

	if(tess.drawFog)
	{
		ApplyState(tess.fogStateBits, tess.shader->cullType, tess.shader->polygonOffset);

		UploadVertexArray(VB_TEXCOORD, tess.svarsFog.texcoordsptr);
		BindVertexArray(VB_TEXCOORD2);
		UploadVertexArray(VB_COLOR, tess.svarsFog.colors);

		BindImage(0, tr.fogImage);
		glActiveTexture(GL_TEXTURE1);
		BindImage(1, tr.whiteImage);
		glActiveTexture(GL_TEXTURE0);

		ApplyTexEnv(TE_DISABLED);
		if(pipeline->uniformsDirty[GU_ALPHA_TEX])
		{
			glUniform2ui(pipeline->uniformLocations[GU_ALPHA_TEX], gl.alphaTest, gl.texEnv);
		}

		DrawElements(tess.numIndexes);
	}

	memset(pipeline->uniformsDirty, 0, sizeof(pipeline->uniformsDirty));
}

static void DrawDynamicLight()
{
	Pipeline* const pipeline = &gl.pipelines[PID_DYNAMIC_LIGHT];

	const int stageIndex = tess.shader->lightingStages[ST_DIFFUSE];
	const shaderStage_t* stage = tess.xstages[stageIndex];

	UploadVertexArray(VB_POSITION, tess.xyz);
	UploadVertexArray(VB_NORMAL, tess.normal);
	UploadVertexArray(VB_TEXCOORD, tess.svars[stageIndex].texcoordsptr);
	UploadIndices(tess.dlIndexes, tess.dlNumIndexes);

	ApplyState(backEnd.dlStateBits, tess.shader->cullType, tess.shader->polygonOffset);
	BindBundle(0, &stage->bundle);

	if(backEnd.dlOpaque != gl.dlOpaque)
	{
		gl.dlOpaque = backEnd.dlOpaque;
		pipeline->uniformsDirty[DU_OPAQUE] = qtrue;
	}

	if(backEnd.dlIntensity != gl.dlIntensity)
	{
		gl.dlIntensity = backEnd.dlIntensity;
		pipeline->uniformsDirty[DU_INTENSITY] = qtrue;
	}

	if(pipeline->uniformsDirty[DU_MODELVIEW])
	{
		glUniformMatrix4fv(pipeline->uniformLocations[DU_MODELVIEW], 1, GL_FALSE, gl.modelViewMatrix);
	}
	if(pipeline->uniformsDirty[DU_PROJECTION])
	{
		glUniformMatrix4fv(pipeline->uniformLocations[DU_PROJECTION], 1, GL_FALSE, gl.projectionMatrix);
	}
	if(pipeline->uniformsDirty[DU_CLIP_PLANE])
	{
		glUniform4fv(pipeline->uniformLocations[DU_CLIP_PLANE], 1, gl.clipPlane);
	}
	if(pipeline->uniformsDirty[DU_OPAQUE])
	{
		glUniform1f(pipeline->uniformLocations[DU_OPAQUE], gl.dlOpaque ? 1.0f : 0.0f);
	}
	if(pipeline->uniformsDirty[DU_INTENSITY])
	{
		glUniform1f(pipeline->uniformLocations[DU_INTENSITY], gl.dlIntensity);
	}

	memset(pipeline->uniformsDirty, 0, sizeof(pipeline->uniformsDirty));

	DrawElements(tess.dlNumIndexes);
}

static void DrawDepthFade()
{
	Pipeline* const pipeline = &gl.pipelines[PID_SOFT_SPRITE];

	if(pipeline->uniformsDirty[SU_PROJECTION])
	{
		glUniformMatrix4fv(pipeline->uniformLocations[SU_PROJECTION], 1, GL_FALSE, gl.projectionMatrix);
	}
	if(pipeline->uniformsDirty[SU_MODELVIEW])
	{
		glUniformMatrix4fv(pipeline->uniformLocations[SU_MODELVIEW], 1, GL_FALSE, gl.modelViewMatrix);
	}
	if(pipeline->uniformsDirty[SU_CLIP_PLANE])
	{
		glUniform4fv(pipeline->uniformLocations[SU_CLIP_PLANE], 1, gl.clipPlane);
	}
	if(pipeline->uniformsDirty[SU_COLOR_SCALE] ||
	   memcmp(gl.depthFadeScale, r_depthFadeScale[tess.shader->dfType], sizeof(gl.depthFadeScale)) != 0)
	{
		glUniform4fv(pipeline->uniformLocations[SU_COLOR_SCALE], 1, r_depthFadeScale[tess.shader->dfType]);
		memcpy(gl.depthFadeScale, r_depthFadeScale[tess.shader->dfType], sizeof(gl.depthFadeScale));
	}
	if(pipeline->uniformsDirty[SU_COLOR_BIAS] ||
	   memcmp(gl.depthFadeBias, r_depthFadeBias[tess.shader->dfType], sizeof(gl.depthFadeBias)) != 0)
	{
		glUniform4fv(pipeline->uniformLocations[SU_COLOR_BIAS], 1, r_depthFadeBias[tess.shader->dfType]);
		memcpy(gl.depthFadeBias, r_depthFadeBias[tess.shader->dfType], sizeof(gl.depthFadeBias));
	}
	if(pipeline->uniformsDirty[SU_DIST_OFFSET] ||
	   tess.shader->dfInvDist != gl.depthFadeDist ||
	   tess.shader->dfBias != gl.depthFadeOffset)
	{
		glUniform2f(pipeline->uniformLocations[SU_DIST_OFFSET], tess.shader->dfInvDist, tess.shader->dfBias);
		gl.depthFadeDist = tess.shader->dfInvDist;
		gl.depthFadeOffset = tess.shader->dfBias;
	}

	UploadVertexArray(VB_POSITION, tess.xyz);

	for(int i = 0; i < tess.shader->numStages; ++i)
	{
		const shaderStage_t* stage = tess.xstages[i];

		ApplyState(stage->stateBits, tess.shader->cullType, tess.shader->polygonOffset);

		UploadVertexArray(VB_TEXCOORD, tess.svars[i].texcoordsptr);
		UploadVertexArray(VB_COLOR, tess.svars[i].colors);
		UploadIndices(tess.indexes, tess.numIndexes);

		if(pipeline->uniformsDirty[SU_ALPHA_TEST])
		{
			glUniform1ui(pipeline->uniformLocations[SU_ALPHA_TEST], gl.alphaTest);
		}

		BindBundle(0, &stage->bundle);
		glActiveTexture(GL_TEXTURE1);
		BindTexture(1, gl.fbMSEnabled ? gl.fbSSDepth.depthStencil : gl.fbSS[gl.fbReadIndex].depthStencil);
		glActiveTexture(GL_TEXTURE0);

		DrawElements(tess.numIndexes);
	}

	memset(pipeline->uniformsDirty, 0, sizeof(pipeline->uniformsDirty));
}

static void GAL_Draw(drawType_t type)
{
	if(type == DT_GENERIC)
	{
		ApplyPipeline(PID_GENERIC);
		DrawGeneric();
	}
	else if(type == DT_DYNAMIC_LIGHT)
	{
		ApplyPipeline(PID_DYNAMIC_LIGHT);
		DrawDynamicLight();
	}
	else if(type == DT_SOFT_SPRITE)
	{
		ApplyPipeline(PID_SOFT_SPRITE);
		DrawDepthFade();
	}
}

static void GAL_Begin3D()
{
	ApplyPipeline(PID_GENERIC);
	R_MakeIdentityMatrix(gl.modelViewMatrix);
	memcpy(gl.projectionMatrix, backEnd.viewParms.projectionMatrix, sizeof(gl.projectionMatrix));
	ApplyViewportAndScissor(backEnd.viewParms.viewportX, backEnd.viewParms.viewportY, backEnd.viewParms.viewportWidth, backEnd.viewParms.viewportHeight);

	if(backEnd.viewParms.isPortal)
	{
		float plane[4];
		plane[0] = backEnd.viewParms.portalPlane.normal[0];
		plane[1] = backEnd.viewParms.portalPlane.normal[1];
		plane[2] = backEnd.viewParms.portalPlane.normal[2];
		plane[3] = backEnd.viewParms.portalPlane.dist;

		float plane2[4];
		plane2[0] = DotProduct(backEnd.viewParms.orient.axis[0], plane);
		plane2[1] = DotProduct(backEnd.viewParms.orient.axis[1], plane);
		plane2[2] = DotProduct(backEnd.viewParms.orient.axis[2], plane);
		plane2[3] = DotProduct(plane, backEnd.viewParms.orient.origin) - plane[3];

		float* o = plane;
		const float* m = s_flipMatrix;
		const float* v = plane2;
		o[0] = m[0] * v[0] + m[4] * v[1] + m[8] * v[2] + m[12] * v[3];
		o[1] = m[1] * v[0] + m[5] * v[1] + m[9] * v[2] + m[13] * v[3];
		o[2] = m[2] * v[0] + m[6] * v[1] + m[10] * v[2] + m[14] * v[3];
		o[3] = m[3] * v[0] + m[7] * v[1] + m[11] * v[2] + m[15] * v[3];

		memcpy(gl.clipPlane, plane, sizeof(gl.clipPlane));
		ApplyClipPlane(qtrue);
	}
	else
	{
		memset(gl.clipPlane, 0, sizeof(gl.clipPlane));
		ApplyClipPlane(qfalse);
	}

	ApplyState(GLS_DEFAULT, CT_TWO_SIDED, qfalse);

	GLbitfield clearBits = GL_DEPTH_BUFFER_BIT;
	if(backEnd.refdef.rdflags & RDF_HYPERSPACE)
	{
		clearBits |= GL_COLOR_BUFFER_BIT;
		const float c = RB_HyperspaceColor();
		glClearColor(c, c, c, 1.0f);
	}
	else if(r_fastsky->integer && !(backEnd.refdef.rdflags & RDF_NOWORLDMODEL))
	{
		clearBits |= GL_COLOR_BUFFER_BIT;
		glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
	}
	glClear(clearBits);

	// in case the generic pipeline was already active before calling this function
	gl.pipelines[PID_GENERIC].uniformsDirty[GU_PROJECTION] = qtrue;
	gl.pipelines[PID_GENERIC].uniformsDirty[GU_MODELVIEW] = qtrue;
	gl.pipelines[PID_GENERIC].uniformsDirty[GU_CLIP_PLANE] = qtrue;
}

static void GAL_BeginSkyAndClouds(double depth)
{
	gl.prevEnableClipPlane = gl.enableClipPlane;
	ApplyClipPlane(qfalse);
	glDepthRange(depth, depth);
}

static void GAL_EndSkyAndClouds()
{
	glDepthRange(0.0, 1.0);
	ApplyClipPlane(gl.prevEnableClipPlane);
}

static int GetMaxAnisotropy(image_t* image)
{
	if((image->flags & IMG_NOAF) == 0 && glInfo.maxAnisotropy >= 2 && r_ext_max_anisotropy->integer >= 2)
	{
		return min(r_ext_max_anisotropy->integer, glInfo.maxAnisotropy);
	}

	return 1;
}

static void GAL_CreateTexture(image_t* image, int mipCount, int w, int h)
{
	GLuint id;
	glGenTextures(1, &id);
	image->texnum = (textureHandle_t)id;

	BindImage(0, image);
	SetDebugName(GL_TEXTURE, id, image->name);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, GetMaxAnisotropy(image));

	if(image->flags & IMG_LMATLAS)
	{
		glTexImage2D(GL_TEXTURE_2D, 0, GetTextureInternalFormat(image->format), w, h, 0, GetTextureFormat(image->format), GL_UNSIGNED_BYTE, NULL);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
		return;
	}

	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GetTextureWrapMode(image->wrapClampMode));
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GetTextureWrapMode(image->wrapClampMode));

	if(Q_stricmp(r_textureMode->string, "GL_NEAREST") == 0 &&
	   (image->flags & (IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0)
	{
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	}
	else if(image->flags & IMG_NOMIPMAP)
	{
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	}
	else
	{
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	}
}

static void GAL_UpdateTexture(image_t* image, int mip, int x, int y, int w, int h, const void* data)
{
	BindImage(0, image);
	if(image->flags & IMG_LMATLAS)
	{
		glTexSubImage2D(GL_TEXTURE_2D, (GLint)mip, x, y, w, h, GetTextureFormat(image->format), GL_UNSIGNED_BYTE, data);
	}
	else
	{
		glTexImage2D(GL_TEXTURE_2D, (GLint)mip, GetTextureInternalFormat(image->format), w, h, 0, GetTextureFormat(image->format), GL_UNSIGNED_BYTE, data);
	}
}

static void GAL_UpdateScratch(image_t* image, int w, int h, const void* data, qbool dirty)
{
	BindImage(0, image);

	// if the scratchImage isn't in the format we want, specify it as a new texture
	if(w != image->width || h != image->height)
	{
		image->width = w;
		image->height = h;
		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
	}
	else if(dirty)
	{
		// otherwise, just subimage upload it so that drivers can tell we are going to be changing
		// it and don't try and do a texture compression
		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, data);
	}
}

static void GAL_CreateTextureEx(image_t* image, int mipCount, int mipOffset, int w, int h, const void* mip0)
{
	enum { GroupSize = 8, GroupMask = GroupSize - 1 };

	assert(image->format == TF_RGBA8);
	assert(GetTextureInternalFormat(image->format) == GL_RGBA8);

	// remember what program we had bound before...
	GLint previousProgram = 0;
	glGetIntegerv(GL_CURRENT_PROGRAM, &previousProgram);

	// create the texture with all mip levels
	GLuint id;
	glGenTextures(1, &id);
	image->texnum = (textureHandle_t)id;
	BindTexture(0, id);
	glTexStorage2D(GL_TEXTURE_2D, mipCount - mipOffset, GL_RGBA8, image->width, image->height);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GetTextureWrapMode(image->wrapClampMode));
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GetTextureWrapMode(image->wrapClampMode));
	if(Q_stricmp(r_textureMode->string, "GL_NEAREST") == 0 &&
	   (image->flags & (IMG_LMATLAS | IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0)
	{
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	}
	else
	{
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	}
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, GetMaxAnisotropy(image));
	SetDebugName(GL_TEXTURE, id, image->name);

	// upload source mip level 0
	BindTexture(0, gl.mipGen.textures[2]);
	glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, mip0);

	// create a linear color space copy of source mip 0
	glUseProgram(gl.mipGen.programs[CPID_GAMMA_TO_LINEAR].program);
	glUniform1f(0, r_mipGenGamma->value);
	glBindImageTexture(0, gl.mipGen.textures[2], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);
	glBindImageTexture(1, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F);
	glDispatchCompute((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);

	// copy to destination mip 0 now if needed
	if(mipOffset == 0)
	{
		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
		glUseProgram(gl.mipGen.programs[CPID_LINEAR_TO_GAMMA].program);
		glUniform1f(0, r_intensity->value);
		glUniform4fv(1, 1, r_mipBlendColors[0]);
		glUniform1f(2, 1.0f / r_mipGenGamma->value);
		glBindImageTexture(0, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
		glBindImageTexture(1, id, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
		glDispatchCompute((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);
	}

	for(int i = 1; i < mipCount; ++i)
	{
		const int w1 = w;
		const int h1 = h;
		w = max(w / 2, 1);
		h = max(h / 2, 1);

		// down-sample on the X-axis
		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
		glUseProgram(gl.mipGen.programs[CPID_DOWN_SAMPLE].program);
		glUniform4fv(0, 1, tr.mipFilter);
		glUniform2i(1, w1 - 1, h1 - 1); // maxSize
		glUniform2i(2, w1 / w, 1); // scale
		glUniform2i(3, 1, 0); // offset
		glUniform1ui(4, image->wrapClampMode == TW_CLAMP_TO_EDGE ? 1 : 0);
		glBindImageTexture(0, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
		glBindImageTexture(1, gl.mipGen.textures[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F);
		glDispatchCompute((w + GroupMask) / GroupSize, (h1 + GroupMask) / GroupSize, 1);

		// down-sample on the Y-axis
		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
		glUseProgram(gl.mipGen.programs[CPID_DOWN_SAMPLE].program);
		glUniform4fv(0, 1, tr.mipFilter);
		glUniform2i(1, w - 1, h1 - 1); // maxSize
		glUniform2i(2, 1, h1 / h); // scale
		glUniform2i(3, 0, 1); // offset
		glUniform1ui(4, image->wrapClampMode == TW_CLAMP_TO_EDGE ? 1 : 0);
		glBindImageTexture(0, gl.mipGen.textures[1], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
		glBindImageTexture(1, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F);
		glDispatchCompute((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);

		const int destMip = i - mipOffset;
		if(destMip >= 0)
		{
			// copy the gamma-corrected result to the desired mip slice
			glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
			glUseProgram(gl.mipGen.programs[CPID_LINEAR_TO_GAMMA].program);
			glUniform1f(0, r_intensity->value);
			glUniform4fv(1, 1, r_mipBlendColors[r_colorMipLevels->integer ? destMip : 0]);
			glUniform1f(2, 1.0f / r_mipGenGamma->value);
			glBindImageTexture(0, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
			glBindImageTexture(1, id, i - mipOffset, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
			glDispatchCompute((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);
		}
	}

	// restore program
	glUseProgram(previousProgram);
}

static void GAL_BeginDynamicLight()
{
	Pipeline* const pipeline = &gl.pipelines[PID_DYNAMIC_LIGHT];
	const dlight_t* const dl = tess.light;

	ApplyPipeline(PID_DYNAMIC_LIGHT);

	glUniform3fv(pipeline->uniformLocations[DU_EYE_POS], 1, backEnd.orient.viewOrigin);
	glUniform3fv(pipeline->uniformLocations[DU_LIGHT_POS], 1, dl->transformed);
	glUniform4f(pipeline->uniformLocations[DU_LIGHT_COLOR_RADIUS], dl->color[0], dl->color[1], dl->color[2], 1.0f / Square(dl->radius));
}

static void GAL_ReadPixels(int x, int y, int w, int h, int alignment, colorSpace_t colorSpace, void* out)
{
	const GLenum format = colorSpace == CS_BGR ? GL_BGR : GL_RGBA;
	glPixelStorei(GL_PACK_ALIGNMENT, alignment);
	glReadPixels(x, y, w, h, format, GL_UNSIGNED_BYTE, out);
	glPixelStorei(GL_PACK_ALIGNMENT, 1);
}

static void GAL_Begin2D()
{
	ApplyPipeline(PID_GENERIC);
	R_MakeIdentityMatrix(gl.modelViewMatrix);
	R_MakeOrthoProjectionMatrix(gl.projectionMatrix, glConfig.vidWidth, glConfig.vidHeight);
	ApplyViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);
	ApplyClipPlane(qfalse);
	ApplyState(GLS_DEFAULT_2D, CT_TWO_SIDED, qfalse);

	// in case the generic pipeline was already active before calling this function
	gl.pipelines[PID_GENERIC].uniformsDirty[GU_MODELVIEW] = qtrue;
	gl.pipelines[PID_GENERIC].uniformsDirty[GU_PROJECTION] = qtrue;
	gl.pipelines[PID_GENERIC].uniformsDirty[GU_CLIP_PLANE] = qfalse; // not used
}

static void GAL_SetModelViewMatrix(const float* matrix)
{
	memcpy(gl.modelViewMatrix, matrix, sizeof(gl.modelViewMatrix));
	if(gl.pipelineId == PID_GENERIC)
	{
		gl.pipelines[PID_GENERIC].uniformsDirty[GU_MODELVIEW] = qtrue;
	}
	else if(gl.pipelineId == PID_DYNAMIC_LIGHT)
	{
		gl.pipelines[PID_DYNAMIC_LIGHT].uniformsDirty[DU_MODELVIEW] = qtrue;
	}
	else if(gl.pipelineId == PID_SOFT_SPRITE)
	{
		gl.pipelines[PID_SOFT_SPRITE].uniformsDirty[SU_MODELVIEW] = qtrue;
	}
}

static void GAL_SetDepthRange(double zNear, double zFar)
{
	glDepthRange(zNear, zFar);
}

static const char* GetMappingTypeName(MappingType type)
{
	switch(type)
	{
		case MT_SUBDATA: return "glBufferSubData";
		case MT_PERSISTENT: return "glMapBufferRange + GL_MAP_PERSISTENT_BIT";
		case MT_UNSYNC: return "glMapBufferRange + GL_MAP_UNSYNCHRONIZED_BIT";
		case MT_AMDPIN: return "glBufferData + GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD";
		default: return "?";
	}
}

static void GAL_PrintInfo()
{
	ri.Printf(PRINT_ALL, "Geometry upload strategy: %s\n", GetMappingTypeName(gl.mappingType));
}

qbool GAL_GetGL3(graphicsAPILayer_t* rb)
{
	rb->Init = &GAL_Init;
	rb->ShutDown = &GAL_ShutDown;
	rb->BeginSkyAndClouds = &GAL_BeginSkyAndClouds;
	rb->EndSkyAndClouds = &GAL_EndSkyAndClouds;
	rb->ReadPixels = &GAL_ReadPixels;
	rb->BeginFrame = &GAL_BeginFrame;
	rb->EndFrame = &GAL_EndFrame;
	rb->CreateTexture = &GAL_CreateTexture;
	rb->UpdateTexture = &GAL_UpdateTexture;
	rb->UpdateScratch = &GAL_UpdateScratch;
	rb->CreateTextureEx = &GAL_CreateTextureEx;
	rb->Draw = &GAL_Draw;
	rb->Begin2D = &GAL_Begin2D;
	rb->Begin3D = &GAL_Begin3D;
	rb->SetModelViewMatrix = &GAL_SetModelViewMatrix;
	rb->SetDepthRange = &GAL_SetDepthRange;
	rb->BeginDynamicLight = &GAL_BeginDynamicLight;
	rb->PrintInfo = &GAL_PrintInfo;

	return qtrue;
}