cnq3/code/renderer/tr_backend_gl3.cpp

2827 lines
83 KiB
C++

/*
===========================================================================
Copyright (C) 2019-2020 Gian 'myT' Schellenbaum
This file is part of Challenge Quake 3 (CNQ3).
Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// OpenGL 3.2+ rendering back-end
#include "tr_local.h"
#include "GL/glew.h"
#if defined(_WIN32)
#include <Windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
/*
Current info:
- OpenGL 3.2 minimum
- GLSL 1.40 minimum
- fancy mip-map generation requires:
- OpenGL 4.3 (or equivalent extensions)
- GLSL 4.30
- alpha to coverage requires GLSL 4.00
- depth fade with MSAA requires GLSL 4.00
Vertex and index data streaming notes:
- everyone: persistent coherent buffer mapping is the best option whenever available
- nVidia : unsynchronized mapping is very slow, even without threaded driver optimization
- Intel : glBufferSubData is painfully slow, even with immutable storage guarantees and full-range updates
- AMD : if persistent coherent buffer mapping isn't available, AMD_pinned_memory is the best option
- AMD : if neither persistent coherent buffer mapping nor AMD_pinned_memory, then pick glBufferSubData to be safe
- AMD : glBufferSubData is slower than unsynchronized mapping with modern drivers
- AMD : unsynchronized mapping drops off the performance cliff with old drivers
Known issues:
- nVidia GeForce GTX 1070 - Windows 7 - drivers 430.64
once the GL2 back-end is used, performance crashes when switching to the GL3 back-end
- AMD Radeon HD 6950 - Windows 10 Pro version 10.0.16299 build 16299 - drivers 15.201.1151.1008
with r_gpuMipGen 1, performance collapses big time (confirmed: whenever glTexStorage2D is called)
- AMD Radeon R7 360 - Windows 7 - drivers 14.502.0.0
with r_gpuMipGen 1, the GPU-generated mips are corrupted (not confirmed: broken barrier implementation?)
*/
// @NOTE: MAX_VERTEXES and MAX_INDEXES are *per frame*
#define LARGEBUFFER_MAX_FRAMES 4
#define LARGEBUFFER_MAX_VERTEXES 131072
#define LARGEBUFFER_MAX_INDEXES (LARGEBUFFER_MAX_VERTEXES * 8)
// this is the highest maximum we'll ever report
#define MAX_GPU_TEXTURE_SIZE 2048
enum PipelineId
{
PID_GENERIC,
PID_DYNAMIC_LIGHT,
PID_SOFT_SPRITE,
PID_POST_PROCESS,
PID_COUNT
};
enum ErrorMode
{
EM_FATAL,
EM_PRINT,
EM_SILENT
};
enum VertexBufferId
{
VB_POSITION,
VB_NORMAL,
VB_TEXCOORD,
VB_TEXCOORD2,
VB_COLOR,
VB_COUNT
};
enum AlphaTest
{
AT_ALWAYS,
AT_GREATER_THAN_0,
AT_LESS_THAN_HALF,
AT_GREATER_OR_EQUAL_TO_HALF
};
struct Program
{
GLuint vertexShader;
GLuint fragmentShader;
GLuint computeShader;
GLuint program;
};
struct ArrayBuffer
{
GLuint buffer;
GLint componentCount;
GLenum dataType;
GLboolean normalized;
int capacity;
int itemSize;
int writeIndex;
int readIndex;
qbool indexBuffer;
// persistent mapping:
byte* mappedData;
int pinnedByteCount; // when using AMD_pinned_memory
GLsync fences[LARGEBUFFER_MAX_FRAMES]; // NULL means uninitialized / invalid
int writeRangeIndex;
};
struct PipelineArrayBuffer
{
const char* attribName;
qbool enabled;
};
struct FrameBuffer
{
GLuint fbo;
GLuint color;
GLuint depthStencil;
qbool multiSampled;
qbool hasDepthStencil;
qbool hasColor;
};
enum GenericUniform
{
GU_MODELVIEW,
GU_PROJECTION,
GU_CLIP_PLANE,
GU_ALPHA_TEX,
GU_GREYSCALE,
GU_GAMMA_BRIGHT_NOISE_SEED, // only defined when dithering is enabled
GU_A2C_ALPHA_BOOST, // only defined when alpha to coverage is enabled
GU_COUNT
};
enum DynamicLightUniform
{
DU_MODELVIEW,
DU_PROJECTION,
DU_CLIP_PLANE,
DU_LIGHT_POS,
DU_EYE_POS,
DU_LIGHT_COLOR_RADIUS,
DU_OPAQUE,
DU_INTENSITY,
DU_GREYSCALE,
DU_COUNT
};
enum SoftSpriteUniform
{
SU_MODELVIEW,
SU_PROJECTION,
SU_CLIP_PLANE,
SU_ALPHA_TEST,
SU_DIST_OFFSET,
SU_COLOR_SCALE,
SU_COLOR_BIAS,
SU_GREYSCALE,
SU_COUNT
};
enum PostUniform
{
PU_BRIGHT_GAMMA_GREY,
PU_COUNT
};
// yes, one could use some template meta-programming horror for this...
#define MAX_UNIFORM_COUNT DU_COUNT
static const char UniformCountLargeEnoughG[(int)MAX_UNIFORM_COUNT >= (int)GU_COUNT ? 1 : -1] = { '\0' };
static const char UniformCountLargeEnoughD[(int)MAX_UNIFORM_COUNT >= (int)DU_COUNT ? 1 : -1] = { '\0' };
static const char UniformCountLargeEnoughS[(int)MAX_UNIFORM_COUNT >= (int)SU_COUNT ? 1 : -1] = { '\0' };
static const char UniformCountLargeEnoughU[(int)MAX_UNIFORM_COUNT >= (int)PU_COUNT ? 1 : -1] = { '\0' };
struct Pipeline
{
Program program;
const char* uniformNames[MAX_UNIFORM_COUNT];
GLint uniformLocations[MAX_UNIFORM_COUNT];
qbool uniformsDirty[MAX_UNIFORM_COUNT];
GLint textureLocations[2];
PipelineArrayBuffer arrayBuffers[VB_COUNT];
};
enum ComputePipelineId
{
CPID_GAMMA_TO_LINEAR,
CPID_LINEAR_TO_GAMMA,
CPID_DOWN_SAMPLE,
CPID_COUNT
};
struct MipMapGenerator
{
Program programs[CPID_COUNT];
GLuint textures[3]; // 0,1=float16 2=uint8
};
enum MappingType
{
MT_SUBDATA, // glBufferSubData
MT_UNSYNC, // glMapBufferRange with GL_MAP_UNSYNCHRONIZED_BIT
MT_PERSISTENT, // glMapBufferRange with GL_MAP_PERSISTENT_BIT and GL_MAP_COHERENT_BIT
MT_AMDPIN // glBufferData with GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD
};
struct OpenGL3
{
char log[8192];
int maxTextureSize;
float modelViewMatrix[16];
float projectionMatrix[16];
float clipPlane[4];
qbool enableClipPlane;
qbool prevEnableClipPlane;
AlphaTest alphaTest;
qbool dlOpaque;
float dlIntensity;
float depthFadeScale[4];
float depthFadeBias[4];
float depthFadeDist;
float depthFadeOffset;
float greyscale;
ArrayBuffer arrayBuffers[VB_COUNT];
ArrayBuffer indexBuffer;
GLuint boundTextures[2];
int activeTextureSlot;
cullType_t cullType;
unsigned int srcBlendBits;
unsigned int dstBlendBits;
qbool enableDepthTest;
GLenum depthFunc;
GLboolean enableDepthWrite;
GLenum polygonMode;
qbool enablePolygonOffset;
texEnv_t texEnv;
qbool enableAlphaToCoverage;
FrameBuffer fbMS;
FrameBuffer fbSS[2];
unsigned int fbReadIndex; // indexes fbSS
qbool fbMSEnabled;
Pipeline pipelines[PID_COUNT];
PipelineId pipelineId;
MappingType mappingType;
ErrorMode errorMode;
MipMapGenerator mipGen;
GLuint timerQueries[8];
qbool queryStarted[8];
int queryWriteIndex;
int queryReadIndex;
};
static OpenGL3 gl;
static const char* shared_fs =
"vec4 MakeGreyscale(vec4 color, float amount)\n"
"{\n"
" float grey = dot(color.rgb, vec3(0.299, 0.587, 0.114));\n"
" vec4 result = mix(color, vec4(grey, grey, grey, color.a), amount);\n"
" return result;\n"
"}\n"
"\n";
static const char* generic_vs =
// a good way to test warning reports with r_verbose 1
//"#extension DOESNTEXISTLOL:warn\n"
//----------------------------------
"uniform mat4 modelView;\n"
"uniform mat4 projection;\n"
"uniform vec4 clipPlane;\n"
"\n"
"in vec4 position;\n"
"in vec2 texCoords1;\n"
"in vec2 texCoords2;\n"
"in vec4 color;\n"
"\n"
"centroid out vec2 texCoords1FS;\n"
"centroid out vec2 texCoords2FS;\n"
"centroid out vec4 colorFS;\n"
"\n"
"void main()\n"
"{\n"
" vec4 positionVS = modelView * vec4(position.xyz, 1);\n"
" gl_Position = projection * positionVS;\n"
" gl_ClipDistance[0] = dot(positionVS, clipPlane);\n"
" texCoords1FS = texCoords1;\n"
" texCoords2FS = texCoords2;\n"
" colorFS = color;\n"
"}\n";
static const char* generic_fs =
"uniform sampler2D texture1;\n"
"uniform sampler2D texture2;\n"
"\n"
"uniform uvec2 alphaTex;\n"
"#define alphaTest alphaTex.x\n"
"#define texEnv alphaTex.y\n"
"uniform float greyscale;\n"
"#if CNQ3_DITHER\n"
"uniform vec4 gammaBrightNoiseSeed;\n"
"#define invGamma gammaBrightNoiseSeed.x\n"
"#define invBrightness gammaBrightNoiseSeed.y\n"
"#define noiseScale gammaBrightNoiseSeed.z\n"
"#define seed gammaBrightNoiseSeed.w\n"
"#endif\n"
"#ifdef CNQ3_A2C\n"
"uniform float alphaBoost;\n"
"#endif\n"
"\n"
"centroid in vec2 texCoords1FS;\n"
"centroid in vec2 texCoords2FS;\n"
"centroid in vec4 colorFS;\n"
"\n"
"out vec4 fragColor;\n"
"\n"
"#if CNQ3_DITHER\n"
"float Hash(vec2 v)\n"
"{\n"
" // this is from Morgan McGuire's 'Hashed Alpha Testing' paper\n"
" return fract(1.0e4 * sin(17.0 * v.x + 0.1 * v.y) + (0.1 + abs(sin(13.0 * v.y + v.x))));\n"
"}\n"
"\n"
"float Linearize(float color)\n"
"{\n"
" return pow(abs(color * invBrightness), invGamma) * sign(color);\n"
"}\n"
"\n"
"vec4 Dither(vec4 color, vec3 position)\n"
"{\n"
" vec2 newSeed = position.xy + vec2(0.6849, 0.6849) * seed + vec2(position.z, position.z);\n"
" float noise = (noiseScale / 255.0) * Linearize(Hash(newSeed) - 0.5);\n"
"\n"
" return color + vec4(noise, noise, noise, 0.0);\n"
"}\n"
"#endif\n"
"\n"
"#if CNQ3_A2C\n"
"float CorrectAlpha(float threshold, float alpha, vec2 tc)\n"
"{\n"
" vec2 size = vec2(textureSize(texture1, 0));\n"
" if(min(size.x, size.y) <= 8.0)\n"
" return alpha >= threshold ? 1.0 : 0.0;\n"
" alpha *= 1.0 + alphaBoost * textureQueryLod(texture1, tc).x;"
" vec2 dtc = fwidth(tc * size);\n"
" float recScale = max(0.25 * (dtc.x + dtc.y), 1.0 / 16384.0);\n"
" float scale = max(1.0 / recScale, 1.0);\n"
" float ac = threshold + (alpha - threshold) * scale;\n"
"\n"
" return ac;\n"
"}\n"
"#endif\n"
"\n"
"void main()\n"
"{\n"
" vec4 p = texture(texture1, texCoords1FS);\n"
" vec4 s = texture(texture2, texCoords2FS);\n"
" vec4 r;\n"
" if(texEnv == uint(1))\n"
" r = colorFS * s * p;\n"
" else if(texEnv == uint(2))\n"
" r = s; // use input.color or not?\n"
" else if(texEnv == uint(3))\n"
" r = colorFS * vec4(p.rgb * (1 - s.a) + s.rgb * s.a, p.a);\n"
" else if(texEnv == uint(4))\n"
" r = colorFS * vec4(p.rgb + s.rgb, p.a * s.a);\n"
" else // texEnv == 0\n"
" r = colorFS * p;\n"
"\n"
"#if CNQ3_DITHER\n"
" r = Dither(r, gl_FragCoord.xyz);\n"
"#endif\n"
"\n"
"#if CNQ3_A2C\n"
" if(alphaTest == uint(1))\n"
" r.a = r.a > 0.0 ? 1.0 : 0.0;\n"
" if(alphaTest == uint(2))\n"
" r.a = CorrectAlpha(uintBitsToFloat(0x3F000001), 1.0 - r.a, texCoords1FS);\n"
" else if(alphaTest == uint(3))\n"
" r.a = CorrectAlpha(0.5, r.a, texCoords1FS);\n"
"#else\n"
" if( (alphaTest == uint(1) && r.a == 0.0) ||\n"
" (alphaTest == uint(2) && r.a >= 0.5) ||\n"
" (alphaTest == uint(3) && r.a < 0.5))\n"
" discard;\n"
"#endif\n"
"\n"
" fragColor = MakeGreyscale(r, greyscale);\n"
"}\n";
static const char* dl_vs =
"uniform mat4 modelView;\n"
"uniform mat4 projection;\n"
"uniform vec4 clipPlane;\n"
"uniform vec3 osLightPos;\n"
"uniform vec3 osEyePos;\n"
"\n"
"in vec4 position;\n"
"in vec4 normal;\n"
"in vec2 texCoords1;\n"
"\n"
"out vec3 normalFS;\n"
"out vec2 texCoords1FS;\n"
"out vec3 L;\n"
"out vec3 V;\n"
"\n"
"void main()\n"
"{\n"
" vec4 positionVS = modelView * vec4(position.xyz, 1);\n"
" gl_Position = projection * positionVS;\n"
" gl_ClipDistance[0] = dot(positionVS, clipPlane);\n"
" normalFS = normal.xyz;\n"
" texCoords1FS = texCoords1;\n"
" L = osLightPos - position.xyz;\n"
" V = osEyePos - position.xyz;\n"
"}\n";
static const char* dl_fs =
"uniform sampler2D texture1;\n"
"\n"
"uniform vec4 lightColorRadius;\n"
"uniform float opaque;\n"
"uniform float intensity;\n"
"uniform float greyscale;\n"
"\n"
"in vec3 normalFS;\n"
"in vec2 texCoords1FS;\n"
"in vec3 L;\n"
"in vec3 V;\n"
"\n"
"out vec4 fragColor;\n"
"\n"
"float BezierEase(float t)\n"
"{\n"
" return t * t * (3.0 - 2.0 * t);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" vec4 base = MakeGreyscale(texture2D(texture1, texCoords1FS), greyscale);\n"
" vec3 nL = normalize(L);\n"
" vec3 nV = normalize(V);\n"
"\n"
" // light intensity\n"
" float intensFactor = min(dot(L, L) * lightColorRadius.w, 1.0);\n"
" vec3 intens = lightColorRadius.rgb * BezierEase(1.0 - sqrt(intensFactor));\n"
"\n"
" // specular reflection term (N.H)\n"
" float specFactor = min(abs(dot(normalFS, normalize(nL + nV))), 1.0);\n"
" float spec = pow(specFactor, 16.0) * 0.25;\n"
"\n"
" // Lambertian diffuse reflection term (N.L)\n"
" float diffuse = min(abs(dot(normalFS, nL)), 1.0);\n"
" vec3 color = (base.rgb * vec3(diffuse) + vec3(spec)) * intens * intensity;\n"
" float alpha = mix(opaque, 1.0, base.a);\n"
"\n"
" vec4 r = vec4(color.rgb * alpha, alpha);\n"
" fragColor = r;\n"
"}\n";
static const char* sprite_vs =
"uniform mat4 modelView;\n"
"uniform mat4 projection;\n"
"uniform vec4 clipPlane;\n"
"\n"
"in vec4 position;\n"
"in vec2 texCoords1;\n"
"in vec4 color;\n"
"\n"
"out vec2 texCoords1FS;\n"
"out vec4 colorFS;\n"
"out float depthVS;\n"
"out vec2 proj22_32;\n"
"\n"
"void main()\n"
"{\n"
" vec4 positionVS = modelView * vec4(position.xyz, 1);\n"
" gl_Position = projection * positionVS;\n"
" gl_ClipDistance[0] = dot(positionVS, clipPlane);\n"
" texCoords1FS = texCoords1;\n"
" colorFS = color;\n"
" depthVS = -positionVS.z;\n"
" proj22_32 = vec2(-projection[2][2], projection[3][2]);\n"
"}\n";
static const char* sprite_fs =
"uniform sampler2D texture1; // diffuse texture\n"
"#if CNQ3_MSAA\n"
"uniform sampler2DMS texture2; // depth texture\n"
"#else\n"
"uniform sampler2D texture2; // depth texture\n"
"#endif\n"
"\n"
"uniform uint alphaTest;\n"
"uniform vec2 distOffset;\n"
"uniform vec4 colorScale;\n"
"uniform vec4 colorBias;\n"
"uniform float greyscale;\n"
"#define distance distOffset.x\n"
"#define offset distOffset.y\n"
"\n"
"in vec2 texCoords1FS;\n"
"in vec4 colorFS;\n"
"in float depthVS;\n"
"in vec2 proj22_32;\n"
"#define proj22 proj22_32.x\n"
"#define proj32 proj22_32.y\n"
"\n"
"out vec4 fragColor;\n"
"\n"
"float LinearDepth(float zwDepth)\n"
"{\n"
" return proj32 / (zwDepth - proj22);\n"
"}\n"
"\n"
"float Contrast(float d, float power)\n"
"{\n"
" bool aboveHalf = d > 0.5;\n"
" float base = clamp(2.0 * (aboveHalf ? (1.0 - d) : d), 0.0, 1.0);\n"
" float r = 0.5 * pow(base, power);\n"
"\n"
" return aboveHalf ? (1.0 - r) : r;\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" vec4 r = colorFS * texture(texture1, texCoords1FS);\n"
" if( (alphaTest == uint(1) && r.a == 0.0) ||\n"
" (alphaTest == uint(2) && r.a >= 0.5) ||\n"
" (alphaTest == uint(3) && r.a < 0.5))\n"
" discard;\n"
"\n"
"#if CNQ3_MSAA\n"
" float depthSRaw = texelFetch(texture2, ivec2(gl_FragCoord.xy), gl_SampleID).r;\n"
"#else\n"
" float depthSRaw = texelFetch(texture2, ivec2(gl_FragCoord.xy), 0).r;\n"
"#endif\n"
" float depthS = LinearDepth(depthSRaw * 2.0 - 1.0);\n"
" float depthP = depthVS - offset;\n"
" float scale = Contrast((depthS - depthP) * distance, 2.0);\n"
" vec4 r2 = mix(r * colorScale + colorBias, r, scale);\n"
" fragColor = MakeGreyscale(r2, greyscale);\n"
"}\n";
static const char* post_vs =
"out vec2 texCoords1FS;\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = vec4(\n"
" float(gl_VertexID / 2) * 4.0 - 1.0,\n"
" float(gl_VertexID % 2) * 4.0 - 1.0,\n"
" 0.0,\n"
" 1.0);\n"
" texCoords1FS = vec2(\n"
" float(gl_VertexID / 2) * 2.0,\n"
" float(gl_VertexID % 2) * 2.0);\n"
"}\n";
static const char* post_fs =
"uniform sampler2D texture1;\n"
"\n"
"uniform vec3 brightGammaGrey;\n"
"#define brightness brightGammaGrey.x\n"
"#define gamma brightGammaGrey.y\n"
"#define greyscale brightGammaGrey.z\n"
"\n"
"in vec2 texCoords1FS;\n"
"\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
" vec3 base = texture(texture1, texCoords1FS).rgb;\n"
" vec3 gc = pow(base, vec3(gamma)) * brightness;\n"
" fragColor = MakeGreyscale(vec4(gc.rgb, 1.0), greyscale);\n"
"}\n";
static const char* gammaToLinear_cs =
"layout (binding = 0, rgba8) readonly uniform image2D srcTex;\n"
"layout (binding = 1, rgba16f) writeonly uniform image2D dstTex;\n"
"\n"
"layout (location = 0) uniform float gamma;\n"
"\n"
"layout (local_size_x = 8, local_size_y = 8) in;\n"
"\n"
"void main()\n"
"{\n"
" ivec2 coords = ivec2(gl_GlobalInvocationID);\n"
" vec4 inV = imageLoad(srcTex, coords);\n"
" vec4 outV = vec4(pow(inV.x, gamma), pow(inV.y, gamma), pow(inV.z, gamma), inV.a);\n"
" imageStore(dstTex, coords, outV);\n"
"}\n";
static const char* linearToGamma_cs =
// yes, intensity *should* be done in light-linear space
// but we keep the old behavior for consistency...
"layout (binding = 0, rgba16f) readonly uniform image2D srcTex;\n"
"layout (binding = 1, rgba8) writeonly uniform image2D dstTex;\n"
"\n"
"layout (location = 0) uniform float intensity;\n"
"layout (location = 1) uniform vec4 blendColor;\n"
"layout (location = 2) uniform float invGamma;\n"
"\n"
"layout (local_size_x = 8, local_size_y = 8) in;\n"
"\n"
"void main()\n"
"{\n"
" ivec2 coords = ivec2(gl_GlobalInvocationID);\n"
" vec4 in0 = imageLoad(srcTex, coords);\n"
" vec3 in1 = 0.5 * (in0.rgb + blendColor.rgb);\n"
" vec3 inV = mix(in0.rgb, in1.rgb, blendColor.a);\n"
" vec3 out0 = vec3(pow(inV.r, invGamma), pow(inV.g, invGamma), pow(inV.b, invGamma));\n"
" vec3 out1 = out0 * intensity;\n"
" vec4 outV = vec4(out1, in0.a);\n"
" imageStore(dstTex, coords, outV);\n"
"}\n";
static const char* downSample_cs =
"layout (binding = 0, rgba16f) readonly uniform image2D srcTex;\n"
"layout (binding = 1, rgba16f) writeonly uniform image2D dstTex;\n"
"\n"
"layout (location = 0) uniform vec4 weights;\n"
"layout (location = 1) uniform ivec2 maxSize;\n"
"layout (location = 2) uniform ivec2 scale;\n"
"layout (location = 3) uniform ivec2 offset;\n"
"layout (location = 4) uniform uint clampMode; // 0 = repeat\n"
"\n"
"layout (local_size_x = 8, local_size_y = 8) in;\n"
"\n"
"ivec2 FixCoords(ivec2 c)\n"
"{\n"
"if(clampMode > 0)\n"
" {\n"
" // clamp\n"
" return clamp(c, ivec2(0, 0), maxSize);\n"
" }\n"
"\n"
" // repeat\n"
" return c & maxSize;\n"
"}\n"
"\n"
"void main()\n"
"{\n"
"ivec2 dstTC = ivec2(gl_GlobalInvocationID);\n"
" ivec2 base = ivec2(gl_GlobalInvocationID) * scale;\n"
" vec4 r = vec4(0, 0, 0, 0);\n"
" r += imageLoad(srcTex, FixCoords(base - offset * 3)) * weights.x;\n"
" r += imageLoad(srcTex, FixCoords(base - offset * 2)) * weights.y;\n"
" r += imageLoad(srcTex, FixCoords(base - offset )) * weights.z;\n"
" r += imageLoad(srcTex, base ) * weights.w;\n"
" r += imageLoad(srcTex, base + offset ) * weights.w;\n"
" r += imageLoad(srcTex, FixCoords(base + offset * 2)) * weights.z;\n"
" r += imageLoad(srcTex, FixCoords(base + offset * 3)) * weights.y;\n"
" r += imageLoad(srcTex, FixCoords(base + offset * 4)) * weights.x;\n"
" imageStore(dstTex, dstTC, r);\n"
"}\n";
void GL_GetRenderTargetFormat(GLenum* internalFormat, GLenum* format, GLenum* type, int cnq3Format)
{
switch(cnq3Format)
{
case RTCF_R10G10B10A2:
*internalFormat = GL_RGB10_A2;
*format = GL_BGRA;
*type = GL_UNSIGNED_INT_2_10_10_10_REV;
break;
case RTCF_R16G16B16A16:
*internalFormat = GL_RGBA16;
*format = GL_BGRA;
*type = GL_UNSIGNED_SHORT;
break;
case RTCF_R8G8B8A8:
default:
*internalFormat = GL_RGBA8;
*format = GL_BGRA;
*type = GL_UNSIGNED_BYTE;
break;
}
}
#if defined(_WIN32)
static void AllocatePinnedMemory(ArrayBuffer* buffer)
{
const int byteCount = PAD(buffer->capacity * buffer->itemSize, 4096);
buffer->mappedData = (byte*)VirtualAlloc(NULL, byteCount, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
buffer->pinnedByteCount = byteCount;
}
static void FreePinnedMemory(ArrayBuffer* buffer)
{
VirtualFree(buffer->mappedData, 0, MEM_RELEASE);
buffer->mappedData = NULL;
buffer->pinnedByteCount = 0;
}
#else
static void AllocatePinnedMemory(ArrayBuffer* buffer)
{
const int pageSizeSC = (int)sysconf(_SC_PAGE_SIZE);
const int pageSize = pageSizeSC > 0 ? pageSizeSC : 4096;
const int byteCount = PAD(buffer->capacity * buffer->itemSize, pageSize);
buffer->mappedData = (byte*)mmap(NULL, byteCount, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
buffer->pinnedByteCount = byteCount;
}
static void FreePinnedMemory(ArrayBuffer* buffer)
{
munmap(buffer->mappedData, buffer->pinnedByteCount);
buffer->mappedData = NULL;
buffer->pinnedByteCount = 0;
}
#endif
static void HandleError(const char* message)
{
if(gl.errorMode == EM_FATAL)
{
ri.Error(ERR_FATAL, message);
}
else if(gl.errorMode == EM_PRINT)
{
ri.Printf(PRINT_ERROR, message);
}
}
// identifier must be one of:
// GL_BUFFER, GL_SHADER, GL_PROGRAM, GL_VERTEX_ARRAY, GL_QUERY, GL_SAMPLER, GL_TEXTURE,
// GL_RENDERBUFFER, GL_FRAMEBUFFER, GL_PROGRAM_PIPELINE, GL_TRANSFORM_FEEDBACK
static void SetDebugName(GLenum identifier, GLuint name, const char* string)
{
if(GLEW_VERSION_4_3 || GLEW_KHR_debug)
{
glObjectLabel(identifier, name, -1, string);
}
}
static const char* GetShaderTypeName(GLenum shaderType)
{
switch(shaderType)
{
case GL_VERTEX_SHADER: return "vertex";
case GL_FRAGMENT_SHADER: return "fragment";
case GL_COMPUTE_SHADER: return "compute";
default: return "???";
}
}
static qbool CreateShader(GLuint* shaderPtr, PipelineId pipelineId, GLenum shaderType, const char* shaderSource, const char* debugName)
{
// alpha to coverage now requires GLSL 4.00 for textureQueryLod
// depth fade with MSAA now requires GLSL 4.00 for gl_SampleID
const qbool enableA2C =
pipelineId == PID_GENERIC &&
shaderType == GL_FRAGMENT_SHADER &&
glInfo.alphaToCoverageSupport;
const qbool enableDithering =
pipelineId == PID_GENERIC &&
shaderType == GL_FRAGMENT_SHADER &&
r_dither->integer;
const qbool depthFadeWithMSAA =
pipelineId == PID_SOFT_SPRITE &&
shaderType == GL_FRAGMENT_SHADER &&
glInfo.depthFadeSupport &&
gl.fbMSEnabled;
const char* const sourceArray[] =
{
shaderType == GL_COMPUTE_SHADER ? "#version 430\n" : (enableA2C || depthFadeWithMSAA ? "#version 400\n" : "#version 140\n"),
"\n",
enableA2C ? "#define CNQ3_A2C 1\n" : "#define CNQ3_A2C 0\n",
enableDithering ? "#define CNQ3_DITHER 1\n" : "#define CNQ3_DITHER 0\n",
depthFadeWithMSAA ? "#define CNQ3_MSAA 1\n" : "#define CNQ3_MSAA 0\n",
shaderType == GL_FRAGMENT_SHADER ? shared_fs : "",
shaderSource
};
GLuint shader = glCreateShader(shaderType);
glShaderSource(shader, ARRAY_LEN(sourceArray), sourceArray, NULL);
glCompileShader(shader);
GLint result = GL_FALSE;
glGetShaderiv(shader, GL_COMPILE_STATUS, &result);
const qbool success = result == GL_TRUE;
if(success)
{
*shaderPtr = shader;
SetDebugName(GL_SHADER, shader, va("%s %s shader", debugName, GetShaderTypeName(shaderType)));
}
if(!success || r_verbose->integer)
{
GLint logLength = 0;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &logLength);
if(logLength > 0)
{
glGetShaderInfoLog(shader, sizeof(gl.log), NULL, gl.log);
const ErrorMode em = gl.errorMode;
gl.errorMode = success ? EM_PRINT : EM_FATAL;
HandleError(va("'%s' %s shader compilation failed: %s\n", debugName, GetShaderTypeName(shaderType), gl.log));
gl.errorMode = em;
}
else if(!success)
{
HandleError(va("'%s' %s shader compilation failed\n", debugName, GetShaderTypeName(shaderType)));
}
}
return success;
}
static qbool FinalizeProgram(Program* prog, const char* debugName)
{
GLint result = GL_FALSE;
glGetProgramiv(prog->program, GL_LINK_STATUS, &result);
const qbool success = result == GL_TRUE;
if(success)
{
SetDebugName(GL_PROGRAM, prog->program, va("%s program", debugName));
}
if(!success || r_verbose->integer)
{
GLint logLength = 0;
glGetProgramiv(prog->program, GL_INFO_LOG_LENGTH, &logLength);
if(logLength > 0)
{
glGetProgramInfoLog(prog->program, sizeof(gl.log), NULL, gl.log);
const ErrorMode em = gl.errorMode;
gl.errorMode = success ? EM_PRINT : EM_FATAL;
HandleError(va("'%s' program link failed: %s\n", debugName, gl.log));
gl.errorMode = em;
}
else if(!success)
{
HandleError(va("'%s' program link failed\n", debugName));
}
}
return success;
}
static qbool CreateGraphicsProgram(PipelineId pipelineId, const char* vs, const char* fs, const char* debugName)
{
Pipeline* const pipeline = &gl.pipelines[pipelineId];
Program* const prog = &pipeline->program;
if(!CreateShader(&prog->vertexShader, pipelineId, GL_VERTEX_SHADER, vs, debugName) ||
!CreateShader(&prog->fragmentShader, pipelineId, GL_FRAGMENT_SHADER, fs, debugName))
{
return qfalse;
}
prog->program = glCreateProgram();
glAttachShader(prog->program, prog->vertexShader);
glAttachShader(prog->program, prog->fragmentShader);
// glBindAttribLocation must be called before the program gets linked
for(int i = 0; i < VB_COUNT; ++i)
{
if(pipeline->arrayBuffers[i].enabled)
{
glBindAttribLocation(pipeline->program.program, i, pipeline->arrayBuffers[i].attribName);
}
}
glLinkProgram(prog->program);
return FinalizeProgram(prog, debugName);
}
static qbool CreateComputeProgram(Program* prog, const char* cs, const char* debugName)
{
if(!CreateShader(&prog->computeShader, PID_COUNT, GL_COMPUTE_SHADER, cs, debugName))
{
return qfalse;
}
prog->program = glCreateProgram();
glAttachShader(prog->program, prog->computeShader);
glLinkProgram(prog->program);
return FinalizeProgram(prog, debugName);
}
static void CreateColorTextureStorageMS(int* samples)
{
GLenum internalFormat, format, type;
GL_GetRenderTargetFormat(&internalFormat, &format, &type, r_rtColorFormat->integer);
int sampleCount = r_msaa->integer;
while(glGetError() != GL_NO_ERROR) {} // clear the error queue
if(GLEW_VERSION_4_2 || GLEW_ARB_internalformat_query)
{
GLint maxSampleCount = 0;
glGetInternalformativ(GL_TEXTURE_2D_MULTISAMPLE, internalFormat, GL_SAMPLES, 1, &maxSampleCount);
if(glGetError() == GL_NO_ERROR)
{
sampleCount = min(sampleCount, (int)maxSampleCount);
}
}
GLenum errorCode = GL_NO_ERROR;
for(;;)
{
glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, sampleCount, internalFormat, glConfig.vidWidth, glConfig.vidHeight, GL_TRUE);
errorCode = glGetError();
if(errorCode == GL_NO_ERROR || sampleCount == 0)
{
break;
}
--sampleCount;
}
if(errorCode != GL_NO_ERROR)
{
ri.Error(ERR_FATAL, "Failed to create multi-sampled texture storage (error 0x%X)\n", (unsigned int)errorCode);
}
*samples = sampleCount;
}
static void FBO_CreateSS(FrameBuffer* fb, qbool color, qbool depthStencil, const char* name)
{
if(depthStencil)
{
glGenTextures(1, &fb->depthStencil);
glBindTexture(GL_TEXTURE_2D, fb->depthStencil);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, glConfig.vidWidth, glConfig.vidHeight, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL);
SetDebugName(GL_TEXTURE, fb->depthStencil, va("%s depth/stencil attachment", name));
}
if(color)
{
GLenum internalFormat, format, type;
GL_GetRenderTargetFormat(&internalFormat, &format, &type, r_rtColorFormat->integer);
glGenTextures(1, &fb->color);
glBindTexture(GL_TEXTURE_2D, fb->color);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, internalFormat, glConfig.vidWidth, glConfig.vidHeight, 0, format, type, NULL);
SetDebugName(GL_TEXTURE, fb->color, va("%s color attachment 0", name));
}
glGenFramebuffers(1, &fb->fbo);
glBindFramebuffer(GL_FRAMEBUFFER, fb->fbo);
if(color)
{
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb->color, 0);
}
if(depthStencil)
{
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb->depthStencil, 0);
}
const GLenum fboStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
if(fboStatus != GL_FRAMEBUFFER_COMPLETE)
{
ri.Error(ERR_FATAL, "Failed to create FBO (status 0x%X, error 0x%X)\n", (unsigned int)fboStatus, (unsigned int)glGetError());
}
SetDebugName(GL_FRAMEBUFFER, fb->fbo, va("%s frame buffer", name));
glBindFramebuffer(GL_FRAMEBUFFER, 0);
fb->multiSampled = qfalse;
fb->hasDepthStencil = depthStencil;
fb->hasColor = color;
}
static void FBO_CreateMS(int* sampleCount, FrameBuffer* fb, const char* name)
{
glGenFramebuffers(1, &fb->fbo);
glBindFramebuffer(GL_FRAMEBUFFER, fb->fbo);
glGenTextures(1, &fb->color);
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, fb->color);
CreateColorTextureStorageMS(sampleCount);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_MULTISAMPLE, fb->color, 0);
SetDebugName(GL_TEXTURE, fb->color, va("%s color attachment 0", name));
glGenTextures(1, &fb->depthStencil);
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, fb->depthStencil);
glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, *sampleCount, GL_DEPTH24_STENCIL8, glConfig.vidWidth, glConfig.vidHeight, GL_TRUE);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D_MULTISAMPLE, fb->depthStencil, 0);
SetDebugName(GL_TEXTURE, fb->depthStencil, va("%s depth/stencil attachment", name));
const GLenum fboStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
if(fboStatus != GL_FRAMEBUFFER_COMPLETE)
{
ri.Error(ERR_FATAL, "Failed to create FBO (status 0x%X, error 0x%X)\n", (unsigned int)fboStatus, (unsigned int)glGetError());
}
SetDebugName(GL_FRAMEBUFFER, fb->fbo, va("%s frame buffer", name));
glBindFramebuffer(GL_FRAMEBUFFER, 0);
fb->multiSampled = qtrue;
fb->hasDepthStencil = qtrue;
fb->hasColor = qtrue;
}
static void FBO_Init()
{
gl.fbMSEnabled = r_msaa->integer >= 2 && r_colorMipLevels->integer == 0;
int finalSampleCount = 1;
if(gl.fbMSEnabled)
{
FBO_CreateMS(&finalSampleCount, &gl.fbMS, "main");
FBO_CreateSS(&gl.fbSS[0], qtrue, qfalse, "post-process #1");
FBO_CreateSS(&gl.fbSS[1], qtrue, qfalse, "post-process #2");
}
else
{
FBO_CreateSS(&gl.fbSS[0], qtrue, qtrue, "post-process #1");
FBO_CreateSS(&gl.fbSS[1], qtrue, qtrue, "post-process #2");
}
ri.Printf(PRINT_ALL, "MSAA: %d samples requested, %d selected\n", r_msaa->integer, finalSampleCount);
}
static void FBO_Bind(const FrameBuffer* fb)
{
glBindFramebuffer(GL_FRAMEBUFFER, fb->fbo);
glReadBuffer(GL_COLOR_ATTACHMENT0);
glDrawBuffer(GL_COLOR_ATTACHMENT0);
}
static void FBO_Bind()
{
if(gl.fbMSEnabled)
{
FBO_Bind(&gl.fbMS);
}
else
{
FBO_Bind(&gl.fbSS[gl.fbReadIndex]);
}
}
static void FBO_BlitToBackBuffer()
{
// fixing up the blit mode here to avoid unnecessary glClear calls
int blitMode = r_blitMode->integer;
if(r_mode->integer != VIDEOMODE_UPSCALE)
{
blitMode = BLITMODE_STRETCHED;
}
if(blitMode != BLITMODE_STRETCHED)
{
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
glClear(GL_COLOR_BUFFER_BIT);
}
const FrameBuffer& fbo = gl.fbSS[gl.fbReadIndex];
glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo.fbo);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
glReadBuffer(GL_COLOR_ATTACHMENT0);
glDrawBuffer(GL_BACK);
const int sw = glConfig.vidWidth;
const int sh = glConfig.vidHeight;
const int dw = glInfo.winWidth;
const int dh = glInfo.winHeight;
if(blitMode == BLITMODE_STRETCHED)
{
glBlitFramebuffer(0, 0, sw, sh, 0, 0, dw, dh, GL_COLOR_BUFFER_BIT, GL_LINEAR);
}
else if(blitMode == BLITMODE_CENTERED)
{
const int dx = (dw - sw) / 2;
const int dy = (dh - sh) / 2;
glBlitFramebuffer(0, 0, sw, sh, dx, dy, dx + sw, dy + sh, GL_COLOR_BUFFER_BIT, GL_LINEAR);
}
else // blitMode == BLITMODE_ASPECT
{
const float rx = (float)dw / (float)sw;
const float ry = (float)dh / (float)sh;
const float ar = min(rx, ry);
const int w = (int)(sw * ar);
const int h = (int)(sh * ar);
const int x = (dw - w) / 2;
const int y = (dh - h) / 2;
glBlitFramebuffer(0, 0, sw, sh, x, y, x + w, y + h, GL_COLOR_BUFFER_BIT, GL_LINEAR);
}
}
static void FBO_ResolveColor()
{
const FrameBuffer& r = gl.fbMS;
const FrameBuffer& d = gl.fbSS[gl.fbReadIndex];
glBindFramebuffer(GL_READ_FRAMEBUFFER, r.fbo);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, d.fbo);
glReadBuffer(GL_COLOR_ATTACHMENT0);
glDrawBuffer(GL_COLOR_ATTACHMENT0);
const int w = glConfig.vidWidth;
const int h = glConfig.vidHeight;
glBlitFramebuffer(0, 0, w, h, 0, 0, w, h, GL_COLOR_BUFFER_BIT, GL_LINEAR);
}
static void ApplyActiveTexture(int slot)
{
if(slot == gl.activeTextureSlot)
{
return;
}
glActiveTexture(GL_TEXTURE0 + slot);
gl.activeTextureSlot = slot;
}
static void ApplyPipeline(PipelineId pipelineId)
{
if(pipelineId == gl.pipelineId)
{
return;
}
// The depth fade pipeline is the only one reading from the depth texture
// but doesn't write to it.
// Any change to that pipeline requires a texture barrier with OpenGL 4.5+
// to make sure we get valid data when reading the depth texture.
// See "Feedback Loops Between Textures and the Framebuffer" in the specs.
if((GLEW_VERSION_4_5 || GLEW_ARB_texture_barrier) &&
pipelineId == PID_SOFT_SPRITE)
{
glTextureBarrier();
}
gl.pipelineId = pipelineId;
Pipeline* const pipeline = &gl.pipelines[pipelineId];
glUseProgram(pipeline->program.program);
backEnd.pc3D[RB_SHADER_CHANGES]++;
for(int i = 0; i < VB_COUNT; ++i)
{
if(pipeline->arrayBuffers[i].enabled)
{
ArrayBuffer* const buffer = &gl.arrayBuffers[i];
glEnableVertexAttribArray(i);
glBindBuffer(GL_ARRAY_BUFFER, buffer->buffer);
glVertexAttribPointer(i, buffer->componentCount, buffer->dataType, buffer->normalized, buffer->itemSize, (const void*)0);
}
else
{
glDisableVertexAttribArray(i);
}
}
glUniform1i(pipeline->textureLocations[0], 0);
ApplyActiveTexture(1);
if(pipelineId == PID_SOFT_SPRITE && gl.fbMSEnabled)
{
// we don't have a "BindTextureMS" function for caching/tracking MS texture binds
// since this is the only one we read from a fragment shader at the moment
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, gl.fbMS.depthStencil);
}
glUniform1i(pipeline->textureLocations[1], 1);
ApplyActiveTexture(0);
memset(pipeline->uniformsDirty, 0xFF, sizeof(pipeline->uniformsDirty));
}
static GLint GetTextureWrapMode(textureWrap_t w)
{
switch(w)
{
case TW_REPEAT: return GL_REPEAT;
case TW_CLAMP_TO_EDGE: return GL_CLAMP_TO_EDGE;
default: return GL_REPEAT;
}
}
static GLint GetTextureInternalFormat(textureFormat_t f)
{
switch(f)
{
case TF_RGBA8:
default: return GL_RGBA8;
}
}
static GLenum GetTextureFormat(textureFormat_t f)
{
switch(f)
{
case TF_RGBA8:
default: return GL_RGBA;
}
}
static void BindTexture(int slot, GLuint texture)
{
if(texture == gl.boundTextures[slot])
{
return;
}
ApplyActiveTexture(slot);
glBindTexture(GL_TEXTURE_2D, texture);
gl.boundTextures[slot] = texture;
}
static void BindImage(int slot, const image_t* image)
{
const GLuint texture = (GLuint)image->texnum;
BindTexture(slot, texture);
}
static void UpdateAnimatedImage(image_t* image, int w, int h, const byte* data, qbool dirty)
{
glBindTexture(GL_TEXTURE_2D, (GLuint)image->texnum);
if(w != image->width || h != image->height)
{
// if the scratchImage isn't in the format we want, specify it as a new texture
image->width = w;
image->height = h;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
}
else if(dirty)
{
// otherwise, just update it
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, data);
}
}
static const image_t* GetBundleImage(const textureBundle_t* bundle)
{
return R_UpdateAndGetBundleImage(bundle, &UpdateAnimatedImage);
}
static void BindBundle(int slot, const textureBundle_t* bundle)
{
BindImage(slot, GetBundleImage(bundle));
}
static void ApplyViewportAndScissor(int x, int y, int w, int h)
{
glViewport(x, y, w, h);
glScissor(x, y, w, h);
}
static GLenum GetSourceBlend(unsigned int bits)
{
switch(bits)
{
case GLS_SRCBLEND_ZERO: return GL_ZERO;
case GLS_SRCBLEND_ONE: return GL_ONE;
case GLS_SRCBLEND_DST_COLOR: return GL_DST_COLOR;
case GLS_SRCBLEND_ONE_MINUS_DST_COLOR: return GL_ONE_MINUS_DST_COLOR;
case GLS_SRCBLEND_SRC_ALPHA: return GL_SRC_ALPHA;
case GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA: return GL_ONE_MINUS_SRC_ALPHA;
case GLS_SRCBLEND_DST_ALPHA: return GL_DST_ALPHA;
case GLS_SRCBLEND_ONE_MINUS_DST_ALPHA: return GL_ONE_MINUS_DST_ALPHA;
case GLS_SRCBLEND_ALPHA_SATURATE: return GL_SRC_ALPHA_SATURATE;
default: return GL_ONE;
}
}
static GLenum GetDestinationBlend(unsigned int bits)
{
switch(bits)
{
case GLS_DSTBLEND_ZERO: return GL_ZERO;
case GLS_DSTBLEND_ONE: return GL_ONE;
case GLS_DSTBLEND_SRC_COLOR: return GL_SRC_COLOR;
case GLS_DSTBLEND_ONE_MINUS_SRC_COLOR: return GL_ONE_MINUS_SRC_COLOR;
case GLS_DSTBLEND_SRC_ALPHA: return GL_SRC_ALPHA;
case GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA: return GL_ONE_MINUS_SRC_ALPHA;
case GLS_DSTBLEND_DST_ALPHA: return GL_DST_ALPHA;
case GLS_DSTBLEND_ONE_MINUS_DST_ALPHA: return GL_ONE_MINUS_DST_ALPHA;
default: return GL_ONE;
}
}
static AlphaTest GetAlphaTest(unsigned int bits)
{
switch(bits)
{
case 0: return AT_ALWAYS;
case GLS_ATEST_GT_0: return AT_GREATER_THAN_0;
case GLS_ATEST_LT_80: return AT_LESS_THAN_HALF;
case GLS_ATEST_GE_80: return AT_GREATER_OR_EQUAL_TO_HALF;
default: return AT_ALWAYS;
}
}
static void ApplyCullType(cullType_t cullType)
{
if(cullType == gl.cullType)
{
return;
}
gl.cullType = cullType;
if(cullType == CT_TWO_SIDED)
{
glDisable(GL_CULL_FACE);
}
else
{
glEnable(GL_CULL_FACE);
glCullFace(cullType == CT_FRONT_SIDED ? GL_FRONT : GL_BACK);
}
}
static void ApplyBlendFunc(unsigned int srcBlendBits, unsigned int dstBlendBits)
{
if(srcBlendBits == gl.srcBlendBits && dstBlendBits == gl.dstBlendBits)
{
return;
}
gl.srcBlendBits = srcBlendBits;
gl.dstBlendBits = dstBlendBits;
if((srcBlendBits | dstBlendBits) == 0)
{
glDisable(GL_BLEND);
}
else
{
glEnable(GL_BLEND);
glBlendFunc(GetSourceBlend(srcBlendBits), GetDestinationBlend(dstBlendBits));
}
}
static void ApplyDepthTest(qbool enableDepthTest)
{
if(enableDepthTest == gl.enableDepthTest)
{
return;
}
gl.enableDepthTest = enableDepthTest;
if(enableDepthTest)
{
glEnable(GL_DEPTH_TEST);
}
else
{
glDisable(GL_DEPTH_TEST);
}
}
static void ApplyDepthFunc(GLenum depthFunc)
{
if(depthFunc == gl.depthFunc)
{
return;
}
gl.depthFunc = depthFunc;
glDepthFunc(depthFunc);
}
static void ApplyDepthMask(GLboolean enableDepthWrite)
{
if(enableDepthWrite == gl.enableDepthWrite)
{
return;
}
gl.enableDepthWrite = enableDepthWrite;
glDepthMask(enableDepthWrite ? GL_TRUE : GL_FALSE);
}
static void ApplyPolygonMode(GLenum polygonMode)
{
if(polygonMode == gl.polygonMode)
{
return;
}
gl.polygonMode = polygonMode;
glPolygonMode(GL_FRONT_AND_BACK, polygonMode);
}
static void ApplyPolygonOffset(qbool enablePolygonOffset)
{
if(enablePolygonOffset == gl.enablePolygonOffset)
{
return;
}
gl.enablePolygonOffset = enablePolygonOffset;
if(enablePolygonOffset)
{
glEnable(GL_POLYGON_OFFSET_FILL);
}
else
{
glDisable(GL_POLYGON_OFFSET_FILL);
}
}
static void ApplyClipPlane(qbool enableClipPlane)
{
if(enableClipPlane == gl.enableClipPlane)
{
return;
}
gl.enableClipPlane = enableClipPlane;
if(enableClipPlane)
{
glEnable(GL_CLIP_DISTANCE0);
}
else
{
glDisable(GL_CLIP_DISTANCE0);
}
}
static void ApplyAlphaTest(AlphaTest alphaTest)
{
const qbool enableA2C = glInfo.alphaToCoverageSupport && gl.pipelineId == PID_GENERIC && alphaTest != AT_ALWAYS;
if(enableA2C != gl.enableAlphaToCoverage)
{
gl.enableAlphaToCoverage = enableA2C;
if(enableA2C)
{
glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
}
else
{
glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
}
}
if(alphaTest == gl.alphaTest)
{
return;
}
gl.alphaTest = alphaTest;
if(gl.pipelineId == PID_GENERIC)
{
gl.pipelines[PID_GENERIC].uniformsDirty[GU_ALPHA_TEX] = qtrue;
}
else if(gl.pipelineId == PID_SOFT_SPRITE)
{
gl.pipelines[PID_SOFT_SPRITE].uniformsDirty[SU_ALPHA_TEST] = qtrue;
}
}
static void ApplyState(unsigned int stateBits, cullType_t cullType, qbool polygonOffset)
{
// fix up the cull mode for mirrors
if(backEnd.viewParms.isMirror)
{
if(cullType == CT_BACK_SIDED)
{
cullType = CT_FRONT_SIDED;
}
else if(cullType == CT_FRONT_SIDED)
{
cullType = CT_BACK_SIDED;
}
}
ApplyCullType(cullType);
const unsigned int srcBlendBits = stateBits & GLS_SRCBLEND_BITS;
const unsigned int dstBlendBits = stateBits & GLS_DSTBLEND_BITS;
ApplyBlendFunc(srcBlendBits, dstBlendBits);
const qbool disableDepthTest = ((stateBits & GLS_DEPTHTEST_DISABLE) != 0) || backEnd.projection2D;
ApplyDepthTest(!disableDepthTest);
const qbool depthFuncEqual = (stateBits & GLS_DEPTHFUNC_EQUAL) != 0;
ApplyDepthFunc(depthFuncEqual ? GL_EQUAL : GL_LEQUAL);
const qbool enableDepthWrite = (stateBits & GLS_DEPTHMASK_TRUE) != 0 && gl.pipelineId != PID_SOFT_SPRITE;
ApplyDepthMask(enableDepthWrite ? GL_TRUE : GL_FALSE);
const qbool wireFrame = (stateBits & GLS_POLYMODE_LINE) ? 1 : 0;
ApplyPolygonMode(wireFrame ? GL_LINE : GL_FILL);
ApplyPolygonOffset(polygonOffset);
ApplyAlphaTest(GetAlphaTest(stateBits & GLS_ATEST_BITS));
}
static void ApplyTexEnv(texEnv_t texEnv)
{
if(gl.pipelineId == PID_GENERIC && texEnv != gl.texEnv)
{
gl.pipelines[PID_GENERIC].uniformsDirty[GU_ALPHA_TEX] = qtrue;
}
gl.texEnv = texEnv;
}
static void BindVertexArray(VertexBufferId)
{
}
static void Buffer_WaitForRange(ArrayBuffer* buffer)
{
buffer->writeIndex = buffer->writeRangeIndex * (buffer->capacity / LARGEBUFFER_MAX_FRAMES);
GLsync& fence = buffer->fences[buffer->writeRangeIndex];
if(fence == NULL)
{
return;
}
GLbitfield waitFlags = 0;
GLuint64 waitDurationNS = 0;
for(;;)
{
GLenum waitRet = glClientWaitSync(fence, waitFlags, waitDurationNS);
if(waitRet == GL_ALREADY_SIGNALED || waitRet == GL_CONDITION_SATISFIED)
{
glDeleteSync(fence);
fence = NULL;
return;
}
if(waitRet == GL_WAIT_FAILED)
{
ri.Error(ERR_FATAL, "glClientWaitSync failed with GL_WAIT_FAILED\n");
}
// after the first time, we need to start flushing and wait as long as necessary
waitFlags = GL_SYNC_FLUSH_COMMANDS_BIT;
waitDurationNS = 1e9;
}
}
static void Buffer_LockRange(ArrayBuffer* buffer)
{
GLsync& fence = buffer->fences[buffer->writeRangeIndex];
assert(fence == NULL);
if(fence == NULL)
{
fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
assert(fence != NULL);
}
buffer->writeRangeIndex = (buffer->writeRangeIndex + 1) % LARGEBUFFER_MAX_FRAMES;
buffer->writeIndex = buffer->writeRangeIndex * (buffer->capacity / LARGEBUFFER_MAX_FRAMES);
}
static void Buffers_Wait()
{
for(int i = 0; i < VB_COUNT; ++i)
{
Buffer_WaitForRange(&gl.arrayBuffers[i]);
}
Buffer_WaitForRange(&gl.indexBuffer);
}
static void Buffers_Lock()
{
for(int i = 0; i < VB_COUNT; ++i)
{
Buffer_LockRange(&gl.arrayBuffers[i]);
}
Buffer_LockRange(&gl.indexBuffer);
}
// if qtrue, we have a large buffer for multiple frames and use fences for synchronization
static qbool MappingType_UsesLargeBuffers()
{
return gl.mappingType == MT_PERSISTENT || gl.mappingType == MT_UNSYNC || gl.mappingType == MT_AMDPIN;
}
static void UploadGeometry(ArrayBuffer* buffer, const void* data, int itemCount)
{
const GLenum target = buffer->indexBuffer ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER;
if(MappingType_UsesLargeBuffers())
{
const int rangeLength = buffer->capacity / LARGEBUFFER_MAX_FRAMES;
const int endRangeIndex = (buffer->writeIndex + itemCount - 1) / rangeLength;
#if defined(_DEBUG)
assert(endRangeIndex == buffer->writeRangeIndex ||
endRangeIndex == buffer->writeRangeIndex + 1 ||
(endRangeIndex == 0 && buffer->writeRangeIndex == LARGEBUFFER_MAX_FRAMES - 1));
const int startRangeIndex = buffer->writeIndex == 0 ? 0 : ((buffer->writeIndex - 1) / rangeLength);
assert(startRangeIndex == buffer->writeRangeIndex ||
startRangeIndex == (buffer->writeRangeIndex + LARGEBUFFER_MAX_FRAMES - 1) % LARGEBUFFER_MAX_FRAMES);
#endif
if(endRangeIndex == buffer->writeRangeIndex + 1)
{
Buffer_LockRange(buffer);
Buffer_WaitForRange(buffer);
}
void* mappedData = NULL;
if(gl.mappingType == MT_UNSYNC)
{
mappedData = glMapBufferRange(target, buffer->writeIndex * buffer->itemSize, itemCount * buffer->itemSize, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
if(mappedData == NULL)
{
ri.Error(ERR_FATAL, "Couldn't map buffer\n");
}
}
else
{
mappedData = buffer->mappedData + buffer->writeIndex * buffer->itemSize;
}
memcpy(mappedData, data, itemCount * buffer->itemSize);
if(gl.mappingType == MT_UNSYNC)
{
glUnmapBuffer(target);
}
buffer->readIndex = buffer->writeIndex;
buffer->writeIndex += itemCount;
}
else
{
glBufferSubData(target, (GLintptr)0, itemCount * buffer->itemSize, data);
buffer->readIndex = 0;
}
}
static void UploadVertexArray(VertexBufferId vbid, const void* data)
{
ArrayBuffer* buffer = &gl.arrayBuffers[vbid];
glBindBuffer(GL_ARRAY_BUFFER, buffer->buffer);
UploadGeometry(buffer, data, tess.numVertexes);
if(MappingType_UsesLargeBuffers())
{
glVertexAttribPointer(vbid, buffer->componentCount, buffer->dataType, buffer->normalized, buffer->itemSize, (const GLvoid*)(GLintptr)(buffer->readIndex * buffer->itemSize));
}
}
static void UploadIndices(const void* data, int indexCount)
{
ArrayBuffer* buffer = &gl.indexBuffer;
// @NOTE: we only have 1 index buffer and it's already bound
//glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer->buffer);
UploadGeometry(buffer, data, indexCount);
}
static void CreateGeometryBufferStorage(ArrayBuffer* buffer)
{
const GLenum target = buffer->indexBuffer ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER;
if(gl.mappingType == MT_PERSISTENT)
{
glGenBuffers(1, &buffer->buffer);
glBindBuffer(target, buffer->buffer);
const GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
glBufferStorage(target, buffer->capacity * buffer->itemSize, NULL, flags);
buffer->mappedData = (byte*)glMapBufferRange(target, 0, buffer->capacity * buffer->itemSize, flags);
if(buffer->mappedData == NULL)
{
ri.Error(ERR_FATAL, "Couldn't map buffer storage\n");
}
}
else if(gl.mappingType == MT_AMDPIN)
{
while(glGetError() != GL_NO_ERROR) {} // clear the error queue
GLenum errorCode = GL_NO_ERROR;
AllocatePinnedMemory(buffer);
if(buffer->mappedData == NULL)
{
ri.Error(ERR_FATAL, "Couldn't allocate buffer storage\n");
}
glGenBuffers(1, &buffer->buffer);
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, buffer->buffer);
if((errorCode = glGetError()) != GL_NO_ERROR)
{
ri.Error(ERR_FATAL, "glBindBuffer GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD failed with error code: 0x%08X\n", (unsigned int)errorCode);
}
glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, buffer->pinnedByteCount, buffer->mappedData, GL_DYNAMIC_DRAW);
if((errorCode = glGetError()) != GL_NO_ERROR)
{
ri.Error(ERR_FATAL, "glBufferData GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD failed with error code: 0x%08X\n", (unsigned int)errorCode);
}
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
glBindBuffer(target, buffer->buffer);
}
else
{
glGenBuffers(1, &buffer->buffer);
glBindBuffer(target, buffer->buffer);
glBufferData(target, buffer->capacity * buffer->itemSize, NULL, GL_DYNAMIC_DRAW);
}
}
static void DrawElements(int indexCount)
{
glDrawElements(GL_TRIANGLES, indexCount, GL_UNSIGNED_INT, (const GLvoid*)(GLintptr)(gl.indexBuffer.readIndex * gl.indexBuffer.itemSize));
backEnd.pc3D[RB_DRAW_CALLS]++;
}
static void SetDefaultState()
{
glViewport(0, 0, glConfig.vidWidth, glConfig.vidHeight);
glScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);
glEnable(GL_DEPTH_TEST);
glEnable(GL_SCISSOR_TEST);
glEnable(GL_BLEND);
glDisable(GL_CULL_FACE);
glDisable(GL_POLYGON_OFFSET_FILL);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glDepthFunc(GL_LEQUAL);
glDepthMask(GL_FALSE);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glCullFace(GL_FRONT);
glPolygonOffset(-1.0f, -1.0f);
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
glClearDepth(1.0f);
glActiveTexture(GL_TEXTURE0);
glDisable(GL_CLIP_DISTANCE0);
glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
glDisable(GL_DEPTH_CLAMP);
gl.boundTextures[0] = GLuint(-1);
gl.boundTextures[1] = GLuint(-1);
gl.activeTextureSlot = 0;
gl.cullType = CT_TWO_SIDED;
gl.srcBlendBits = GLS_SRCBLEND_SRC_ALPHA;
gl.dstBlendBits = GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA;
gl.enableDepthTest = qtrue;
gl.depthFunc = GL_LEQUAL;
gl.enableDepthWrite = GL_FALSE;
gl.polygonMode = GL_FILL;
gl.enablePolygonOffset = qfalse;
gl.enableClipPlane = qfalse;
gl.enableAlphaToCoverage = qfalse;
}
static qbool InitCompute()
{
while(glGetError() != GL_NO_ERROR) {} // clear the error queue
glGenTextures(ARRAY_LEN(gl.mipGen.textures), gl.mipGen.textures);
glBindTexture(GL_TEXTURE_2D, gl.mipGen.textures[0]);
SetDebugName(GL_TEXTURE, gl.mipGen.textures[0], "mip-gen float16 texture #1");
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA16F, gl.maxTextureSize, gl.maxTextureSize);
glBindTexture(GL_TEXTURE_2D, gl.mipGen.textures[1]);
SetDebugName(GL_TEXTURE, gl.mipGen.textures[1], "mip-gen float16 texture #2");
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA16F, gl.maxTextureSize, gl.maxTextureSize);
glBindTexture(GL_TEXTURE_2D, gl.mipGen.textures[2]);
SetDebugName(GL_TEXTURE, gl.mipGen.textures[2], "mip-gen uint8 texture");
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, gl.maxTextureSize, gl.maxTextureSize);
if(glGetError() != GL_NO_ERROR)
{
HandleError("Failed to allocate storage for the mip-map generation textures\n");
return qfalse;
}
if(!CreateComputeProgram(&gl.mipGen.programs[CPID_GAMMA_TO_LINEAR], gammaToLinear_cs, "gamma to linear") ||
!CreateComputeProgram(&gl.mipGen.programs[CPID_LINEAR_TO_GAMMA], linearToGamma_cs, "linear to gamma") ||
!CreateComputeProgram(&gl.mipGen.programs[CPID_DOWN_SAMPLE], downSample_cs, "down sample"))
{
HandleError("Failed to compile compute shaders for GPU-side mip-map generation\n");
return qfalse;
}
return qtrue;
}
static MappingType GetMappingTypeFromCvar()
{
const int mode = r_gl3_geoStream->integer;
if(mode == GL3MAP_SUBDATA)
{
return MT_SUBDATA;
}
if(mode == GL3MAP_MAPUNSYNC)
{
return MT_UNSYNC;
}
if(mode == GL3MAP_AMDPIN && GLEW_AMD_pinned_memory)
{
return MT_AMDPIN;
}
if((mode == GL3MAP_AUTO || mode == GL3MAP_MAPPERS) && (GLEW_VERSION_4_4 || GLEW_ARB_buffer_storage))
{
return MT_PERSISTENT;
}
if(GLEW_AMD_pinned_memory)
{
return MT_AMDPIN;
}
if(strstr((const char*)glGetString(GL_RENDERER), "Intel") != NULL)
{
return MT_UNSYNC;
}
return MT_SUBDATA;
}
static void InitQueries()
{
glGenQueries(ARRAY_LEN(gl.timerQueries), &gl.timerQueries[0]);
}
static void BeginQueries()
{
glBeginQuery(GL_TIME_ELAPSED, gl.timerQueries[gl.queryWriteIndex]);
gl.queryStarted[gl.queryWriteIndex] = qtrue;
}
static void EndQueries()
{
// finish this frame
glEndQuery(GL_TIME_ELAPSED);
gl.queryWriteIndex = (gl.queryWriteIndex + 1) % ARRAY_LEN(gl.timerQueries);
// try to grab a previous frame's result
if(gl.queryStarted[gl.queryReadIndex])
{
const GLuint query = gl.timerQueries[gl.queryReadIndex];
backEnd.pc3D[RB_USEC_GPU] = 0;
GLint done = GL_FALSE;
glGetQueryObjectiv(query, GL_QUERY_RESULT_AVAILABLE, &done);
if(done != GL_FALSE)
{
GLint durationNS = 0;
glGetQueryObjectiv(query, GL_QUERY_RESULT, &durationNS);
if(durationNS > 0)
{
backEnd.pc3D[RB_USEC_GPU] = durationNS / 1000;
}
gl.queryReadIndex = (gl.queryReadIndex + 1) % ARRAY_LEN(gl.timerQueries);
}
}
}
static void Init()
{
memset(&gl, 0, sizeof(gl));
GLint maxTextureSize = 0;
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
gl.maxTextureSize = maxTextureSize > 0 ? min((int)maxTextureSize, MAX_GPU_TEXTURE_SIZE) : MAX_GPU_TEXTURE_SIZE;
glConfig.unused_maxTextureSize = gl.maxTextureSize;
glInfo.maxTextureSize = gl.maxTextureSize;
glInfo.depthFadeSupport = r_depthFade->integer == 1;
FBO_Init();
if(gl.fbMSEnabled && r_alphaToCoverage->integer)
{
glInfo.alphaToCoverageSupport = qtrue;
}
int maxVertexCount = SHADER_MAX_VERTEXES;
int maxIndexCount = SHADER_MAX_INDEXES;
gl.mappingType = GetMappingTypeFromCvar();
if(MappingType_UsesLargeBuffers())
{
maxVertexCount = LARGEBUFFER_MAX_VERTEXES * LARGEBUFFER_MAX_FRAMES;
maxIndexCount = LARGEBUFFER_MAX_INDEXES * LARGEBUFFER_MAX_FRAMES;
}
gl.arrayBuffers[VB_POSITION].capacity = maxVertexCount;
gl.arrayBuffers[VB_POSITION].itemSize = sizeof(tess.xyz[0]);
gl.arrayBuffers[VB_POSITION].componentCount = 4;
gl.arrayBuffers[VB_POSITION].dataType = GL_FLOAT;
gl.arrayBuffers[VB_POSITION].normalized = GL_FALSE;
gl.arrayBuffers[VB_NORMAL].capacity = maxVertexCount;
gl.arrayBuffers[VB_NORMAL].itemSize = sizeof(tess.normal[0]);
gl.arrayBuffers[VB_NORMAL].componentCount = 4;
gl.arrayBuffers[VB_NORMAL].dataType = GL_FLOAT;
gl.arrayBuffers[VB_NORMAL].normalized = GL_FALSE;
gl.arrayBuffers[VB_TEXCOORD].capacity = maxVertexCount;
gl.arrayBuffers[VB_TEXCOORD].itemSize = sizeof(tess.svars[0].texcoords[0]);
gl.arrayBuffers[VB_TEXCOORD].componentCount = 2;
gl.arrayBuffers[VB_TEXCOORD].dataType = GL_FLOAT;
gl.arrayBuffers[VB_TEXCOORD].normalized = GL_FALSE;
gl.arrayBuffers[VB_TEXCOORD2].capacity = maxVertexCount;
gl.arrayBuffers[VB_TEXCOORD2].itemSize = sizeof(tess.svars[0].texcoords[0]);
gl.arrayBuffers[VB_TEXCOORD2].componentCount = 2;
gl.arrayBuffers[VB_TEXCOORD2].dataType = GL_FLOAT;
gl.arrayBuffers[VB_TEXCOORD2].normalized = GL_FALSE;
gl.arrayBuffers[VB_COLOR].capacity = maxVertexCount;
gl.arrayBuffers[VB_COLOR].itemSize = sizeof(tess.svars[0].colors[0]);
gl.arrayBuffers[VB_COLOR].componentCount = 4;
gl.arrayBuffers[VB_COLOR].dataType = GL_UNSIGNED_BYTE;
gl.arrayBuffers[VB_COLOR].normalized = GL_TRUE;
gl.indexBuffer.capacity = maxIndexCount;
gl.indexBuffer.itemSize = sizeof(tess.indexes[0]);
gl.indexBuffer.indexBuffer = qtrue;
gl.pipelines[PID_GENERIC].arrayBuffers[VB_POSITION].enabled = qtrue;
gl.pipelines[PID_GENERIC].arrayBuffers[VB_POSITION].attribName = "position";
gl.pipelines[PID_GENERIC].arrayBuffers[VB_TEXCOORD].enabled = qtrue;
gl.pipelines[PID_GENERIC].arrayBuffers[VB_TEXCOORD].attribName = "texCoords1";
gl.pipelines[PID_GENERIC].arrayBuffers[VB_TEXCOORD2].enabled = qtrue;
gl.pipelines[PID_GENERIC].arrayBuffers[VB_TEXCOORD2].attribName = "texCoords2";
gl.pipelines[PID_GENERIC].arrayBuffers[VB_COLOR].enabled = qtrue;
gl.pipelines[PID_GENERIC].arrayBuffers[VB_COLOR].attribName = "color";
gl.pipelines[PID_GENERIC].uniformNames[GU_MODELVIEW] = "modelView";
gl.pipelines[PID_GENERIC].uniformNames[GU_PROJECTION] = "projection";
gl.pipelines[PID_GENERIC].uniformNames[GU_CLIP_PLANE] = "clipPlane";
gl.pipelines[PID_GENERIC].uniformNames[GU_ALPHA_TEX] = "alphaTex";
gl.pipelines[PID_GENERIC].uniformNames[GU_GREYSCALE] = "greyscale";
gl.pipelines[PID_GENERIC].uniformNames[GU_GAMMA_BRIGHT_NOISE_SEED] = "gammaBrightNoiseSeed";
gl.pipelines[PID_GENERIC].uniformNames[GU_A2C_ALPHA_BOOST] = "alphaBoost";
gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_POSITION].enabled = qtrue;
gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_POSITION].attribName = "position";
gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_NORMAL].enabled = qtrue;
gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_NORMAL].attribName = "normal";
gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_TEXCOORD].enabled = qtrue;
gl.pipelines[PID_DYNAMIC_LIGHT].arrayBuffers[VB_TEXCOORD].attribName = "texCoords1";
gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_MODELVIEW] = "modelView";
gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_PROJECTION] = "projection";
gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_CLIP_PLANE] = "clipPlane";
gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_LIGHT_POS] = "osLightPos";
gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_EYE_POS] = "osEyePos";
gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_LIGHT_COLOR_RADIUS] = "lightColorRadius";
gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_OPAQUE] = "opaque";
gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_INTENSITY] = "intensity";
gl.pipelines[PID_DYNAMIC_LIGHT].uniformNames[DU_GREYSCALE] = "greyscale";
gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_POSITION].enabled = qtrue;
gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_POSITION].attribName = "position";
gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_TEXCOORD].enabled = qtrue;
gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_TEXCOORD].attribName = "texCoords1";
gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_COLOR].enabled = qtrue;
gl.pipelines[PID_SOFT_SPRITE].arrayBuffers[VB_COLOR].attribName = "color";
gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_MODELVIEW] = "modelView";
gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_PROJECTION] = "projection";
gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_CLIP_PLANE] = "clipPlane";
gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_ALPHA_TEST] = "alphaTest";
gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_DIST_OFFSET] = "distOffset";
gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_COLOR_SCALE] = "colorScale";
gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_COLOR_BIAS] = "colorBias";
gl.pipelines[PID_SOFT_SPRITE].uniformNames[SU_GREYSCALE] = "greyscale";
gl.pipelines[PID_POST_PROCESS].uniformNames[PU_BRIGHT_GAMMA_GREY] = "brightGammaGrey";
CreateGraphicsProgram(PID_GENERIC, generic_vs, generic_fs, "generic");
CreateGraphicsProgram(PID_DYNAMIC_LIGHT, dl_vs, dl_fs, "dynamic light");
CreateGraphicsProgram(PID_SOFT_SPRITE, sprite_vs, sprite_fs, "soft sprite");
CreateGraphicsProgram(PID_POST_PROCESS, post_vs, post_fs, "post-process");
GLuint vertexArray;
glGenVertexArrays(1, &vertexArray);
glBindVertexArray(vertexArray);
CreateGeometryBufferStorage(&gl.indexBuffer);
for(int i = 0; i < VB_COUNT; ++i)
{
CreateGeometryBufferStorage(&gl.arrayBuffers[i]);
}
for(int p = 0; p < PID_COUNT; ++p)
{
Pipeline* pipeline = &gl.pipelines[p];
pipeline->textureLocations[0] = glGetUniformLocation(pipeline->program.program, "texture1");
pipeline->textureLocations[1] = glGetUniformLocation(pipeline->program.program, "texture2");
for(int i = 0; i < ARRAY_LEN(pipeline->uniformLocations); ++i)
{
if(pipeline->uniformNames[i] != NULL)
{
pipeline->uniformLocations[i] = glGetUniformLocation(pipeline->program.program, pipeline->uniformNames[i]);
#if defined(_DEBUG)
if((p == PID_GENERIC && i == GU_GAMMA_BRIGHT_NOISE_SEED && r_dither->integer == 0) ||
(p == PID_GENERIC && i == GU_A2C_ALPHA_BOOST && !glInfo.alphaToCoverageSupport))
{
continue;
}
assert(pipeline->uniformLocations[i] != -1);
#endif
}
}
}
if(r_gpuMipGen->integer && (GLEW_VERSION_4_3 || (GLEW_ARB_compute_shader && GLEW_ARB_texture_storage && GLEW_ARB_shader_image_load_store && GLEW_ARB_copy_image)))
{
gl.errorMode = EM_PRINT;
glInfo.mipGenSupport = InitCompute();
gl.errorMode = EM_FATAL;
}
gl.pipelineId = PID_COUNT;
ApplyPipeline(PID_GENERIC);
InitQueries();
}
static void InitGLConfig()
{
// @NOTE: could use glGetStringi in a loop to grab the extension list, but it's useless either way
Q_strncpyz(glConfig.vendor_string, (const char*)glGetString(GL_VENDOR), sizeof(glConfig.vendor_string));
Q_strncpyz(glConfig.renderer_string, (const char*)glGetString(GL_RENDERER), sizeof(glConfig.renderer_string));
Q_strncpyz(glConfig.version_string, (const char*)glGetString(GL_VERSION), sizeof(glConfig.version_string));
Q_strncpyz(glConfig.extensions_string, "", sizeof(glConfig.extensions_string));
glConfig.unused_maxTextureSize = MAX_GPU_TEXTURE_SIZE;
glConfig.unused_maxActiveTextures = 0;
glConfig.unused_driverType = 0; // ICD
glConfig.unused_hardwareType = 0; // generic
glConfig.unused_deviceSupportsGamma = qtrue;
glConfig.unused_textureCompression = 0; // no compression
glConfig.unused_textureEnvAddAvailable = qtrue;
glConfig.unused_displayFrequency = 0;
glConfig.unused_isFullscreen = !!r_fullscreen->integer;
glConfig.unused_stereoEnabled = qfalse;
glConfig.unused_smpActive = qfalse;
}
static void InitGLInfo()
{
glInfo.maxTextureSize = MAX_GPU_TEXTURE_SIZE;
if(GLEW_EXT_texture_filter_anisotropic)
{
glGetIntegerv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &glInfo.maxAnisotropy);
}
else
{
glInfo.maxAnisotropy = 0;
}
glInfo.depthFadeSupport = qfalse;
glInfo.mipGenSupport = qfalse;
glInfo.alphaToCoverageSupport = qfalse;
}
static qbool GAL_Init()
{
if(glConfig.vidWidth == 0)
{
// the order of these calls can not be changed
Sys_V_Init(GAL_GL3);
if(!GLEW_VERSION_3_2)
{
ri.Error(ERR_FATAL, "OpenGL 3.2 is required by the selected back-end!\n");
}
InitGLConfig();
InitGLInfo();
Init();
// apply the current V-Sync option after the first rendered frame
r_swapInterval->modified = qtrue;
}
SetDefaultState();
const int err = glGetError();
if(err != GL_NO_ERROR)
{
ri.Printf(PRINT_ALL, "glGetError() = 0x%x\n", err);
}
return qtrue;
}
static void GAL_ShutDown(qbool fullShutDown)
{
for(int i = 0; i < tr.numImages; ++i)
{
const GLuint texture = (GLuint)tr.images[i]->texnum;
glDeleteTextures(1, &texture);
}
tr.numImages = 0;
memset(tr.images, 0, sizeof(tr.images));
gl.boundTextures[0] = GLuint(-1);
gl.boundTextures[1] = GLuint(-1);
if(fullShutDown && gl.mappingType == MT_AMDPIN)
{
// We flush the command queue and wait for all commands to be done executing
// to make sure the GPU is done accessing our own memory buffers.
// We could also have used a fence instead.
glFlush();
glFinish();
// Now that it's safe to do so, free our memory buffers.
for(int i = 0; i < ARRAY_LEN(gl.arrayBuffers); ++i)
{
FreePinnedMemory(&gl.arrayBuffers[i]);
}
FreePinnedMemory(&gl.indexBuffer);
}
}
static void GAL_BeginFrame()
{
BeginQueries();
FBO_Bind();
ApplyViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);
if(r_clear->integer)
{
glClearColor(1.0f, 0.0f, 0.5f, 1.0f);
}
else
{
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
}
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
if(MappingType_UsesLargeBuffers())
{
Buffers_Wait();
}
}
static void GAL_EndFrame()
{
if(MappingType_UsesLargeBuffers())
{
Buffers_Lock();
}
if(gl.fbMSEnabled)
{
FBO_ResolveColor();
}
ApplyPipeline(PID_POST_PROCESS);
ApplyState(GLS_DEPTHTEST_DISABLE, CT_TWO_SIDED, qfalse);
ApplyViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);
BindTexture(0, gl.fbSS[gl.fbReadIndex].color);
Pipeline* const pipeline = &gl.pipelines[PID_POST_PROCESS];
glUniform3f(pipeline->uniformLocations[PU_BRIGHT_GAMMA_GREY], r_brightness->value, 1.0f / r_gamma->value, r_greyscale->value);
gl.fbReadIndex ^= 1;
FBO_Bind(&gl.fbSS[gl.fbReadIndex]);
glDrawArrays(GL_TRIANGLES, 0, 3);
backEnd.pc3D[RB_DRAW_CALLS]++;
ApplyViewportAndScissor(0, 0, glInfo.winWidth, glInfo.winHeight);
FBO_BlitToBackBuffer();
EndQueries();
}
static void DrawGeneric()
{
Pipeline* const pipeline = &gl.pipelines[PID_GENERIC];
if(pipeline->uniformsDirty[GU_MODELVIEW])
{
glUniformMatrix4fv(pipeline->uniformLocations[GU_MODELVIEW], 1, GL_FALSE, gl.modelViewMatrix);
pipeline->uniformsDirty[GU_MODELVIEW] = qfalse;
}
if(pipeline->uniformsDirty[GU_PROJECTION])
{
glUniformMatrix4fv(pipeline->uniformLocations[GU_PROJECTION], 1, GL_FALSE, gl.projectionMatrix);
pipeline->uniformsDirty[GU_PROJECTION] = qfalse;
}
if(pipeline->uniformsDirty[GU_CLIP_PLANE])
{
glUniform4fv(pipeline->uniformLocations[GU_CLIP_PLANE], 1, gl.clipPlane);
pipeline->uniformsDirty[GU_CLIP_PLANE] = qfalse;
}
if(pipeline->uniformsDirty[GU_GAMMA_BRIGHT_NOISE_SEED] &&
pipeline->uniformLocations[GU_GAMMA_BRIGHT_NOISE_SEED] != -1)
{
glUniform4f(
pipeline->uniformLocations[GU_GAMMA_BRIGHT_NOISE_SEED],
1.0f / r_gamma->value,
1.0f / r_brightness->value,
backEnd.projection2D ? 0.0f : r_ditherStrength->value,
(float)rand() / (float)RAND_MAX);
pipeline->uniformsDirty[GU_GAMMA_BRIGHT_NOISE_SEED] = qfalse;
}
if(pipeline->uniformsDirty[GU_A2C_ALPHA_BOOST] &&
pipeline->uniformLocations[GU_A2C_ALPHA_BOOST] != -1)
{
glUniform1f(pipeline->uniformLocations[GU_A2C_ALPHA_BOOST], r_alphaToCoverageMipBoost->value);
pipeline->uniformsDirty[GU_A2C_ALPHA_BOOST] = qfalse;
}
if(pipeline->uniformsDirty[GU_GREYSCALE] ||
tess.greyscale != gl.greyscale)
{
glUniform1f(pipeline->uniformLocations[GU_GREYSCALE], tess.greyscale);
gl.greyscale = tess.greyscale;
pipeline->uniformsDirty[GU_GREYSCALE] = qfalse;
}
UploadVertexArray(VB_POSITION, tess.xyz);
UploadIndices(tess.indexes, tess.numIndexes);
for(int i = 0; i < tess.shader->numStages; ++i)
{
const shaderStage_t* const stage = tess.xstages[i];
ApplyState(stage->stateBits, tess.shader->cullType, tess.shader->polygonOffset);
UploadVertexArray(VB_TEXCOORD, tess.svars[i].texcoordsptr);
UploadVertexArray(VB_COLOR, tess.svars[i].colors);
BindBundle(0, &stage->bundle);
if(stage->mtStages == 0)
{
BindImage(1, tr.whiteImage);
BindVertexArray(VB_TEXCOORD2);
ApplyTexEnv(TE_DISABLED);
}
else
{
const shaderStage_t* const stage2 = tess.xstages[i + 1];
BindBundle(1, &stage2->bundle);
UploadVertexArray(VB_TEXCOORD2, tess.svars[i + 1].texcoordsptr);
ApplyTexEnv(stage2->mtEnv);
++i;
}
if(pipeline->uniformsDirty[GU_ALPHA_TEX])
{
glUniform2ui(pipeline->uniformLocations[GU_ALPHA_TEX], gl.alphaTest, gl.texEnv);
pipeline->uniformsDirty[GU_ALPHA_TEX] = qfalse;
}
DrawElements(tess.numIndexes);
}
if(tess.drawFog)
{
ApplyState(tess.fogStateBits, tess.shader->cullType, tess.shader->polygonOffset);
UploadVertexArray(VB_TEXCOORD, tess.svarsFog.texcoordsptr);
BindVertexArray(VB_TEXCOORD2);
UploadVertexArray(VB_COLOR, tess.svarsFog.colors);
BindImage(0, tr.fogImage);
BindImage(1, tr.whiteImage);
ApplyTexEnv(TE_DISABLED);
if(pipeline->uniformsDirty[GU_ALPHA_TEX])
{
glUniform2ui(pipeline->uniformLocations[GU_ALPHA_TEX], gl.alphaTest, gl.texEnv);
pipeline->uniformsDirty[GU_ALPHA_TEX] = qfalse;
}
DrawElements(tess.numIndexes);
}
}
static void DrawDynamicLight()
{
Pipeline* const pipeline = &gl.pipelines[PID_DYNAMIC_LIGHT];
const int stageIndex = tess.shader->lightingStages[ST_DIFFUSE];
const shaderStage_t* stage = tess.xstages[stageIndex];
UploadVertexArray(VB_POSITION, tess.xyz);
UploadVertexArray(VB_NORMAL, tess.normal);
UploadVertexArray(VB_TEXCOORD, tess.svars[stageIndex].texcoordsptr);
UploadIndices(tess.dlIndexes, tess.dlNumIndexes);
ApplyState(backEnd.dlStateBits, tess.shader->cullType, tess.shader->polygonOffset);
BindBundle(0, &stage->bundle);
if(backEnd.dlOpaque != gl.dlOpaque)
{
gl.dlOpaque = backEnd.dlOpaque;
pipeline->uniformsDirty[DU_OPAQUE] = qtrue;
}
if(backEnd.dlIntensity != gl.dlIntensity)
{
gl.dlIntensity = backEnd.dlIntensity;
pipeline->uniformsDirty[DU_INTENSITY] = qtrue;
}
if(tess.greyscale != gl.greyscale)
{
gl.greyscale = tess.greyscale;
pipeline->uniformsDirty[DU_GREYSCALE] = qtrue;
}
if(pipeline->uniformsDirty[DU_MODELVIEW])
{
glUniformMatrix4fv(pipeline->uniformLocations[DU_MODELVIEW], 1, GL_FALSE, gl.modelViewMatrix);
}
if(pipeline->uniformsDirty[DU_PROJECTION])
{
glUniformMatrix4fv(pipeline->uniformLocations[DU_PROJECTION], 1, GL_FALSE, gl.projectionMatrix);
}
if(pipeline->uniformsDirty[DU_CLIP_PLANE])
{
glUniform4fv(pipeline->uniformLocations[DU_CLIP_PLANE], 1, gl.clipPlane);
}
if(pipeline->uniformsDirty[DU_OPAQUE])
{
glUniform1f(pipeline->uniformLocations[DU_OPAQUE], gl.dlOpaque ? 1.0f : 0.0f);
}
if(pipeline->uniformsDirty[DU_INTENSITY])
{
glUniform1f(pipeline->uniformLocations[DU_INTENSITY], gl.dlIntensity);
}
if(pipeline->uniformsDirty[DU_GREYSCALE])
{
glUniform1f(pipeline->uniformLocations[DU_GREYSCALE], tess.greyscale);
}
memset(pipeline->uniformsDirty, 0, sizeof(pipeline->uniformsDirty));
DrawElements(tess.dlNumIndexes);
}
static void DrawDepthFade()
{
Pipeline* const pipeline = &gl.pipelines[PID_SOFT_SPRITE];
if(pipeline->uniformsDirty[SU_PROJECTION])
{
glUniformMatrix4fv(pipeline->uniformLocations[SU_PROJECTION], 1, GL_FALSE, gl.projectionMatrix);
pipeline->uniformsDirty[SU_PROJECTION] = qfalse;
}
if(pipeline->uniformsDirty[SU_MODELVIEW])
{
glUniformMatrix4fv(pipeline->uniformLocations[SU_MODELVIEW], 1, GL_FALSE, gl.modelViewMatrix);
pipeline->uniformsDirty[SU_MODELVIEW] = qfalse;
}
if(pipeline->uniformsDirty[SU_CLIP_PLANE])
{
glUniform4fv(pipeline->uniformLocations[SU_CLIP_PLANE], 1, gl.clipPlane);
pipeline->uniformsDirty[SU_CLIP_PLANE] = qfalse;
}
if(pipeline->uniformsDirty[SU_COLOR_SCALE] ||
memcmp(gl.depthFadeScale, r_depthFadeScale[tess.shader->dfType], sizeof(gl.depthFadeScale)) != 0)
{
glUniform4fv(pipeline->uniformLocations[SU_COLOR_SCALE], 1, r_depthFadeScale[tess.shader->dfType]);
memcpy(gl.depthFadeScale, r_depthFadeScale[tess.shader->dfType], sizeof(gl.depthFadeScale));
pipeline->uniformsDirty[SU_COLOR_SCALE] = qfalse;
}
if(pipeline->uniformsDirty[SU_COLOR_BIAS] ||
memcmp(gl.depthFadeBias, r_depthFadeBias[tess.shader->dfType], sizeof(gl.depthFadeBias)) != 0)
{
glUniform4fv(pipeline->uniformLocations[SU_COLOR_BIAS], 1, r_depthFadeBias[tess.shader->dfType]);
memcpy(gl.depthFadeBias, r_depthFadeBias[tess.shader->dfType], sizeof(gl.depthFadeBias));
pipeline->uniformsDirty[SU_COLOR_BIAS] = qfalse;
}
if(pipeline->uniformsDirty[SU_DIST_OFFSET] ||
tess.shader->dfInvDist != gl.depthFadeDist ||
tess.shader->dfBias != gl.depthFadeOffset)
{
glUniform2f(pipeline->uniformLocations[SU_DIST_OFFSET], tess.shader->dfInvDist, tess.shader->dfBias);
gl.depthFadeDist = tess.shader->dfInvDist;
gl.depthFadeOffset = tess.shader->dfBias;
pipeline->uniformsDirty[SU_DIST_OFFSET] = qfalse;
}
if(pipeline->uniformsDirty[SU_GREYSCALE] ||
tess.greyscale != gl.greyscale)
{
glUniform1f(pipeline->uniformLocations[SU_GREYSCALE], tess.greyscale);
gl.greyscale = tess.greyscale;
pipeline->uniformsDirty[SU_GREYSCALE] = qfalse;
}
UploadVertexArray(VB_POSITION, tess.xyz);
for(int i = 0; i < tess.shader->numStages; ++i)
{
const shaderStage_t* stage = tess.xstages[i];
// We have already made sure (in theory) we won't have depth writes enabled
// to avoid "feedback loops" on the depth texture, resulting in undefined behavior.
// See "Feedback Loops Between Textures and the Framebuffer" in the GL specs.
// However, this is not enough for OpenGL 4.5+, where glTextureBarrier is needed too
// because caching means a feedback loop can happen across draw calls.
assert((stage->stateBits & GLS_DEPTHTEST_DISABLE) == 0);
ApplyState(stage->stateBits, tess.shader->cullType, tess.shader->polygonOffset);
UploadVertexArray(VB_TEXCOORD, tess.svars[i].texcoordsptr);
UploadVertexArray(VB_COLOR, tess.svars[i].colors);
UploadIndices(tess.indexes, tess.numIndexes);
if(pipeline->uniformsDirty[SU_ALPHA_TEST])
{
glUniform1ui(pipeline->uniformLocations[SU_ALPHA_TEST], gl.alphaTest);
pipeline->uniformsDirty[SU_ALPHA_TEST] = qfalse;
}
BindBundle(0, &stage->bundle);
if(!gl.fbMSEnabled)
{
BindTexture(1, gl.fbSS[gl.fbReadIndex].depthStencil);
}
DrawElements(tess.numIndexes);
}
}
static void GAL_Draw(drawType_t type)
{
if(type == DT_GENERIC)
{
ApplyPipeline(PID_GENERIC);
DrawGeneric();
}
else if(type == DT_DYNAMIC_LIGHT)
{
ApplyPipeline(PID_DYNAMIC_LIGHT);
DrawDynamicLight();
}
else if(type == DT_SOFT_SPRITE)
{
ApplyPipeline(PID_SOFT_SPRITE);
DrawDepthFade();
}
}
static void GAL_Begin3D()
{
ApplyPipeline(PID_GENERIC);
R_MakeIdentityMatrix(gl.modelViewMatrix);
memcpy(gl.projectionMatrix, backEnd.viewParms.projectionMatrix, sizeof(gl.projectionMatrix));
ApplyViewportAndScissor(backEnd.viewParms.viewportX, backEnd.viewParms.viewportY, backEnd.viewParms.viewportWidth, backEnd.viewParms.viewportHeight);
if(backEnd.viewParms.isPortal)
{
float plane[4];
plane[0] = backEnd.viewParms.portalPlane.normal[0];
plane[1] = backEnd.viewParms.portalPlane.normal[1];
plane[2] = backEnd.viewParms.portalPlane.normal[2];
plane[3] = backEnd.viewParms.portalPlane.dist;
float plane2[4];
plane2[0] = DotProduct(backEnd.viewParms.orient.axis[0], plane);
plane2[1] = DotProduct(backEnd.viewParms.orient.axis[1], plane);
plane2[2] = DotProduct(backEnd.viewParms.orient.axis[2], plane);
plane2[3] = DotProduct(plane, backEnd.viewParms.orient.origin) - plane[3];
float* o = plane;
const float* m = s_flipMatrix;
const float* v = plane2;
o[0] = m[0] * v[0] + m[4] * v[1] + m[8] * v[2] + m[12] * v[3];
o[1] = m[1] * v[0] + m[5] * v[1] + m[9] * v[2] + m[13] * v[3];
o[2] = m[2] * v[0] + m[6] * v[1] + m[10] * v[2] + m[14] * v[3];
o[3] = m[3] * v[0] + m[7] * v[1] + m[11] * v[2] + m[15] * v[3];
memcpy(gl.clipPlane, plane, sizeof(gl.clipPlane));
ApplyClipPlane(qtrue);
}
else
{
memset(gl.clipPlane, 0, sizeof(gl.clipPlane));
ApplyClipPlane(qfalse);
}
ApplyState(GLS_DEFAULT, CT_TWO_SIDED, qfalse);
GLbitfield clearBits = GL_DEPTH_BUFFER_BIT;
if(backEnd.refdef.rdflags & RDF_HYPERSPACE)
{
clearBits |= GL_COLOR_BUFFER_BIT;
const float c = RB_HyperspaceColor();
glClearColor(c, c, c, 1.0f);
}
else if(r_fastsky->integer && !(backEnd.refdef.rdflags & RDF_NOWORLDMODEL))
{
clearBits |= GL_COLOR_BUFFER_BIT;
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
}
glClear(clearBits);
// in case the generic pipeline was already active before calling this function
gl.pipelines[PID_GENERIC].uniformsDirty[GU_PROJECTION] = qtrue;
gl.pipelines[PID_GENERIC].uniformsDirty[GU_MODELVIEW] = qtrue;
gl.pipelines[PID_GENERIC].uniformsDirty[GU_CLIP_PLANE] = qtrue;
}
static void GAL_BeginSkyAndClouds(double depth)
{
gl.prevEnableClipPlane = gl.enableClipPlane;
ApplyClipPlane(qfalse);
glDepthRange(depth, depth);
}
static void GAL_EndSkyAndClouds()
{
glDepthRange(0.0, 1.0);
ApplyClipPlane(gl.prevEnableClipPlane);
}
static int GetMaxAnisotropy(image_t* image)
{
if((image->flags & IMG_NOAF) == 0 && glInfo.maxAnisotropy >= 2 && r_ext_max_anisotropy->integer >= 2)
{
return min(r_ext_max_anisotropy->integer, glInfo.maxAnisotropy);
}
return 1;
}
static void GAL_CreateTexture(image_t* image, int mipCount, int w, int h)
{
GLuint id;
glGenTextures(1, &id);
image->texnum = (textureHandle_t)id;
BindImage(0, image);
SetDebugName(GL_TEXTURE, id, image->name);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, GetMaxAnisotropy(image));
if(image->flags & IMG_LMATLAS)
{
glTexImage2D(GL_TEXTURE_2D, 0, GetTextureInternalFormat(image->format), w, h, 0, GetTextureFormat(image->format), GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
return;
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GetTextureWrapMode(image->wrapClampMode));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GetTextureWrapMode(image->wrapClampMode));
if(Q_stricmp(r_textureMode->string, "GL_NEAREST") == 0 &&
(image->flags & (IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
else if(image->flags & IMG_NOMIPMAP)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
}
else
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
}
}
static void GAL_UpdateTexture(image_t* image, int mip, int x, int y, int w, int h, const void* data)
{
BindImage(0, image);
if(image->flags & IMG_LMATLAS)
{
glTexSubImage2D(GL_TEXTURE_2D, (GLint)mip, x, y, w, h, GetTextureFormat(image->format), GL_UNSIGNED_BYTE, data);
}
else
{
glTexImage2D(GL_TEXTURE_2D, (GLint)mip, GetTextureInternalFormat(image->format), w, h, 0, GetTextureFormat(image->format), GL_UNSIGNED_BYTE, data);
}
}
static void GAL_UpdateScratch(image_t* image, int w, int h, const void* data, qbool dirty)
{
BindImage(0, image);
// if the scratchImage isn't in the format we want, specify it as a new texture
if(w != image->width || h != image->height)
{
image->width = w;
image->height = h;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
else if(dirty)
{
// otherwise, just subimage upload it so that drivers can tell we are going to be changing
// it and don't try and do a texture compression
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, data);
}
}
static void GAL_CreateTextureEx(image_t* image, int mipCount, int mipOffset, int w, int h, const void* mip0)
{
enum { GroupSize = 8, GroupMask = GroupSize - 1 };
assert(image->format == TF_RGBA8);
assert(GetTextureInternalFormat(image->format) == GL_RGBA8);
// remember what program we had bound before...
GLint previousProgram = 0;
glGetIntegerv(GL_CURRENT_PROGRAM, &previousProgram);
// create the texture with all mip levels
GLuint id;
glGenTextures(1, &id);
image->texnum = (textureHandle_t)id;
BindTexture(0, id);
glTexStorage2D(GL_TEXTURE_2D, mipCount - mipOffset, GL_RGBA8, image->width, image->height);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GetTextureWrapMode(image->wrapClampMode));
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GetTextureWrapMode(image->wrapClampMode));
if(Q_stricmp(r_textureMode->string, "GL_NEAREST") == 0 &&
(image->flags & (IMG_LMATLAS | IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
else
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, GetMaxAnisotropy(image));
SetDebugName(GL_TEXTURE, id, image->name);
// upload source mip level 0
BindTexture(0, gl.mipGen.textures[2]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, mip0);
// create a linear color space copy of source mip 0
glUseProgram(gl.mipGen.programs[CPID_GAMMA_TO_LINEAR].program);
glUniform1f(0, r_mipGenGamma->value);
glBindImageTexture(0, gl.mipGen.textures[2], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);
glBindImageTexture(1, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F);
glDispatchCompute((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);
// copy to destination mip 0 now if needed
if(mipOffset == 0)
{
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glUseProgram(gl.mipGen.programs[CPID_LINEAR_TO_GAMMA].program);
glUniform1f(0, r_intensity->value);
glUniform4fv(1, 1, r_mipBlendColors[0]);
glUniform1f(2, 1.0f / r_mipGenGamma->value);
glBindImageTexture(0, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
glBindImageTexture(1, id, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);
}
for(int i = 1; i < mipCount; ++i)
{
const int w1 = w;
const int h1 = h;
w = max(w / 2, 1);
h = max(h / 2, 1);
// down-sample on the X-axis
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glUseProgram(gl.mipGen.programs[CPID_DOWN_SAMPLE].program);
glUniform4fv(0, 1, tr.mipFilter);
glUniform2i(1, w1 - 1, h1 - 1); // maxSize
glUniform2i(2, w1 / w, 1); // scale
glUniform2i(3, 1, 0); // offset
glUniform1ui(4, image->wrapClampMode == TW_CLAMP_TO_EDGE ? 1 : 0);
glBindImageTexture(0, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
glBindImageTexture(1, gl.mipGen.textures[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F);
glDispatchCompute((w + GroupMask) / GroupSize, (h1 + GroupMask) / GroupSize, 1);
// down-sample on the Y-axis
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glUseProgram(gl.mipGen.programs[CPID_DOWN_SAMPLE].program);
glUniform4fv(0, 1, tr.mipFilter);
glUniform2i(1, w - 1, h1 - 1); // maxSize
glUniform2i(2, 1, h1 / h); // scale
glUniform2i(3, 0, 1); // offset
glUniform1ui(4, image->wrapClampMode == TW_CLAMP_TO_EDGE ? 1 : 0);
glBindImageTexture(0, gl.mipGen.textures[1], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
glBindImageTexture(1, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA16F);
glDispatchCompute((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);
const int destMip = i - mipOffset;
if(destMip >= 0)
{
// copy the gamma-corrected result to the desired mip slice
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glUseProgram(gl.mipGen.programs[CPID_LINEAR_TO_GAMMA].program);
glUniform1f(0, r_intensity->value);
glUniform4fv(1, 1, r_mipBlendColors[r_colorMipLevels->integer ? destMip : 0]);
glUniform1f(2, 1.0f / r_mipGenGamma->value);
glBindImageTexture(0, gl.mipGen.textures[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
glBindImageTexture(1, id, i - mipOffset, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);
}
}
// restore program
glUseProgram(previousProgram);
}
static void GAL_BeginDynamicLight()
{
Pipeline* const pipeline = &gl.pipelines[PID_DYNAMIC_LIGHT];
const dlight_t* const dl = tess.light;
ApplyPipeline(PID_DYNAMIC_LIGHT);
glUniform3fv(pipeline->uniformLocations[DU_EYE_POS], 1, backEnd.orient.viewOrigin);
glUniform3fv(pipeline->uniformLocations[DU_LIGHT_POS], 1, dl->transformed);
glUniform4f(pipeline->uniformLocations[DU_LIGHT_COLOR_RADIUS], dl->color[0], dl->color[1], dl->color[2], 1.0f / Square(dl->radius));
}
static void GAL_ReadPixels(int x, int y, int w, int h, int alignment, colorSpace_t colorSpace, void* out)
{
const GLenum format = colorSpace == CS_BGR ? GL_BGR : GL_RGBA;
glPixelStorei(GL_PACK_ALIGNMENT, alignment);
glReadPixels(x, y, w, h, format, GL_UNSIGNED_BYTE, out);
glPixelStorei(GL_PACK_ALIGNMENT, 1);
}
static void GAL_Begin2D()
{
ApplyPipeline(PID_GENERIC);
R_MakeIdentityMatrix(gl.modelViewMatrix);
R_MakeOrthoProjectionMatrix(gl.projectionMatrix, glConfig.vidWidth, glConfig.vidHeight);
ApplyViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);
ApplyClipPlane(qfalse);
ApplyState(GLS_DEFAULT_2D, CT_TWO_SIDED, qfalse);
// in case the generic pipeline was already active before calling this function
gl.pipelines[PID_GENERIC].uniformsDirty[GU_MODELVIEW] = qtrue;
gl.pipelines[PID_GENERIC].uniformsDirty[GU_PROJECTION] = qtrue;
gl.pipelines[PID_GENERIC].uniformsDirty[GU_CLIP_PLANE] = qfalse; // not used
}
static void GAL_SetModelViewMatrix(const float* matrix)
{
memcpy(gl.modelViewMatrix, matrix, sizeof(gl.modelViewMatrix));
if(gl.pipelineId == PID_GENERIC)
{
gl.pipelines[PID_GENERIC].uniformsDirty[GU_MODELVIEW] = qtrue;
}
else if(gl.pipelineId == PID_DYNAMIC_LIGHT)
{
gl.pipelines[PID_DYNAMIC_LIGHT].uniformsDirty[DU_MODELVIEW] = qtrue;
}
else if(gl.pipelineId == PID_SOFT_SPRITE)
{
gl.pipelines[PID_SOFT_SPRITE].uniformsDirty[SU_MODELVIEW] = qtrue;
}
}
static void GAL_SetDepthRange(double zNear, double zFar)
{
glDepthRange(zNear, zFar);
}
static const char* GetMappingTypeName(MappingType type)
{
switch(type)
{
case MT_SUBDATA: return "glBufferSubData";
case MT_PERSISTENT: return "glMapBufferRange + GL_MAP_PERSISTENT_BIT";
case MT_UNSYNC: return "glMapBufferRange + GL_MAP_UNSYNCHRONIZED_BIT";
case MT_AMDPIN: return "glBufferData + GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD";
default: return "?";
}
}
static void GAL_PrintInfo()
{
ri.Printf(PRINT_ALL, "Geometry upload strategy: %s\n", GetMappingTypeName(gl.mappingType));
}
qbool GAL_GetGL3(graphicsAPILayer_t* rb)
{
rb->Init = &GAL_Init;
rb->ShutDown = &GAL_ShutDown;
rb->BeginSkyAndClouds = &GAL_BeginSkyAndClouds;
rb->EndSkyAndClouds = &GAL_EndSkyAndClouds;
rb->ReadPixels = &GAL_ReadPixels;
rb->BeginFrame = &GAL_BeginFrame;
rb->EndFrame = &GAL_EndFrame;
rb->CreateTexture = &GAL_CreateTexture;
rb->UpdateTexture = &GAL_UpdateTexture;
rb->UpdateScratch = &GAL_UpdateScratch;
rb->CreateTextureEx = &GAL_CreateTextureEx;
rb->Draw = &GAL_Draw;
rb->Begin2D = &GAL_Begin2D;
rb->Begin3D = &GAL_Begin3D;
rb->SetModelViewMatrix = &GAL_SetModelViewMatrix;
rb->SetDepthRange = &GAL_SetDepthRange;
rb->BeginDynamicLight = &GAL_BeginDynamicLight;
rb->PrintInfo = &GAL_PrintInfo;
return qtrue;
}