From 78cf275a62e5b8ba0300b3f64527687d33972c73 Mon Sep 17 00:00:00 2001 From: myT Date: Thu, 8 Oct 2020 03:55:40 +0200 Subject: [PATCH] fixed depth fade with MSAA in the GL3 backend --- changelog.txt | 9 +- code/renderer/tr_backend_gl2.cpp | 42 ++++++- code/renderer/tr_backend_gl3.cpp | 191 +++++++++++++++++-------------- 3 files changed, 143 insertions(+), 99 deletions(-) diff --git a/changelog.txt b/changelog.txt index 4f4ebfb..1b6a581 100644 --- a/changelog.txt +++ b/changelog.txt @@ -7,6 +7,9 @@ DD Mmm 20 - 1.53 add: r_alphaToCoverageMipBoost <0.0 to 0.5> (default: 0.125) boosts the alpha value of higher mip levels with A2C enabled, it prevents alpha-tested surfaces from fading (too much) in the distance +chg: with r_backend GL3, depth fade with MSAA now requires GLSL 4.00 at a minimum + depth access is now correctly multi-sampled (less aliasing) and performance has improved as well + chg: with r_backend GL3, alpha to coverage now requires GLSL 4.00 at a minimum fix: with r_backend D3D11, some device reset scenarios caused fatal errors instead of video restarts @@ -1002,12 +1005,6 @@ exp: not confirmed: broken barrier implementation? fix: use D3D11 or upgrade drivers act: can't fix (root cause unknown) -iss: with GL3, depth faded surfaces break anti-aliasing -exp: can't fetch from the right sample index because render buffers - are used as FBO attachments instead of textures -fix: use D3D11 -act: will get fixed - iss: strong gamma correction breaks anti-aliasing exp: MSAA resolve happens before the post-process pass act: will get fixed for D3D11 and GL3 diff --git a/code/renderer/tr_backend_gl2.cpp b/code/renderer/tr_backend_gl2.cpp index 9b22534..3056383 100644 --- a/code/renderer/tr_backend_gl2.cpp +++ b/code/renderer/tr_backend_gl2.cpp @@ -52,7 +52,6 @@ static void GAL_Begin2D(); static GLint GetTexEnv( texEnv_t texEnv ); void GL_GetRenderTargetFormat( GLenum* internalFormat, GLenum* format, GLenum* type, int cnq3Format ); -void GL_CreateColorRenderBufferStorageMS( int* samples ); struct GLSL_Program { @@ -334,8 +333,8 @@ static const char* dynLightFS = struct FrameBuffer { GLuint fbo; - GLuint color; // texture if MS, buffer if SS - GLuint depthStencil; // texture if MS, buffer if SS + GLuint color; // texture if SS, renderbuffer if MS + GLuint depthStencil; // texture if SS, renderbuffer if MS qbool multiSampled; qbool hasDepthStencil; }; @@ -390,6 +389,41 @@ static void GL2_CheckError( const char* call, const char* function, const char* } +static void GL2_CreateColorRenderBufferStorageMS( int* samples ) +{ + GLenum internalFormat, format, type; + GL_GetRenderTargetFormat( &internalFormat, &format, &type, r_rtColorFormat->integer ); + + int sampleCount = r_msaa->integer; + while ( glGetError() != GL_NO_ERROR ) {} // clear the error queue + + if ( GLEW_VERSION_4_2 || GLEW_ARB_internalformat_query ) + { + GLint maxSampleCount = 0; + glGetInternalformativ( GL_RENDERBUFFER, internalFormat, GL_SAMPLES, 1, &maxSampleCount ); + if ( glGetError() == GL_NO_ERROR ) + sampleCount = min(sampleCount, (int)maxSampleCount); + } + + GLenum errorCode = GL_NO_ERROR; + for ( ;; ) + { + // @NOTE: when the sample count is invalid, the error code is GL_INVALID_OPERATION + glRenderbufferStorageMultisample( GL_RENDERBUFFER, sampleCount, internalFormat, glConfig.vidWidth, glConfig.vidHeight ); + errorCode = glGetError(); + if ( errorCode == GL_NO_ERROR || sampleCount == 0 ) + break; + + --sampleCount; + } + + if ( errorCode != GL_NO_ERROR ) + ri.Error( ERR_FATAL, "Failed to create multi-sampled render buffer storage (error 0x%X)\n", (unsigned int)errorCode ); + + *samples = sampleCount; +} + + static qbool GL2_FBO_CreateSS( FrameBuffer& fb, qbool depthStencil ) { while ( glGetError() != GL_NO_ERROR ) {} // clear the error queue @@ -441,7 +475,7 @@ static qbool GL2_FBO_CreateMS( int* sampleCount, FrameBuffer& fb ) GL(glGenRenderbuffers( 1, &fb.color )); GL(glBindRenderbuffer( GL_RENDERBUFFER, fb.color )); - GL_CreateColorRenderBufferStorageMS( sampleCount ); + GL2_CreateColorRenderBufferStorageMS( sampleCount ); GL(glFramebufferRenderbuffer( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, fb.color )); GL(glGenRenderbuffers( 1, &fb.depthStencil )); diff --git a/code/renderer/tr_backend_gl3.cpp b/code/renderer/tr_backend_gl3.cpp index 0cd94d6..fce68da 100644 --- a/code/renderer/tr_backend_gl3.cpp +++ b/code/renderer/tr_backend_gl3.cpp @@ -35,10 +35,11 @@ along with Challenge Quake 3. If not, see . Current info: - OpenGL 3.2 minimum - GLSL 1.40 minimum -- fancy mip-map generations requires: +- fancy mip-map generation requires: - OpenGL 4.3 (or equivalent extensions) - GLSL 4.30 -- alpha to coverage requires GLSL 4.00 +- alpha to coverage requires GLSL 4.00 +- depth fade with MSAA requires GLSL 4.00 Vertex and index data streaming notes: - everyone: persistent coherent buffer mapping is the best option whenever available @@ -137,8 +138,8 @@ struct PipelineArrayBuffer struct FrameBuffer { GLuint fbo; - GLuint color; // texture if MS, buffer if SS - GLuint depthStencil; // texture if MS, buffer if SS + GLuint color; + GLuint depthStencil; qbool multiSampled; qbool hasDepthStencil; qbool hasColor; @@ -260,7 +261,6 @@ struct OpenGL3 qbool enableAlphaToCoverage; FrameBuffer fbMS; - FrameBuffer fbSSDepth; // resolved depth/stencil from fbMS FrameBuffer fbSS[2]; unsigned int fbReadIndex; // indexes fbSS qbool fbMSEnabled; @@ -503,7 +503,11 @@ static const char* sprite_vs = static const char* sprite_fs = "uniform sampler2D texture1; // diffuse texture\n" +"#if CNQ3_MSAA\n" +"uniform sampler2DMS texture2; // depth texture\n" +"#else\n" "uniform sampler2D texture2; // depth texture\n" +"#endif\n" "\n" "uniform uint alphaTest;\n" "uniform vec2 distOffset;\n" @@ -543,7 +547,11 @@ static const char* sprite_fs = " (alphaTest == uint(3) && r.a < 0.5))\n" " discard;\n" "\n" +"#if CNQ3_MSAA\n" +" float depthSRaw = texelFetch(texture2, ivec2(gl_FragCoord.xy), gl_SampleID).r;\n" +"#else\n" " float depthSRaw = texelFetch(texture2, ivec2(gl_FragCoord.xy), 0).r;\n" +"#endif\n" " float depthS = LinearDepth(depthSRaw * 2.0 - 1.0);\n" " float depthP = depthVS - offset;\n" " float scale = Contrast((depthS - depthP) * distance, 2.0);\n" @@ -693,46 +701,6 @@ void GL_GetRenderTargetFormat(GLenum* internalFormat, GLenum* format, GLenum* ty } } -void GL_CreateColorRenderBufferStorageMS(int* samples) -{ - GLenum internalFormat, format, type; - GL_GetRenderTargetFormat(&internalFormat, &format, &type, r_rtColorFormat->integer); - - int sampleCount = r_msaa->integer; - while(glGetError() != GL_NO_ERROR) {} // clear the error queue - - if(GLEW_VERSION_4_2 || GLEW_ARB_internalformat_query) - { - GLint maxSampleCount = 0; - glGetInternalformativ(GL_RENDERBUFFER, internalFormat, GL_SAMPLES, 1, &maxSampleCount); - if(glGetError() == GL_NO_ERROR) - { - sampleCount = min(sampleCount, (int)maxSampleCount); - } - } - - GLenum errorCode = GL_NO_ERROR; - for(;;) - { - // @NOTE: when the sample count is invalid, the error code is GL_INVALID_OPERATION - glRenderbufferStorageMultisample(GL_RENDERBUFFER, sampleCount, internalFormat, glConfig.vidWidth, glConfig.vidHeight); - errorCode = glGetError(); - if(errorCode == GL_NO_ERROR || sampleCount == 0) - { - break; - } - - --sampleCount; - } - - if(errorCode != GL_NO_ERROR) - { - ri.Error(ERR_FATAL, "Failed to create multi-sampled render buffer storage (error 0x%X)\n", (unsigned int)errorCode); - } - - *samples = sampleCount; -} - #if defined(_WIN32) static void AllocatePinnedMemory(ArrayBuffer* buffer) @@ -805,7 +773,8 @@ static const char* GetShaderTypeName(GLenum shaderType) static qbool CreateShader(GLuint* shaderPtr, PipelineId pipelineId, GLenum shaderType, const char* shaderSource, const char* debugName) { - // A2C now requires GLSL 4.00 for textureQueryLod + // alpha to coverage now requires GLSL 4.00 for textureQueryLod + // depth fade with MSAA now requires GLSL 4.00 for gl_SampleID const qbool enableA2C = pipelineId == PID_GENERIC && shaderType == GL_FRAGMENT_SHADER && @@ -814,12 +783,18 @@ static qbool CreateShader(GLuint* shaderPtr, PipelineId pipelineId, GLenum shade pipelineId == PID_GENERIC && shaderType == GL_FRAGMENT_SHADER && r_dither->integer; - const char* sourceArray[] = + const qbool depthFadeWithMSAA = + pipelineId == PID_SOFT_SPRITE && + shaderType == GL_FRAGMENT_SHADER && + glInfo.depthFadeSupport && + gl.fbMSEnabled; + const char* const sourceArray[] = { - shaderType == GL_COMPUTE_SHADER ? "#version 430\n" : (enableA2C ? "#version 400\n" : "#version 140\n"), + shaderType == GL_COMPUTE_SHADER ? "#version 430\n" : (enableA2C || depthFadeWithMSAA ? "#version 400\n" : "#version 140\n"), "\n", enableA2C ? "#define CNQ3_A2C 1\n" : "#define CNQ3_A2C 0\n", enableDithering ? "#define CNQ3_DITHER 1\n" : "#define CNQ3_DITHER 0\n", + depthFadeWithMSAA ? "#define CNQ3_MSAA 1\n" : "#define CNQ3_MSAA 0\n", shaderSource }; @@ -930,7 +905,44 @@ static qbool CreateComputeProgram(Program* prog, const char* cs, const char* deb return FinalizeProgram(prog, debugName); } -extern void GL_GetRenderTargetFormat(GLenum* internalFormat, GLenum* format, GLenum* type, int cnq3Format); +static void CreateColorTextureStorageMS(int* samples) +{ + GLenum internalFormat, format, type; + GL_GetRenderTargetFormat(&internalFormat, &format, &type, r_rtColorFormat->integer); + + int sampleCount = r_msaa->integer; + while(glGetError() != GL_NO_ERROR) {} // clear the error queue + + if(GLEW_VERSION_4_2 || GLEW_ARB_internalformat_query) + { + GLint maxSampleCount = 0; + glGetInternalformativ(GL_TEXTURE_2D_MULTISAMPLE, internalFormat, GL_SAMPLES, 1, &maxSampleCount); + if(glGetError() == GL_NO_ERROR) + { + sampleCount = min(sampleCount, (int)maxSampleCount); + } + } + + GLenum errorCode = GL_NO_ERROR; + for(;;) + { + glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, sampleCount, internalFormat, glConfig.vidWidth, glConfig.vidHeight, GL_TRUE); + errorCode = glGetError(); + if(errorCode == GL_NO_ERROR || sampleCount == 0) + { + break; + } + + --sampleCount; + } + + if(errorCode != GL_NO_ERROR) + { + ri.Error(ERR_FATAL, "Failed to create multi-sampled texture storage (error 0x%X)\n", (unsigned int)errorCode); + } + + *samples = sampleCount; +} static void FBO_CreateSS(FrameBuffer* fb, qbool color, qbool depthStencil, const char* name) { @@ -986,17 +998,17 @@ static void FBO_CreateMS(int* sampleCount, FrameBuffer* fb, const char* name) glGenFramebuffers(1, &fb->fbo); glBindFramebuffer(GL_FRAMEBUFFER, fb->fbo); - glGenRenderbuffers(1, &fb->color); - glBindRenderbuffer(GL_RENDERBUFFER, fb->color); - GL_CreateColorRenderBufferStorageMS(sampleCount); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, fb->color); - SetDebugName(GL_RENDERBUFFER, fb->color, va("%s color attachment 0", name)); + glGenTextures(1, &fb->color); + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, fb->color); + CreateColorTextureStorageMS(sampleCount); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_MULTISAMPLE, fb->color, 0); + SetDebugName(GL_TEXTURE, fb->color, va("%s color attachment 0", name)); - glGenRenderbuffers(1, &fb->depthStencil); - glBindRenderbuffer(GL_RENDERBUFFER, fb->depthStencil); - glRenderbufferStorageMultisample(GL_RENDERBUFFER, *sampleCount, GL_DEPTH24_STENCIL8, glConfig.vidWidth, glConfig.vidHeight); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, fb->depthStencil); - SetDebugName(GL_RENDERBUFFER, fb->depthStencil, va("%s depth/stencil attachment", name)); + glGenTextures(1, &fb->depthStencil); + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, fb->depthStencil); + glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, *sampleCount, GL_DEPTH24_STENCIL8, glConfig.vidWidth, glConfig.vidHeight, GL_TRUE); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D_MULTISAMPLE, fb->depthStencil, 0); + SetDebugName(GL_TEXTURE, fb->depthStencil, va("%s depth/stencil attachment", name)); const GLenum fboStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); if(fboStatus != GL_FRAMEBUFFER_COMPLETE) @@ -1020,7 +1032,6 @@ static void FBO_Init() if(gl.fbMSEnabled) { FBO_CreateMS(&finalSampleCount, &gl.fbMS, "main"); - FBO_CreateSS(&gl.fbSSDepth, qfalse, qtrue, "depth resolve"); FBO_CreateSS(&gl.fbSS[0], qtrue, qfalse, "post-process #1"); FBO_CreateSS(&gl.fbSS[1], qtrue, qfalse, "post-process #2"); } @@ -1115,18 +1126,6 @@ static void FBO_ResolveColor() glBlitFramebuffer(0, 0, w, h, 0, 0, w, h, GL_COLOR_BUFFER_BIT, GL_LINEAR); } -static void FBO_ResolveDepth() -{ - const FrameBuffer& r = gl.fbMS; - const FrameBuffer& d = gl.fbSSDepth; - glBindFramebuffer(GL_READ_FRAMEBUFFER, r.fbo); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, d.fbo); - - const int w = glConfig.vidWidth; - const int h = glConfig.vidHeight; - glBlitFramebuffer(0, 0, w, h, 0, 0, w, h, GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, GL_NEAREST); -} - static void ApplyPipeline(PipelineId pipelineId) { if(pipelineId == gl.pipelineId) @@ -1134,6 +1133,17 @@ static void ApplyPipeline(PipelineId pipelineId) return; } + // The depth fade pipeline is the only one reading from the depth texture + // but doesn't write to it. + // Any change to that pipeline requires a texture barrier with OpenGL 4.5+ + // to make sure we get valid data when reading the depth texture. + // See "Feedback Loops Between Textures and the Framebuffer" in the specs. + if((GLEW_VERSION_4_5 || GLEW_ARB_texture_barrier) && + pipelineId == PID_SOFT_SPRITE) + { + glTextureBarrier(); + } + gl.pipelineId = pipelineId; Pipeline* const pipeline = &gl.pipelines[pipelineId]; @@ -1154,20 +1164,14 @@ static void ApplyPipeline(PipelineId pipelineId) } } - if(pipelineId == PID_SOFT_SPRITE && gl.fbMSEnabled) - { - // This is not how it should be done and will counter the benefits of MSAA. - // To do this right, we need to bind the FBO's depth attachment to the shader and for that, - // we need multi-sampled textures as FBO attachments instead of multi-sampled render buffers. - // We also need the shader to use gl_SampleID, which changes our minimum requirements. - // Because of all these changes and lack of testing time, - // I'll do the necessary changes after the 1.52 release to avoid problems. - FBO_ResolveDepth(); - FBO_Bind(); - } - glUniform1i(pipeline->textureLocations[0], 0); glActiveTexture(GL_TEXTURE1); + if(pipelineId == PID_SOFT_SPRITE && gl.fbMSEnabled) + { + // we don't have a "BindTextureMS" function for caching/tracking MS texture binds + // since this is the only one we read from a fragment shader at the moment + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, gl.fbMS.depthStencil); + } glUniform1i(pipeline->textureLocations[1], 1); glActiveTexture(GL_TEXTURE0); @@ -1862,6 +1866,7 @@ static void Init() gl.maxTextureSize = maxTextureSize > 0 ? min((int)maxTextureSize, MAX_GPU_TEXTURE_SIZE) : MAX_GPU_TEXTURE_SIZE; glConfig.unused_maxTextureSize = gl.maxTextureSize; glInfo.maxTextureSize = gl.maxTextureSize; + glInfo.depthFadeSupport = r_depthFade->integer == 1; FBO_Init(); if(gl.fbMSEnabled && r_alphaToCoverage->integer) @@ -1998,8 +2003,6 @@ static void Init() gl.errorMode = EM_FATAL; } - glInfo.depthFadeSupport = r_depthFade->integer == 1; - gl.pipelineId = PID_COUNT; ApplyPipeline(PID_GENERIC); @@ -2359,6 +2362,13 @@ static void DrawDepthFade() { const shaderStage_t* stage = tess.xstages[i]; + // We have already made sure (in theory) we won't have depth writes enabled + // to avoid "feedback loops" on the depth texture, resulting in undefined behavior. + // See "Feedback Loops Between Textures and the Framebuffer" in the GL specs. + // However, this is not enough for OpenGL 4.5+, where glTextureBarrier is needed too + // because caching means a feedback loop can happen across draw calls. + assert((stage->stateBits & GLS_DEPTHTEST_DISABLE) == 0); + ApplyState(stage->stateBits, tess.shader->cullType, tess.shader->polygonOffset); UploadVertexArray(VB_TEXCOORD, tess.svars[i].texcoordsptr); @@ -2372,9 +2382,12 @@ static void DrawDepthFade() } BindBundle(0, &stage->bundle); - glActiveTexture(GL_TEXTURE1); - BindTexture(1, gl.fbMSEnabled ? gl.fbSSDepth.depthStencil : gl.fbSS[gl.fbReadIndex].depthStencil); - glActiveTexture(GL_TEXTURE0); + if(!gl.fbMSEnabled) + { + glActiveTexture(GL_TEXTURE1); + BindTexture(1, gl.fbSS[gl.fbReadIndex].depthStencil); + glActiveTexture(GL_TEXTURE0); + } DrawElements(tess.numIndexes); }