From 9931fab6817f51bbdc09a93589033e3dbabd8c58 Mon Sep 17 00:00:00 2001 From: Robert Beckebans Date: Sun, 10 May 2020 15:05:46 +0200 Subject: [PATCH] Improved SSAO performance with Blue Noise --- RELEASE-NOTES.md | 2 ++ base/renderprogs/AmbientOcclusion_AO.ps.hlsl | 25 ++++++++++++++++- base/renderprogs/interactionSM.ps.hlsl | 2 +- neo/renderer/OpenGL/RenderDebug_GL.cpp | 15 +++++----- neo/renderer/RenderBackend.cpp | 29 +++++++++++++++----- neo/renderer/RenderProgs_embedded.h | 27 ++++++++++++++++-- neo/renderer/RenderSystem_init.cpp | 2 +- 7 files changed, 83 insertions(+), 19 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 028afe61..4bab25bc 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -79,6 +79,8 @@ The main goal is that the new content looks the same in RBDOOM-3-BFG as in Blend * Improved Shadow Mapping performance by reducing the number of taps from 12 to 6 and keeping a good quality using dithering the result with Blue Noise magic by Alan Wolfe +* Improved Screen Space Ambient Occlusion performance by enhancing the quality with Blue Noise and skipping the expensive extra bilateral filtering pass + * Updated idRenderLog to support RenderDoc and Nvidia's Nsight and only issue OpenGL or Vulkan debug commands if the debug extensions are detected. Reference: https://devblogs.nvidia.com/best-practices-gpu-performance-events/ * Artistic Style C++ beautifier configuration has slightly changed to work closer to Clang Format's behaviour diff --git a/base/renderprogs/AmbientOcclusion_AO.ps.hlsl b/base/renderprogs/AmbientOcclusion_AO.ps.hlsl index 2c886cd1..47ff72ec 100644 --- a/base/renderprogs/AmbientOcclusion_AO.ps.hlsl +++ b/base/renderprogs/AmbientOcclusion_AO.ps.hlsl @@ -94,6 +94,7 @@ const float projScale = 500.0; // *INDENT-OFF* uniform sampler2D samp0 : register( s0 ); // view normal/roughness uniform sampler2D samp1 : register( s1 ); // view depth +uniform sampler2D samp2 : register( s2 ); // blue noise 256 #define CS_Z_buffer samp1 @@ -109,7 +110,22 @@ struct PS_OUT // *INDENT-ON* +float BlueNoise( float2 n, float x ) +{ + float noise = tex2D( samp2, ( n.xy / 256.0 ) ).r; +#if TEMPORALLY_VARY_TAPS + noise = fract( noise + 0.61803398875 * rpJitterTexOffset.z * x ); +#else + noise = fract( noise ); +#endif + + noise = RemapNoiseTriErp( noise ); + + //noise = noise * 2.0 - 1.0; + + return noise; +} /** Used for packing Z into the GB channels */ // float CSZToKey( float z ) @@ -390,13 +406,20 @@ void main( PS_IN fragment, out PS_OUT result ) } #endif +#if 1 + float randomPatternRotationAngle = BlueNoise( ssP.xy, 10.0 ) * 10.0; + //float randomPatternRotationAngle = InterleavedGradientNoise( ssP.xy ) * 10.0; +#else + // Hash function used in the HPG12 AlchemyAO paper float randomPatternRotationAngle = float( ( ( 3 * ssP.x ) ^ ( ssP.y + ssP.x * ssP.y ) ) #if TEMPORALLY_VARY_TAPS - + rpJitterTexOffset.x + + rpJitterTexOffset.z #endif ) * 10.0; +#endif + // Choose the screen-space sample radius // proportional to the projected area of the sphere float ssDiskRadius = -projScale * radius / C.z; diff --git a/base/renderprogs/interactionSM.ps.hlsl b/base/renderprogs/interactionSM.ps.hlsl index 6d7f3c45..cfc8aa01 100644 --- a/base/renderprogs/interactionSM.ps.hlsl +++ b/base/renderprogs/interactionSM.ps.hlsl @@ -303,7 +303,7 @@ void main( PS_IN fragment, out PS_OUT result ) float shadow = 0.0; // RB: casting a float to int and using it as index can really kill the performance ... - float numSamples = 6.0; //int(rpScreenCorrectionFactor.w); + float numSamples = 6.0; float stepSize = 1.0 / numSamples; //float4 jitterTC = ( fragment.position * rpScreenCorrectionFactor ) + rpJitterTexOffset; diff --git a/neo/renderer/OpenGL/RenderDebug_GL.cpp b/neo/renderer/OpenGL/RenderDebug_GL.cpp index 4f9c6b23..74648364 100644 --- a/neo/renderer/OpenGL/RenderDebug_GL.cpp +++ b/neo/renderer/OpenGL/RenderDebug_GL.cpp @@ -2697,7 +2697,7 @@ void idRenderBackend::DBG_TestImage() } // Set State - GL_State( GLS_DEPTHFUNC_ALWAYS | GLS_SRCBLEND_ONE | GLS_DSTBLEND_ZERO ); + GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ZERO | GLS_DEPTHMASK | GLS_DEPTHFUNC_ALWAYS | GLS_CULL_TWOSIDED ); // Set Parms float texS[4] = { 1.0f, 0.0f, 0.0f, 0.0f }; @@ -2708,6 +2708,7 @@ void idRenderBackend::DBG_TestImage() float texGenEnabled[4] = { 0, 0, 0, 0 }; renderProgManager.SetRenderParm( RENDERPARM_TEXGEN_0_ENABLED, texGenEnabled ); +#if 1 // not really necessary but just for clarity const float screenWidth = 1.0f; const float screenHeight = 1.0f; @@ -2716,7 +2717,7 @@ void idRenderBackend::DBG_TestImage() float scale[16] = { 0 }; scale[0] = w; // scale - scale[5] = h; // scale + scale[5] = -h; // scale scale[12] = halfScreenWidth - ( halfScreenWidth * w ); // translate scale[13] = halfScreenHeight - ( halfScreenHeight * h ); // translate scale[10] = 1.0f; @@ -2737,11 +2738,10 @@ void idRenderBackend::DBG_TestImage() float projMatrixTranspose[16]; R_MatrixTranspose( finalOrtho, projMatrixTranspose ); renderProgManager.SetRenderParms( RENDERPARM_MVPMATRIX_X, projMatrixTranspose, 4 ); - -// glMatrixMode( GL_PROJECTION ); -// glLoadMatrixf( finalOrtho ); -// glMatrixMode( GL_MODELVIEW ); -// glLoadIdentity(); +#else + // draw texture over entire screen + RB_SetMVP( renderMatrix_identity ); +#endif // Set Color GL_Color( 1, 1, 1, 1 ); @@ -2767,6 +2767,7 @@ void idRenderBackend::DBG_TestImage() // Draw! DrawElementsWithCounters( &testImageSurface ); + //DrawElementsWithCounters( &unitSquareSurface ); } // RB begin diff --git a/neo/renderer/RenderBackend.cpp b/neo/renderer/RenderBackend.cpp index 7d710289..ac1ef6bf 100644 --- a/neo/renderer/RenderBackend.cpp +++ b/neo/renderer/RenderBackend.cpp @@ -2068,7 +2068,7 @@ void idRenderBackend::AmbientPass( const drawSurf_t* const* drawSurfs, int numDr } else { - if( r_forceAmbient.GetFloat() <= 0 || r_skipAmbient.GetBool() ) + if( r_forceAmbient.GetFloat() <= 0 )//|| r_skipAmbient.GetBool() ) { // clear gbuffer GL_Clear( true, false, false, 0, 0.0f, 0.0f, 0.0f, 1.0f, false ); @@ -4990,19 +4990,31 @@ void idRenderBackend::DrawScreenSpaceAmbientOcclusion( const viewDef_t* _viewDef #endif SetVertexParms( RENDERPARM_MODELMATRIX_X, viewDef->unprojectionToCameraRenderMatrix[0], 4 ); + const static int BLUENOISE_SIZE = 256; + const float jitterSampleScale = 1.0f; + + float jitterTexScale[4]; + jitterTexScale[0] = r_shadowMapJitterScale.GetFloat() * jitterSampleScale; // TODO shadow buffer size fraction shadowMapSize / maxShadowMapSize + jitterTexScale[1] = r_shadowMapJitterScale.GetFloat() * jitterSampleScale; + jitterTexScale[2] = -r_shadowMapBiasScale.GetFloat(); + jitterTexScale[3] = 0.0f; + SetFragmentParm( RENDERPARM_JITTERTEXSCALE, jitterTexScale ); // rpJitterTexScale + float jitterTexOffset[4]; + jitterTexOffset[0] = 1.0f / BLUENOISE_SIZE; + jitterTexOffset[1] = 1.0f / BLUENOISE_SIZE; + if( r_shadowMapRandomizeJitter.GetBool() ) { - jitterTexOffset[0] = ( rand() & 255 ) / 255.0; - jitterTexOffset[1] = ( rand() & 255 ) / 255.0; + jitterTexOffset[2] = Sys_Milliseconds() / 1000.0f; + jitterTexOffset[3] = tr.frameCount % 64; } else { - jitterTexOffset[0] = 0; - jitterTexOffset[1] = 0; + jitterTexOffset[2] = 0.0f; + jitterTexOffset[3] = 0.0f; } - jitterTexOffset[2] = viewDef->renderView.time[0] * 0.001f; - jitterTexOffset[3] = 0.0f; + SetFragmentParm( RENDERPARM_JITTERTEXOFFSET, jitterTexOffset ); // rpJitterTexOffset GL_SelectTexture( 0 ); @@ -5018,6 +5030,9 @@ void idRenderBackend::DrawScreenSpaceAmbientOcclusion( const viewDef_t* _viewDef globalImages->currentDepthImage->Bind(); } + GL_SelectTexture( 2 ); + globalImages->blueNoiseImage256->Bind(); + DrawElementsWithCounters( &unitSquareSurface ); if( r_ssaoFiltering.GetBool() ) diff --git a/neo/renderer/RenderProgs_embedded.h b/neo/renderer/RenderProgs_embedded.h index ffae2712..41c0fe4c 100644 --- a/neo/renderer/RenderProgs_embedded.h +++ b/neo/renderer/RenderProgs_embedded.h @@ -2798,6 +2798,7 @@ static const cgShaderDef_t cg_renderprogs[] = "// *INDENT-OFF*\n" "uniform sampler2D samp0 : register( s0 ); // view normal/roughness\n" "uniform sampler2D samp1 : register( s1 ); // view depth\n" + "uniform sampler2D samp2 : register( s2 ); // blue noise 256\n" "\n" "#define CS_Z_buffer samp1\n" "\n" @@ -2813,7 +2814,22 @@ static const cgShaderDef_t cg_renderprogs[] = "// *INDENT-ON*\n" "\n" "\n" + "float BlueNoise( float2 n, float x )\n" + "{\n" + " float noise = tex2D( samp2, ( n.xy / 256.0 ) ).r;\n" "\n" + "#if TEMPORALLY_VARY_TAPS\n" + " noise = fract( noise + 0.61803398875 * rpJitterTexOffset.z * x );\n" + "#else\n" + " noise = fract( noise );\n" + "#endif\n" + "\n" + " noise = RemapNoiseTriErp( noise );\n" + "\n" + " //noise = noise * 2.0 - 1.0;\n" + "\n" + " return noise;\n" + "}\n" "\n" "/** Used for packing Z into the GB channels */\n" "// float CSZToKey( float z )\n" @@ -3094,13 +3110,20 @@ static const cgShaderDef_t cg_renderprogs[] = " }\n" "#endif\n" "\n" + "#if 1\n" + " float randomPatternRotationAngle = BlueNoise( ssP.xy, 10.0 ) * 10.0;\n" + " //float randomPatternRotationAngle = InterleavedGradientNoise( ssP.xy ) * 10.0;\n" + "#else\n" + "\n" " // Hash function used in the HPG12 AlchemyAO paper\n" " float randomPatternRotationAngle = float( ( ( 3 * ssP.x ) ^ ( ssP.y + ssP.x * ssP.y ) )\n" "#if TEMPORALLY_VARY_TAPS\n" - " + rpJitterTexOffset.x\n" + " + rpJitterTexOffset.z\n" "#endif\n" " ) * 10.0;\n" "\n" + "#endif\n" + "\n" " // Choose the screen-space sample radius\n" " // proportional to the projected area of the sphere\n" " float ssDiskRadius = -projScale * radius / C.z;\n" @@ -10068,7 +10091,7 @@ static const cgShaderDef_t cg_renderprogs[] = " float shadow = 0.0;\n" "\n" " // RB: casting a float to int and using it as index can really kill the performance ...\n" - " float numSamples = 6.0; //int(rpScreenCorrectionFactor.w);\n" + " float numSamples = 6.0;\n" " float stepSize = 1.0 / numSamples;\n" "\n" " //float4 jitterTC = ( fragment.position * rpScreenCorrectionFactor ) + rpJitterTexOffset;\n" diff --git a/neo/renderer/RenderSystem_init.cpp b/neo/renderer/RenderSystem_init.cpp index a0bdce49..07e6f230 100644 --- a/neo/renderer/RenderSystem_init.cpp +++ b/neo/renderer/RenderSystem_init.cpp @@ -293,7 +293,7 @@ idCVar r_ssgiFiltering( "r_ssgiFiltering", "1", CVAR_RENDERER | CVAR_BOOL, "" ); idCVar r_useSSAO( "r_useSSAO", "1", CVAR_RENDERER | CVAR_ARCHIVE | CVAR_BOOL, "use screen space ambient occlusion to darken corners" ); idCVar r_ssaoDebug( "r_ssaoDebug", "0", CVAR_RENDERER | CVAR_INTEGER, "" ); -idCVar r_ssaoFiltering( "r_ssaoFiltering", "1", CVAR_RENDERER | CVAR_BOOL, "" ); +idCVar r_ssaoFiltering( "r_ssaoFiltering", "0", CVAR_RENDERER | CVAR_BOOL, "" ); idCVar r_useHierarchicalDepthBuffer( "r_useHierarchicalDepthBuffer", "1", CVAR_RENDERER | CVAR_BOOL, "" ); idCVar r_usePBR( "r_usePBR", "1", CVAR_RENDERER | CVAR_ARCHIVE | CVAR_BOOL, "use PBR and Image Based Lighting instead of old Quake 4 style ambient lighting" );