Improved SSAO performance with Blue Noise

This commit is contained in:
Robert Beckebans 2020-05-10 15:05:46 +02:00
parent d5e8380a5f
commit 9931fab681
7 changed files with 83 additions and 19 deletions

View file

@ -79,6 +79,8 @@ The main goal is that the new content looks the same in RBDOOM-3-BFG as in Blend
* Improved Shadow Mapping performance by reducing the number of taps from 12 to 6 and keeping a good quality using dithering the result with Blue Noise magic by Alan Wolfe
* Improved Screen Space Ambient Occlusion performance by enhancing the quality with Blue Noise and skipping the expensive extra bilateral filtering pass
* Updated idRenderLog to support RenderDoc and Nvidia's Nsight and only issue OpenGL or Vulkan debug commands if the debug extensions are detected. Reference: https://devblogs.nvidia.com/best-practices-gpu-performance-events/
* Artistic Style C++ beautifier configuration has slightly changed to work closer to Clang Format's behaviour

View file

@ -94,6 +94,7 @@ const float projScale = 500.0;
// *INDENT-OFF*
uniform sampler2D samp0 : register( s0 ); // view normal/roughness
uniform sampler2D samp1 : register( s1 ); // view depth
uniform sampler2D samp2 : register( s2 ); // blue noise 256
#define CS_Z_buffer samp1
@ -109,7 +110,22 @@ struct PS_OUT
// *INDENT-ON*
float BlueNoise( float2 n, float x )
{
float noise = tex2D( samp2, ( n.xy / 256.0 ) ).r;
#if TEMPORALLY_VARY_TAPS
noise = fract( noise + 0.61803398875 * rpJitterTexOffset.z * x );
#else
noise = fract( noise );
#endif
noise = RemapNoiseTriErp( noise );
//noise = noise * 2.0 - 1.0;
return noise;
}
/** Used for packing Z into the GB channels */
// float CSZToKey( float z )
@ -390,13 +406,20 @@ void main( PS_IN fragment, out PS_OUT result )
}
#endif
#if 1
float randomPatternRotationAngle = BlueNoise( ssP.xy, 10.0 ) * 10.0;
//float randomPatternRotationAngle = InterleavedGradientNoise( ssP.xy ) * 10.0;
#else
// Hash function used in the HPG12 AlchemyAO paper
float randomPatternRotationAngle = float( ( ( 3 * ssP.x ) ^ ( ssP.y + ssP.x * ssP.y ) )
#if TEMPORALLY_VARY_TAPS
+ rpJitterTexOffset.x
+ rpJitterTexOffset.z
#endif
) * 10.0;
#endif
// Choose the screen-space sample radius
// proportional to the projected area of the sphere
float ssDiskRadius = -projScale * radius / C.z;

View file

@ -303,7 +303,7 @@ void main( PS_IN fragment, out PS_OUT result )
float shadow = 0.0;
// RB: casting a float to int and using it as index can really kill the performance ...
float numSamples = 6.0; //int(rpScreenCorrectionFactor.w);
float numSamples = 6.0;
float stepSize = 1.0 / numSamples;
//float4 jitterTC = ( fragment.position * rpScreenCorrectionFactor ) + rpJitterTexOffset;

View file

@ -2697,7 +2697,7 @@ void idRenderBackend::DBG_TestImage()
}
// Set State
GL_State( GLS_DEPTHFUNC_ALWAYS | GLS_SRCBLEND_ONE | GLS_DSTBLEND_ZERO );
GL_State( GLS_SRCBLEND_ONE | GLS_DSTBLEND_ZERO | GLS_DEPTHMASK | GLS_DEPTHFUNC_ALWAYS | GLS_CULL_TWOSIDED );
// Set Parms
float texS[4] = { 1.0f, 0.0f, 0.0f, 0.0f };
@ -2708,6 +2708,7 @@ void idRenderBackend::DBG_TestImage()
float texGenEnabled[4] = { 0, 0, 0, 0 };
renderProgManager.SetRenderParm( RENDERPARM_TEXGEN_0_ENABLED, texGenEnabled );
#if 1
// not really necessary but just for clarity
const float screenWidth = 1.0f;
const float screenHeight = 1.0f;
@ -2716,7 +2717,7 @@ void idRenderBackend::DBG_TestImage()
float scale[16] = { 0 };
scale[0] = w; // scale
scale[5] = h; // scale
scale[5] = -h; // scale
scale[12] = halfScreenWidth - ( halfScreenWidth * w ); // translate
scale[13] = halfScreenHeight - ( halfScreenHeight * h ); // translate
scale[10] = 1.0f;
@ -2737,11 +2738,10 @@ void idRenderBackend::DBG_TestImage()
float projMatrixTranspose[16];
R_MatrixTranspose( finalOrtho, projMatrixTranspose );
renderProgManager.SetRenderParms( RENDERPARM_MVPMATRIX_X, projMatrixTranspose, 4 );
// glMatrixMode( GL_PROJECTION );
// glLoadMatrixf( finalOrtho );
// glMatrixMode( GL_MODELVIEW );
// glLoadIdentity();
#else
// draw texture over entire screen
RB_SetMVP( renderMatrix_identity );
#endif
// Set Color
GL_Color( 1, 1, 1, 1 );
@ -2767,6 +2767,7 @@ void idRenderBackend::DBG_TestImage()
// Draw!
DrawElementsWithCounters( &testImageSurface );
//DrawElementsWithCounters( &unitSquareSurface );
}
// RB begin

View file

@ -2068,7 +2068,7 @@ void idRenderBackend::AmbientPass( const drawSurf_t* const* drawSurfs, int numDr
}
else
{
if( r_forceAmbient.GetFloat() <= 0 || r_skipAmbient.GetBool() )
if( r_forceAmbient.GetFloat() <= 0 )//|| r_skipAmbient.GetBool() )
{
// clear gbuffer
GL_Clear( true, false, false, 0, 0.0f, 0.0f, 0.0f, 1.0f, false );
@ -4990,19 +4990,31 @@ void idRenderBackend::DrawScreenSpaceAmbientOcclusion( const viewDef_t* _viewDef
#endif
SetVertexParms( RENDERPARM_MODELMATRIX_X, viewDef->unprojectionToCameraRenderMatrix[0], 4 );
const static int BLUENOISE_SIZE = 256;
const float jitterSampleScale = 1.0f;
float jitterTexScale[4];
jitterTexScale[0] = r_shadowMapJitterScale.GetFloat() * jitterSampleScale; // TODO shadow buffer size fraction shadowMapSize / maxShadowMapSize
jitterTexScale[1] = r_shadowMapJitterScale.GetFloat() * jitterSampleScale;
jitterTexScale[2] = -r_shadowMapBiasScale.GetFloat();
jitterTexScale[3] = 0.0f;
SetFragmentParm( RENDERPARM_JITTERTEXSCALE, jitterTexScale ); // rpJitterTexScale
float jitterTexOffset[4];
jitterTexOffset[0] = 1.0f / BLUENOISE_SIZE;
jitterTexOffset[1] = 1.0f / BLUENOISE_SIZE;
if( r_shadowMapRandomizeJitter.GetBool() )
{
jitterTexOffset[0] = ( rand() & 255 ) / 255.0;
jitterTexOffset[1] = ( rand() & 255 ) / 255.0;
jitterTexOffset[2] = Sys_Milliseconds() / 1000.0f;
jitterTexOffset[3] = tr.frameCount % 64;
}
else
{
jitterTexOffset[0] = 0;
jitterTexOffset[1] = 0;
jitterTexOffset[2] = 0.0f;
jitterTexOffset[3] = 0.0f;
}
jitterTexOffset[2] = viewDef->renderView.time[0] * 0.001f;
jitterTexOffset[3] = 0.0f;
SetFragmentParm( RENDERPARM_JITTERTEXOFFSET, jitterTexOffset ); // rpJitterTexOffset
GL_SelectTexture( 0 );
@ -5018,6 +5030,9 @@ void idRenderBackend::DrawScreenSpaceAmbientOcclusion( const viewDef_t* _viewDef
globalImages->currentDepthImage->Bind();
}
GL_SelectTexture( 2 );
globalImages->blueNoiseImage256->Bind();
DrawElementsWithCounters( &unitSquareSurface );
if( r_ssaoFiltering.GetBool() )

View file

@ -2798,6 +2798,7 @@ static const cgShaderDef_t cg_renderprogs[] =
"// *INDENT-OFF*\n"
"uniform sampler2D samp0 : register( s0 ); // view normal/roughness\n"
"uniform sampler2D samp1 : register( s1 ); // view depth\n"
"uniform sampler2D samp2 : register( s2 ); // blue noise 256\n"
"\n"
"#define CS_Z_buffer samp1\n"
"\n"
@ -2813,7 +2814,22 @@ static const cgShaderDef_t cg_renderprogs[] =
"// *INDENT-ON*\n"
"\n"
"\n"
"float BlueNoise( float2 n, float x )\n"
"{\n"
" float noise = tex2D( samp2, ( n.xy / 256.0 ) ).r;\n"
"\n"
"#if TEMPORALLY_VARY_TAPS\n"
" noise = fract( noise + 0.61803398875 * rpJitterTexOffset.z * x );\n"
"#else\n"
" noise = fract( noise );\n"
"#endif\n"
"\n"
" noise = RemapNoiseTriErp( noise );\n"
"\n"
" //noise = noise * 2.0 - 1.0;\n"
"\n"
" return noise;\n"
"}\n"
"\n"
"/** Used for packing Z into the GB channels */\n"
"// float CSZToKey( float z )\n"
@ -3094,13 +3110,20 @@ static const cgShaderDef_t cg_renderprogs[] =
" }\n"
"#endif\n"
"\n"
"#if 1\n"
" float randomPatternRotationAngle = BlueNoise( ssP.xy, 10.0 ) * 10.0;\n"
" //float randomPatternRotationAngle = InterleavedGradientNoise( ssP.xy ) * 10.0;\n"
"#else\n"
"\n"
" // Hash function used in the HPG12 AlchemyAO paper\n"
" float randomPatternRotationAngle = float( ( ( 3 * ssP.x ) ^ ( ssP.y + ssP.x * ssP.y ) )\n"
"#if TEMPORALLY_VARY_TAPS\n"
" + rpJitterTexOffset.x\n"
" + rpJitterTexOffset.z\n"
"#endif\n"
" ) * 10.0;\n"
"\n"
"#endif\n"
"\n"
" // Choose the screen-space sample radius\n"
" // proportional to the projected area of the sphere\n"
" float ssDiskRadius = -projScale * radius / C.z;\n"
@ -10068,7 +10091,7 @@ static const cgShaderDef_t cg_renderprogs[] =
" float shadow = 0.0;\n"
"\n"
" // RB: casting a float to int and using it as index can really kill the performance ...\n"
" float numSamples = 6.0; //int(rpScreenCorrectionFactor.w);\n"
" float numSamples = 6.0;\n"
" float stepSize = 1.0 / numSamples;\n"
"\n"
" //float4 jitterTC = ( fragment.position * rpScreenCorrectionFactor ) + rpJitterTexOffset;\n"

View file

@ -293,7 +293,7 @@ idCVar r_ssgiFiltering( "r_ssgiFiltering", "1", CVAR_RENDERER | CVAR_BOOL, "" );
idCVar r_useSSAO( "r_useSSAO", "1", CVAR_RENDERER | CVAR_ARCHIVE | CVAR_BOOL, "use screen space ambient occlusion to darken corners" );
idCVar r_ssaoDebug( "r_ssaoDebug", "0", CVAR_RENDERER | CVAR_INTEGER, "" );
idCVar r_ssaoFiltering( "r_ssaoFiltering", "1", CVAR_RENDERER | CVAR_BOOL, "" );
idCVar r_ssaoFiltering( "r_ssaoFiltering", "0", CVAR_RENDERER | CVAR_BOOL, "" );
idCVar r_useHierarchicalDepthBuffer( "r_useHierarchicalDepthBuffer", "1", CVAR_RENDERER | CVAR_BOOL, "" );
idCVar r_usePBR( "r_usePBR", "1", CVAR_RENDERER | CVAR_ARCHIVE | CVAR_BOOL, "use PBR and Image Based Lighting instead of old Quake 4 style ambient lighting" );