Added high performance timer queries for OpenGL

This commit is contained in:
Robert Beckebans 2020-05-09 16:48:55 +02:00
parent d3bc29d510
commit f9a1c5ff36
6 changed files with 111 additions and 37 deletions

View file

@ -319,8 +319,10 @@ static void R_CheckPortableExtensions()
// GL_ARB_occlusion_query
glConfig.occlusionQueryAvailable = GLEW_ARB_occlusion_query != 0;
// GL_ARB_timer_query
glConfig.timerQueryAvailable = ( GLEW_ARB_timer_query != 0 || GLEW_EXT_timer_query != 0 ) && ( glConfig.vendor != VENDOR_INTEL || r_skipIntelWorkarounds.GetBool() ) && glConfig.driverType != GLDRV_OPENGL_MESA;
// GL_ARB_timer_query using the DSA interface
//glConfig.timerQueryAvailable = ( GLEW_ARB_timer_query != 0 || GLEW_EXT_timer_query != 0 ) && ( glConfig.vendor != VENDOR_INTEL || r_skipIntelWorkarounds.GetBool() ) && glConfig.driverType != GLDRV_OPENGL_MESA;
glConfig.timerQueryAvailable = ( GLEW_ARB_direct_state_access != 0 && GLEW_ARB_timer_query != 0 );
// GREMEDY_string_marker
glConfig.gremedyStringMarkerAvailable = GLEW_GREMEDY_string_marker != 0;
@ -753,6 +755,11 @@ void idRenderBackend::GL_BlockingSwapBuffers()
GLimp_SwapBuffers();
// RB: at this time the image is presented on the screen
glcontext.frameCounter++;
glcontext.frameParity = glcontext.frameCounter % NUM_FRAME_DATA;
const int beforeFence = Sys_Milliseconds();
if( r_showSwapBuffers.GetBool() && beforeFence - beforeSwap > 1 )
{
@ -1825,8 +1832,14 @@ idRenderBackend::idRenderBackend
*/
idRenderBackend::idRenderBackend()
{
glcontext.frameCounter = 0;
glcontext.frameParity = 0;
memset( glcontext.tmu, 0, sizeof( glcontext.tmu ) );
memset( glcontext.stencilOperations, 0, sizeof( glcontext.stencilOperations ) );
memset( glcontext.renderLogMainBlockTimeQueryIds, 0, sizeof( glcontext.renderLogMainBlockTimeQueryIds ) );
memset( glcontext.renderLogMainBlockTimeQueryIssued, 0, sizeof( glcontext.renderLogMainBlockTimeQueryIssued ) );
}
/*
@ -2162,7 +2175,7 @@ void idRenderBackend::StereoRenderExecuteBackEndCommands( const emptyCommand_t*
// stop rendering on this thread
uint64 backEndFinishTime = Sys_Microseconds();
pc.totalMicroSec = backEndFinishTime - backEndStartTime;
pc.cpuTotalMicroSec = backEndFinishTime - backEndStartTime;
}
/*

View file

@ -3414,7 +3414,7 @@ idRenderBackend::DrawInteractions
*/
void idRenderBackend::DrawInteractions( const viewDef_t* _viewDef )
{
if( r_skipInteractions.GetBool() )
if( r_skipInteractions.GetBool() || viewDef->viewLights == NULL )
{
return;
}
@ -5978,7 +5978,8 @@ void idRenderBackend::PostProcess( const void* data )
return;
}
RENDERLOG_PRINTF( "---------- RB_PostProcess() ----------\n" );
renderLog.OpenMainBlock( MRB_POSTPROCESS );
renderLog.OpenBlock( "Render_PostProcessing", colorBlue );
// FIXME
#if !defined(USE_VULKAN)
@ -6077,7 +6078,6 @@ void idRenderBackend::PostProcess( const void* data )
#endif
}
#if 1
if( r_useFilmicPostProcessEffects.GetBool() )
{
globalImages->currentRenderImage->CopyFramebuffer( viewport.x1, viewport.y1, viewport.GetWidth(), viewport.GetHeight() );
@ -6122,12 +6122,12 @@ void idRenderBackend::PostProcess( const void* data )
// Draw
DrawElementsWithCounters( &unitSquareSurface );
}
#endif
GL_SelectTexture( 0 );
renderProgManager.Unbind();
#endif
//renderLog.CloseBlock();
renderLog.CloseBlock();
renderLog.CloseMainBlock();
}

View file

@ -220,13 +220,15 @@ extern vulkanContext_t vkcontext;
struct glContext_t
{
// bool bAnisotropicFilterAvailable;
// bool bTextureLODBiasAvailable;
// float maxTextureAnisotropy;
uint64 frameCounter;
uint32 frameParity;
tmu_t tmu[ MAX_MULTITEXTURE_UNITS ];
uint64 stencilOperations[ STENCIL_FACE_NUM ];
// for GL_TIME_ELAPSED_EXT queries
GLuint renderLogMainBlockTimeQueryIds[ NUM_FRAME_DATA ][ MRB_TOTAL_QUERIES ];
uint32 renderLogMainBlockTimeQueryIssued[ NUM_FRAME_DATA ][ MRB_TOTAL_QUERIES ];
};
extern glContext_t glcontext;

View file

@ -882,8 +882,6 @@ public:
idRenderBackend backend;
unsigned timerQueryId; // for GL_TIME_ELAPSED_EXT queries
private:
bool bInitialized;
};

View file

@ -62,8 +62,6 @@ const char* renderLogMainBlockLabels[] =
#if defined( USE_VULKAN )
compile_time_assert( NUM_TIMESTAMP_QUERIES >= ( MRB_TOTAL_QUERIES ) );
#else
static GLuint renderLogMainBlockTimeQueryIds[MRB_MAX];
#endif
extern uint64 Sys_Microseconds();
@ -594,9 +592,6 @@ idRenderLog::idRenderLog
*/
idRenderLog::idRenderLog()
{
#if !defined(USE_VULKAN)
memset( renderLogMainBlockTimeQueryIds, 0, sizeof( renderLogMainBlockTimeQueryIds ) );
#endif
}
#if 1
@ -608,9 +603,9 @@ idRenderLog::OpenMainBlock
*/
void idRenderLog::OpenMainBlock( renderLogMainBlock_t block )
{
#if defined( USE_VULKAN )
mainBlock = block;
#if defined( USE_VULKAN )
if( vkcontext.queryIndex[ vkcontext.frameParity ] >= ( NUM_TIMESTAMP_QUERIES - 1 ) )
{
return;
@ -621,18 +616,19 @@ void idRenderLog::OpenMainBlock( renderLogMainBlock_t block )
uint32 queryIndex = vkcontext.queryAssignedIndex[ vkcontext.frameParity ][ mainBlock * 2 + 0 ] = vkcontext.queryIndex[ vkcontext.frameParity ]++;
vkCmdWriteTimestamp( commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, queryPool, queryIndex );
#else
/*
if( glConfig.timerQueryAvailable )
{
if( renderLogMainBlockTimeQueryIds[block] == 0 )
if( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ][ block * 2 ] == 0 )
{
glGenQueries( 1, &renderLogMainBlockTimeQueryIds[block] );
glCreateQueries( GL_TIMESTAMP, 2, &glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ][ block * 2 ] );
}
glBeginQuery( GL_TIME_ELAPSED_EXT, renderLogMainBlockTimeQueryIds[block] );
glQueryCounter( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ][ block * 2 + 0 ], GL_TIMESTAMP );
glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ][ block * 2 + 0 ]++;
}
*/
#endif
}
@ -655,8 +651,14 @@ void idRenderLog::CloseMainBlock()
uint32 queryIndex = vkcontext.queryAssignedIndex[ vkcontext.frameParity ][ mainBlock * 2 + 1 ] = vkcontext.queryIndex[ vkcontext.frameParity ]++;
vkCmdWriteTimestamp( commandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, queryPool, queryIndex );
#else
//glEndQuery( GL_TIME_ELAPSED_EXT );
if( glConfig.timerQueryAvailable )
{
glQueryCounter( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ][ mainBlock * 2 + 1 ], GL_TIMESTAMP );
glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ][ mainBlock * 2 + 1 ]++;
}
#endif
}

View file

@ -133,13 +133,17 @@ void idRenderSystemLocal::RenderCommandBuffers( const emptyCommand_t* const cmdH
#if !defined(USE_VULKAN)
if( glConfig.timerQueryAvailable )
{
if( tr.timerQueryId == 0 )
if( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ][ MRB_GPU_TIME ] == 0 )
{
glGenQueries( 1, & tr.timerQueryId );
glCreateQueries( GL_TIMESTAMP, 2, &glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ][MRB_GPU_TIME ] );
}
glBeginQuery( GL_TIME_ELAPSED_EXT, tr.timerQueryId );
glQueryCounter( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ][ MRB_GPU_TIME * 2 + 0 ], GL_TIMESTAMP );
backend.ExecuteBackEndCommands( cmdHead );
glEndQuery( GL_TIME_ELAPSED_EXT );
glQueryCounter( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ][ MRB_GPU_TIME * 2 + 1 ], GL_TIMESTAMP );
glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ][ MRB_GPU_TIME * 2 + 1 ]++;
glFlush();
}
else
@ -672,18 +676,73 @@ void idRenderSystemLocal::SwapCommandBuffers_FinishRendering(
// read back the start and end timer queries from the previous frame
if( glConfig.timerQueryAvailable )
{
// RB: 64 bit fixes, changed int64 to GLuint64EXT
GLuint64EXT drawingTimeNanoseconds = 0;
// RB end
GLuint64EXT gpuStartNanoseconds = 0;
GLuint64EXT gpuEndNanoseconds = 0;
if( tr.timerQueryId != 0 )
if( glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ^ 1 ][ MRB_GPU_TIME * 2 + 1 ] > 0 )
{
glGetQueryObjectui64vEXT( tr.timerQueryId, GL_QUERY_RESULT, &drawingTimeNanoseconds );
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_GPU_TIME * 2 + 0], GL_QUERY_RESULT, &gpuStartNanoseconds );
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_GPU_TIME * 2 + 1], GL_QUERY_RESULT, &gpuEndNanoseconds );
backend.pc.gpuMicroSec = ( gpuEndNanoseconds - gpuStartNanoseconds ) / 1000;
if( gpuMicroSec != NULL )
{
*gpuMicroSec = backend.pc.gpuMicroSec;
}
}
if( gpuMicroSec != NULL )
if( glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ^ 1 ][ MRB_FILL_DEPTH_BUFFER * 2 + 1 ] > 0 )
{
*gpuMicroSec = drawingTimeNanoseconds / 1000;
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_FILL_DEPTH_BUFFER * 2 + 0], GL_QUERY_RESULT, &gpuStartNanoseconds );
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_FILL_DEPTH_BUFFER * 2 + 1], GL_QUERY_RESULT, &gpuEndNanoseconds );
backend.pc.gpuDepthMicroSec = ( gpuEndNanoseconds - gpuStartNanoseconds ) / 1000;
}
if( glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ^ 1 ][ MRB_SSAO_PASS * 2 + 1 ] > 0 )
{
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_SSAO_PASS * 2 + 0], GL_QUERY_RESULT, &gpuStartNanoseconds );
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_SSAO_PASS * 2 + 1], GL_QUERY_RESULT, &gpuEndNanoseconds );
backend.pc.gpuScreenSpaceAmbientOcclusionMicroSec = ( gpuEndNanoseconds - gpuStartNanoseconds ) / 1000;
}
if( glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ^ 1 ][ MRB_AMBIENT_PASS * 2 + 1 ] > 0 )
{
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_AMBIENT_PASS * 2 + 0], GL_QUERY_RESULT, &gpuStartNanoseconds );
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_AMBIENT_PASS * 2 + 1], GL_QUERY_RESULT, &gpuEndNanoseconds );
backend.pc.gpuAmbientPassMicroSec = ( gpuEndNanoseconds - gpuStartNanoseconds ) / 1000;
}
if( glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ^ 1 ][ MRB_DRAW_INTERACTIONS * 2 + 1 ] > 0 )
{
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_DRAW_INTERACTIONS * 2 + 0], GL_QUERY_RESULT, &gpuStartNanoseconds );
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_DRAW_INTERACTIONS * 2 + 1], GL_QUERY_RESULT, &gpuEndNanoseconds );
backend.pc.gpuInteractionsMicroSec = ( gpuEndNanoseconds - gpuStartNanoseconds ) / 1000;
}
if( glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ^ 1 ][ MRB_DRAW_SHADER_PASSES * 2 + 1 ] > 0 )
{
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_DRAW_SHADER_PASSES * 2 + 0], GL_QUERY_RESULT, &gpuStartNanoseconds );
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_DRAW_SHADER_PASSES * 2 + 1], GL_QUERY_RESULT, &gpuEndNanoseconds );
backend.pc.gpuShaderPassMicroSec = ( gpuEndNanoseconds - gpuStartNanoseconds ) / 1000;
}
if( glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ^ 1 ][ MRB_POSTPROCESS * 2 + 1 ] > 0 )
{
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_POSTPROCESS * 2 + 0], GL_QUERY_RESULT, &gpuStartNanoseconds );
glGetQueryObjectui64vEXT( glcontext.renderLogMainBlockTimeQueryIds[ glcontext.frameParity ^ 1 ][ MRB_POSTPROCESS * 2 + 1], GL_QUERY_RESULT, &gpuEndNanoseconds );
backend.pc.gpuPostProcessingMicroSec = ( gpuEndNanoseconds - gpuStartNanoseconds ) / 1000;
}
for( int i = 0; i < MRB_TOTAL_QUERIES; i++ )
{
glcontext.renderLogMainBlockTimeQueryIssued[ glcontext.frameParity ^ 1 ][ i ] = 0;
}
}
#endif