From d3bc29d510e082e11f13b77afa07f647d8463cf8 Mon Sep 17 00:00:00 2001 From: Robert Beckebans Date: Tue, 5 May 2020 01:21:29 +0200 Subject: [PATCH] Added GPU timer queries for Vulkan --- neo/framework/Common.cpp | 2 +- neo/framework/Common_local.h | 38 +++++++ neo/framework/Console.cpp | 113 +++++++++++-------- neo/imgui/BFGimguiImpl.cpp | 2 +- neo/renderer/RenderBackend.cpp | 8 +- neo/renderer/RenderBackend.h | 12 ++- neo/renderer/RenderLog.cpp | 46 +++++--- neo/renderer/RenderLog.h | 17 +-- neo/renderer/RenderSystem.cpp | 12 ++- neo/renderer/RenderSystem.h | 20 ++-- neo/renderer/Vulkan/RenderBackend_VK.cpp | 132 ++++++++++++++++++++++- neo/renderer/Vulkan/qvk.h | 1 + neo/renderer/tr_frontend_addlights.cpp | 2 +- neo/renderer/tr_frontend_addmodels.cpp | 2 +- 14 files changed, 319 insertions(+), 88 deletions(-) diff --git a/neo/framework/Common.cpp b/neo/framework/Common.cpp index 28d47a3a..4b2cbbe3 100644 --- a/neo/framework/Common.cpp +++ b/neo/framework/Common.cpp @@ -75,7 +75,7 @@ idCVar com_forceGenericSIMD( "com_forceGenericSIMD", "0", CVAR_BOOL | CVAR_SYSTE idCVar com_developer( "developer", "0", CVAR_BOOL | CVAR_SYSTEM | CVAR_NOCHEAT, "developer mode" ); idCVar com_speeds( "com_speeds", "0", CVAR_BOOL | CVAR_SYSTEM | CVAR_NOCHEAT, "show engine timings" ); -// DG: support "com_showFPS 2" for fps-only view like in classic doom3 => make it CVAR_INTEGER +// DG: support "com_showFPS 1" for fps-only view like in classic doom3 => make it CVAR_INTEGER idCVar com_showFPS( "com_showFPS", "0", CVAR_INTEGER | CVAR_SYSTEM | CVAR_ARCHIVE | CVAR_NOCHEAT, "show frames rendered per second. 0: off 1: default bfg values, 2: only show FPS (classic view)" ); // DG end idCVar com_showMemoryUsage( "com_showMemoryUsage", "0", CVAR_BOOL | CVAR_SYSTEM | CVAR_NOCHEAT, "show total and per frame memory usage" ); diff --git a/neo/framework/Common_local.h b/neo/framework/Common_local.h index 4ee5259b..7367a6a1 100644 --- a/neo/framework/Common_local.h +++ b/neo/framework/Common_local.h @@ -330,6 +330,44 @@ public: { return time_gpu; } + + // RB begin + uint64 GetRendererGpuEarlyZMicroseconds() const + { + return stats_backend.gpuDepthMicroSec; + } + + uint64 GetRendererGpuSSAOMicroseconds() const + { + return stats_backend.gpuScreenSpaceAmbientOcclusionMicroSec; + } + + uint64 GetRendererGpuSSRMicroseconds() const + { + return stats_backend.gpuScreenSpaceReflectionsMicroSec; + } + + uint64 GetRendererGpuAmbientPassMicroseconds() const + { + return stats_backend.gpuAmbientPassMicroSec; + } + + uint64 GetRendererGpuInteractionsMicroseconds() const + { + return stats_backend.gpuInteractionsMicroSec; + } + + uint64 GetRendererGpuShaderPassMicroseconds() const + { + return stats_backend.gpuShaderPassMicroSec; + } + + uint64 GetRendererGpuPostProcessingMicroseconds() const + { + return stats_backend.gpuPostProcessingMicroSec; + } + // RB end + // foresthale 2014-05-30: a special binarize pacifier has to be shown in // some cases, which includes filename and ETA information, note that // the progress function takes 0-1 float, not 0-100, and can be called diff --git a/neo/framework/Console.cpp b/neo/framework/Console.cpp index add45fd4..43619819 100644 --- a/neo/framework/Console.cpp +++ b/neo/framework/Console.cpp @@ -209,16 +209,17 @@ void idConsoleLocal::DrawTextRightAlign( float x, float& y, const char* text, .. idConsoleLocal::DrawFPS ================== */ -#define FPS_FRAMES 90 +#define FPS_FRAMES 6 +#define FPS_FRAMES_HISTORY 90 float idConsoleLocal::DrawFPS( float y ) { static float previousTimes[FPS_FRAMES]; - static float previousTimesNormalized[FPS_FRAMES]; + static float previousTimesNormalized[FPS_FRAMES_HISTORY]; static int index; static int previous; static int valuesOffset = 0; - bool renderImGuiPerfWindow = ImGuiHook::IsReadyToRender() && ( com_showFPS.GetInteger() == 1 ); + bool renderImGuiPerfWindow = ImGuiHook::IsReadyToRender() && ( com_showFPS.GetInteger() > 1 ); // don't use serverTime, because that will be drifting to // correct for internet lag changes, timescales, timedemos, etc @@ -231,8 +232,8 @@ float idConsoleLocal::DrawFPS( float y ) const float milliSecondsPerFrame = 1000.0f / com_engineHz_latched; previousTimes[index % FPS_FRAMES] = frameTime; - previousTimesNormalized[index % FPS_FRAMES] = frameTime / milliSecondsPerFrame; - valuesOffset = ( valuesOffset + 1 ) % FPS_FRAMES; + previousTimesNormalized[index % FPS_FRAMES_HISTORY] = frameTime / milliSecondsPerFrame; + valuesOffset = ( valuesOffset + 1 ) % FPS_FRAMES_HISTORY; index++; if( index > FPS_FRAMES ) { @@ -252,7 +253,7 @@ float idConsoleLocal::DrawFPS( float y ) const char* s = va( "%ifps", fps ); int w = strlen( s ) * BIGCHAR_WIDTH; - if( com_showFPS.GetInteger() == 2 ) + if( com_showFPS.GetInteger() == 1 ) { renderSystem->DrawBigStringExt( LOCALSAFE_RIGHT - w, idMath::Ftoi( y ) + 2, s, colorWhite, true ); } @@ -260,8 +261,8 @@ float idConsoleLocal::DrawFPS( float y ) y += BIGCHAR_HEIGHT + 4; - // DG: "com_showFPS 2" means: show FPS only, like in classic doom3 - if( com_showFPS.GetInteger() == 2 ) + // DG: "com_showFPS 1" means: show FPS only, like in classic doom3 + if( com_showFPS.GetInteger() == 1 ) { return y; } @@ -279,6 +280,13 @@ float idConsoleLocal::DrawFPS( float y ) const uint64 rendererShadowsTime = commonLocal.GetRendererShadowsMicroseconds(); const uint64 rendererGPUIdleTime = commonLocal.GetRendererIdleMicroseconds(); const uint64 rendererGPUTime = commonLocal.GetRendererGPUMicroseconds(); + const uint64 rendererGPUEarlyZTime = commonLocal.GetRendererGpuEarlyZMicroseconds(); + const uint64 rendererGPU_SSAOTime = commonLocal.GetRendererGpuSSAOMicroseconds(); + const uint64 rendererGPU_SSRTime = commonLocal.GetRendererGpuSSRMicroseconds(); + const uint64 rendererGPUAmbientPassTime = commonLocal.GetRendererGpuAmbientPassMicroseconds(); + const uint64 rendererGPUInteractionsTime = commonLocal.GetRendererGpuInteractionsMicroseconds(); + const uint64 rendererGPUShaderPassesTime = commonLocal.GetRendererGpuShaderPassMicroseconds(); + const uint64 rendererGPUPostProcessingTime = commonLocal.GetRendererGpuPostProcessingMicroseconds(); const int maxTime = 16 * 1000; #if 1 @@ -286,8 +294,15 @@ float idConsoleLocal::DrawFPS( float y ) // RB: use ImGui to show more detailed stats about the scene loads if( ImGuiHook::IsReadyToRender() ) { - int32 statsWindowWidth = 550; - int32 statsWindowHeight = 320; // 290 without the frame plot + // start smaller + int32 statsWindowWidth = 320; + int32 statsWindowHeight = 260; + + if( com_showFPS.GetInteger() > 2 ) + { + statsWindowWidth = 550; + statsWindowHeight = 370; + } ImVec2 pos; pos.x = renderSystem->GetWidth() - statsWindowWidth; @@ -341,63 +356,71 @@ float idConsoleLocal::DrawFPS( float y ) int width = renderSystem->GetWidth(); int height = renderSystem->GetHeight(); - ImGui::TextColored( colorGreen, "API: %s, AA[%i, %i]: %s, %s", API, width, height, aaMode, resolutionText.c_str() ); + ImGui::TextColored( colorCyan, "API: %s, AA[%i, %i]: %s, %s", API, width, height, aaMode, resolutionText.c_str() ); - ImGui::TextColored( colorYellow, "GENERAL: views:%i draws:%i tris:%i (shdw:%i)", + ImGui::TextColored( colorLtGrey, "GENERAL: views:%i draws:%i tris:%i (shdw:%i)", commonLocal.stats_frontend.c_numViews, commonLocal.stats_backend.c_drawElements + commonLocal.stats_backend.c_shadowElements, ( commonLocal.stats_backend.c_drawIndexes + commonLocal.stats_backend.c_shadowIndexes ) / 3, commonLocal.stats_backend.c_shadowIndexes / 3 ); - ImGui::TextColored( colorLtGrey, "DYNAMIC: callback:%i md5:%i dfrmVerts:%i dfrmTris:%i tangTris:%i guis:%i", - commonLocal.stats_frontend.c_entityDefCallbacks, - commonLocal.stats_frontend.c_generateMd5, - commonLocal.stats_frontend.c_deformedVerts, - commonLocal.stats_frontend.c_deformedIndexes / 3, - commonLocal.stats_frontend.c_tangentIndexes / 3, - commonLocal.stats_frontend.c_guiSurfs - ); + if( com_showFPS.GetInteger() > 2 ) + { + ImGui::TextColored( colorLtGrey, "DYNAMIC: callback:%i md5:%i dfrmVerts:%i dfrmTris:%i tangTris:%i guis:%i", + commonLocal.stats_frontend.c_entityDefCallbacks, + commonLocal.stats_frontend.c_generateMd5, + commonLocal.stats_frontend.c_deformedVerts, + commonLocal.stats_frontend.c_deformedIndexes / 3, + commonLocal.stats_frontend.c_tangentIndexes / 3, + commonLocal.stats_frontend.c_guiSurfs + ); - //ImGui::Text( "Cull: %i box in %i box out\n", - // commonLocal.stats_frontend.c_box_cull_in, commonLocal.stats_frontend.c_box_cull_out ); + //ImGui::Text( "Cull: %i box in %i box out\n", + // commonLocal.stats_frontend.c_box_cull_in, commonLocal.stats_frontend.c_box_cull_out ); - ImGui::TextColored( colorLtGrey, "ADDMODEL: callback:%i createInteractions:%i createShadowVolumes:%i", - commonLocal.stats_frontend.c_entityDefCallbacks, - commonLocal.stats_frontend.c_createInteractions, - commonLocal.stats_frontend.c_createShadowVolumes ); + ImGui::TextColored( colorLtGrey, "ADDMODEL: callback:%i createInteractions:%i createShadowVolumes:%i", + commonLocal.stats_frontend.c_entityDefCallbacks, + commonLocal.stats_frontend.c_createInteractions, + commonLocal.stats_frontend.c_createShadowVolumes ); - ImGui::TextColored( colorLtGrey, "viewEntities:%i shadowEntities:%i viewLights:%i\n", commonLocal.stats_frontend.c_visibleViewEntities, - commonLocal.stats_frontend.c_shadowViewEntities, - commonLocal.stats_frontend.c_viewLights ); + ImGui::TextColored( colorLtGrey, "viewEntities:%i shadowEntities:%i viewLights:%i\n", commonLocal.stats_frontend.c_visibleViewEntities, + commonLocal.stats_frontend.c_shadowViewEntities, + commonLocal.stats_frontend.c_viewLights ); - ImGui::TextColored( colorLtGrey, "UPDATES: entityUpdates:%i entityRefs:%i lightUpdates:%i lightRefs:%i\n", - commonLocal.stats_frontend.c_entityUpdates, commonLocal.stats_frontend.c_entityReferences, - commonLocal.stats_frontend.c_lightUpdates, commonLocal.stats_frontend.c_lightReferences ); + ImGui::TextColored( colorLtGrey, "UPDATES: entityUpdates:%i entityRefs:%i lightUpdates:%i lightRefs:%i\n", + commonLocal.stats_frontend.c_entityUpdates, commonLocal.stats_frontend.c_entityReferences, + commonLocal.stats_frontend.c_lightUpdates, commonLocal.stats_frontend.c_lightReferences ); + } //ImGui::Text( "frameData: %i (%i)\n", frameData->frameMemoryAllocated.GetValue(), frameData->highWaterAllocated ); - ImGui::Spacing(); - ImGui::Spacing(); + //ImGui::Spacing(); + //ImGui::Spacing(); ImGui::Spacing(); - //ImGui::TextColored( colorCyan, "Average FPS %i", fps ); - + if( com_showFPS.GetInteger() > 2 ) { const char* overlay = va( "Average FPS %i", fps ); - ImGui::PlotLines( "Relative\nFrametime ms", previousTimesNormalized, FPS_FRAMES, valuesOffset, overlay, -10.0f, 10.0f, ImVec2( 0, 50 ) ); + ImGui::PlotLines( "Relative\nFrametime ms", previousTimesNormalized, FPS_FRAMES_HISTORY, valuesOffset, overlay, -10.0f, 10.0f, ImVec2( 0, 50 ) ); + } + else + { + ImGui::TextColored( colorYellow, "Average FPS %i", fps ); } ImGui::Spacing(); - - ImGui::TextColored( gameThreadTotalTime > maxTime ? colorRed : colorWhite, "G+RF: %5llu us", gameThreadTotalTime ); - ImGui::TextColored( gameThreadGameTime > maxTime ? colorRed : colorWhite, "G: %5llu us", gameThreadGameTime ); - ImGui::TextColored( gameThreadRenderTime > maxTime ? colorRed : colorWhite, "RF: %5llu us", gameThreadRenderTime ); - ImGui::TextColored( rendererBackEndTime > maxTime ? colorRed : colorWhite, "RB: %5llu us", rendererBackEndTime ); - ImGui::TextColored( rendererShadowsTime > maxTime ? colorRed : colorWhite, "SHADOWS: %5llu us", rendererShadowsTime ); - ImGui::TextColored( rendererGPUIdleTime > maxTime ? colorRed : colorWhite, "IDLE: %5llu us", rendererGPUIdleTime ); - ImGui::TextColored( rendererGPUTime > maxTime ? colorRed : colorWhite, "GPU: %5llu us", rendererGPUTime ); + ImGui::TextColored( colorMdGrey, "CPU GPU" ); + ImGui::TextColored( gameThreadTotalTime > maxTime ? colorRed : colorWhite, "Game+RF: %5llu us EarlyZ: %5llu us", gameThreadTotalTime, rendererGPUEarlyZTime ); + ImGui::TextColored( gameThreadGameTime > maxTime ? colorRed : colorWhite, "Game: %5llu us SSAO: %5llu us", gameThreadGameTime, rendererGPU_SSAOTime ); + ImGui::TextColored( gameThreadRenderTime > maxTime ? colorRed : colorWhite, "RF: %5llu us SSR: %5llu us", gameThreadRenderTime, rendererGPU_SSRTime ); + ImGui::TextColored( rendererBackEndTime > maxTime ? colorRed : colorWhite, "RB: %5llu us AmbientPass: %5llu us", rendererBackEndTime, rendererGPUAmbientPassTime ); + ImGui::TextColored( rendererShadowsTime > maxTime ? colorRed : colorWhite, "Shadows: %5llu us Interactions: %5llu us", rendererShadowsTime, rendererGPUInteractionsTime ); + ImGui::TextColored( rendererGPUShaderPassesTime > maxTime ? colorRed : colorWhite, " ShaderPass: %5llu us", rendererGPUShaderPassesTime ); + ImGui::TextColored( rendererGPUPostProcessingTime > maxTime ? colorRed : colorWhite, " PostFX: %5llu us", rendererGPUPostProcessingTime ); + ImGui::TextColored( rendererGPUIdleTime > maxTime ? colorRed : colorWhite, " Idle: %5llu us", rendererGPUIdleTime ); + ImGui::TextColored( rendererGPUTime > maxTime ? colorRed : colorWhite, " Total: %5llu us", rendererGPUTime ); ImGui::End(); } diff --git a/neo/imgui/BFGimguiImpl.cpp b/neo/imgui/BFGimguiImpl.cpp index 76f2c27b..c7cd7d5d 100644 --- a/neo/imgui/BFGimguiImpl.cpp +++ b/neo/imgui/BFGimguiImpl.cpp @@ -206,7 +206,7 @@ void SetClipboardText( void*, const char* text ) bool ShowWindows() { - return ( ImGuiTools::AreEditorsActive() || imgui_showDemoWindow.GetBool() || com_showFPS.GetInteger() > 0 ); + return ( ImGuiTools::AreEditorsActive() || imgui_showDemoWindow.GetBool() || com_showFPS.GetInteger() > 1 ); } bool UseInput() diff --git a/neo/renderer/RenderBackend.cpp b/neo/renderer/RenderBackend.cpp index 00cd10bd..5f1d21df 100644 --- a/neo/renderer/RenderBackend.cpp +++ b/neo/renderer/RenderBackend.cpp @@ -1636,7 +1636,7 @@ void idRenderBackend::RenderInteractions( const drawSurf_t* surfList, const view } uint64 end = Sys_Microseconds(); - pc.shadowMicroSec += end - start; + pc.cpuShadowMicroSec += end - start; } const idMaterial* surfaceShader = walk->material; @@ -2569,7 +2569,7 @@ void idRenderBackend::StencilShadowPass( const drawSurf_t* drawSurfs, const view } uint64 end = Sys_Microseconds(); - pc.shadowMicroSec += end - start; + pc.cpuShadowMicroSec += end - start; } if( drawSurf->numIndexes == 0 ) @@ -3219,7 +3219,7 @@ void idRenderBackend::ShadowMapPass( const drawSurf_t* drawSurfs, const viewLigh } uint64 end = Sys_Microseconds(); - pc.shadowMicroSec += end - start; + pc.cpuShadowMicroSec += end - start; } #endif @@ -5431,7 +5431,7 @@ void idRenderBackend::ExecuteBackEndCommands( const emptyCommand_t* cmds ) // stop rendering on this thread uint64 backEndFinishTime = Sys_Microseconds(); - pc.totalMicroSec = backEndFinishTime - backEndStartTime; + pc.cpuTotalMicroSec = backEndFinishTime - backEndStartTime; if( r_debugRenderToTexture.GetInteger() == 1 ) { diff --git a/neo/renderer/RenderBackend.h b/neo/renderer/RenderBackend.h index 265bd525..8850f1d9 100644 --- a/neo/renderer/RenderBackend.h +++ b/neo/renderer/RenderBackend.h @@ -4,7 +4,7 @@ Doom 3 BFG Edition GPL Source Code Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company. Copyright (C) 2016-2017 Dustin Land -Copyright (C) 2017-2019 Robert Beckebans +Copyright (C) 2017-2020 Robert Beckebans This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code"). @@ -31,7 +31,7 @@ If you have questions concerning this license or the applicable additional terms #ifndef __RENDERER_BACKEND_H__ #define __RENDERER_BACKEND_H__ - +#include "RenderLog.h" bool GL_CheckErrors_( const char* filename, int line ); #if 1 // !defined(RETAIL) @@ -204,6 +204,14 @@ struct vulkanContext_t int currentImageParm; idArray< idImage*, MAX_IMAGE_PARMS > imageParms; + + //typedef uint32 QueryTuple[2]; + + // GPU timestamp queries + idArray< uint32, NUM_FRAME_DATA > queryIndex; + idArray< idArray< uint32, MRB_TOTAL_QUERIES >, NUM_FRAME_DATA > queryAssignedIndex; + idArray< idArray< uint64, NUM_TIMESTAMP_QUERIES >, NUM_FRAME_DATA > queryResults; + idArray< VkQueryPool, NUM_FRAME_DATA > queryPools; }; extern vulkanContext_t vkcontext; diff --git a/neo/renderer/RenderLog.cpp b/neo/renderer/RenderLog.cpp index 0d47b3c8..4744160f 100644 --- a/neo/renderer/RenderLog.cpp +++ b/neo/renderer/RenderLog.cpp @@ -44,7 +44,7 @@ static const int LOG_LEVEL_EVERYTHING = 2; const char* renderLogMainBlockLabels[] = { - ASSERT_ENUM_STRING( MRB_NONE, 0 ), + ASSERT_ENUM_STRING( MRB_GPU_TIME, 0 ), ASSERT_ENUM_STRING( MRB_BEGIN_DRAWING_VIEW, 1 ), ASSERT_ENUM_STRING( MRB_FILL_DEPTH_BUFFER, 2 ), ASSERT_ENUM_STRING( MRB_FILL_GEOMETRY_BUFFER, 3 ), // RB @@ -57,15 +57,12 @@ const char* renderLogMainBlockLabels[] = ASSERT_ENUM_STRING( MRB_DRAW_DEBUG_TOOLS, 10 ), ASSERT_ENUM_STRING( MRB_CAPTURE_COLORBUFFER, 11 ), ASSERT_ENUM_STRING( MRB_POSTPROCESS, 12 ), - ASSERT_ENUM_STRING( MRB_GPU_SYNC, 13 ), - ASSERT_ENUM_STRING( MRB_END_FRAME, 14 ), - ASSERT_ENUM_STRING( MRB_BINK_FRAME, 15 ), - ASSERT_ENUM_STRING( MRB_BINK_NEXT_FRAME, 16 ), - ASSERT_ENUM_STRING( MRB_TOTAL, 17 ), - ASSERT_ENUM_STRING( MRB_MAX, 18 ) + ASSERT_ENUM_STRING( MRB_TOTAL, 13 ) }; -#if !defined(USE_VULKAN) +#if defined( USE_VULKAN ) + compile_time_assert( NUM_TIMESTAMP_QUERIES >= ( MRB_TOTAL_QUERIES ) ); +#else static GLuint renderLogMainBlockTimeQueryIds[MRB_MAX]; #endif @@ -93,7 +90,7 @@ struct pixEvent_t idCVar r_pix( "r_pix", "0", CVAR_INTEGER, "print GPU/CPU event timing" ); -#if !defined(USE_VULKAN) +#if !defined( USE_VULKAN ) static const int MAX_PIX_EVENTS = 256; // defer allocation of this until needed, so we don't waste lots of memory pixEvent_t* pixEvents; // [MAX_PIX_EVENTS] @@ -602,7 +599,7 @@ idRenderLog::idRenderLog() #endif } -#if 0 +#if 1 /* ======================== @@ -612,8 +609,20 @@ idRenderLog::OpenMainBlock void idRenderLog::OpenMainBlock( renderLogMainBlock_t block ) { #if defined( USE_VULKAN ) - // TODO + mainBlock = block; + + if( vkcontext.queryIndex[ vkcontext.frameParity ] >= ( NUM_TIMESTAMP_QUERIES - 1 ) ) + { + return; + } + + VkCommandBuffer commandBuffer = vkcontext.commandBuffer[ vkcontext.frameParity ]; + VkQueryPool queryPool = vkcontext.queryPools[ vkcontext.frameParity ]; + + uint32 queryIndex = vkcontext.queryAssignedIndex[ vkcontext.frameParity ][ mainBlock * 2 + 0 ] = vkcontext.queryIndex[ vkcontext.frameParity ]++; + vkCmdWriteTimestamp( commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, queryPool, queryIndex ); #else + /* if( glConfig.timerQueryAvailable ) { if( renderLogMainBlockTimeQueryIds[block] == 0 ) @@ -623,6 +632,7 @@ void idRenderLog::OpenMainBlock( renderLogMainBlock_t block ) glBeginQuery( GL_TIME_ELAPSED_EXT, renderLogMainBlockTimeQueryIds[block] ); } + */ #endif } @@ -634,9 +644,19 @@ idRenderLog::CloseMainBlock void idRenderLog::CloseMainBlock() { #if defined( USE_VULKAN ) - // TODO + + if( vkcontext.queryIndex[ vkcontext.frameParity ] >= ( NUM_TIMESTAMP_QUERIES - 1 ) ) + { + return; + } + + VkCommandBuffer commandBuffer = vkcontext.commandBuffer[ vkcontext.frameParity ]; + VkQueryPool queryPool = vkcontext.queryPools[ vkcontext.frameParity ]; + + uint32 queryIndex = vkcontext.queryAssignedIndex[ vkcontext.frameParity ][ mainBlock * 2 + 1 ] = vkcontext.queryIndex[ vkcontext.frameParity ]++; + vkCmdWriteTimestamp( commandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, queryPool, queryIndex ); #else - glEndQuery( GL_TIME_ELAPSED_EXT ); + //glEndQuery( GL_TIME_ELAPSED_EXT ); #endif } diff --git a/neo/renderer/RenderLog.h b/neo/renderer/RenderLog.h index b8c1bc32..628fdbf0 100644 --- a/neo/renderer/RenderLog.h +++ b/neo/renderer/RenderLog.h @@ -41,7 +41,8 @@ Contains the RenderLog declaration. enum renderLogMainBlock_t { - MRB_NONE, + // each block will require to allocate 2 GPU query timestamps + MRB_GPU_TIME, MRB_BEGIN_DRAWING_VIEW, MRB_FILL_DEPTH_BUFFER, MRB_FILL_GEOMETRY_BUFFER, @@ -54,12 +55,9 @@ enum renderLogMainBlock_t MRB_DRAW_DEBUG_TOOLS, MRB_CAPTURE_COLORBUFFER, MRB_POSTPROCESS, - MRB_GPU_SYNC, - MRB_END_FRAME, - MRB_BINK_FRAME, - MRB_BINK_NEXT_FRAME, MRB_TOTAL, - MRB_MAX + + MRB_TOTAL_QUERIES = MRB_TOTAL * 2, }; // these are used to make sure each Indent() is properly paired with an Outdent() @@ -182,6 +180,9 @@ performance rendering in retail builds. */ class idRenderLog { +private: + renderLogMainBlock_t mainBlock; + public: idRenderLog(); @@ -195,8 +196,8 @@ public: void OpenBlock( const char* label, const idVec4& color = colorBlack ); void CloseBlock(); - void OpenMainBlock( renderLogMainBlock_t block ) {} - void CloseMainBlock() {} + void OpenMainBlock( renderLogMainBlock_t block );// {} + void CloseMainBlock();// {} void Indent( renderLogIndentLabel_t label = RENDER_LOG_INDENT_DEFAULT ) {} void Outdent( renderLogIndentLabel_t label = RENDER_LOG_INDENT_DEFAULT ) {} diff --git a/neo/renderer/RenderSystem.cpp b/neo/renderer/RenderSystem.cpp index e3f408e7..ca730c55 100644 --- a/neo/renderer/RenderSystem.cpp +++ b/neo/renderer/RenderSystem.cpp @@ -663,7 +663,12 @@ void idRenderSystemLocal::SwapCommandBuffers_FinishRendering( backend.GL_BlockingSwapBuffers(); } -#if !defined(USE_VULKAN) +#if defined(USE_VULKAN) + if( gpuMicroSec != NULL ) + { + *gpuMicroSec = backend.pc.gpuMicroSec; + } +#else // read back the start and end timer queries from the previous frame if( glConfig.timerQueryAvailable ) { @@ -675,6 +680,7 @@ void idRenderSystemLocal::SwapCommandBuffers_FinishRendering( { glGetQueryObjectui64vEXT( tr.timerQueryId, GL_QUERY_RESULT, &drawingTimeNanoseconds ); } + if( gpuMicroSec != NULL ) { *gpuMicroSec = drawingTimeNanoseconds / 1000; @@ -692,12 +698,12 @@ void idRenderSystemLocal::SwapCommandBuffers_FinishRendering( if( backEndMicroSec != NULL ) { - *backEndMicroSec = backend.pc.totalMicroSec; + *backEndMicroSec = backend.pc.cpuTotalMicroSec; } if( shadowMicroSec != NULL ) { - *shadowMicroSec = backend.pc.shadowMicroSec; + *shadowMicroSec = backend.pc.cpuShadowMicroSec; } // RB: TODO clean up the above and just pass entire backend and performance stats before they get cleared diff --git a/neo/renderer/RenderSystem.h b/neo/renderer/RenderSystem.h index 8a89a324..d37ed5e6 100644 --- a/neo/renderer/RenderSystem.h +++ b/neo/renderer/RenderSystem.h @@ -109,9 +109,7 @@ enum antiAliasingMode_t ANTI_ALIASING_MSAA_8X }; -/* -** performanceCounters_t -*/ +// CPU counters and timers struct performanceCounters_t { int c_box_cull_in; @@ -136,9 +134,11 @@ struct performanceCounters_t int c_entityReferences; int c_lightReferences; int c_guiSurfs; - int frontEndMicroSec; // sum of time in all RE_RenderScene's in a frame + + uint64 frontEndMicroSec; // sum of time in all RE_RenderScene's in a frame }; +// CPU & GPU counters and timers struct backEndCounters_t { int c_surfaces; @@ -154,8 +154,16 @@ struct backEndCounters_t float c_overDraw; - int totalMicroSec; // total microseconds for backend run - int shadowMicroSec; + uint64 cpuTotalMicroSec; // total microseconds for backend run + uint64 cpuShadowMicroSec; + uint64 gpuDepthMicroSec; + uint64 gpuScreenSpaceAmbientOcclusionMicroSec; + uint64 gpuScreenSpaceReflectionsMicroSec; + uint64 gpuAmbientPassMicroSec; + uint64 gpuInteractionsMicroSec; + uint64 gpuShaderPassMicroSec; + uint64 gpuPostProcessingMicroSec; + uint64 gpuMicroSec; }; // RB end diff --git a/neo/renderer/Vulkan/RenderBackend_VK.cpp b/neo/renderer/Vulkan/RenderBackend_VK.cpp index e6751be8..25f8123c 100644 --- a/neo/renderer/Vulkan/RenderBackend_VK.cpp +++ b/neo/renderer/Vulkan/RenderBackend_VK.cpp @@ -1014,7 +1014,23 @@ static void CreateSemaphores() } } +/* +=============== +idRenderBackend::CreateQueryPool +=============== +*/ +static void CreateQueryPool() +{ + VkQueryPoolCreateInfo createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + createInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + createInfo.queryCount = NUM_TIMESTAMP_QUERIES; + for( int i = 0; i < NUM_FRAME_DATA; ++i ) + { + ID_VK_CHECK( vkCreateQueryPool( vkcontext.device, &createInfo, NULL, &vkcontext.queryPools[ i ] ) ); + } +} /* ============= @@ -1390,8 +1406,17 @@ static void ClearContext() vkcontext.frameBuffers.Zero(); vkcontext.acquireSemaphores.Zero(); vkcontext.renderCompleteSemaphores.Zero(); + vkcontext.currentImageParm = 0; vkcontext.imageParms.Zero(); + + vkcontext.queryIndex.Zero(); + for( int i = 0; i < NUM_FRAME_DATA; ++i ) + { + vkcontext.queryAssignedIndex[ i ].Zero(); + vkcontext.queryResults[ i ].Zero(); + } + vkcontext.queryPools.Zero(); } /* @@ -1445,8 +1470,8 @@ void idRenderBackend::Init() glConfig.driverType = GLDRV_VULKAN; glConfig.gpuSkinningAvailable = true; - idLib::Printf( "Creating Vulkan Instance...\n" ); // create the Vulkan instance and enable validation layers + idLib::Printf( "Creating Vulkan Instance...\n" ); CreateVulkanInstance(); // create the windowing interface @@ -1470,6 +1495,10 @@ void idRenderBackend::Init() idLib::Printf( "Creating semaphores...\n" ); CreateSemaphores(); + // Create Query Pool + idLib::Printf( "Creating query pool...\n" ); + CreateQueryPool(); + // Create Command Pool idLib::Printf( "Creating command pool...\n" ); CreateCommandPool(); @@ -1571,6 +1600,12 @@ void idRenderBackend::Shutdown() // Destroy Command Pool vkDestroyCommandPool( vkcontext.device, vkcontext.commandPool, NULL ); + // Destroy Query Pools + for( int i = 0; i < NUM_FRAME_DATA; ++i ) + { + vkDestroyQueryPool( vkcontext.device, vkcontext.queryPools[ i ], NULL ); + } + // Destroy Semaphores for( int i = 0; i < NUM_FRAME_DATA; ++i ) { @@ -1896,20 +1931,108 @@ void idRenderBackend::GL_StartFrame() #endif stagingManager.Flush(); + // reset descriptor pool renderProgManager.StartFrame(); + // fetch GPU timer queries of last frame + VkQueryPool queryPool = vkcontext.queryPools[ vkcontext.frameParity ]; + idArray< uint64, NUM_TIMESTAMP_QUERIES >& results = vkcontext.queryResults[ vkcontext.frameParity ]; + idArray< uint32, MRB_TOTAL_QUERIES >& assignedIndex = vkcontext.queryAssignedIndex[ vkcontext.frameParity ]; + if( assignedIndex[ MRB_GPU_TIME + 1 ] > 0 ) + { + int lastValidQuery = assignedIndex[ MRB_GPU_TIME + 1 ]; + int numQueries = lastValidQuery + 1; + + if( numQueries <= NUM_TIMESTAMP_QUERIES ) + { + vkGetQueryPoolResults( vkcontext.device, queryPool, MRB_GPU_TIME, numQueries, + results.ByteSize(), results.Ptr(), sizeof( uint64 ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ); + + const uint64 gpuStart = results[ assignedIndex[ MRB_GPU_TIME * 2 + 0 ] ]; + const uint64 gpuEnd = results[ assignedIndex[ MRB_GPU_TIME * 2 + 1 ] ]; + const uint64 tick = ( 1000 * 1000 * 1000 ) / vkcontext.gpu->props.limits.timestampPeriod; + pc.gpuMicroSec = ( ( gpuEnd - gpuStart ) * 1000 * 1000 ) / tick; + + if( assignedIndex[ MRB_FILL_DEPTH_BUFFER * 2 + 1 ] > 0 ) + { + const uint64 gpuStart = results[ assignedIndex[ MRB_FILL_DEPTH_BUFFER * 2 + 0 ] ]; + const uint64 gpuEnd = results[ assignedIndex[ MRB_FILL_DEPTH_BUFFER * 2 + 1 ] ]; + const uint64 tick = ( 1000 * 1000 * 1000 ) / vkcontext.gpu->props.limits.timestampPeriod; + pc.gpuDepthMicroSec = ( ( gpuEnd - gpuStart ) * 1000 * 1000 ) / tick; + } + + if( assignedIndex[ MRB_SSAO_PASS * 2 + 1 ] > 0 ) + { + const uint64 gpuStart = results[ assignedIndex[ MRB_SSAO_PASS * 2 + 0 ] ]; + const uint64 gpuEnd = results[ assignedIndex[ MRB_SSAO_PASS * 2 + 1 ] ]; + const uint64 tick = ( 1000 * 1000 * 1000 ) / vkcontext.gpu->props.limits.timestampPeriod; + pc.gpuScreenSpaceAmbientOcclusionMicroSec = ( ( gpuEnd - gpuStart ) * 1000 * 1000 ) / tick; + } + + if( assignedIndex[ MRB_AMBIENT_PASS * 2 + 1 ] > 0 ) + { + const uint64 gpuStart = results[ assignedIndex[ MRB_AMBIENT_PASS * 2 + 0 ] ]; + const uint64 gpuEnd = results[ assignedIndex[ MRB_AMBIENT_PASS * 2 + 1 ] ]; + const uint64 tick = ( 1000 * 1000 * 1000 ) / vkcontext.gpu->props.limits.timestampPeriod; + pc.gpuAmbientPassMicroSec = ( ( gpuEnd - gpuStart ) * 1000 * 1000 ) / tick; + } + + if( assignedIndex[ MRB_DRAW_INTERACTIONS * 2 + 1 ] > 0 ) + { + const uint64 gpuStart = results[ assignedIndex[ MRB_DRAW_INTERACTIONS * 2 + 0 ] ]; + const uint64 gpuEnd = results[ assignedIndex[ MRB_DRAW_INTERACTIONS * 2 + 1 ] ]; + const uint64 tick = ( 1000 * 1000 * 1000 ) / vkcontext.gpu->props.limits.timestampPeriod; + pc.gpuInteractionsMicroSec = ( ( gpuEnd - gpuStart ) * 1000 * 1000 ) / tick; + } + + if( assignedIndex[ MRB_DRAW_SHADER_PASSES * 2 + 1 ] > 0 ) + { + const uint64 gpuStart = results[ assignedIndex[ MRB_DRAW_SHADER_PASSES * 2 + 0 ] ]; + const uint64 gpuEnd = results[ assignedIndex[ MRB_DRAW_SHADER_PASSES * 2 + 1 ] ]; + const uint64 tick = ( 1000 * 1000 * 1000 ) / vkcontext.gpu->props.limits.timestampPeriod; + pc.gpuShaderPassMicroSec = ( ( gpuEnd - gpuStart ) * 1000 * 1000 ) / tick; + } + + if( assignedIndex[ MRB_POSTPROCESS * 2 + 1 ] > 0 ) + { + const uint64 gpuStart = results[ assignedIndex[ MRB_POSTPROCESS * 2 + 0 ] ]; + const uint64 gpuEnd = results[ assignedIndex[ MRB_POSTPROCESS * 2 + 1 ] ]; + const uint64 tick = ( 1000 * 1000 * 1000 ) / vkcontext.gpu->props.limits.timestampPeriod; + pc.gpuPostProcessingMicroSec = ( ( gpuEnd - gpuStart ) * 1000 * 1000 ) / tick; + } + } + } + + // reset query indices for current frame + vkcontext.queryIndex[ vkcontext.frameParity ] = 0; + + for( int i = 0; i < MRB_TOTAL_QUERIES; i++ ) + { + vkcontext.queryAssignedIndex[ vkcontext.frameParity ][ i ] = 0; + } + + VkCommandBuffer commandBuffer = vkcontext.commandBuffer[ vkcontext.frameParity ]; + + // begin command buffer VkCommandBufferBeginInfo commandBufferBeginInfo = {}; commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - ID_VK_CHECK( vkBeginCommandBuffer( vkcontext.commandBuffer[ vkcontext.frameParity ], &commandBufferBeginInfo ) ); + ID_VK_CHECK( vkBeginCommandBuffer( commandBuffer, &commandBufferBeginInfo ) ); + // reset timer queries + vkCmdResetQueryPool( commandBuffer, queryPool, 0, NUM_TIMESTAMP_QUERIES ); + + uint32 queryIndex = vkcontext.queryAssignedIndex[ vkcontext.frameParity ][ MRB_GPU_TIME * 2 + 0 ] = vkcontext.queryIndex[ vkcontext.frameParity ]++; + vkCmdWriteTimestamp( commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, queryPool, queryIndex ); + + // begin initial render pass VkRenderPassBeginInfo renderPassBeginInfo = {}; renderPassBeginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; renderPassBeginInfo.renderPass = vkcontext.renderPass; renderPassBeginInfo.framebuffer = vkcontext.frameBuffers[ vkcontext.currentSwapIndex ]; renderPassBeginInfo.renderArea.extent = vkcontext.swapchainExtent; - vkCmdBeginRenderPass( vkcontext.commandBuffer[ vkcontext.frameParity ], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE ); + vkCmdBeginRenderPass( commandBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE ); } /* @@ -1921,6 +2044,9 @@ void idRenderBackend::GL_EndFrame() { VkCommandBuffer commandBuffer = vkcontext.commandBuffer[ vkcontext.frameParity ]; + uint32 queryIndex = vkcontext.queryAssignedIndex[ vkcontext.frameParity ][ MRB_GPU_TIME * 2 + 1 ] = vkcontext.queryIndex[ vkcontext.frameParity ]++; + vkCmdWriteTimestamp( commandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, vkcontext.queryPools[ vkcontext.frameParity ], queryIndex ); + vkCmdEndRenderPass( commandBuffer ); // Transition our swap image to present. diff --git a/neo/renderer/Vulkan/qvk.h b/neo/renderer/Vulkan/qvk.h index ced1d88e..cb1a7a3d 100644 --- a/neo/renderer/Vulkan/qvk.h +++ b/neo/renderer/Vulkan/qvk.h @@ -78,6 +78,7 @@ static const int MAX_DESC_SET_WRITES = 32; static const int MAX_DESC_SET_UNIFORMS = 48; static const int MAX_IMAGE_PARMS = 16; static const int MAX_UBO_PARMS = 2; +static const int NUM_TIMESTAMP_QUERIES = 32; // VK_EXT_debug_marker extern PFN_vkDebugMarkerSetObjectTagEXT qvkDebugMarkerSetObjectTagEXT; diff --git a/neo/renderer/tr_frontend_addlights.cpp b/neo/renderer/tr_frontend_addlights.cpp index 6831f9bb..ebfbd332 100644 --- a/neo/renderer/tr_frontend_addlights.cpp +++ b/neo/renderer/tr_frontend_addlights.cpp @@ -675,7 +675,7 @@ void R_AddLights() } int end = Sys_Microseconds(); - tr.backend.pc.shadowMicroSec += end - start; + tr.backend.pc.cpuShadowMicroSec += end - start; } } diff --git a/neo/renderer/tr_frontend_addmodels.cpp b/neo/renderer/tr_frontend_addmodels.cpp index 627ff9cc..3d09e054 100644 --- a/neo/renderer/tr_frontend_addmodels.cpp +++ b/neo/renderer/tr_frontend_addmodels.cpp @@ -1406,7 +1406,7 @@ void R_AddModels() } int end = Sys_Microseconds(); - tr.backend.pc.shadowMicroSec += end - start; + tr.backend.pc.cpuShadowMicroSec += end - start; } }