Add CPU / GPU usage % to HUD overlay and display MoltenVK's Metal encoding time when available

(cherry picked from commit 54759765ff384bd5a3109d8c801e50952c97c4aa)
This commit is contained in:
Stephen Saunders 2023-10-04 12:33:32 -04:00
parent 7bd543f699
commit 6e8f74154f
4 changed files with 56 additions and 21 deletions

View file

@ -8,4 +8,5 @@ cd xcode-debug
# note 3: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables MoltenVK's image view swizzle which may be required on older macOS versions or hardware (see vulkaninfo)
# note 4: env variable MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0 disables synchronous queue submits which is optimal for the synchronization method used by the game
# note 5: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2 enables MoltenVK's use of Metal argument buffers only if VK_EXT_descriptor_indexing is enabled
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev
# note 6: env variable MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA=1.0 disables MoltenVK's timestampPeriod lowpass filter for non-Apple GPUs
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2;MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA=1.0" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev

View file

@ -380,14 +380,15 @@ public:
// RB end
// SRS start
uint64 GetRendererStartFrameSyncMicroseconds() const
void SetRendererMvkEncodeMicroseconds( uint64 mvkEncodeMicroSeconds )
{
return mainFrameTiming.finishSyncTime - mainFrameTiming.startSyncTime;
mvkEncodeMicroSec = mvkEncodeMicroSeconds;
return;
}
uint64 GetRendererEndFrameSyncMicroseconds() const
uint64 GetRendererMvkEncodeMicroseconds() const
{
return mainFrameTiming.finishSyncTime_EndFrame - mainFrameTiming.startRenderTime;
return mvkEncodeMicroSec;
}
// SRS end
@ -603,6 +604,8 @@ private:
// RB: r_speeds counters
backEndCounters_t stats_backend;
performanceCounters_t stats_frontend;
// SRS - MoltenVK's Vulkan to Metal command buffer encoding time, set default to 0 for non-macOS platforms (Windows and Linux)
uint64 mvkEncodeMicroSec = 0;
// Used during loading screens
int lastPacifierSessionTime;

View file

@ -290,6 +290,7 @@ float idConsoleLocal::DrawFPS( float y )
const uint64 rendererBackEndTime = commonLocal.GetRendererBackEndMicroseconds();
const uint64 rendererShadowsTime = commonLocal.GetRendererShadowsMicroseconds();
const uint64 rendererMvkEncodeTime = commonLocal.GetRendererMvkEncodeMicroseconds();
const uint64 rendererGPUTime = commonLocal.GetRendererGPUMicroseconds();
const uint64 rendererGPUEarlyZTime = commonLocal.GetRendererGpuEarlyZMicroseconds();
const uint64 rendererGPU_SSAOTime = commonLocal.GetRendererGpuSSAOMicroseconds();
@ -322,7 +323,7 @@ float idConsoleLocal::DrawFPS( float y )
{
// start smaller
int32 statsWindowWidth = 320;
int32 statsWindowHeight = 315;
int32 statsWindowHeight = 330;
if( com_showFPS.GetInteger() > 2 )
{
@ -494,12 +495,20 @@ float idConsoleLocal::DrawFPS( float y )
ImGui::TextColored( gameThreadRenderTime > maxTime ? colorRed : colorWhite, "RF: %5llu us SSR: %5llu us", gameThreadRenderTime, rendererGPU_SSRTime );
ImGui::TextColored( rendererBackEndTime > maxTime ? colorRed : colorWhite, "RB: %5llu us Ambient Pass: %5llu us", rendererBackEndTime, rendererGPUAmbientPassTime );
ImGui::TextColored( rendererGPUShadowAtlasTime > maxTime ? colorRed : colorWhite, "Shadows: %5llu us Shadow Atlas: %5llu us", rendererShadowsTime, rendererGPUShadowAtlasTime );
#if defined(__APPLE__) && defined( USE_MoltenVK )
// SRS - For more recent versions of MoltenVK with enhanced performance statistics (v1.2.6 and later), display the Vulkan to Metal encoding thread time on macOS
ImGui::TextColored( rendererMvkEncodeTime > maxTime || rendererGPUInteractionsTime > maxTime ? colorRed : colorWhite, "Encode: %5lld us Interactions: %5llu us", rendererMvkEncodeTime, rendererGPUInteractionsTime );
ImGui::TextColored( rendererGPUShaderPassesTime > maxTime ? colorRed : colorWhite, "Sync: %5lld us Shader Pass: %5llu us", frameSyncTime, rendererGPUShaderPassesTime );
#else
ImGui::TextColored( rendererGPUInteractionsTime > maxTime ? colorRed : colorWhite, "Sync: %5lld us Interactions: %5llu us", frameSyncTime, rendererGPUInteractionsTime );
ImGui::TextColored( rendererGPUShaderPassesTime > maxTime ? colorRed : colorWhite, " Shader Pass: %5llu us", rendererGPUShaderPassesTime );
#endif
ImGui::TextColored( rendererGPU_TAATime > maxTime ? colorRed : colorWhite, " TAA: %5llu us", rendererGPU_TAATime );
ImGui::TextColored( rendererGPUPostProcessingTime > maxTime ? colorRed : colorWhite, " PostFX: %5llu us", rendererGPUPostProcessingTime );
ImGui::TextColored( frameBusyTime > maxTime || rendererGPUTime > maxTime ? colorRed : colorWhite, "Total: %5lld us Total: %5lld us", frameBusyTime, rendererGPUTime );
ImGui::TextColored( colorWhite, "Idle: %5lld us Idle: %5lld us", frameIdleTime, rendererGPUIdleTime );
// SRS - Show CPU and GPU overall usage statistics
ImGui::TextColored( colorWhite, "Usage: %3.0f %% Usage: %3.0f %%", float( frameBusyTime - frameSyncTime ) / float( frameBusyTime + frameIdleTime ) * 100.0, float( rendererGPUTime ) / float( rendererGPUTime + rendererGPUIdleTime ) * 100.0 );
ImGui::End();
}

View file

@ -38,7 +38,7 @@
// SRS - optionally needed for MoltenVK runtime config visibility
#if defined(__APPLE__) && defined( USE_MoltenVK )
#include <MoltenVK/vk_mvk_moltenvk.h>
#include "framework/Common_local.h"
idCVar r_mvkSynchronousQueueSubmits( "r_mvkSynchronousQueueSubmits", "0", CVAR_BOOL | CVAR_INIT, "Use MoltenVK's synchronous queue submit option." );
#endif
#include <nvrhi/validation.h>
@ -210,8 +210,8 @@ private:
},
// layers
{
#if defined(__APPLE__)
// SRS - synchronization2 not supported natively on MoltenVK, use layer implementation instead
#if defined(__APPLE__) && !defined( USE_MoltenVK )
// SRS - Enable synchronization2 layer when using Vulkan loader and MoltenVK version unknown
"VK_LAYER_KHRONOS_synchronization2"
#endif
},
@ -1157,30 +1157,42 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain()
deviceFeatures2.setPNext( &portabilityFeatures );
m_VulkanPhysicalDevice.getFeatures2( &deviceFeatures2 );
MVKConfiguration pConfig;
size_t pConfigSize = sizeof( pConfig );
MVKConfiguration mvkConfig;
size_t mvkConfigSize = sizeof( mvkConfig );
vkGetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
vkGetMoltenVKConfigurationMVK( m_VulkanInstance, &mvkConfig, &mvkConfigSize );
// SRS - Set MoltenVK's synchronous queue submit option for vkQueueSubmit() & vkQueuePresentKHR()
pConfig.synchronousQueueSubmits = r_mvkSynchronousQueueSubmits.GetBool() ? VK_TRUE : VK_FALSE;
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
if( mvkConfig.synchronousQueueSubmits == VK_TRUE && !r_mvkSynchronousQueueSubmits.GetBool() )
{
idLib::Printf( "Disabled MoltenVK's synchronous queue submits...\n" );
mvkConfig.synchronousQueueSubmits = VK_FALSE;
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &mvkConfig, &mvkConfigSize );
}
// SRS - If we don't have native image view swizzle, enable MoltenVK's image view swizzle feature
if( portabilityFeatures.imageViewFormatSwizzle == VK_FALSE )
{
idLib::Printf( "Enabling MoltenVK's image view swizzle...\n" );
pConfig.fullImageViewSwizzle = VK_TRUE;
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
idLib::Printf( "Enabled MoltenVK's image view swizzle...\n" );
mvkConfig.fullImageViewSwizzle = VK_TRUE;
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &mvkConfig, &mvkConfigSize );
}
// SRS - Turn MoltenVK's Metal argument buffer feature on for descriptor indexing only
if( pConfig.useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER )
if( mvkConfig.useMetalArgumentBuffers == MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER )
{
idLib::Printf( "Enabling MoltenVK's Metal argument buffers for descriptor indexing...\n" );
pConfig.useMetalArgumentBuffers = MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_DESCRIPTOR_INDEXING;
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
idLib::Printf( "Enabled MoltenVK's Metal argument buffers for descriptor indexing...\n" );
mvkConfig.useMetalArgumentBuffers = MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_DESCRIPTOR_INDEXING;
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &mvkConfig, &mvkConfigSize );
}
#if MVK_VERSION >= MVK_MAKE_VERSION( 1, 2, 6 )
// SRS - Disable MoltenVK's timestampPeriod filter for HUD / Optick profiler timing calibration
mvkConfig.timestampPeriodLowPassAlpha = 1.0;
// SRS - Enable MoltenVK's performance tracking for display of Metal encoding timer on macOS
mvkConfig.performanceTracking = VK_TRUE;
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &mvkConfig, &mvkConfigSize );
#endif
#endif
CHECK( createDevice() );
@ -1305,6 +1317,16 @@ void DeviceManager_VK::DestroyDeviceAndSwapChain()
void DeviceManager_VK::BeginFrame()
{
#if defined(__APPLE__) && defined( USE_MoltenVK )
#if MVK_VERSION >= MVK_MAKE_VERSION( 1, 2, 6 )
// SRS - fetch MoltenVK's Vulkan to Metal encoding time for the previous frame
MVKPerformanceStatistics mvkPerfStats;
size_t mvkPerfStatsSize = sizeof( mvkPerfStats );
vkGetPerformanceStatisticsMVK( m_VulkanDevice, &mvkPerfStats, &mvkPerfStatsSize );
commonLocal.SetRendererMvkEncodeMicroseconds( uint64( Max( 0.0, mvkPerfStats.queue.submitCommandBuffers.latest - mvkPerfStats.queue.retrieveCAMetalDrawable.latest ) * 1000.0 ) );
#endif
#endif
const vk::Result res = m_VulkanDevice.acquireNextImageKHR( m_SwapChain,
std::numeric_limits<uint64_t>::max(), // timeout
m_PresentSemaphore,