From 59b4555bc9bfd9d54ee223275637681f013d2bc9 Mon Sep 17 00:00:00 2001 From: Stephen Saunders Date: Mon, 27 Mar 2023 15:51:43 -0400 Subject: [PATCH] Optimize macOS frame sync / fps by enabling MoltenVK asynchronous queue submission --- neo/cmake-xcode-debug.sh | 4 ++-- neo/sys/DeviceManager.h | 2 +- neo/sys/DeviceManager_VK.cpp | 23 +++++++++++++++++++---- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/neo/cmake-xcode-debug.sh b/neo/cmake-xcode-debug.sh index 6194aeed..df7738a5 100755 --- a/neo/cmake-xcode-debug.sh +++ b/neo/cmake-xcode-debug.sh @@ -6,6 +6,6 @@ cd xcode-debug # however, if ZERO_CHECK is reenabled **must** add VULKAN_SDK location to Xcode Custom Paths (under Prefs/Locations) otherwise build failures may occur # note 2: policy CMAKE_POLICY_DEFAULT_CMP0142=NEW suppresses non-existant per-config suffixes on Xcode library search paths, works for cmake version 3.25 and later # note 3: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables MoltenVK's image view swizzle which may be required on older macOS versions or hardware (see vulkaninfo) -# note 4: env variable MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=1 enforces synchronous queue submits which is required for the synchronization method used by the game +# note 4: env variable MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0 disables synchronous queue submits which is optimal for the synchronization method used by the game # note 5: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2 enables MoltenVK's use of Metal argument buffers only if VK_EXT_descriptor_indexing is enabled -cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=1;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev +cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev diff --git a/neo/sys/DeviceManager.h b/neo/sys/DeviceManager.h index f881b00c..dab8e31d 100644 --- a/neo/sys/DeviceManager.h +++ b/neo/sys/DeviceManager.h @@ -53,7 +53,7 @@ struct DeviceCreationParameters uint32_t backBufferHeight = 720; uint32_t backBufferSampleCount = 1; // optional HDR Framebuffer MSAA uint32_t refreshRate = 0; - uint32_t swapChainBufferCount = 3; // SRS - hardcode to 3 for Vsync and linux surfaceCaps.minImageCount + uint32_t swapChainBufferCount = 3; // SRS - hardcode to 3 for Vsync modes and linux surfaceCaps.minImageCount = 3 nvrhi::Format swapChainFormat = nvrhi::Format::RGBA8_UNORM; // RB: don't do the sRGB gamma ramp with the swapchain uint32_t swapChainSampleCount = 1; uint32_t swapChainSampleQuality = 0; diff --git a/neo/sys/DeviceManager_VK.cpp b/neo/sys/DeviceManager_VK.cpp index ad3912a4..9ab3e6fa 100644 --- a/neo/sys/DeviceManager_VK.cpp +++ b/neo/sys/DeviceManager_VK.cpp @@ -273,6 +273,7 @@ private: nvrhi::DeviceHandle m_ValidationLayer; nvrhi::CommandListHandle m_BarrierCommandList; + std::queue m_PresentSemaphoreQueue; vk::Semaphore m_PresentSemaphore; nvrhi::EventQueryHandle m_FrameWaitQuery; @@ -1126,8 +1127,8 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain() vkGetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize ); - // SRS - Enforce synchronous queue submission for vkQueueSubmit() & vkQueuePresentKHR() - pConfig.synchronousQueueSubmits = VK_TRUE; + // SRS - Disable synchronous queue submission for vkQueueSubmit() & vkQueuePresentKHR() + pConfig.synchronousQueueSubmits = VK_FALSE; vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize ); // SRS - If we don't have native image view swizzle, enable MoltenVK's image view swizzle feature @@ -1186,7 +1187,12 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain() m_BarrierCommandList = m_NvrhiDevice->createCommandList(); - m_PresentSemaphore = m_VulkanDevice.createSemaphore( vk::SemaphoreCreateInfo() ); + // SRS - Give each frame its own semaphore in case of overlap (e.g. MoltenVK async queue submit) + for( int i = 0; i < NUM_FRAME_DATA; i++ ) + { + m_PresentSemaphoreQueue.push( m_VulkanDevice.createSemaphore( vk::SemaphoreCreateInfo() ) ); + } + m_PresentSemaphore = m_PresentSemaphoreQueue.front(); m_FrameWaitQuery = m_NvrhiDevice->createEventQuery(); m_NvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics ); @@ -1202,7 +1208,11 @@ void DeviceManager_VK::DestroyDeviceAndSwapChain() m_FrameWaitQuery = nullptr; - m_VulkanDevice.destroySemaphore( m_PresentSemaphore ); + for( int i = 0; i < NUM_FRAME_DATA; i++ ) + { + m_VulkanDevice.destroySemaphore( m_PresentSemaphoreQueue.front() ); + m_PresentSemaphoreQueue.pop(); + } m_PresentSemaphore = vk::Semaphore(); m_BarrierCommandList = nullptr; @@ -1283,6 +1293,11 @@ void DeviceManager_VK::Present() const vk::Result res = m_PresentQueue.presentKHR( &info ); assert( res == vk::Result::eSuccess || res == vk::Result::eErrorOutOfDateKHR || res == vk::Result::eSuboptimalKHR ); + // SRS - Cycle the semaphore queue and setup m_PresentSemaphore for the next frame + m_PresentSemaphoreQueue.pop(); + m_PresentSemaphoreQueue.push( m_PresentSemaphore ); + m_PresentSemaphore = m_PresentSemaphoreQueue.front(); + #if !defined(__APPLE__) || !defined( USE_MoltenVK ) // SRS - validation layer is present only when the vulkan loader + layers are enabled (i.e. not MoltenVK standalone) if( m_DeviceParams.enableDebugRuntime )