Optimize macOS frame sync / fps by enabling MoltenVK asynchronous queue submission

This commit is contained in:
Stephen Saunders 2023-03-27 15:51:43 -04:00
parent 4207e22e10
commit 59b4555bc9
3 changed files with 22 additions and 7 deletions

View file

@ -6,6 +6,6 @@ cd xcode-debug
# however, if ZERO_CHECK is reenabled **must** add VULKAN_SDK location to Xcode Custom Paths (under Prefs/Locations) otherwise build failures may occur
# note 2: policy CMAKE_POLICY_DEFAULT_CMP0142=NEW suppresses non-existant per-config suffixes on Xcode library search paths, works for cmake version 3.25 and later
# note 3: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables MoltenVK's image view swizzle which may be required on older macOS versions or hardware (see vulkaninfo)
# note 4: env variable MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=1 enforces synchronous queue submits which is required for the synchronization method used by the game
# note 4: env variable MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0 disables synchronous queue submits which is optimal for the synchronization method used by the game
# note 5: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2 enables MoltenVK's use of Metal argument buffers only if VK_EXT_descriptor_indexing is enabled
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=1;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev

View file

@ -53,7 +53,7 @@ struct DeviceCreationParameters
uint32_t backBufferHeight = 720;
uint32_t backBufferSampleCount = 1; // optional HDR Framebuffer MSAA
uint32_t refreshRate = 0;
uint32_t swapChainBufferCount = 3; // SRS - hardcode to 3 for Vsync and linux surfaceCaps.minImageCount
uint32_t swapChainBufferCount = 3; // SRS - hardcode to 3 for Vsync modes and linux surfaceCaps.minImageCount = 3
nvrhi::Format swapChainFormat = nvrhi::Format::RGBA8_UNORM; // RB: don't do the sRGB gamma ramp with the swapchain
uint32_t swapChainSampleCount = 1;
uint32_t swapChainSampleQuality = 0;

View file

@ -273,6 +273,7 @@ private:
nvrhi::DeviceHandle m_ValidationLayer;
nvrhi::CommandListHandle m_BarrierCommandList;
std::queue<vk::Semaphore> m_PresentSemaphoreQueue;
vk::Semaphore m_PresentSemaphore;
nvrhi::EventQueryHandle m_FrameWaitQuery;
@ -1126,8 +1127,8 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain()
vkGetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
// SRS - Enforce synchronous queue submission for vkQueueSubmit() & vkQueuePresentKHR()
pConfig.synchronousQueueSubmits = VK_TRUE;
// SRS - Disable synchronous queue submission for vkQueueSubmit() & vkQueuePresentKHR()
pConfig.synchronousQueueSubmits = VK_FALSE;
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
// SRS - If we don't have native image view swizzle, enable MoltenVK's image view swizzle feature
@ -1186,7 +1187,12 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain()
m_BarrierCommandList = m_NvrhiDevice->createCommandList();
m_PresentSemaphore = m_VulkanDevice.createSemaphore( vk::SemaphoreCreateInfo() );
// SRS - Give each frame its own semaphore in case of overlap (e.g. MoltenVK async queue submit)
for( int i = 0; i < NUM_FRAME_DATA; i++ )
{
m_PresentSemaphoreQueue.push( m_VulkanDevice.createSemaphore( vk::SemaphoreCreateInfo() ) );
}
m_PresentSemaphore = m_PresentSemaphoreQueue.front();
m_FrameWaitQuery = m_NvrhiDevice->createEventQuery();
m_NvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics );
@ -1202,7 +1208,11 @@ void DeviceManager_VK::DestroyDeviceAndSwapChain()
m_FrameWaitQuery = nullptr;
m_VulkanDevice.destroySemaphore( m_PresentSemaphore );
for( int i = 0; i < NUM_FRAME_DATA; i++ )
{
m_VulkanDevice.destroySemaphore( m_PresentSemaphoreQueue.front() );
m_PresentSemaphoreQueue.pop();
}
m_PresentSemaphore = vk::Semaphore();
m_BarrierCommandList = nullptr;
@ -1283,6 +1293,11 @@ void DeviceManager_VK::Present()
const vk::Result res = m_PresentQueue.presentKHR( &info );
assert( res == vk::Result::eSuccess || res == vk::Result::eErrorOutOfDateKHR || res == vk::Result::eSuboptimalKHR );
// SRS - Cycle the semaphore queue and setup m_PresentSemaphore for the next frame
m_PresentSemaphoreQueue.pop();
m_PresentSemaphoreQueue.push( m_PresentSemaphore );
m_PresentSemaphore = m_PresentSemaphoreQueue.front();
#if !defined(__APPLE__) || !defined( USE_MoltenVK )
// SRS - validation layer is present only when the vulkan loader + layers are enabled (i.e. not MoltenVK standalone)
if( m_DeviceParams.enableDebugRuntime )