Use command queue sync method vs. device sync for higher performance / frame rate

This commit is contained in:
Stephen Saunders 2023-02-28 18:02:45 -05:00
parent 0ac23b5f94
commit f0c61a3f55
6 changed files with 37 additions and 49 deletions

View file

@ -5,6 +5,7 @@ cd xcode-debug
# note 1: remove or set -DCMAKE_SUPPRESS_REGENERATION=OFF to reenable ZERO_CHECK target which checks for CMakeLists.txt changes and re-runs CMake before builds
# however, if ZERO_CHECK is reenabled **must** add VULKAN_SDK location to Xcode Custom Paths (under Prefs/Locations) otherwise build failures may occur
# note 2: policy CMAKE_POLICY_DEFAULT_CMP0142=NEW suppresses non-existant per-config suffixes on Xcode library search paths, works for cmake version 3.25 and later
#note 3: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables MoltenVK's image view swizzle which may be required on older macOS versions or hardware (see vulkaninfo)
# note 4: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2 enables MoltenVK's use of Metal argument buffers only if VK_EXT_descriptor_indexing is enabled
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2" -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev
# note 3: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables MoltenVK's image view swizzle which may be required on older macOS versions or hardware (see vulkaninfo)
# note 4: env variable MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=1 enforces synchronous queue submits which is required for the synchronization method used by the game
# note 5: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2 enables MoltenVK's use of Metal argument buffers only if VK_EXT_descriptor_indexing is enabled
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=1;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2" -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev

View file

@ -90,8 +90,9 @@ const int MAX_EXPRESSION_REGISTERS = 4096;
// everything that is needed by the backend needs
// to be double buffered to allow it to run in
// parallel on a dual cpu machine
#if defined(__APPLE__) && ( defined( USE_VULKAN ) || defined( USE_NVRHI ) )
#if ( defined(__APPLE__) && defined( USE_VULKAN ) ) || defined( USE_NVRHI )
// SRS - macOS MoltenVK/Metal needs triple buffering for full screen to work properly
// SRS - use triple buffering for NVRHI with command queue event query sync method
const uint32 NUM_FRAME_DATA = 3;
#else
const uint32 NUM_FRAME_DATA = 2;

View file

@ -1597,14 +1597,16 @@ We want to exit this with the GPU idle, right at vsync
void idRenderBackend::GL_BlockingSwapBuffers()
{
// Make sure that all frames have finished rendering
deviceManager->GetDevice()->waitForIdle();
// Release all in-flight references to the render targets
deviceManager->GetDevice()->runGarbageCollection();
// SRS - device-level sync kills perf by serializing command queue processing (CPU) and rendering (GPU)
// - instead, use alternative sync method (based on command queue event queries) inside Present()
//deviceManager->GetDevice()->waitForIdle();
// Present to the swap chain.
deviceManager->Present();
// Release all in-flight references to the render targets
deviceManager->GetDevice()->runGarbageCollection();
renderLog.EndFrame();
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )

View file

@ -61,7 +61,6 @@ struct DeviceCreationParameters
nvrhi::Format swapChainFormat = nvrhi::Format::RGBA8_UNORM; // RB: don't do the sRGB gamma ramp with the swapchain
uint32_t swapChainSampleCount = 1;
uint32_t swapChainSampleQuality = 0;
uint32_t maxFramesInFlight = 2;
bool enableDebugRuntime = false;
bool enableNvrhiValidationLayer = false;
bool vsyncEnabled = false;

View file

@ -61,6 +61,7 @@ class DeviceManager_DX12 : public DeviceManager
std::vector<nvrhi::TextureHandle> m_RhiSwapChainBuffers;
RefCountPtr<ID3D12Fence> m_FrameFence;
std::vector<HANDLE> m_FrameFenceEvents;
nvrhi::EventQueryHandle m_FrameWaitQuery;
UINT64 m_FrameCount = 1;
@ -447,6 +448,9 @@ bool DeviceManager_DX12::CreateDeviceAndSwapChain()
m_FrameFenceEvents.push_back( CreateEvent( nullptr, false, true, NULL ) );
}
m_FrameWaitQuery = nvrhiDevice->createEventQuery();
nvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics );
return true;
}
@ -459,6 +463,8 @@ void DeviceManager_DX12::DestroyDeviceAndSwapChain()
nvrhiDevice = nullptr;
m_FrameWaitQuery = nullptr;
for( auto fenceEvent : m_FrameFenceEvents )
{
WaitForSingleObject( fenceEvent, INFINITE );
@ -648,6 +654,11 @@ void DeviceManager_DX12::Present()
// SRS - Don't change deviceParms.vsyncEnabled here, simply test for vsync mode 2 to set DXGI SyncInterval
m_SwapChain->Present( deviceParms.vsyncEnabled && r_swapInterval.GetInteger() == 2 ? 1 : 0, presentFlags );
// SRS - Sync on previous frame's command queue completion vs. waitForIdle() on whole device
nvrhiDevice->waitEventQuery( m_FrameWaitQuery );
nvrhiDevice->resetEventQuery( m_FrameWaitQuery );
nvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics );
m_FrameFence->SetEventOnCompletion( m_FrameCount, m_FrameFenceEvents[bufferIndex] );
m_GraphicsQueue->Signal( m_FrameFence, m_FrameCount );
m_FrameCount++;

View file

@ -272,8 +272,7 @@ private:
nvrhi::CommandListHandle m_BarrierCommandList;
vk::Semaphore m_PresentSemaphore;
std::queue<nvrhi::EventQueryHandle> m_FramesInFlight;
std::vector<nvrhi::EventQueryHandle> m_QueryPool;
nvrhi::EventQueryHandle m_FrameWaitQuery;
// SRS - flag indicating support for eFifoRelaxed surface presentation (r_swapInterval = 1) mode
bool enablePModeFifoRelaxed = false;
@ -1124,6 +1123,10 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain()
vkGetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
// SRS - Enforce synchronous queue submission for vkQueueSubmit() & vkQueuePresentKHR()
pConfig.synchronousQueueSubmits = VK_TRUE;
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
// SRS - If we don't have native image view swizzle, enable MoltenVK's image view swizzle feature
if( portabilityFeatures.imageViewFormatSwizzle == VK_FALSE )
{
@ -1182,6 +1185,9 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain()
m_PresentSemaphore = m_VulkanDevice.createSemaphore( vk::SemaphoreCreateInfo() );
m_FrameWaitQuery = m_NvrhiDevice->createEventQuery();
m_NvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics );
#undef CHECK
return true;
@ -1191,25 +1197,13 @@ void DeviceManager_VK::DestroyDeviceAndSwapChain()
{
destroySwapChain();
m_FrameWaitQuery = nullptr;
m_VulkanDevice.destroySemaphore( m_PresentSemaphore );
m_PresentSemaphore = vk::Semaphore();
m_BarrierCommandList = nullptr;
while( m_FramesInFlight.size() > 0 )
{
auto query = m_FramesInFlight.front();
m_FramesInFlight.pop();
query = nullptr;
}
if( !m_QueryPool.empty() )
{
auto query = m_QueryPool.back();
m_QueryPool.pop_back();
query = nullptr;
}
m_NvrhiDevice = nullptr;
m_ValidationLayer = nullptr;
m_RendererString.clear();
@ -1294,37 +1288,17 @@ void DeviceManager_VK::Present()
}
else
{
#ifndef _WIN32
if( deviceParms.vsyncEnabled )
{
m_PresentQueue.waitIdle();
}
#endif
while( m_FramesInFlight.size() > deviceParms.maxFramesInFlight )
{
auto query = m_FramesInFlight.front();
m_FramesInFlight.pop();
m_NvrhiDevice->waitEventQuery( query );
m_QueryPool.push_back( query );
}
nvrhi::EventQueryHandle query;
if( !m_QueryPool.empty() )
{
query = m_QueryPool.back();
m_QueryPool.pop_back();
}
// SRS - Sync on previous frame's command queue completion vs. waitForIdle() on whole device
else
{
query = m_NvrhiDevice->createEventQuery();
m_NvrhiDevice->waitEventQuery( m_FrameWaitQuery );
m_NvrhiDevice->resetEventQuery( m_FrameWaitQuery );
m_NvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics );
}
m_NvrhiDevice->resetEventQuery( query );
m_NvrhiDevice->setEventQuery( query, nvrhi::CommandQueue::Graphics );
m_FramesInFlight.push( query );
}
}