mirror of
https://github.com/id-Software/DOOM-3-BFG.git
synced 2025-03-14 22:50:45 +00:00
Use command queue sync method vs. device sync for higher performance / frame rate
This commit is contained in:
parent
0ac23b5f94
commit
f0c61a3f55
6 changed files with 37 additions and 49 deletions
|
@ -5,6 +5,7 @@ cd xcode-debug
|
|||
# note 1: remove or set -DCMAKE_SUPPRESS_REGENERATION=OFF to reenable ZERO_CHECK target which checks for CMakeLists.txt changes and re-runs CMake before builds
|
||||
# however, if ZERO_CHECK is reenabled **must** add VULKAN_SDK location to Xcode Custom Paths (under Prefs/Locations) otherwise build failures may occur
|
||||
# note 2: policy CMAKE_POLICY_DEFAULT_CMP0142=NEW suppresses non-existant per-config suffixes on Xcode library search paths, works for cmake version 3.25 and later
|
||||
#note 3: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables MoltenVK's image view swizzle which may be required on older macOS versions or hardware (see vulkaninfo)
|
||||
# note 4: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2 enables MoltenVK's use of Metal argument buffers only if VK_EXT_descriptor_indexing is enabled
|
||||
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2" -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev
|
||||
# note 3: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables MoltenVK's image view swizzle which may be required on older macOS versions or hardware (see vulkaninfo)
|
||||
# note 4: env variable MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=1 enforces synchronous queue submits which is required for the synchronization method used by the game
|
||||
# note 5: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2 enables MoltenVK's use of Metal argument buffers only if VK_EXT_descriptor_indexing is enabled
|
||||
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=1;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=2" -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev
|
||||
|
|
|
@ -90,8 +90,9 @@ const int MAX_EXPRESSION_REGISTERS = 4096;
|
|||
// everything that is needed by the backend needs
|
||||
// to be double buffered to allow it to run in
|
||||
// parallel on a dual cpu machine
|
||||
#if defined(__APPLE__) && ( defined( USE_VULKAN ) || defined( USE_NVRHI ) )
|
||||
#if ( defined(__APPLE__) && defined( USE_VULKAN ) ) || defined( USE_NVRHI )
|
||||
// SRS - macOS MoltenVK/Metal needs triple buffering for full screen to work properly
|
||||
// SRS - use triple buffering for NVRHI with command queue event query sync method
|
||||
const uint32 NUM_FRAME_DATA = 3;
|
||||
#else
|
||||
const uint32 NUM_FRAME_DATA = 2;
|
||||
|
|
|
@ -1597,14 +1597,16 @@ We want to exit this with the GPU idle, right at vsync
|
|||
void idRenderBackend::GL_BlockingSwapBuffers()
|
||||
{
|
||||
// Make sure that all frames have finished rendering
|
||||
deviceManager->GetDevice()->waitForIdle();
|
||||
|
||||
// Release all in-flight references to the render targets
|
||||
deviceManager->GetDevice()->runGarbageCollection();
|
||||
// SRS - device-level sync kills perf by serializing command queue processing (CPU) and rendering (GPU)
|
||||
// - instead, use alternative sync method (based on command queue event queries) inside Present()
|
||||
//deviceManager->GetDevice()->waitForIdle();
|
||||
|
||||
// Present to the swap chain.
|
||||
deviceManager->Present();
|
||||
|
||||
// Release all in-flight references to the render targets
|
||||
deviceManager->GetDevice()->runGarbageCollection();
|
||||
|
||||
renderLog.EndFrame();
|
||||
|
||||
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )
|
||||
|
|
|
@ -61,7 +61,6 @@ struct DeviceCreationParameters
|
|||
nvrhi::Format swapChainFormat = nvrhi::Format::RGBA8_UNORM; // RB: don't do the sRGB gamma ramp with the swapchain
|
||||
uint32_t swapChainSampleCount = 1;
|
||||
uint32_t swapChainSampleQuality = 0;
|
||||
uint32_t maxFramesInFlight = 2;
|
||||
bool enableDebugRuntime = false;
|
||||
bool enableNvrhiValidationLayer = false;
|
||||
bool vsyncEnabled = false;
|
||||
|
|
|
@ -61,6 +61,7 @@ class DeviceManager_DX12 : public DeviceManager
|
|||
std::vector<nvrhi::TextureHandle> m_RhiSwapChainBuffers;
|
||||
RefCountPtr<ID3D12Fence> m_FrameFence;
|
||||
std::vector<HANDLE> m_FrameFenceEvents;
|
||||
nvrhi::EventQueryHandle m_FrameWaitQuery;
|
||||
|
||||
UINT64 m_FrameCount = 1;
|
||||
|
||||
|
@ -447,6 +448,9 @@ bool DeviceManager_DX12::CreateDeviceAndSwapChain()
|
|||
m_FrameFenceEvents.push_back( CreateEvent( nullptr, false, true, NULL ) );
|
||||
}
|
||||
|
||||
m_FrameWaitQuery = nvrhiDevice->createEventQuery();
|
||||
nvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -459,6 +463,8 @@ void DeviceManager_DX12::DestroyDeviceAndSwapChain()
|
|||
|
||||
nvrhiDevice = nullptr;
|
||||
|
||||
m_FrameWaitQuery = nullptr;
|
||||
|
||||
for( auto fenceEvent : m_FrameFenceEvents )
|
||||
{
|
||||
WaitForSingleObject( fenceEvent, INFINITE );
|
||||
|
@ -648,6 +654,11 @@ void DeviceManager_DX12::Present()
|
|||
// SRS - Don't change deviceParms.vsyncEnabled here, simply test for vsync mode 2 to set DXGI SyncInterval
|
||||
m_SwapChain->Present( deviceParms.vsyncEnabled && r_swapInterval.GetInteger() == 2 ? 1 : 0, presentFlags );
|
||||
|
||||
// SRS - Sync on previous frame's command queue completion vs. waitForIdle() on whole device
|
||||
nvrhiDevice->waitEventQuery( m_FrameWaitQuery );
|
||||
nvrhiDevice->resetEventQuery( m_FrameWaitQuery );
|
||||
nvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics );
|
||||
|
||||
m_FrameFence->SetEventOnCompletion( m_FrameCount, m_FrameFenceEvents[bufferIndex] );
|
||||
m_GraphicsQueue->Signal( m_FrameFence, m_FrameCount );
|
||||
m_FrameCount++;
|
||||
|
|
|
@ -272,8 +272,7 @@ private:
|
|||
nvrhi::CommandListHandle m_BarrierCommandList;
|
||||
vk::Semaphore m_PresentSemaphore;
|
||||
|
||||
std::queue<nvrhi::EventQueryHandle> m_FramesInFlight;
|
||||
std::vector<nvrhi::EventQueryHandle> m_QueryPool;
|
||||
nvrhi::EventQueryHandle m_FrameWaitQuery;
|
||||
|
||||
// SRS - flag indicating support for eFifoRelaxed surface presentation (r_swapInterval = 1) mode
|
||||
bool enablePModeFifoRelaxed = false;
|
||||
|
@ -1124,6 +1123,10 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain()
|
|||
|
||||
vkGetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
|
||||
|
||||
// SRS - Enforce synchronous queue submission for vkQueueSubmit() & vkQueuePresentKHR()
|
||||
pConfig.synchronousQueueSubmits = VK_TRUE;
|
||||
vkSetMoltenVKConfigurationMVK( m_VulkanInstance, &pConfig, &pConfigSize );
|
||||
|
||||
// SRS - If we don't have native image view swizzle, enable MoltenVK's image view swizzle feature
|
||||
if( portabilityFeatures.imageViewFormatSwizzle == VK_FALSE )
|
||||
{
|
||||
|
@ -1182,6 +1185,9 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain()
|
|||
|
||||
m_PresentSemaphore = m_VulkanDevice.createSemaphore( vk::SemaphoreCreateInfo() );
|
||||
|
||||
m_FrameWaitQuery = m_NvrhiDevice->createEventQuery();
|
||||
m_NvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics );
|
||||
|
||||
#undef CHECK
|
||||
|
||||
return true;
|
||||
|
@ -1191,25 +1197,13 @@ void DeviceManager_VK::DestroyDeviceAndSwapChain()
|
|||
{
|
||||
destroySwapChain();
|
||||
|
||||
m_FrameWaitQuery = nullptr;
|
||||
|
||||
m_VulkanDevice.destroySemaphore( m_PresentSemaphore );
|
||||
m_PresentSemaphore = vk::Semaphore();
|
||||
|
||||
m_BarrierCommandList = nullptr;
|
||||
|
||||
while( m_FramesInFlight.size() > 0 )
|
||||
{
|
||||
auto query = m_FramesInFlight.front();
|
||||
m_FramesInFlight.pop();
|
||||
query = nullptr;
|
||||
}
|
||||
|
||||
if( !m_QueryPool.empty() )
|
||||
{
|
||||
auto query = m_QueryPool.back();
|
||||
m_QueryPool.pop_back();
|
||||
query = nullptr;
|
||||
}
|
||||
|
||||
m_NvrhiDevice = nullptr;
|
||||
m_ValidationLayer = nullptr;
|
||||
m_RendererString.clear();
|
||||
|
@ -1294,37 +1288,17 @@ void DeviceManager_VK::Present()
|
|||
}
|
||||
else
|
||||
{
|
||||
#ifndef _WIN32
|
||||
if( deviceParms.vsyncEnabled )
|
||||
{
|
||||
m_PresentQueue.waitIdle();
|
||||
}
|
||||
#endif
|
||||
|
||||
while( m_FramesInFlight.size() > deviceParms.maxFramesInFlight )
|
||||
{
|
||||
auto query = m_FramesInFlight.front();
|
||||
m_FramesInFlight.pop();
|
||||
|
||||
m_NvrhiDevice->waitEventQuery( query );
|
||||
|
||||
m_QueryPool.push_back( query );
|
||||
}
|
||||
|
||||
nvrhi::EventQueryHandle query;
|
||||
if( !m_QueryPool.empty() )
|
||||
{
|
||||
query = m_QueryPool.back();
|
||||
m_QueryPool.pop_back();
|
||||
}
|
||||
// SRS - Sync on previous frame's command queue completion vs. waitForIdle() on whole device
|
||||
else
|
||||
{
|
||||
query = m_NvrhiDevice->createEventQuery();
|
||||
m_NvrhiDevice->waitEventQuery( m_FrameWaitQuery );
|
||||
m_NvrhiDevice->resetEventQuery( m_FrameWaitQuery );
|
||||
m_NvrhiDevice->setEventQuery( m_FrameWaitQuery, nvrhi::CommandQueue::Graphics );
|
||||
}
|
||||
|
||||
m_NvrhiDevice->resetEventQuery( query );
|
||||
m_NvrhiDevice->setEventQuery( query, nvrhi::CommandQueue::Graphics );
|
||||
m_FramesInFlight.push( query );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue