Merge remote-tracking branch 'SRSaunders/hiz-optick-fixes'

This commit is contained in:
Robert Beckebans 2025-02-06 17:01:09 +01:00
commit d085838d7f
9 changed files with 61 additions and 18 deletions

View file

@ -18,6 +18,6 @@ fi
# note 2: policy CMAKE_POLICY_DEFAULT_CMP0142=NEW suppresses non-existant per-config suffixes on Xcode library search paths, works for cmake version 3.25 and later
# note 3: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables MoltenVK's image view swizzle which may be required on older macOS versions or hardware (see vulkaninfo) - only used for VulkanSDK < 1.3.275
# note 4: env variable MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0 disables synchronous queue submits which is optimal for the synchronization method used by the game - only used for VulkanSDK < 1.3.275
# note 5: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=1 enables MoltenVK's use of Metal argument buffers - needed for descriptor resource scaling and VulkanSDK < 1.3.275
# note 5: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 disables MoltenVK's use of Metal argument buffers - only used for VulkanSDK < 1.3.275
# note 6: env variable MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA=1.0 disables MoltenVK's timestampPeriod lowpass filter for non-Apple GPUs - only used for VulkanSDK < 1.3.275
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=1;MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA=1.0" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=$OPENAL_PREFIX/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=$OPENAL_PREFIX/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0;MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA=1.0" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=$OPENAL_PREFIX/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=$OPENAL_PREFIX/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev

View file

@ -5738,6 +5738,7 @@ void idRenderBackend::DrawViewInternal( const viewDef_t* _viewDef, const int ste
//-------------------------------------------------
if( R_UseHiZ() && is3D )
{
OPTICK_GPU_EVENT( "Render_HiZ" );
renderLog.OpenBlock( "Render_HiZ" );
commandList->clearTextureFloat( globalImages->hierarchicalZbufferImage->GetTextureHandle(), nvrhi::AllSubresources, nvrhi::Color( 1.f ) );
@ -5802,8 +5803,10 @@ void idRenderBackend::DrawViewInternal( const viewDef_t* _viewDef, const int ste
//-------------------------------------------------
// resolve the screen for SSR
//-------------------------------------------------
if( is3D && r_useSSR.GetBool() )
if( is3D && r_useSSR.GetBool() && R_UseHiZ() )
{
OPTICK_GPU_EVENT( "Resolve_Screen4SSR" );
if( R_GetMSAASamples() > 1 )
{
renderLog.OpenBlock( "Resolve to _currentRender" );

View file

@ -80,7 +80,14 @@ enum graphicsVendor_t
VENDOR_NVIDIA,
VENDOR_AMD,
VENDOR_INTEL,
VENDOR_APPLE // SRS - Added support for Apple GPUs
VENDOR_APPLE, // SRS - Added support for Apple GPUs
VENDOR_OTHER
};
enum graphicsGpuType_t
{
GPU_TYPE_DISCRETE,
GPU_TYPE_OTHER
};
#define ID_MSAA 0
@ -181,6 +188,7 @@ struct backEndCounters_t
struct glconfig_t
{
graphicsVendor_t vendor;
graphicsGpuType_t gpuType;
// int maxTextureSize; // TODO
// int maxTextureCoords; // TODO

View file

@ -364,6 +364,13 @@ bool R_UseTemporalAA()
bool R_UseHiZ()
{
// TODO check for driver problems here
#if defined(__linux__)
if( glConfig.vendor == VENDOR_INTEL && glConfig.gpuType == GPU_TYPE_OTHER )
{
// SRS - Disable HiZ to work-around Linux driver issues on Intel iGPUs
return false;
}
#endif
return r_useHierarchicalDepthBuffer.GetBool();
}

View file

@ -59,6 +59,23 @@ void DeviceManager::GetWindowDimensions( int& width, int& height )
height = m_DeviceParams.backBufferHeight;
}
graphicsVendor_t DeviceManager::getGPUVendor( uint32_t vendorID ) const
{
switch( vendorID )
{
case 0x10DE:
return VENDOR_NVIDIA;
case 0x1002:
return VENDOR_AMD;
case 0x8086:
return VENDOR_INTEL;
case 0x106B:
return VENDOR_APPLE;
default:
return VENDOR_OTHER;
}
}
void DeviceManager::BackBufferResizing()
{
Framebuffer::Shutdown();

View file

@ -159,7 +159,6 @@ protected:
void* windowInstance;
void* windowHandle;
bool m_windowVisible = false;
bool isNvidia = false;
DeviceCreationParameters m_DeviceParams;
@ -171,6 +170,8 @@ protected:
DeviceManager() = default;
graphicsVendor_t getGPUVendor( uint32_t vendorID ) const;
void BackBufferResizing();
void BackBufferResized();

View file

@ -106,11 +106,6 @@ private:
void ReleaseRenderTargets();
};
static bool IsNvDeviceID( UINT id )
{
return id == 0x10DE;
}
// Find an adapter whose name contains the given string.
static RefCountPtr<IDXGIAdapter> FindAdapter( const std::wstring& targetName )
{
@ -277,7 +272,9 @@ bool DeviceManager_DX12::CreateDeviceAndSwapChain()
}
m_RendererString = ss.str();
isNvidia = IsNvDeviceID( aDesc.VendorId );
glConfig.vendor = getGPUVendor( aDesc.VendorId );
// SRS - Intel iGPUs typically allocate 128 MB for Dedicated UMA, set threshold at 512 MB to potentially handle other iGPUs (e.g. AMD APUs)
glConfig.gpuType = aDesc.DedicatedVideoMemory > 0x20000000 ? GPU_TYPE_DISCRETE : GPU_TYPE_OTHER;
}
/*
// SRS - Don't center window here for DX12 only, instead use portable initialization in CreateWindowDeviceAndSwapChain() within win_glimp.cpp

View file

@ -50,8 +50,9 @@
#endif
#endif
#if defined( VK_EXT_layer_settings ) || defined( USE_MoltenVK )
// SRS - Disable MoltenVK's Synchronous Queue Submits for better performance, and Metal Argument Buffers to avoid HiZ compute shader issues on Apple Silicon
idCVar r_mvkSynchronousQueueSubmits( "r_mvkSynchronousQueueSubmits", "0", CVAR_BOOL | CVAR_INIT | CVAR_NEW, "Use MoltenVK's synchronous queue submit option." );
idCVar r_mvkUseMetalArgumentBuffers( "r_mvkUseMetalArgumentBuffers", "1", CVAR_INTEGER | CVAR_INIT | CVAR_NEW, "Use MoltenVK's Metal argument buffers option (0=Off, 1=On)", 0, 1 );
idCVar r_mvkUseMetalArgumentBuffers( "r_mvkUseMetalArgumentBuffers", "0", CVAR_INTEGER | CVAR_INIT | CVAR_NEW, "Use MoltenVK's Metal argument buffers option (0=Off, 1=On)", 0, 1 );
#endif
#endif
#include <nvrhi/validation.h>
@ -780,12 +781,16 @@ bool DeviceManager_VK::pickPhysicalDevice()
// pick the first discrete GPU if it exists, otherwise the first integrated GPU
if( !discreteGPUs.empty() )
{
glConfig.vendor = getGPUVendor( discreteGPUs[0].getProperties().vendorID );
glConfig.gpuType = GPU_TYPE_DISCRETE;
m_VulkanPhysicalDevice = discreteGPUs[0];
return true;
}
if( !otherGPUs.empty() )
{
glConfig.vendor = getGPUVendor( otherGPUs[0].getProperties().vendorID );
glConfig.gpuType = GPU_TYPE_OTHER;
m_VulkanPhysicalDevice = otherGPUs[0];
return true;
}
@ -1650,9 +1655,10 @@ void DeviceManager_VK::Present()
OPTICK_STORAGE_EVENT( mvkSubmitEventStorage, mvkSubmitEventDesc, mvkPreviousSubmitTime, mvkPreviousSubmitTime + mvkPreviousSubmitWaitTime );
OPTICK_STORAGE_TAG( mvkSubmitEventStorage, mvkPreviousSubmitTime + mvkPreviousSubmitWaitTime / 2, "Frame", idLib::frameNumber - 2 );
// SRS - select latest acquire time if hashes match and we didn't retrieve a new image, otherwise select previous acquire time
// SRS - select latest acquire time if hashes match and we didn't retrieve a new image, or vsync is on, or other high-load conditions
double mvkLatestAcquireHash = mvkPerfStats.queue.retrieveCAMetalDrawable.latest + mvkPerfStats.queue.retrieveCAMetalDrawable.previous;
int64_t mvkAcquireWaitTime = mvkLatestAcquireHash == mvkPreviousAcquireHash ? mvkPerfStats.queue.retrieveCAMetalDrawable.latest * 1000000.0 : mvkPerfStats.queue.retrieveCAMetalDrawable.previous * 1000000.0;
bool useLatestAcquire = ( mvkLatestAcquireHash != mvkPreviousAcquireHash ) && ( mvkPerfStats.queue.waitSubmitCommandBuffers.latest > mvkPerfStats.queue.waitSubmitCommandBuffers.previous || mvkPerfStats.queue.commandBufferEncoding.latest > mvkPerfStats.queue.commandBufferEncoding.previous ) && ( mvkPerfStats.queue.retrieveCAMetalDrawable.latest > mvkPerfStats.queue.retrieveCAMetalDrawable.previous );
int64_t mvkAcquireWaitTime = mvkLatestAcquireHash == mvkPreviousAcquireHash || r_swapInterval.GetInteger() > 0 || useLatestAcquire ? mvkPerfStats.queue.retrieveCAMetalDrawable.latest * 1000000.0 : mvkPerfStats.queue.retrieveCAMetalDrawable.previous * 1000000.0;
// SRS - select latest presented frame if we are running synchronous, otherwise select previous presented frame as reference
int64_t mvkAcquireStartTime = mvkPreviousSubmitTime + mvkPreviousSubmitWaitTime;
@ -1668,15 +1674,16 @@ void DeviceManager_VK::Present()
OPTICK_STORAGE_EVENT( mvkAcquireEventStorage, mvkAcquireEventDesc, mvkAcquireStartTime, mvkAcquireStartTime + mvkAcquireWaitTime );
OPTICK_STORAGE_TAG( mvkAcquireEventStorage, mvkAcquireStartTime + mvkAcquireWaitTime / 2, "Frame", frameNumberTag );
// SRS - when Optick is active, use MoltenVK's previous encoding time to select game command buffer vs. Optick's command buffer
// SRS - when Optick is active, use max of MoltenVK's latest/previous encoding time to select game command buffer vs. Optick's command buffer
int64_t mvkEncodeStartTime = mvkAcquireStartTime + mvkAcquireWaitTime;
mvkEncodeTime = Max( int64_t( 0 ), int64_t( mvkPerfStats.queue.commandBufferEncoding.previous * 1000000.0 ) - mvkAcquireWaitTime );
mvkEncodeTime = Max( mvkPerfStats.queue.commandBufferEncoding.latest, mvkPerfStats.queue.commandBufferEncoding.previous ) * 1000000.0;
mvkEncodeTime = ( mvkEncodeTime > mvkAcquireWaitTime ) && ( ( mvkPerfStats.queue.commandBufferEncoding.previous > mvkPerfStats.queue.commandBufferEncoding.latest && Max( mvkPreviousSubmitWaitTime, int64_t( mvkPerfStats.queue.waitSubmitCommandBuffers.previous * 1000000.0 ) ) > int64_t( mvkPerfStats.queue.waitSubmitCommandBuffers.latest * 1000000.0 ) ) || useLatestAcquire ) ? mvkEncodeTime - mvkAcquireWaitTime : mvkEncodeTime;
// SRS - create custom Optick event that displays MoltenVK's Vulkan-to-Metal encoding time
OPTICK_STORAGE_EVENT( mvkEncodeEventStorage, mvkEncodeEventDesc, mvkEncodeStartTime, mvkEncodeStartTime + mvkEncodeTime );
OPTICK_STORAGE_TAG( mvkEncodeEventStorage, mvkEncodeStartTime + mvkEncodeTime / 2, "Frame", frameNumberTag );
mvkPreviousSubmitWaitTime = mvkPerfStats.queue.waitSubmitCommandBuffers.latest * 1000000.0;
mvkPreviousSubmitWaitTime = Min( mvkPerfStats.queue.waitSubmitCommandBuffers.latest, mvkPerfStats.queue.waitSubmitCommandBuffers.previous ) * 1000000.0;
mvkPreviousAcquireHash = mvkLatestAcquireHash;
}
#endif

View file

@ -452,7 +452,7 @@ main
*/
int main( int argc, const char** argv )
{
extern idCVar r_useGPUSkinning;
//extern idCVar r_useGPUSkinning;
// DG: needed for Sys_ReLaunch()
cmdargc = argc;
@ -483,6 +483,8 @@ int main( int argc, const char** argv )
common->Init( 0, NULL, NULL );
}
// SRS - GPU skinning on Apple Silicon now works for recent builds and/or drivers
#if 0
// SRS - Determine the machine name, e.g. "x86_64" or "arm64"
// Might be cleaner in posix Sys_Init(), but only needed on
// macOS and all the required sys includes are located here.
@ -497,6 +499,7 @@ int main( int argc, const char** argv )
r_useGPUSkinning.SetInteger( 0 );
}
Mem_Free( machineName );
#endif
Posix_LateInit();