mirror of
https://github.com/id-Software/DOOM-3-BFG.git
synced 2025-03-13 22:22:05 +00:00
Merge remote-tracking branch 'SRSaunders/hiz-optick-fixes'
This commit is contained in:
commit
d085838d7f
9 changed files with 61 additions and 18 deletions
|
@ -18,6 +18,6 @@ fi
|
|||
# note 2: policy CMAKE_POLICY_DEFAULT_CMP0142=NEW suppresses non-existant per-config suffixes on Xcode library search paths, works for cmake version 3.25 and later
|
||||
# note 3: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables MoltenVK's image view swizzle which may be required on older macOS versions or hardware (see vulkaninfo) - only used for VulkanSDK < 1.3.275
|
||||
# note 4: env variable MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0 disables synchronous queue submits which is optimal for the synchronization method used by the game - only used for VulkanSDK < 1.3.275
|
||||
# note 5: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=1 enables MoltenVK's use of Metal argument buffers - needed for descriptor resource scaling and VulkanSDK < 1.3.275
|
||||
# note 5: env variable MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 disables MoltenVK's use of Metal argument buffers - only used for VulkanSDK < 1.3.275
|
||||
# note 6: env variable MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA=1.0 disables MoltenVK's timestampPeriod lowpass filter for non-Apple GPUs - only used for VulkanSDK < 1.3.275
|
||||
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=1;MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA=1.0" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=$OPENAL_PREFIX/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=$OPENAL_PREFIX/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev
|
||||
cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS=0;MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0;MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA=1.0" -DCMAKE_XCODE_SCHEME_ENABLE_GPU_API_VALIDATION=OFF -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=$OPENAL_PREFIX/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=$OPENAL_PREFIX/include ../neo -DCMAKE_POLICY_DEFAULT_CMP0142=NEW -Wno-dev
|
||||
|
|
|
@ -5738,6 +5738,7 @@ void idRenderBackend::DrawViewInternal( const viewDef_t* _viewDef, const int ste
|
|||
//-------------------------------------------------
|
||||
if( R_UseHiZ() && is3D )
|
||||
{
|
||||
OPTICK_GPU_EVENT( "Render_HiZ" );
|
||||
renderLog.OpenBlock( "Render_HiZ" );
|
||||
|
||||
commandList->clearTextureFloat( globalImages->hierarchicalZbufferImage->GetTextureHandle(), nvrhi::AllSubresources, nvrhi::Color( 1.f ) );
|
||||
|
@ -5802,8 +5803,10 @@ void idRenderBackend::DrawViewInternal( const viewDef_t* _viewDef, const int ste
|
|||
//-------------------------------------------------
|
||||
// resolve the screen for SSR
|
||||
//-------------------------------------------------
|
||||
if( is3D && r_useSSR.GetBool() )
|
||||
if( is3D && r_useSSR.GetBool() && R_UseHiZ() )
|
||||
{
|
||||
OPTICK_GPU_EVENT( "Resolve_Screen4SSR" );
|
||||
|
||||
if( R_GetMSAASamples() > 1 )
|
||||
{
|
||||
renderLog.OpenBlock( "Resolve to _currentRender" );
|
||||
|
|
|
@ -80,7 +80,14 @@ enum graphicsVendor_t
|
|||
VENDOR_NVIDIA,
|
||||
VENDOR_AMD,
|
||||
VENDOR_INTEL,
|
||||
VENDOR_APPLE // SRS - Added support for Apple GPUs
|
||||
VENDOR_APPLE, // SRS - Added support for Apple GPUs
|
||||
VENDOR_OTHER
|
||||
};
|
||||
|
||||
enum graphicsGpuType_t
|
||||
{
|
||||
GPU_TYPE_DISCRETE,
|
||||
GPU_TYPE_OTHER
|
||||
};
|
||||
|
||||
#define ID_MSAA 0
|
||||
|
@ -181,6 +188,7 @@ struct backEndCounters_t
|
|||
struct glconfig_t
|
||||
{
|
||||
graphicsVendor_t vendor;
|
||||
graphicsGpuType_t gpuType;
|
||||
|
||||
// int maxTextureSize; // TODO
|
||||
// int maxTextureCoords; // TODO
|
||||
|
|
|
@ -364,6 +364,13 @@ bool R_UseTemporalAA()
|
|||
bool R_UseHiZ()
|
||||
{
|
||||
// TODO check for driver problems here
|
||||
#if defined(__linux__)
|
||||
if( glConfig.vendor == VENDOR_INTEL && glConfig.gpuType == GPU_TYPE_OTHER )
|
||||
{
|
||||
// SRS - Disable HiZ to work-around Linux driver issues on Intel iGPUs
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return r_useHierarchicalDepthBuffer.GetBool();
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,23 @@ void DeviceManager::GetWindowDimensions( int& width, int& height )
|
|||
height = m_DeviceParams.backBufferHeight;
|
||||
}
|
||||
|
||||
graphicsVendor_t DeviceManager::getGPUVendor( uint32_t vendorID ) const
|
||||
{
|
||||
switch( vendorID )
|
||||
{
|
||||
case 0x10DE:
|
||||
return VENDOR_NVIDIA;
|
||||
case 0x1002:
|
||||
return VENDOR_AMD;
|
||||
case 0x8086:
|
||||
return VENDOR_INTEL;
|
||||
case 0x106B:
|
||||
return VENDOR_APPLE;
|
||||
default:
|
||||
return VENDOR_OTHER;
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceManager::BackBufferResizing()
|
||||
{
|
||||
Framebuffer::Shutdown();
|
||||
|
|
|
@ -159,7 +159,6 @@ protected:
|
|||
void* windowInstance;
|
||||
void* windowHandle;
|
||||
bool m_windowVisible = false;
|
||||
bool isNvidia = false;
|
||||
|
||||
DeviceCreationParameters m_DeviceParams;
|
||||
|
||||
|
@ -171,6 +170,8 @@ protected:
|
|||
|
||||
DeviceManager() = default;
|
||||
|
||||
graphicsVendor_t getGPUVendor( uint32_t vendorID ) const;
|
||||
|
||||
void BackBufferResizing();
|
||||
void BackBufferResized();
|
||||
|
||||
|
|
|
@ -106,11 +106,6 @@ private:
|
|||
void ReleaseRenderTargets();
|
||||
};
|
||||
|
||||
static bool IsNvDeviceID( UINT id )
|
||||
{
|
||||
return id == 0x10DE;
|
||||
}
|
||||
|
||||
// Find an adapter whose name contains the given string.
|
||||
static RefCountPtr<IDXGIAdapter> FindAdapter( const std::wstring& targetName )
|
||||
{
|
||||
|
@ -277,7 +272,9 @@ bool DeviceManager_DX12::CreateDeviceAndSwapChain()
|
|||
}
|
||||
m_RendererString = ss.str();
|
||||
|
||||
isNvidia = IsNvDeviceID( aDesc.VendorId );
|
||||
glConfig.vendor = getGPUVendor( aDesc.VendorId );
|
||||
// SRS - Intel iGPUs typically allocate 128 MB for Dedicated UMA, set threshold at 512 MB to potentially handle other iGPUs (e.g. AMD APUs)
|
||||
glConfig.gpuType = aDesc.DedicatedVideoMemory > 0x20000000 ? GPU_TYPE_DISCRETE : GPU_TYPE_OTHER;
|
||||
}
|
||||
/*
|
||||
// SRS - Don't center window here for DX12 only, instead use portable initialization in CreateWindowDeviceAndSwapChain() within win_glimp.cpp
|
||||
|
|
|
@ -50,8 +50,9 @@
|
|||
#endif
|
||||
#endif
|
||||
#if defined( VK_EXT_layer_settings ) || defined( USE_MoltenVK )
|
||||
// SRS - Disable MoltenVK's Synchronous Queue Submits for better performance, and Metal Argument Buffers to avoid HiZ compute shader issues on Apple Silicon
|
||||
idCVar r_mvkSynchronousQueueSubmits( "r_mvkSynchronousQueueSubmits", "0", CVAR_BOOL | CVAR_INIT | CVAR_NEW, "Use MoltenVK's synchronous queue submit option." );
|
||||
idCVar r_mvkUseMetalArgumentBuffers( "r_mvkUseMetalArgumentBuffers", "1", CVAR_INTEGER | CVAR_INIT | CVAR_NEW, "Use MoltenVK's Metal argument buffers option (0=Off, 1=On)", 0, 1 );
|
||||
idCVar r_mvkUseMetalArgumentBuffers( "r_mvkUseMetalArgumentBuffers", "0", CVAR_INTEGER | CVAR_INIT | CVAR_NEW, "Use MoltenVK's Metal argument buffers option (0=Off, 1=On)", 0, 1 );
|
||||
#endif
|
||||
#endif
|
||||
#include <nvrhi/validation.h>
|
||||
|
@ -780,12 +781,16 @@ bool DeviceManager_VK::pickPhysicalDevice()
|
|||
// pick the first discrete GPU if it exists, otherwise the first integrated GPU
|
||||
if( !discreteGPUs.empty() )
|
||||
{
|
||||
glConfig.vendor = getGPUVendor( discreteGPUs[0].getProperties().vendorID );
|
||||
glConfig.gpuType = GPU_TYPE_DISCRETE;
|
||||
m_VulkanPhysicalDevice = discreteGPUs[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
if( !otherGPUs.empty() )
|
||||
{
|
||||
glConfig.vendor = getGPUVendor( otherGPUs[0].getProperties().vendorID );
|
||||
glConfig.gpuType = GPU_TYPE_OTHER;
|
||||
m_VulkanPhysicalDevice = otherGPUs[0];
|
||||
return true;
|
||||
}
|
||||
|
@ -1650,9 +1655,10 @@ void DeviceManager_VK::Present()
|
|||
OPTICK_STORAGE_EVENT( mvkSubmitEventStorage, mvkSubmitEventDesc, mvkPreviousSubmitTime, mvkPreviousSubmitTime + mvkPreviousSubmitWaitTime );
|
||||
OPTICK_STORAGE_TAG( mvkSubmitEventStorage, mvkPreviousSubmitTime + mvkPreviousSubmitWaitTime / 2, "Frame", idLib::frameNumber - 2 );
|
||||
|
||||
// SRS - select latest acquire time if hashes match and we didn't retrieve a new image, otherwise select previous acquire time
|
||||
// SRS - select latest acquire time if hashes match and we didn't retrieve a new image, or vsync is on, or other high-load conditions
|
||||
double mvkLatestAcquireHash = mvkPerfStats.queue.retrieveCAMetalDrawable.latest + mvkPerfStats.queue.retrieveCAMetalDrawable.previous;
|
||||
int64_t mvkAcquireWaitTime = mvkLatestAcquireHash == mvkPreviousAcquireHash ? mvkPerfStats.queue.retrieveCAMetalDrawable.latest * 1000000.0 : mvkPerfStats.queue.retrieveCAMetalDrawable.previous * 1000000.0;
|
||||
bool useLatestAcquire = ( mvkLatestAcquireHash != mvkPreviousAcquireHash ) && ( mvkPerfStats.queue.waitSubmitCommandBuffers.latest > mvkPerfStats.queue.waitSubmitCommandBuffers.previous || mvkPerfStats.queue.commandBufferEncoding.latest > mvkPerfStats.queue.commandBufferEncoding.previous ) && ( mvkPerfStats.queue.retrieveCAMetalDrawable.latest > mvkPerfStats.queue.retrieveCAMetalDrawable.previous );
|
||||
int64_t mvkAcquireWaitTime = mvkLatestAcquireHash == mvkPreviousAcquireHash || r_swapInterval.GetInteger() > 0 || useLatestAcquire ? mvkPerfStats.queue.retrieveCAMetalDrawable.latest * 1000000.0 : mvkPerfStats.queue.retrieveCAMetalDrawable.previous * 1000000.0;
|
||||
|
||||
// SRS - select latest presented frame if we are running synchronous, otherwise select previous presented frame as reference
|
||||
int64_t mvkAcquireStartTime = mvkPreviousSubmitTime + mvkPreviousSubmitWaitTime;
|
||||
|
@ -1668,15 +1674,16 @@ void DeviceManager_VK::Present()
|
|||
OPTICK_STORAGE_EVENT( mvkAcquireEventStorage, mvkAcquireEventDesc, mvkAcquireStartTime, mvkAcquireStartTime + mvkAcquireWaitTime );
|
||||
OPTICK_STORAGE_TAG( mvkAcquireEventStorage, mvkAcquireStartTime + mvkAcquireWaitTime / 2, "Frame", frameNumberTag );
|
||||
|
||||
// SRS - when Optick is active, use MoltenVK's previous encoding time to select game command buffer vs. Optick's command buffer
|
||||
// SRS - when Optick is active, use max of MoltenVK's latest/previous encoding time to select game command buffer vs. Optick's command buffer
|
||||
int64_t mvkEncodeStartTime = mvkAcquireStartTime + mvkAcquireWaitTime;
|
||||
mvkEncodeTime = Max( int64_t( 0 ), int64_t( mvkPerfStats.queue.commandBufferEncoding.previous * 1000000.0 ) - mvkAcquireWaitTime );
|
||||
mvkEncodeTime = Max( mvkPerfStats.queue.commandBufferEncoding.latest, mvkPerfStats.queue.commandBufferEncoding.previous ) * 1000000.0;
|
||||
mvkEncodeTime = ( mvkEncodeTime > mvkAcquireWaitTime ) && ( ( mvkPerfStats.queue.commandBufferEncoding.previous > mvkPerfStats.queue.commandBufferEncoding.latest && Max( mvkPreviousSubmitWaitTime, int64_t( mvkPerfStats.queue.waitSubmitCommandBuffers.previous * 1000000.0 ) ) > int64_t( mvkPerfStats.queue.waitSubmitCommandBuffers.latest * 1000000.0 ) ) || useLatestAcquire ) ? mvkEncodeTime - mvkAcquireWaitTime : mvkEncodeTime;
|
||||
|
||||
// SRS - create custom Optick event that displays MoltenVK's Vulkan-to-Metal encoding time
|
||||
OPTICK_STORAGE_EVENT( mvkEncodeEventStorage, mvkEncodeEventDesc, mvkEncodeStartTime, mvkEncodeStartTime + mvkEncodeTime );
|
||||
OPTICK_STORAGE_TAG( mvkEncodeEventStorage, mvkEncodeStartTime + mvkEncodeTime / 2, "Frame", frameNumberTag );
|
||||
|
||||
mvkPreviousSubmitWaitTime = mvkPerfStats.queue.waitSubmitCommandBuffers.latest * 1000000.0;
|
||||
mvkPreviousSubmitWaitTime = Min( mvkPerfStats.queue.waitSubmitCommandBuffers.latest, mvkPerfStats.queue.waitSubmitCommandBuffers.previous ) * 1000000.0;
|
||||
mvkPreviousAcquireHash = mvkLatestAcquireHash;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -452,7 +452,7 @@ main
|
|||
*/
|
||||
int main( int argc, const char** argv )
|
||||
{
|
||||
extern idCVar r_useGPUSkinning;
|
||||
//extern idCVar r_useGPUSkinning;
|
||||
|
||||
// DG: needed for Sys_ReLaunch()
|
||||
cmdargc = argc;
|
||||
|
@ -483,6 +483,8 @@ int main( int argc, const char** argv )
|
|||
common->Init( 0, NULL, NULL );
|
||||
}
|
||||
|
||||
// SRS - GPU skinning on Apple Silicon now works for recent builds and/or drivers
|
||||
#if 0
|
||||
// SRS - Determine the machine name, e.g. "x86_64" or "arm64"
|
||||
// Might be cleaner in posix Sys_Init(), but only needed on
|
||||
// macOS and all the required sys includes are located here.
|
||||
|
@ -497,6 +499,7 @@ int main( int argc, const char** argv )
|
|||
r_useGPUSkinning.SetInteger( 0 );
|
||||
}
|
||||
Mem_Free( machineName );
|
||||
#endif
|
||||
|
||||
Posix_LateInit();
|
||||
|
||||
|
|
Loading…
Reference in a new issue