From e4021513d1b0b18b0cb006fef19890211038f201 Mon Sep 17 00:00:00 2001 From: Stephen Saunders Date: Fri, 28 Jan 2022 10:28:54 -0500 Subject: [PATCH] Fixes for: Vulkan+BinkDec validation errors, BinkDec replay in PDAs, macOS Vulkan+BinkDec stall, VK_EXT_debug_utils handling, Vulkan gfxInfo on Windows & Linux, idVulkanAllocator::Free(), idCinematicLocal::GetStartTime(), ImGui timings for timedemo first pass (cherry picked from commit 2a93ad2380374e1a327f7af4f8e71c27b2b51528) --- neo/cmake-xcode-vulkan-debug.sh | 8 +- neo/framework/Common.cpp | 4 +- neo/framework/Common_demos.cpp | 25 ++++-- neo/framework/common_frame.cpp | 3 + neo/renderer/Cinematic.cpp | 97 +++++++++++++++++------- neo/renderer/RenderBackend.h | 1 + neo/renderer/RenderLog.cpp | 38 +++++----- neo/renderer/Vulkan/Allocator_VK.cpp | 14 +++- neo/renderer/Vulkan/BufferObject_VK.cpp | 34 ++++++--- neo/renderer/Vulkan/Image_VK.cpp | 16 ++++ neo/renderer/Vulkan/RenderBackend_VK.cpp | 92 ++++++++++++++-------- 11 files changed, 230 insertions(+), 102 deletions(-) diff --git a/neo/cmake-xcode-vulkan-debug.sh b/neo/cmake-xcode-vulkan-debug.sh index 21a37918..31464dcc 100755 --- a/neo/cmake-xcode-vulkan-debug.sh +++ b/neo/cmake-xcode-vulkan-debug.sh @@ -2,6 +2,8 @@ cd .. rm -rf xcode-vulkan-debug mkdir xcode-vulkan-debug cd xcode-vulkan-debug -# remove or set -DCMAKE_SUPPRESS_REGENERATION=OFF to reenable ZERO_CHECK target which checks for CMakeLists.txt changes and re-runs CMake before builds -# however, if ZERO_CHECK is reenabled **must** add VULKAN_SDK location to Xcode Custom Paths (under Prefs/Locations) otherwise build failures may occur -cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DSDL2=ON -DUSE_VULKAN=ON -DSPIRV_SHADERC=OFF -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1" -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -Wno-dev +# note 1: remove or set -DCMAKE_SUPPRESS_REGENERATION=OFF to reenable ZERO_CHECK target which checks for CMakeLists.txt changes and re-runs CMake before builds +# however, if ZERO_CHECK is reenabled **must** add VULKAN_SDK location to Xcode Custom Paths (under Prefs/Locations) otherwise build failures may occur +# note 2: env variable MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1 enables imageViewFormatSwizzle which may be required on older macOS versions or hardware (see vulkaninfo) +# note 3: env variable VK_LAYER_MESSAGE_ID_FILTER=0xb408bc0b suppresses validation layer error messages caused by exceeding maxSamplerAllocationCount limit +cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DSDL2=ON -DUSE_VULKAN=ON -DSPIRV_SHADERC=OFF -DCMAKE_XCODE_GENERATE_SCHEME=ON -DCMAKE_XCODE_SCHEME_ENVIRONMENT="MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE=1;VK_LAYER_MESSAGE_ID_FILTER=0xb408bc0b" -DCMAKE_SUPPRESS_REGENERATION=ON -DOPENAL_LIBRARY=/usr/local/opt/openal-soft/lib/libopenal.dylib -DOPENAL_INCLUDE_DIR=/usr/local/opt/openal-soft/include ../neo -Wno-dev diff --git a/neo/framework/Common.cpp b/neo/framework/Common.cpp index a55b02e1..cf66e3c6 100644 --- a/neo/framework/Common.cpp +++ b/neo/framework/Common.cpp @@ -931,8 +931,8 @@ void idCommonLocal::RenderBink( const char* path ) material->Parse( materialText.c_str(), materialText.Length(), false ); material->ResetCinematicTime( Sys_Milliseconds() ); - // RB: FFmpeg might return the wrong play length so I changed the intro video to play max 30 seconds until finished - int cinematicLength = 30000; //material->CinematicLength(); + // SRS - Restored original calculation after implementing idCinematicLocal::GetStartTime() and fixing animationLength in idCinematicLocal::InitFromBinkDecFile() + int cinematicLength = material->CinematicLength(); int mouseEvents[MAX_MOUSE_EVENTS][2]; bool escapeEvent = false; diff --git a/neo/framework/Common_demos.cpp b/neo/framework/Common_demos.cpp index d8e04d80..36fb0b78 100644 --- a/neo/framework/Common_demos.cpp +++ b/neo/framework/Common_demos.cpp @@ -312,6 +312,7 @@ idCommonLocal::TimeRenderDemo */ void idCommonLocal::TimeRenderDemo( const char* demoName, bool twice, bool quit ) { + extern idCVar com_smp; idStr demo = demoName; StartPlayingRenderDemo( demo ); @@ -319,16 +320,30 @@ void idCommonLocal::TimeRenderDemo( const char* demoName, bool twice, bool quit if( twice && readDemo ) { timeDemo = TD_YES; // SRS - Set timeDemo to TD_YES to disable time demo playback pause when window not in focus + + int smp_mode = com_smp.GetInteger(); + com_smp.SetInteger( 0 ); // SRS - First pass of timedemo is effectively in com_smp == 0 mode, so set this for ImGui timings to be correct while( readDemo ) { -// const bool captureToImage = false; -// UpdateScreen( captureToImage ); - BusyWait(); // SRS - Call BusyWait() vs. UpdateScreen() to avoid Pump() timeout messages in console - AdvanceRenderDemo( true ); - eventLoop->RunEventLoop(); // SRS - Run event loop to allow keyboard escape to cancel first pass of the demo + BusyWait(); // SRS - BusyWait() calls UpdateScreen() which draws and renders out-of-sequence but still supports frame timing + commonLocal.frameTiming.finishSyncTime_EndFrame = Sys_Microseconds(); + commonLocal.mainFrameTiming = commonLocal.frameTiming; + // ** End of current logical frame ** + + // ** Start of next logical frame ** + commonLocal.frameTiming.startSyncTime = Sys_Microseconds(); + commonLocal.frameTiming.finishSyncTime = commonLocal.frameTiming.startSyncTime; + commonLocal.frameTiming.startGameTime = commonLocal.frameTiming.finishSyncTime; + + AdvanceRenderDemo( true ); // SRS - Advance demo commands to manually run the next game frame during first pass of the timedemo + commonLocal.frameTiming.finishGameTime = Sys_Microseconds(); + + eventLoop->RunEventLoop( false ); // SRS - Run event loop (with no commands) to allow keyboard escape to cancel first pass of the timedemo } + com_smp.SetInteger( smp_mode ); // SRS - Restore original com_smp mode before second pass of timedemo which runs within normal rendering loop + StartPlayingRenderDemo( demo ); } diff --git a/neo/framework/common_frame.cpp b/neo/framework/common_frame.cpp index 40e33782..d419d963 100644 --- a/neo/framework/common_frame.cpp +++ b/neo/framework/common_frame.cpp @@ -427,6 +427,7 @@ void idCommonLocal::UpdateScreen( bool captureToImage, bool releaseMouse ) // build all the draw commands without running a new game tic Draw(); + frameTiming.finishDrawTime = Sys_Microseconds(); // SRS - Added frame timing for out-of-sequence updates (e.g. used in timedemo "twice" mode) // foresthale 2014-03-01: note: the only place that has captureToImage=true is idAutoRender::StartBackgroundAutoSwaps if( captureToImage ) @@ -438,7 +439,9 @@ void idCommonLocal::UpdateScreen( bool captureToImage, bool releaseMouse ) const emptyCommand_t* cmd = renderSystem->SwapCommandBuffers( &time_frontend, &time_backend, &time_shadows, &time_gpu, &stats_backend, &stats_frontend ); // get the GPU busy with new commands + frameTiming.startRenderTime = Sys_Microseconds(); // SRS - Added frame timing for out-of-sequence updates (e.g. used in timedemo "twice" mode) renderSystem->RenderCommandBuffers( cmd ); + frameTiming.finishRenderTime = Sys_Microseconds(); // SRS - Added frame timing for out-of-sequence updates (e.g. used in timedemo "twice" mode) insideUpdateScreen = false; } diff --git a/neo/renderer/Cinematic.cpp b/neo/renderer/Cinematic.cpp index d5187453..3dabbb2c 100644 --- a/neo/renderer/Cinematic.cpp +++ b/neo/renderer/Cinematic.cpp @@ -90,6 +90,9 @@ public: bool IsPlaying() const; // RB end virtual void Close(); + // SRS begin + virtual int GetStartTime(); + // SRS end virtual void ResetTime( int time ); private: @@ -116,6 +119,7 @@ private: void BinkDecReset(); YUVbuffer yuvBuffer; + bool hasFrame; int framePos; int numFrames; idImage* imgY; @@ -332,7 +336,7 @@ idCinematic::GetStartTime */ int idCinematic::GetStartTime() { - return -1; + return -1; // SRS - this is just the abstract virtual method } /* @@ -422,11 +426,13 @@ idCinematicLocal::idCinematicLocal() video_stream_index = -1; img_convert_ctx = NULL; hasFrame = false; + framePos = -1; #endif #ifdef USE_BINKDEC binkHandle.isValid = false; binkHandle.instanceIndex = -1; // whatever this is, it now has a deterministic value + hasFrame = false; framePos = -1; numFrames = 0; @@ -622,25 +628,23 @@ bool idCinematicLocal::InitFromFFMPEGFile( const char* qpath, bool amilooping ) float durationSec = static_cast( fmt_ctx->streams[video_stream_index]->duration ) * static_cast( ticksPerFrame ) / static_cast( avr.den ); animationLength = durationSec * 1000; frameRate = av_q2d( fmt_ctx->streams[video_stream_index]->avg_frame_rate ); - buf = NULL; - hasFrame = false; - framePos = -1; common->Printf( "Loaded FFMPEG file: '%s', looping=%d%dx%d, %f FPS, %f sec\n", qpath, looping, CIN_WIDTH, CIN_HEIGHT, frameRate, durationSec ); - image = ( byte* )Mem_Alloc( CIN_WIDTH * CIN_HEIGHT * 4 * 2, TAG_CINEMATIC ); + + image = ( byte* )Mem_Alloc( CIN_WIDTH * CIN_HEIGHT * 4 * 2, TAG_CINEMATIC ); avpicture_fill( ( AVPicture* )frame2, image, AV_PIX_FMT_BGR32, CIN_WIDTH, CIN_HEIGHT ); if( img_convert_ctx ) { sws_freeContext( img_convert_ctx ); } img_convert_ctx = sws_getContext( dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt, CIN_WIDTH, CIN_HEIGHT, AV_PIX_FMT_BGR32, SWS_BICUBIC, NULL, NULL, NULL ); - status = FMV_PLAY; - - startTime = 0; + + buf = NULL; + status = FMV_PLAY; + hasFrame = false; + framePos = -1; ImageForTime( 0 ); status = ( looping ) ? FMV_PLAY : FMV_IDLE; - //startTime = Sys_Milliseconds(); - return true; } #endif @@ -720,18 +724,18 @@ bool idCinematicLocal::InitFromBinkDecFile( const char* qpath, bool amilooping ) frameRate = Bink_GetFrameRate( binkHandle ); numFrames = Bink_GetNumFrames( binkHandle ); - float durationSec = frameRate * numFrames; - animationLength = durationSec; - buf = NULL; + float durationSec = numFrames / frameRate; // SRS - fixed Bink durationSec calculation + animationLength = durationSec * 1000; // SRS - animationLength is in milliseconds + common->Printf( "Loaded BinkDec file: '%s', looping=%d, %dx%d, %f FPS, %f sec\n", qpath, looping, CIN_WIDTH, CIN_HEIGHT, frameRate, durationSec ); + + memset( yuvBuffer, 0, sizeof( yuvBuffer ) ); - common->Printf( "Loaded BinkDec file: '%s', looping=%d%dx%d, %f FPS, %f sec\n", qpath, looping, CIN_WIDTH, CIN_HEIGHT, frameRate, durationSec ); - - status = FMV_PLAY; - - startTime = Sys_Milliseconds(); - memset( yuvBuffer, 0, sizeof( yuvBuffer ) ); - framePos = -1; - ImageForTime( 0 ); // SRS - Was missing initial call to ImageForTime() - fixes validation errors when using Vulkan renderer + buf = NULL; + status = FMV_PLAY; + hasFrame = false; // SRS - Implemented hasFrame for BinkDec behaviour consistency with FFMPEG + framePos = -1; + ImageForTime( 0 ); // SRS - Was missing initial call to ImageForTime() - fixes validation errors when using Vulkan renderer + status = ( looping ) ? FMV_PLAY : FMV_IDLE; // SRS - Update status based on looping flag return true; } @@ -793,6 +797,7 @@ bool idCinematicLocal::InitFromFile( const char* qpath, bool amilooping ) #elif defined(USE_BINKDEC) idStr temp = fileName.StripFileExtension() + ".bik"; animationLength = 0; + hasFrame = false; RoQShutdown(); fileName = temp; //idLib::Warning( "New filename: '%s'\n", fileName.c_str() ); @@ -879,10 +884,15 @@ void idCinematicLocal::Close() } #endif #ifdef USE_BINKDEC - if( !isRoQ && binkHandle.isValid ) + hasFrame = false; + + if( !isRoQ ) { - memset( yuvBuffer, 0 , sizeof( yuvBuffer ) ); - Bink_Close( binkHandle ); + if( binkHandle.isValid ) + { + memset( yuvBuffer, 0 , sizeof( yuvBuffer ) ); + Bink_Close( binkHandle ); + } status = FMV_EOF; } #endif @@ -905,6 +915,18 @@ bool idCinematicLocal::IsPlaying() const } // RB end +// SRS - Implement virtual method to override abstract virtual method +/* +============== + idCinematicLocal::GetStartTime +============== +*/ +int idCinematicLocal::GetStartTime() +{ + return startTime; +} +// SRS end + /* ============== idCinematicLocal::ResetTime @@ -1168,13 +1190,14 @@ cinData_t idCinematicLocal::ImageForTimeFFMPEG( int thisTime ) #ifdef USE_BINKDEC cinData_t idCinematicLocal::ImageForTimeBinkDec( int thisTime ) { - cinData_t cinData = {0}; + cinData_t cinData; if( thisTime <= 0 ) { thisTime = Sys_Milliseconds(); } + memset( &cinData, 0, sizeof( cinData ) ); if( r_skipDynamicTextures.GetBool() || status == FMV_EOF || status == FMV_IDLE ) { return cinData; @@ -1186,9 +1209,13 @@ cinData_t idCinematicLocal::ImageForTimeBinkDec( int thisTime ) return cinData; } - if( startTime == -1 ) + // SRS - Implement hasFrame so BinkDec startTime is handled the same as with FFMPEG + if( ( !hasFrame ) || startTime == -1 ) { - BinkDecReset(); + if( startTime == -1 ) + { + BinkDecReset(); + } startTime = thisTime; } @@ -1216,7 +1243,14 @@ cinData_t idCinematicLocal::ImageForTimeBinkDec( int thisTime ) } } - if( desiredFrame == framePos ) + // SRS - Enable video replay within PDAs + if( desiredFrame < framePos ) + { + BinkDecReset(); + } + // SRS end + + if( hasFrame && desiredFrame == framePos ) { cinData.imageWidth = CIN_WIDTH; cinData.imageHeight = CIN_HEIGHT; @@ -1260,6 +1294,11 @@ cinData_t idCinematicLocal::ImageForTimeBinkDec( int thisTime ) } else if( h < CIN_HEIGHT ) { +#if defined(__APPLE__) && defined(USE_VULKAN) + // SRS - For U and V channels on OSX Vulkan use full height image to work around stall that occurs with half-height chroma planes + // when exiting levels or returning from demo playback - depends on OSX-specific logic inside Vulkan version of SubImageUpload() + h = CIN_HEIGHT; +#else // the U and V channels have a lower resolution than the Y channel // (or the logical video resolution), so use the aspect ratio to // calculate the real height @@ -1268,6 +1307,7 @@ cinData_t idCinematicLocal::ImageForTimeBinkDec( int thisTime ) { h = hExp; } +#endif } if( img->GetUploadWidth() != w || img->GetUploadHeight() != h ) @@ -1280,6 +1320,7 @@ cinData_t idCinematicLocal::ImageForTimeBinkDec( int thisTime ) img->SubImageUpload( 0, 0, 0, 0, w, h, yuvBuffer[i].data ); } + hasFrame = true; cinData.imageY = imgY; cinData.imageCr = imgCr; cinData.imageCb = imgCb; diff --git a/neo/renderer/RenderBackend.h b/neo/renderer/RenderBackend.h index 6570c119..1165431f 100644 --- a/neo/renderer/RenderBackend.h +++ b/neo/renderer/RenderBackend.h @@ -164,6 +164,7 @@ struct vulkanContext_t bool debugMarkerSupportAvailable; bool debugUtilsSupportAvailable; + bool deviceProperties2Available; // SRS - For getting device properties in support of gfxInfo // selected GPU gpuInfo_t* gpu; diff --git a/neo/renderer/RenderLog.cpp b/neo/renderer/RenderLog.cpp index c95575ea..5e77e6c8 100644 --- a/neo/renderer/RenderLog.cpp +++ b/neo/renderer/RenderLog.cpp @@ -110,7 +110,20 @@ void PC_BeginNamedEvent( const char* szName, const idVec4& color ) #if defined( USE_VULKAN ) // start an annotated group of calls under the this name - if( vkcontext.debugMarkerSupportAvailable ) + // SRS - Prefer VK_EXT_debug_utils over VK_EXT_debug_marker/VK_EXT_debug_report (deprecated by VK_EXT_debug_utils) + if( vkcontext.debugUtilsSupportAvailable ) + { + VkDebugUtilsLabelEXT label = {}; + label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; + label.pLabelName = szName; + label.color[0] = color.x; + label.color[1] = color.y; + label.color[2] = color.z; + label.color[3] = color.w; + + qvkCmdBeginDebugUtilsLabelEXT( vkcontext.commandBuffer[ vkcontext.frameParity ], &label ); + } + else if( vkcontext.debugMarkerSupportAvailable ) { VkDebugMarkerMarkerInfoEXT label = {}; label.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT; @@ -122,18 +135,6 @@ void PC_BeginNamedEvent( const char* szName, const idVec4& color ) qvkCmdDebugMarkerBeginEXT( vkcontext.commandBuffer[ vkcontext.frameParity ], &label ); } - else if( vkcontext.debugUtilsSupportAvailable ) - { - VkDebugUtilsLabelEXT label = {}; - label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; - label.pLabelName = szName; - label.color[0] = color.x; - label.color[1] = color.y; - label.color[2] = color.z; - label.color[3] = color.w; - - qvkCmdBeginDebugUtilsLabelEXT( vkcontext.commandBuffer[ vkcontext.frameParity ], &label ); - } #else // RB: colors are not supported in OpenGL @@ -190,14 +191,15 @@ PC_EndNamedEvent void PC_EndNamedEvent() { #if defined( USE_VULKAN ) - if( vkcontext.debugMarkerSupportAvailable ) + // SRS - Prefer VK_EXT_debug_utils over VK_EXT_debug_marker/VK_EXT_debug_report (deprecated by VK_EXT_debug_utils) + if( vkcontext.debugUtilsSupportAvailable ) + { + qvkCmdEndDebugUtilsLabelEXT( vkcontext.commandBuffer[ vkcontext.frameParity ] ); + } + else if( vkcontext.debugMarkerSupportAvailable ) { qvkCmdDebugMarkerEndEXT( vkcontext.commandBuffer[ vkcontext.frameParity ] ); } - else if( vkcontext.debugUtilsSupportAvailable ) - { - qvkCmdEndDebugUtilsLabelEXT( vkcontext.commandBuffer[ vkcontext.frameParity ] ); - } #else // only do this if RBDOOM-3-BFG was started by RenderDoc or some similar tool if( glConfig.gremedyStringMarkerAvailable && glConfig.khronosDebugAvailable ) diff --git a/neo/renderer/Vulkan/Allocator_VK.cpp b/neo/renderer/Vulkan/Allocator_VK.cpp index 5c684617..0980c5bc 100644 --- a/neo/renderer/Vulkan/Allocator_VK.cpp +++ b/neo/renderer/Vulkan/Allocator_VK.cpp @@ -65,6 +65,7 @@ uint32 FindMemoryTypeIndex( const uint32 memoryTypeBits, const vulkanMemoryUsage VkMemoryPropertyFlags required = 0; VkMemoryPropertyFlags preferred = 0; + VkMemoryHeapFlags avoid = 0; switch( usage ) { @@ -77,10 +78,14 @@ uint32 FindMemoryTypeIndex( const uint32 memoryTypeBits, const vulkanMemoryUsage case VULKAN_MEMORY_USAGE_CPU_TO_GPU: required |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; preferred |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + // SRS - Make sure preferred memory type does not have VK_MEMORY_HEAP_MULTI_INSTANCE_BIT set, otherwise get validation errors when mapping memory + avoid |= VK_MEMORY_HEAP_MULTI_INSTANCE_BIT; break; case VULKAN_MEMORY_USAGE_GPU_TO_CPU: required |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; preferred |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + // SRS - Make sure preferred memory type does not have VK_MEMORY_HEAP_MULTI_INSTANCE_BIT set, otherwise get validation errors when mapping memory + avoid |= VK_MEMORY_HEAP_MULTI_INSTANCE_BIT; break; default: idLib::FatalError( "idVulkanAllocator::AllocateFromPools: Unknown memory usage." ); @@ -99,7 +104,8 @@ uint32 FindMemoryTypeIndex( const uint32 memoryTypeBits, const vulkanMemoryUsage continue; } - if( ( properties & preferred ) != preferred ) + // SRS - Make sure preferred memory type does not have any avoid heap flags set + if( ( properties & preferred ) != preferred || ( physicalMemoryProperties.memoryHeaps[ i ].flags & avoid ) != 0 ) { continue; } @@ -642,7 +648,11 @@ idVulkanAllocator::Free */ void idVulkanAllocator::Free( const vulkanAllocation_t allocation ) { - garbage[ garbageIndex ].Append( allocation ); + // SRS - Make sure we are trying to free an actual allocated block, otherwise skip + if( allocation.block != NULL ) + { + garbage[ garbageIndex ].Append( allocation ); + } } /* diff --git a/neo/renderer/Vulkan/BufferObject_VK.cpp b/neo/renderer/Vulkan/BufferObject_VK.cpp index fef27ce3..55e76933 100644 --- a/neo/renderer/Vulkan/BufferObject_VK.cpp +++ b/neo/renderer/Vulkan/BufferObject_VK.cpp @@ -105,10 +105,9 @@ bool idVertexBuffer::AllocBufferObject( const void* data, int allocSize, bufferU } else if( usage == BU_DYNAMIC ) { - // SRS - needed to ensure host coherency for MoltenVK on OSX < 10.15.6, otherwise black screen #if defined(__APPLE__) - vmaReq.usage = VMA_MEMORY_USAGE_UNKNOWN; - vmaReq.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + // SRS - VMA_MEMORY_USAGE_CPU_ONLY required for BU_DYNAMIC host coherency on OSX, otherwise black screen + vmaReq.usage = VMA_MEMORY_USAGE_CPU_ONLY; #else vmaReq.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; #endif @@ -121,10 +120,15 @@ bool idVertexBuffer::AllocBufferObject( const void* data, int allocSize, bufferU VkResult ret = vkCreateBuffer( vkcontext.device, &bufferCreateInfo, NULL, &apiObject ); assert( ret == VK_SUCCESS ); - VkMemoryRequirements memoryRequirements; + VkMemoryRequirements memoryRequirements = {}; vkGetBufferMemoryRequirements( vkcontext.device, apiObject, &memoryRequirements ); +#if defined(__APPLE__) + // SRS - VULKAN_MEMORY_USAGE_CPU_ONLY required for BU_DYNAMIC host coherency on OSX, otherwise black screen + vulkanMemoryUsage_t memUsage = ( usage == BU_STATIC ) ? VULKAN_MEMORY_USAGE_GPU_ONLY : VULKAN_MEMORY_USAGE_CPU_ONLY; +#else vulkanMemoryUsage_t memUsage = ( usage == BU_STATIC ) ? VULKAN_MEMORY_USAGE_GPU_ONLY : VULKAN_MEMORY_USAGE_CPU_TO_GPU; +#endif allocation = vulkanAllocator.Allocate( memoryRequirements.size, @@ -363,10 +367,9 @@ bool idIndexBuffer::AllocBufferObject( const void* data, int allocSize, bufferUs } else if( usage == BU_DYNAMIC ) { - // SRS - needed to ensure host coherency for MoltenVK on OSX < 10.15.6, otherwise black screen #if defined(__APPLE__) - vmaReq.usage = VMA_MEMORY_USAGE_UNKNOWN; - vmaReq.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + // SRS - VMA_MEMORY_USAGE_CPU_ONLY required for BU_DYNAMIC host coherency on OSX, otherwise black screen + vmaReq.usage = VMA_MEMORY_USAGE_CPU_ONLY; #else vmaReq.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; #endif @@ -379,10 +382,15 @@ bool idIndexBuffer::AllocBufferObject( const void* data, int allocSize, bufferUs VkResult ret = vkCreateBuffer( vkcontext.device, &bufferCreateInfo, NULL, &apiObject ); assert( ret == VK_SUCCESS ); - VkMemoryRequirements memoryRequirements; + VkMemoryRequirements memoryRequirements = {}; vkGetBufferMemoryRequirements( vkcontext.device, apiObject, &memoryRequirements ); +#if defined(__APPLE__) + // SRS - VULKAN_MEMORY_USAGE_CPU_ONLY required for BU_DYNAMIC host coherency on OSX, otherwise black screen + vulkanMemoryUsage_t memUsage = ( usage == BU_STATIC ) ? VULKAN_MEMORY_USAGE_GPU_ONLY : VULKAN_MEMORY_USAGE_CPU_ONLY; +#else vulkanMemoryUsage_t memUsage = ( usage == BU_STATIC ) ? VULKAN_MEMORY_USAGE_GPU_ONLY : VULKAN_MEMORY_USAGE_CPU_TO_GPU; +#endif allocation = vulkanAllocator.Allocate( memoryRequirements.size, @@ -622,10 +630,9 @@ bool idUniformBuffer::AllocBufferObject( const void* data, int allocSize, buffer } else if( usage == BU_DYNAMIC ) { - // SRS - needed to ensure host coherency for MoltenVK on OSX < 10.15.6, otherwise black screen #if defined(__APPLE__) - vmaReq.usage = VMA_MEMORY_USAGE_UNKNOWN; - vmaReq.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + // SRS - VMA_MEMORY_USAGE_CPU_ONLY required for BU_DYNAMIC host coherency on OSX, otherwise black screen + vmaReq.usage = VMA_MEMORY_USAGE_CPU_ONLY; #else vmaReq.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; #endif @@ -641,7 +648,12 @@ bool idUniformBuffer::AllocBufferObject( const void* data, int allocSize, buffer VkMemoryRequirements memoryRequirements = {}; vkGetBufferMemoryRequirements( vkcontext.device, apiObject, &memoryRequirements ); +#if defined(__APPLE__) + // SRS - VULKAN_MEMORY_USAGE_CPU_ONLY required for BU_DYNAMIC host coherency on OSX, otherwise black screen + vulkanMemoryUsage_t memUsage = ( usage == BU_STATIC ) ? VULKAN_MEMORY_USAGE_GPU_ONLY : VULKAN_MEMORY_USAGE_CPU_ONLY; +#else vulkanMemoryUsage_t memUsage = ( usage == BU_STATIC ) ? VULKAN_MEMORY_USAGE_GPU_ONLY : VULKAN_MEMORY_USAGE_CPU_TO_GPU; +#endif allocation = vulkanAllocator.Allocate( memoryRequirements.size, diff --git a/neo/renderer/Vulkan/Image_VK.cpp b/neo/renderer/Vulkan/Image_VK.cpp index 54b09129..a7e58b9d 100644 --- a/neo/renderer/Vulkan/Image_VK.cpp +++ b/neo/renderer/Vulkan/Image_VK.cpp @@ -718,6 +718,22 @@ void idImage::SubImageUpload( int mipLevel, int x, int y, int z, int width, int data[ i + 3 ] = 255; } } +#endif +#if defined(__APPLE__) && defined(USE_BINKDEC) + else if( opts.format == FMT_LUM8 && ( imgName == "_cinematicCr" || imgName == "_cinematicCb" ) ) + { + // SRS - When decoding YUV420 cinematics on OSX, copy and duplicate individual rows of half-height chroma planes into full-height planes + // This works around a stall that occurs with half-height planes when exiting levels or after demo playback (possible issue in MoltenVK??) + // ***IMPORTANT - Assumes that SubImageUpload() has been called with half-width and full-height parameters and a packed pic buffer *** + byte* imgData = ( byte* )pic; + int evenRow; + for( int i = 0; i < size / 2; i++ ) + { + evenRow = ( i / width ) * 2; + data[ evenRow * width + i % width ] = imgData[ i ]; // SRS - Copy image data into even-numbered rows of new chroma plane + data[ ( evenRow + 1 ) * width + i % width ] = imgData[ i ]; // SRS - Duplicate image data into odd-numbered rows of new chroma plane + } + } #endif else { diff --git a/neo/renderer/Vulkan/RenderBackend_VK.cpp b/neo/renderer/Vulkan/RenderBackend_VK.cpp index b3423213..c0f901d4 100644 --- a/neo/renderer/Vulkan/RenderBackend_VK.cpp +++ b/neo/renderer/Vulkan/RenderBackend_VK.cpp @@ -291,6 +291,28 @@ static void CreateVulkanInstance() } #endif + // SRS - Enumerate available Vulkan instance extensions and test for presence of VK_KHR_get_physical_device_properties2 and VK_EXT_debug_utils + idLib::Printf( "Getting available vulkan instance extensions...\n" ); + uint32 numInstanceExtensions; + ID_VK_CHECK( vkEnumerateInstanceExtensionProperties( NULL, &numInstanceExtensions, NULL ) ); + ID_VK_VALIDATE( numInstanceExtensions > 0, "vkEnumerateInstanceExtensionProperties returned zero extensions." ); + + idList< VkExtensionProperties > instanceExtensionProps; + instanceExtensionProps.SetNum( numInstanceExtensions ); + ID_VK_CHECK( vkEnumerateInstanceExtensionProperties( NULL, &numInstanceExtensions, instanceExtensionProps.Ptr() ) ); + ID_VK_VALIDATE( numInstanceExtensions > 0, "vkEnumerateInstanceExtensionProperties returned zero extensions." ); + + vkcontext.deviceProperties2Available = false; + for( int i = 0; i < numInstanceExtensions; i++ ) + { + if( idStr::Icmp( instanceExtensionProps[ i ].extensionName, VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME ) == 0 ) + { + vkcontext.instanceExtensions.AddUnique( VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME ); + vkcontext.deviceProperties2Available = true; + break; + } + } + vkcontext.debugUtilsSupportAvailable = false; if( enableLayers ) { @@ -299,24 +321,15 @@ static void CreateVulkanInstance() vkcontext.instanceExtensions.Append( g_debugInstanceExtensions[ i ] ); } - // SRS - Enumerate available Vulkan instance extensions and test for presence of VK_EXT_debug_utils - idLib::Printf( "Getting available vulkan instance extensions...\n" ); - uint32 numInstanceExtensions; - ID_VK_CHECK( vkEnumerateInstanceExtensionProperties( NULL, &numInstanceExtensions, NULL ) ); - ID_VK_VALIDATE( numInstanceExtensions > 0, "vkEnumerateInstanceExtensionProperties returned zero extensions." ); - - idList< VkExtensionProperties > instanceExtensionProps; - instanceExtensionProps.SetNum( numInstanceExtensions ); - ID_VK_CHECK( vkEnumerateInstanceExtensionProperties( NULL, &numInstanceExtensions, instanceExtensionProps.Ptr() ) ); - ID_VK_VALIDATE( numInstanceExtensions > 0, "vkEnumerateInstanceExtensionProperties returned zero extensions." ); - + idLib::Printf( "Number of available instance extensions\t%i\n", numInstanceExtensions ); + idLib::Printf( "Available Extension List: \n" ); for( int i = 0; i < numInstanceExtensions; i++ ) { + idLib::Printf( "\t%s\n", instanceExtensionProps[ i ].extensionName ); if( idStr::Icmp( instanceExtensionProps[ i ].extensionName, VK_EXT_DEBUG_UTILS_EXTENSION_NAME ) == 0 ) { vkcontext.instanceExtensions.AddUnique( VK_EXT_DEBUG_UTILS_EXTENSION_NAME ); vkcontext.debugUtilsSupportAvailable = true; - break; } } @@ -728,13 +741,9 @@ static void SelectPhysicalDevice() glConfig.renderer_string = gpu.props.deviceName; - uint32_t instanceVersion; - vkEnumerateInstanceVersion( &instanceVersion ); - static idStr version_string; version_string.Clear(); - version_string.Append( va( "Vulkan %i.%i.%i", VK_API_VERSION_MAJOR( instanceVersion ), VK_API_VERSION_MINOR( instanceVersion ), VK_API_VERSION_PATCH( instanceVersion ) ) ); - version_string.Append( va( " / API %i.%i.%i", VK_API_VERSION_MAJOR( gpu.props.apiVersion ), VK_API_VERSION_MINOR( gpu.props.apiVersion ), VK_API_VERSION_PATCH( gpu.props.apiVersion ) ) ); + version_string.Append( va( "Vulkan API %i.%i.%i", VK_API_VERSION_MAJOR( gpu.props.apiVersion ), VK_API_VERSION_MINOR( gpu.props.apiVersion ), VK_API_VERSION_PATCH( gpu.props.apiVersion ) ) ); static idStr extensions_string; extensions_string.Clear(); @@ -750,7 +759,7 @@ static void SelectPhysicalDevice() } glConfig.extensions_string = extensions_string.c_str(); - if( driverPropertiesAvailable ) + if( vkcontext.deviceProperties2Available && driverPropertiesAvailable ) { VkPhysicalDeviceProperties2 pProperties = {}; VkPhysicalDeviceDriverProperties pDriverProperties = {}; @@ -805,6 +814,19 @@ static void CreateLogicalDeviceAndQueues() portabilityFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_FEATURES_KHR; vkGetPhysicalDeviceFeatures2( vkcontext.physicalDevice, &deviceFeatures2 ); +#if defined(USE_MoltenVK) + // SRS - Check if we have native image swizzling, and if not, enable MoltenVK's emulation + if( portabilityFeatures.imageViewFormatSwizzle == VK_FALSE ) + { + MVKConfiguration pConfig; + size_t pConfigSize = sizeof( pConfig ); + + idLib::Printf( "Enabling MoltenVK fullImageViewSwizzle...\n" ); + ID_VK_CHECK( vkGetMoltenVKConfigurationMVK( vkcontext.instance, &pConfig, &pConfigSize ) ); + pConfig.fullImageViewSwizzle = VK_TRUE; + ID_VK_CHECK( vkSetMoltenVKConfigurationMVK( vkcontext.instance, &pConfig, &pConfigSize ) ); + } +#endif #else VkPhysicalDeviceFeatures deviceFeatures = {}; deviceFeatures.textureCompressionBC = VK_TRUE; @@ -861,11 +883,12 @@ static void CreateLogicalDeviceAndQueues() if( vkcontext.debugUtilsSupportAvailable ) { - qvkQueueBeginDebugUtilsLabelEXT = ( PFN_vkQueueBeginDebugUtilsLabelEXT )vkGetDeviceProcAddr( vkcontext.device, "vkQueueBeginDebugUtilsLabelEXT" ); - qvkQueueEndDebugUtilsLabelEXT = ( PFN_vkQueueEndDebugUtilsLabelEXT )vkGetDeviceProcAddr( vkcontext.device, "vkQueueEndDebugUtilsLabelEXT" ); - qvkCmdBeginDebugUtilsLabelEXT = ( PFN_vkCmdBeginDebugUtilsLabelEXT )vkGetDeviceProcAddr( vkcontext.device, "vkCmdBeginDebugUtilsLabelEXT" ); - qvkCmdEndDebugUtilsLabelEXT = ( PFN_vkCmdEndDebugUtilsLabelEXT )vkGetDeviceProcAddr( vkcontext.device, "vkCmdEndDebugUtilsLabelEXT" ); - qvkCmdInsertDebugUtilsLabelEXT = ( PFN_vkCmdInsertDebugUtilsLabelEXT )vkGetDeviceProcAddr( vkcontext.device, "vkCmdInsertDebugUtilsLabelEXT" ); + // SRS - Since VK_EXT_debug_utils is an instance extension, must use vkGetInstanceProcAddr() vs vkGetDeviceProcAddr() + qvkQueueBeginDebugUtilsLabelEXT = ( PFN_vkQueueBeginDebugUtilsLabelEXT )vkGetInstanceProcAddr( vkcontext.instance, "vkQueueBeginDebugUtilsLabelEXT" ); + qvkQueueEndDebugUtilsLabelEXT = ( PFN_vkQueueEndDebugUtilsLabelEXT )vkGetInstanceProcAddr( vkcontext.instance, "vkQueueEndDebugUtilsLabelEXT" ); + qvkCmdBeginDebugUtilsLabelEXT = ( PFN_vkCmdBeginDebugUtilsLabelEXT )vkGetInstanceProcAddr( vkcontext.instance, "vkCmdBeginDebugUtilsLabelEXT" ); + qvkCmdEndDebugUtilsLabelEXT = ( PFN_vkCmdEndDebugUtilsLabelEXT )vkGetInstanceProcAddr( vkcontext.instance, "vkCmdEndDebugUtilsLabelEXT" ); + qvkCmdInsertDebugUtilsLabelEXT = ( PFN_vkCmdInsertDebugUtilsLabelEXT )vkGetInstanceProcAddr( vkcontext.instance, "vkCmdInsertDebugUtilsLabelEXT" ); } } @@ -1480,11 +1503,15 @@ ClearContext */ static void ClearContext() { +#if defined(VULKAN_USE_PLATFORM_SDL) + vkcontext.sdlWindow = nullptr; +#endif vkcontext.frameCounter = 0; vkcontext.frameParity = 0; vkcontext.jointCacheHandle = 0; vkcontext.instance = VK_NULL_HANDLE; vkcontext.physicalDevice = VK_NULL_HANDLE; + vkcontext.physicalDeviceFeatures = {}; vkcontext.device = VK_NULL_HANDLE; vkcontext.graphicsQueue = VK_NULL_HANDLE; vkcontext.presentQueue = VK_NULL_HANDLE; @@ -1494,6 +1521,9 @@ static void ClearContext() vkcontext.instanceExtensions.Clear(); vkcontext.deviceExtensions.Clear(); vkcontext.validationLayers.Clear(); + vkcontext.debugMarkerSupportAvailable = false; + vkcontext.debugUtilsSupportAvailable = false; + vkcontext.deviceProperties2Available = false; vkcontext.gpu = NULL; vkcontext.gpus.Clear(); vkcontext.commandPool = VK_NULL_HANDLE; @@ -1513,6 +1543,12 @@ static void ClearContext() vkcontext.currentSwapIndex = 0; vkcontext.msaaImage = VK_NULL_HANDLE; vkcontext.msaaImageView = VK_NULL_HANDLE; +#if defined( USE_AMD_ALLOCATOR ) + vkcontext.msaaVmaAllocation = NULL; + vkcontext.msaaAllocation = VmaAllocationInfo(); +#else + vkcontext.msaaAllocation = vulkanAllocation_t(); +#endif vkcontext.swapchainImages.Zero(); vkcontext.swapchainViews.Zero(); vkcontext.frameBuffers.Zero(); @@ -1588,16 +1624,6 @@ void idRenderBackend::Init() idLib::Printf( "Creating Vulkan Instance...\n" ); CreateVulkanInstance(); - // SRS - On macOS optionally set fullImageViewSwizzle to TRUE (instead of env var MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE = 1) -#if defined(__APPLE__) && defined(USE_MoltenVK) - MVKConfiguration pConfig; - size_t pConfigSize = sizeof( pConfig ); - - vkGetMoltenVKConfigurationMVK( vkcontext.instance, &pConfig, &pConfigSize ); - pConfig.fullImageViewSwizzle = VK_TRUE; - vkSetMoltenVKConfigurationMVK( vkcontext.instance, &pConfig, &pConfigSize ); -#endif - // create the windowing interface //#ifdef _WIN32 CreateSurface();