Merge remote-tracking branch 'SRSaunders/optick-vulkan'

This commit is contained in:
Robert Beckebans 2023-07-14 09:44:55 +02:00
commit 33615ef541
19 changed files with 383 additions and 105 deletions

View file

@ -1677,6 +1677,15 @@ else()
ENDFOREACH(item)
endif()
# SRS - if using gcc compiler enable gnu extensions for ##__VA_ARGS__ support within optick profiler (i.e. __STRICT_ANSI__ not set)
if(CMAKE_COMPILER_IS_GNUCC)
set_source_files_properties(
${OPTICK_SOURCES}
PROPERTIES
COMPILE_FLAGS "-std=gnu++${CMAKE_CXX_STANDARD}"
)
endif()
GET_DIRECTORY_PROPERTY(_directory_flags DEFINITIONS)
LIST(APPEND _compiler_FLAGS ${_directory_flags})
SEPARATE_ARGUMENTS(_compiler_FLAGS)
@ -1685,6 +1694,11 @@ else()
# we need to recreate the precompiled header for RBDoom3BFG
# (i.e. can't use the one created for idlib before)
# because some definitions (e.g. -D__IDLIB__ -D__DOOM_DLL__) differ
if(OPTICK)
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=1)
else()
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=0)
endif()
add_custom_target(precomp_header_rbdoom3bfg ALL
COMMAND ${CMAKE_CXX_COMPILER} ${_compiler_FLAGS} -x c++-header idlib/precompiled.h -o idlib/precompiled.h.gch
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}

View file

@ -90,7 +90,11 @@ be called directly in the foreground thread for comparison.
*/
int idGameThread::Run()
{
OPTICK_THREAD( "idGameThread" );
if( com_smp.GetBool() )
{
// SRS - label thread in smp mode only, otherwise CPU frame number is missing
OPTICK_THREAD( "idGameThread" );
}
commonLocal.frameTiming.startGameTime = Sys_Microseconds();

View file

@ -130,6 +130,11 @@ else()
SEPARATE_ARGUMENTS(_compiler_FLAGS)
if (USE_PRECOMPILED_HEADERS)
if(OPTICK)
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=1)
else()
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=0)
endif()
add_custom_target(precomp_header_idlib ALL
COMMAND ${CMAKE_CXX_COMPILER} ${_compiler_FLAGS} -x c++-header precompiled.h -o precompiled.h.gch
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}

View file

@ -68,7 +68,7 @@
#if defined(_MSC_VER)
#define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU /*&& 0*/)
#else
#define OPTICK_ENABLE_GPU_VULKAN (0)
#define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU /*&& 0*/)
#endif
#endif

View file

@ -103,11 +103,13 @@ OPTICK_DEFINE_HANDLE(VkCommandBuffer);
OPTICK_DEFINE_HANDLE(VkQueryPool);
OPTICK_DEFINE_HANDLE(VkCommandPool);
OPTICK_DEFINE_HANDLE(VkFence);
OPTICK_DEFINE_HANDLE(VkEvent);
struct VkPhysicalDeviceProperties;
struct VkQueryPoolCreateInfo;
struct VkAllocationCallbacks;
struct VkCommandPoolCreateInfo;
struct VkEventCreateInfo;
struct VkCommandBufferAllocateInfo;
struct VkFenceCreateInfo;
struct VkSubmitInfo;
@ -126,9 +128,14 @@ struct VkCommandBufferBeginInfo;
typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties_)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties);
typedef int32_t (VKAPI_PTR *PFN_vkCreateQueryPool_)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool);
typedef int32_t (VKAPI_PTR *PFN_vkCreateCommandPool_)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool);
typedef int32_t (VKAPI_PTR *PFN_vkCreateEvent_)(VkDevice device, const VkEventCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkEvent* pEvent);
typedef int32_t (VKAPI_PTR *PFN_vkAllocateCommandBuffers_)(VkDevice device, const VkCommandBufferAllocateInfo* pAllocateInfo, VkCommandBuffer* pCommandBuffers);
typedef int32_t (VKAPI_PTR *PFN_vkCreateFence_)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence);
typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool_)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount);
typedef void (VKAPI_PTR *PFN_vkResetQueryPool_)(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount);
typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents_)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, uint32_t srcStageMask, uint32_t dstStageMask, uint32_t memoryBarrierCount, const void* pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const void* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const void* pImageMemoryBarriers);
typedef int32_t (VKAPI_PTR *PFN_vkResetEvent_)(VkDevice device, VkEvent event);
typedef int32_t (VKAPI_PTR *PFN_vkSetEvent_)(VkDevice device, VkEvent event);
typedef int32_t (VKAPI_PTR *PFN_vkQueueSubmit_)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence);
typedef int32_t (VKAPI_PTR *PFN_vkWaitForFences_)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, uint32_t waitAll, uint64_t timeout);
typedef int32_t (VKAPI_PTR *PFN_vkResetCommandBuffer_)(VkCommandBuffer commandBuffer, uint32_t flags);
@ -139,8 +146,10 @@ typedef int32_t (VKAPI_PTR *PFN_vkEndCommandBuffer_)(VkCommandBuffer commandBuff
typedef int32_t (VKAPI_PTR *PFN_vkResetFences_)(VkDevice device, uint32_t fenceCount, const VkFence* pFences);
typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool_)(VkDevice device, VkCommandPool commandPool, const VkAllocationCallbacks* pAllocator);
typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool_)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator);
typedef void (VKAPI_PTR *PFN_vkDestroyEvent_)(VkDevice device, VkEvent event, const VkAllocationCallbacks* pAllocator);
typedef void (VKAPI_PTR *PFN_vkDestroyFence_)(VkDevice device, VkFence fence, const VkAllocationCallbacks* pAllocator);
typedef void (VKAPI_PTR *PFN_vkFreeCommandBuffers_)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers);
typedef int32_t (VKAPI_PTR *PFN_vkGetPastPresentationTimingGOOGLE_)(VkDevice device, void* swapchain, uint32_t* pPresentationTimingCount, void* pPresentationTimings);
#if OPTICK_VKAPI_PTR_DEFINED
#undef VKAPI_PTR
@ -159,9 +168,14 @@ namespace Optick
PFN_vkGetPhysicalDeviceProperties_ vkGetPhysicalDeviceProperties;
PFN_vkCreateQueryPool_ vkCreateQueryPool;
PFN_vkCreateCommandPool_ vkCreateCommandPool;
PFN_vkCreateEvent_ vkCreateEvent;
PFN_vkAllocateCommandBuffers_ vkAllocateCommandBuffers;
PFN_vkCreateFence_ vkCreateFence;
PFN_vkCmdResetQueryPool_ vkCmdResetQueryPool;
PFN_vkResetQueryPool_ vkResetQueryPool;
PFN_vkCmdWaitEvents_ vkCmdWaitEvents;
PFN_vkResetEvent_ vkResetEvent;
PFN_vkSetEvent_ vkSetEvent;
PFN_vkQueueSubmit_ vkQueueSubmit;
PFN_vkWaitForFences_ vkWaitForFences;
PFN_vkResetCommandBuffer_ vkResetCommandBuffer;
@ -172,8 +186,10 @@ namespace Optick
PFN_vkResetFences_ vkResetFences;
PFN_vkDestroyCommandPool_ vkDestroyCommandPool;
PFN_vkDestroyQueryPool_ vkDestroyQueryPool;
PFN_vkDestroyEvent_ vkDestroyEvent;
PFN_vkDestroyFence_ vkDestroyFence;
PFN_vkFreeCommandBuffers_ vkFreeCommandBuffers;
PFN_vkGetPastPresentationTimingGOOGLE_ vkGetPastPresentationTimingGOOGLE;
};
// Source: http://msdn.microsoft.com/en-us/library/system.windows.media.colors(v=vs.110).aspx
@ -763,7 +779,7 @@ struct OPTICK_API GPUContext
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OPTICK_API void InitGpuD3D12(ID3D12Device* device, ID3D12CommandQueue** cmdQueues, uint32_t numQueues);
OPTICK_API void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions);
OPTICK_API void GpuFlip(void* swapChain);
OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID = 0);
OPTICK_API GPUContext SetGpuContext(GPUContext context);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct OPTICK_API GPUContextScope
@ -780,6 +796,12 @@ struct OPTICK_API GPUContextScope
prevContext = SetGpuContext(GPUContext(cmdBuffer, queue, node));
}
// SRS - add typeless void* commandHandle prototype to support runtime selection of graphics API
GPUContextScope(void* commandHandle, GPUQueueType queue = GPU_QUEUE_GRAPHICS, int node = 0)
{
prevContext = SetGpuContext(GPUContext(commandHandle, queue, node));
}
~GPUContextScope()
{
SetGpuContext(prevContext);
@ -1041,7 +1063,7 @@ struct OptickApp
if (OPTICK_CONCAT(gpu_autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \
::Optick::GPUEvent OPTICK_CONCAT(gpu_autogen_event_, __LINE__)( *(OPTICK_CONCAT(gpu_autogen_description_, __LINE__)) ); \
#define OPTICK_GPU_FLIP(SWAP_CHAIN) ::Optick::GpuFlip(SWAP_CHAIN);
#define OPTICK_GPU_FLIP(...) ::Optick::GpuFlip(__VA_ARGS__);
/////////////////////////////////////////////////////////////////////////////////
// [Automation][Startup]
@ -1096,13 +1118,13 @@ struct OptickApp
#define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START)
#define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH)
#define OPTICK_SET_STATE_CHANGED_CALLBACK(CALLBACK)
#define OPTICK_SET_MEMORY_ALLOCATOR(ALLOCATE_FUNCTION, DEALLOCATE_FUNCTION)
#define OPTICK_SET_MEMORY_ALLOCATOR(ALLOCATE_FUNCTION, DEALLOCATE_FUNCTION, INIT_THREAD_CALLBACK)
#define OPTICK_SHUTDOWN()
#define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS)
#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS)
#define OPTICK_GPU_CONTEXT(...)
#define OPTICK_GPU_EVENT(NAME)
#define OPTICK_GPU_FLIP(SWAP_CHAIN)
#define OPTICK_GPU_FLIP(...)
#define OPTICK_UPDATE()
#define OPTICK_FRAME_FLIP(...)
#define OPTICK_FRAME_EVENT(FRAME_TYPE, ...)

View file

@ -1801,10 +1801,10 @@ OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID, Th
return entry ? &entry->storage : nullptr;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OPTICK_API void GpuFlip(void* swapChain)
OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID)
{
if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler)
gpuProfiler->Flip(swapChain);
gpuProfiler->Flip(swapChain, frameID);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OPTICK_API GPUContext SetGpuContext(GPUContext context)

View file

@ -337,7 +337,8 @@ struct ThreadEntry
// https://github.com/ulricheck/optick/pull/1/commits/1e5e1919816a64f235caa0f4b0bf20495225b1fa
~ThreadEntry()
{
if((*threadTLS)!=nullptr)
// SRS - check threadTLS for null before dereferencing, not *threadTLS
if (threadTLS != nullptr)
{
*threadTLS = nullptr;
}
@ -650,4 +651,4 @@ public:
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //USE_OPTICK
#endif //USE_OPTICK

View file

@ -61,7 +61,7 @@ namespace Optick
int64 Platform::GetTime()
{
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000000000LL + ts.tv_nsec;
}
}
@ -306,4 +306,4 @@ SymbolEngine* Platform::CreateSymbolEngine()
}
#endif //OPTICK_ENABLE_TRACING
#endif //USE_OPTICK
#endif //__APPLE_CC__
#endif //__APPLE_CC__

View file

@ -62,10 +62,35 @@ namespace Optick
{
std::lock_guard<std::recursive_mutex> lock(updateLock);
currentState = STATE_OFF;
// SRS - Resolve delayed GPU frame timestamps before dumping data
for (uint32_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex)
{
Node* node = nodes[nodeIndex];
uint32_t nextFrameIndex = (frameNumber + 1 - NUM_FRAMES_DELAY) % NUM_FRAMES_DELAY;
QueryFrame& nextFrame = node->queryGpuframes[nextFrameIndex];
while (nextFrame.queryIndexStart != (uint32_t)-1 && nextFrame.queryIndexCount > 0 &&
nextFrameIndex != frameNumber % NUM_FRAMES_DELAY)
{
WaitForFrame(nodeIndex, (uint64_t)nextFrameIndex);
uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount;
ResolveTimestamps(nodeIndex, resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart);
if (resolveFinish > MAX_QUERIES_COUNT)
ResolveTimestamps(nodeIndex, 0, resolveFinish - MAX_QUERIES_COUNT);
nextFrameIndex = (nextFrameIndex + 1) % NUM_FRAMES_DELAY;
nextFrame = node->queryGpuframes[nextFrameIndex];
}
}
}
void GPUProfiler::Dump(uint32 /*mode*/)
{
std::lock_guard<std::recursive_mutex> lock(updateLock);
for (size_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex)
{
Node* node = nodes[nodeIndex];
@ -119,9 +144,9 @@ namespace Optick
return event;
}
EventData& GPUProfiler::AddVSyncEvent()
EventData& GPUProfiler::AddVSyncEvent(const char *eventName)
{
static const EventDescription* VSyncDescription = EventDescription::Create("VSync", __FILE__, __LINE__);
static const EventDescription* VSyncDescription = EventDescription::Create(eventName, __FILE__, __LINE__);
EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->eventBuffer.Add();
event.description = VSyncDescription;
event.start = EventTime::INVALID_TIMESTAMP;
@ -139,6 +164,16 @@ namespace Optick
return tag;
}
TagData<uint32>& GPUProfiler::AddVSyncTag()
{
static const EventDescription* VSyncTagDescription = EventDescription::CreateShared("Frame");
TagData<uint32>& tag = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->tagU32Buffer.Add();
tag.description = VSyncTagDescription;
tag.timestamp = EventTime::INVALID_TIMESTAMP;
tag.data = 0;
return tag;
}
const char * GetGPUQueueName(GPUQueueType queue)
{
const char* GPUQueueToName[GPU_QUEUE_COUNT] = { "Graphics", "Compute", "Transfer", "VSync" };

View file

@ -31,6 +31,7 @@
#include <atomic>
#include <thread>
#include <queue>
#include <d3d12.h>
#include <dxgi.h>
@ -80,16 +81,14 @@ namespace Optick
ID3D12Resource* queryBuffer;
ID3D12Device* device;
// VSync Stats
// VSync / Present Stats
DXGI_FRAME_STATISTICS prevFrameStatistics;
std::queue<UINT> presentIdQueue;
std::queue<uint32_t> frameIdQueue;
//void UpdateRange(uint32_t start, uint32_t finish)
void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue);
void ResolveTimestamps(uint32_t startIndex, uint32_t count);
void WaitForFrame(uint64_t frameNumber);
public:
GPUProfilerD3D12();
~GPUProfilerD3D12();
@ -98,7 +97,7 @@ namespace Optick
void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp);
void Flip(IDXGISwapChain* swapChain);
void Flip(IDXGISwapChain* swapChain, uint32_t frameID);
// Interface implementation
@ -109,9 +108,13 @@ namespace Optick
QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp);
}
void Flip(void* swapChain) override
void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) override;
void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override;
void Flip(void* swapChain, uint32_t frameID) override
{
Flip(static_cast<IDXGISwapChain*>(swapChain));
Flip(static_cast<IDXGISwapChain*>(swapChain), frameID);
}
};
@ -241,11 +244,11 @@ namespace Optick
}
}
void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count)
void GPUProfilerD3D12::ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count)
{
if (count)
{
Node* node = nodes[currentNode];
Node* node = nodes[nodeIndex];
D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) };
void* pData = nullptr;
@ -259,18 +262,18 @@ namespace Optick
}
}
void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait)
void GPUProfilerD3D12::WaitForFrame(uint32_t nodeIndex, uint64_t frameNumberToWait)
{
OPTICK_EVENT();
NodePayload* payload = nodePayloads[currentNode];
NodePayload* payload = nodePayloads[nodeIndex];
while (frameNumberToWait > payload->syncFence->GetCompletedValue())
{
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain)
void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain, uint32_t frameID)
{
OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug);
@ -328,38 +331,76 @@ namespace Optick
commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0);
}
}
else
{
// Initialize present / frame statistics
prevFrameStatistics = { 0 };
swapChain->GetFrameStatistics(&prevFrameStatistics);
while (!presentIdQueue.empty())
{
presentIdQueue.pop();
frameIdQueue.pop();
}
}
commandList->Close();
payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList);
payload.commandQueue->Signal(payload.syncFence, frameNumber);
// Save presentID to frameID correlation for the next present's vsync tag
if (frameID > 0)
{
UINT prevPresentID = 0;
HRESULT result = swapChain->GetLastPresentCount(&prevPresentID);
if (result == S_OK)
{
presentIdQueue.push(prevPresentID + 1);
frameIdQueue.push(frameID);
}
}
// Process VSync / Presentation timing
DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 };
HRESULT result = swapChain->GetFrameStatistics(&currentFrameStatistics);
if ((result == S_OK) && (currentFrameStatistics.SyncQPCTime.QuadPart > prevFrameStatistics.SyncQPCTime.QuadPart))
{
EventData& data = AddVSyncEvent("Present");
data.start = prevFrameStatistics.SyncQPCTime.QuadPart;
data.finish = currentFrameStatistics.SyncQPCTime.QuadPart;
while (!presentIdQueue.empty() && presentIdQueue.front() <= prevFrameStatistics.PresentCount)
{
if (presentIdQueue.front() == prevFrameStatistics.PresentCount)
{
TagData<uint32>& tag = AddVSyncTag();
tag.timestamp = prevFrameStatistics.SyncQPCTime.QuadPart;
tag.data = frameIdQueue.front();
}
presentIdQueue.pop();
frameIdQueue.pop();
}
prevFrameStatistics = currentFrameStatistics;
}
// Preparing Next Frame
// Try resolve timestamps for the current frame
if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount)
{
WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY);
WaitForFrame(currentNode, (uint64_t)frameNumber + 1 - NUM_FRAMES_DELAY);
uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount;
ResolveTimestamps(resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart);
ResolveTimestamps(currentNode, resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart);
if (resolveFinish > MAX_QUERIES_COUNT)
ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT);
ResolveTimestamps(currentNode, 0, resolveFinish - MAX_QUERIES_COUNT);
}
nextFrame.queryIndexStart = queryEnd;
nextFrame.queryIndexCount = 0;
// Process VSync
DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 };
HRESULT result = swapChain->GetFrameStatistics(&currentFrameStatistics);
if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount))
{
EventData& data = AddVSyncEvent();
data.start = prevFrameStatistics.SyncQPCTime.QuadPart;
data.finish = currentFrameStatistics.SyncQPCTime.QuadPart;
}
prevFrameStatistics = currentFrameStatistics;
}
++frameNumber;
@ -399,4 +440,4 @@ namespace Optick
}
#endif //OPTICK_ENABLE_GPU_D3D12
#endif //USE_OPTICK
#endif //USE_OPTICK

View file

@ -61,7 +61,8 @@ namespace Optick
int64_t GetCPUTimestamp(int64_t gpuTimestamp)
{
return timestampCPU + (gpuTimestamp - timestampGPU) * frequencyCPU / frequencyGPU;
// SRS - Improve accuracy of GPU to CPU timestamp conversion by using floating point doubles
return timestampCPU + (int64_t)(double(gpuTimestamp - timestampGPU) * (double)frequencyCPU / (double)frequencyGPU);
}
ClockSynchronization() : frequencyCPU(0), frequencyGPU(0), timestampCPU(0), timestampGPU(0) {}
@ -122,8 +123,9 @@ namespace Optick
void Reset();
EventData& AddFrameEvent();
EventData& AddVSyncEvent();
EventData& AddVSyncEvent(const char *eventName = "VSync");
TagData<uint32>& AddFrameTag();
TagData<uint32>& AddVSyncTag();
public:
GPUProfiler();
@ -141,7 +143,9 @@ namespace Optick
// Interface to implement
virtual ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) = 0;
virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0;
virtual void Flip(void* swapChain) = 0;
virtual void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) = 0;
virtual void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) = 0;
virtual void Flip(void* swapChain, uint32_t frameID) = 0;
virtual ~GPUProfiler();
};

View file

@ -54,16 +54,18 @@ namespace Optick
VkQueue queue;
VkQueryPool queryPool;
VkCommandPool commandPool;
VkEvent event;
array<Frame, NUM_FRAMES_DELAY> frames;
NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE) {}
NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE), event(VK_NULL_HANDLE) {}
~NodePayload();
};
vector<NodePayload*> nodePayloads;
void ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count);
void WaitForFrame(uint64_t frameNumber);
// VSync / Present Stats
uint64_t prevPresentTime;
uint32_t prevPresentID;
public:
GPUProfilerVulkan();
@ -71,6 +73,7 @@ namespace Optick
void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions);
void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp);
void Flip(VkSwapchainKHR swapChain);
// Interface implementation
@ -81,7 +84,14 @@ namespace Optick
QueryTimestamp((VkCommandBuffer)context, outCpuTimestamp);
}
void Flip(void* swapChain) override;
void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) override;
void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override;
void Flip(void* swapChain, uint32_t frameID) override
{
Flip(static_cast<VkSwapchainKHR>(swapChain));
}
};
void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions)
@ -93,6 +103,8 @@ namespace Optick
GPUProfilerVulkan::GPUProfilerVulkan()
{
prevPresentTime = 0;
prevPresentID = 0;
}
void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions)
@ -107,9 +119,14 @@ namespace Optick
vkGetPhysicalDeviceProperties,
(PFN_vkCreateQueryPool_)vkCreateQueryPool,
(PFN_vkCreateCommandPool_)vkCreateCommandPool,
(PFN_vkCreateEvent_)vkCreateEvent,
(PFN_vkAllocateCommandBuffers_)vkAllocateCommandBuffers,
(PFN_vkCreateFence_)vkCreateFence,
vkCmdResetQueryPool,
vkResetQueryPool,
(PFN_vkCmdWaitEvents_)vkCmdWaitEvents,
(PFN_vkResetEvent_)vkResetEvent,
(PFN_vkSetEvent_)vkSetEvent,
(PFN_vkQueueSubmit_)vkQueueSubmit,
(PFN_vkWaitForFences_)vkWaitForFences,
(PFN_vkResetCommandBuffer_)vkResetCommandBuffer,
@ -120,8 +137,10 @@ namespace Optick
(PFN_vkResetFences_)vkResetFences,
vkDestroyCommandPool,
vkDestroyQueryPool,
vkDestroyEvent,
vkDestroyFence,
vkFreeCommandBuffers,
nullptr, // dynamically define vkGetPastPresentationTimingGOOGLE if VK_GOOGLE_display_timing extension available
};
}
@ -137,6 +156,11 @@ namespace Optick
commandPoolCreateInfo.pNext = 0;
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VkEventCreateInfo eventCreateInfo;
eventCreateInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
eventCreateInfo.pNext = 0;
eventCreateInfo.flags = 0;
nodes.resize(nodeCount);
nodePayloads.resize(nodeCount);
@ -150,6 +174,7 @@ namespace Optick
NodePayload* nodePayload = Memory::New<NodePayload>();
nodePayloads[i] = nodePayload;
nodePayload->vulkanFunctions = &vulkanFunctions;
nodePayload->vulkanFunctions->vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr(devices[i], "vkGetPastPresentationTimingGOOGLE");
nodePayload->device = devices[i];
nodePayload->physicalDevice = physicalDevices[i];
nodePayload->queue = cmdQueues[i];
@ -163,6 +188,10 @@ namespace Optick
OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
(void)r;
r = (VkResult)(*vulkanFunctions.vkCreateEvent)(nodePayload->device, &eventCreateInfo, 0, &nodePayload->event);
OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
(void)r;
for (uint32_t j = 0; j < nodePayload->frames.size(); ++j)
{
Frame& frame = nodePayload->frames[j];
@ -221,16 +250,16 @@ namespace Optick
}
}
void GPUProfilerVulkan::ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count)
void GPUProfilerVulkan::ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count)
{
if (count)
{
Node* node = nodes[currentNode];
Node* node = nodes[nodeIndex];
NodePayload* payload = nodePayloads[currentNode];
NodePayload* payload = nodePayloads[nodeIndex];
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[currentNode]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
(*vulkanFunctions.vkCmdResetQueryPool)(commandBuffer, payload->queryPool, startIndex, count);
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * (size_t)count, &nodes[nodeIndex]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
(*vulkanFunctions.vkResetQueryPool)(payload->device, payload->queryPool, startIndex, count);
// Convert GPU timestamps => CPU Timestamps
for (uint32_t index = startIndex; index < startIndex + count; ++index)
@ -238,19 +267,19 @@ namespace Optick
}
}
void GPUProfilerVulkan::WaitForFrame(uint64_t frameNumberToWait)
void GPUProfilerVulkan::WaitForFrame(uint32_t nodeIndex, uint64_t frameNumberToWait)
{
OPTICK_EVENT();
int r = VK_SUCCESS;
do
{
NodePayload& payload = *nodePayloads[currentNode];
r = (*vulkanFunctions.vkWaitForFences)(nodePayloads[currentNode]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
NodePayload& payload = *nodePayloads[nodeIndex];
r = (*vulkanFunctions.vkWaitForFences)(nodePayloads[nodeIndex]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
} while (r != VK_SUCCESS);
}
void GPUProfilerVulkan::Flip(void* /*swapChain*/)
void GPUProfilerVulkan::Flip(VkSwapchainKHR swapChain)
{
OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug);
@ -276,6 +305,7 @@ namespace Optick
VkQueue queue = payload.queue;
(*vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(device, 1, &fence);
VkCommandBufferBeginInfo commandBufferBeginInfo;
commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
@ -283,7 +313,6 @@ namespace Optick
commandBufferBeginInfo.pInheritanceInfo = 0;
commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo));
(*vulkanFunctions.vkResetFences)(device, 1, &fence);
if (EventData* frameEvent = currentFrame.frameEvent)
QueryTimestamp(commandBuffer, &frameEvent->finish);
@ -311,8 +340,16 @@ namespace Optick
if (queryBegin != (uint32_t)-1)
{
OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!");
currentFrame.queryIndexCount = queryEnd - queryBegin;
}
else
{
currentFrame.queryIndexStart = 0;
currentFrame.queryIndexCount = queryEnd;
prevPresentTime = 0;
prevPresentID = 0;
}
// Preparing Next Frame
// Try resolve timestamps for the current frame
@ -323,12 +360,44 @@ namespace Optick
if (startIndex < finishIndex)
{
ResolveTimestamps(commandBuffer, startIndex, finishIndex - startIndex);
ResolveTimestamps(currentNode, startIndex, finishIndex - startIndex);
}
else if (startIndex > finishIndex)
{
ResolveTimestamps(commandBuffer, startIndex, MAX_QUERIES_COUNT - startIndex);
ResolveTimestamps(commandBuffer, 0, finishIndex);
ResolveTimestamps(currentNode, startIndex, MAX_QUERIES_COUNT - startIndex);
ResolveTimestamps(currentNode, 0, finishIndex);
}
// SRS - Add Vulkan presentation / vsync timing if VK_GOOGLE_display_timing extension available
if (vulkanFunctions.vkGetPastPresentationTimingGOOGLE)
{
uint32_t queryPresentTimingCount = 0;
(*vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, nullptr);
if (queryPresentTimingCount > 0)
{
// Query Presentation Timing / VSync
vector<VkPastPresentationTimingGOOGLE> queryPresentTimings;
queryPresentTimings.resize(queryPresentTimingCount);
(*vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, &queryPresentTimings[0]);
for (uint32_t presentIndex = 0; presentIndex < queryPresentTimingCount; presentIndex++)
{
// Process Presentation Timing / VSync if swap image was actually presented (i.e. not dropped)
VkPastPresentationTimingGOOGLE presentTiming = queryPresentTimings[presentIndex];
if (presentTiming.actualPresentTime > prevPresentTime)
{
EventData& data = AddVSyncEvent("Present");
data.start = prevPresentTime;
data.finish = presentTiming.actualPresentTime;
TagData<uint32>& tag = AddVSyncTag();
tag.timestamp = prevPresentTime;
tag.data = prevPresentID;
prevPresentTime = presentTiming.actualPresentTime;
prevPresentID = presentTiming.presentID;
}
}
}
}
}
@ -357,10 +426,12 @@ namespace Optick
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
(*vulkanFunctions.vkResetEvent)(Device, nodePayloads[nodeIndex]->event);
(*vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, 1);
(*vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[nodeIndex]->queryPool, 0);
(*vulkanFunctions.vkCmdWaitEvents)(CB, 1, &nodePayloads[nodeIndex]->event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, nullptr, 0, nullptr, 0, nullptr);
(*vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, nodePayloads[nodeIndex]->queryPool, 0);
(*vulkanFunctions.vkEndCommandBuffer)(CB);
VkSubmitInfo submitInfo = {};
@ -373,22 +444,35 @@ namespace Optick
submitInfo.signalSemaphoreCount = 0;
submitInfo.pSignalSemaphores = nullptr;
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
// SRS - Improve GPU to CPU clock offset calibration by using Vulkan events
// thanks to cdwfs for concept at https://gist.github.com/cdwfs/4222ca09cb259f8dd50f7f2cf7d09179
std::this_thread::sleep_for(std::chrono::seconds(1));
(*vulkanFunctions.vkSetEvent)(Device, nodePayloads[nodeIndex]->event);
clock.timestampCPU = GetHighPrecisionTime();
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
clock.timestampGPU = 0;
(*vulkanFunctions.vkGetQueryPoolResults)(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT);
clock.timestampCPU = GetHighPrecisionTime();
clock.frequencyCPU = GetHighPrecisionFrequency();
// SRS - Improve GPU to CPU clock frequency scaling by using floating point doubles
clock.frequencyCPU = GetHighPrecisionFrequency();
VkPhysicalDeviceProperties Properties;
(*vulkanFunctions.vkGetPhysicalDeviceProperties)(nodePayloads[nodeIndex]->physicalDevice, &Properties);
clock.frequencyGPU = (uint64_t)(1000000000ll / Properties.limits.timestampPeriod);
clock.frequencyGPU = (int64_t)(1000000000.0 / (double)Properties.limits.timestampPeriod);
// SRS - Reset entire query pool to clear clock sync query + any leftover queries from previous run
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
(*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, MAX_QUERIES_COUNT);
(*vulkanFunctions.vkEndCommandBuffer)(CB);
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
return clock;
}
GPUProfilerVulkan::NodePayload::~NodePayload()
{
(*vulkanFunctions->vkDestroyEvent)(device, event, nullptr);
(*vulkanFunctions->vkDestroyCommandPool)(device, commandPool, nullptr);
(*vulkanFunctions->vkDestroyQueryPool)(device, queryPool, nullptr);
}
@ -419,4 +503,4 @@ namespace Optick
}
}
#endif //OPTICK_ENABLE_GPU_D3D12
#endif //USE_OPTICK
#endif //USE_OPTICK

View file

@ -792,10 +792,13 @@ void idRenderBackend::FillDepthBufferFast( drawSurf_t** drawSurfs, int numDrawSu
{
OPTICK_EVENT( "Render_FillDepthBufferFast" );
#if USE_OPTICK_GPU
OPTICK_GPU_CONTEXT( ( ID3D12GraphicsCommandList* ) commandList->getNativeObject( nvrhi::ObjectTypes::D3D12_GraphicsCommandList ) );
nvrhi::ObjectType commandObject = nvrhi::ObjectTypes::D3D12_GraphicsCommandList;
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )
{
commandObject = nvrhi::ObjectTypes::VK_CommandBuffer;
}
OPTICK_GPU_CONTEXT( ( void* ) commandList->getNativeObject( commandObject ) );
OPTICK_GPU_EVENT( "Render_FillDepthBufferFast" );
#endif
if( numDrawSurfs == 0 )
{
@ -3357,10 +3360,13 @@ void idRenderBackend::ShadowAtlasPass( const viewDef_t* _viewDef )
OPTICK_EVENT( "Render_ShadowAtlas" );
#if USE_OPTICK_GPU
OPTICK_GPU_CONTEXT( ( ID3D12GraphicsCommandList* ) commandList->getNativeObject( nvrhi::ObjectTypes::D3D12_GraphicsCommandList ) );
nvrhi::ObjectType commandObject = nvrhi::ObjectTypes::D3D12_GraphicsCommandList;
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )
{
commandObject = nvrhi::ObjectTypes::VK_CommandBuffer;
}
OPTICK_GPU_CONTEXT( ( void* ) commandList->getNativeObject( commandObject ) );
OPTICK_GPU_EVENT( "Render_ShadowAtlas" );
#endif
renderLog.OpenMainBlock( MRB_SHADOW_ATLAS_PASS );
renderLog.OpenBlock( "Render_ShadowAtlas", colorYellow );
@ -3670,10 +3676,13 @@ void idRenderBackend::DrawInteractions( const viewDef_t* _viewDef )
OPTICK_EVENT( "Render_Interactions" );
#if USE_OPTICK_GPU
OPTICK_GPU_CONTEXT( ( ID3D12GraphicsCommandList* ) commandList->getNativeObject( nvrhi::ObjectTypes::D3D12_GraphicsCommandList ) );
nvrhi::ObjectType commandObject = nvrhi::ObjectTypes::D3D12_GraphicsCommandList;
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )
{
commandObject = nvrhi::ObjectTypes::VK_CommandBuffer;
}
OPTICK_GPU_CONTEXT( ( void* ) commandList->getNativeObject( commandObject ) );
OPTICK_GPU_EVENT( "Render_Interactions" );
#endif
renderLog.OpenMainBlock( MRB_DRAW_INTERACTIONS );
renderLog.OpenBlock( "Render_Interactions", colorYellow );
@ -5389,13 +5398,13 @@ void idRenderBackend::DrawViewInternal( const viewDef_t* _viewDef, const int ste
OPTICK_EVENT( "Backend_DrawViewInternal" );
OPTICK_TAG( "stereoEye", stereoEye );
#if USE_OPTICK_GPU
//uint32_t swapIndex = deviceManager->GetCurrentBackBufferIndex();
//idStr eventLabel;
//eventLabel.Format( "DrawView( frameIndex = %i, swapIndex = %i ) ", taaPass->GetFrameIndex(), swapIndex );
OPTICK_GPU_CONTEXT( ( ID3D12GraphicsCommandList* ) commandList->getNativeObject( nvrhi::ObjectTypes::D3D12_GraphicsCommandList ) );
nvrhi::ObjectType commandObject = nvrhi::ObjectTypes::D3D12_GraphicsCommandList;
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )
{
commandObject = nvrhi::ObjectTypes::VK_CommandBuffer;
}
OPTICK_GPU_CONTEXT( ( void* ) commandList->getNativeObject( commandObject ) );
OPTICK_GPU_EVENT( "DrawView" );
#endif
renderLog.OpenBlock( "Render_DrawViewInternal", colorRed );

View file

@ -43,13 +43,6 @@ If you have questions concerning this license or the applicable additional terms
#include "PipelineCache.h"
#if USE_OPTICK
#define USE_OPTICK_GPU 0
#else
#define USE_OPTICK_GPU 0
#endif
struct tmu_t
{
unsigned int current2DMap;

View file

@ -459,6 +459,8 @@ bool DeviceManager_DX12::CreateDeviceAndSwapChain()
void DeviceManager_DX12::DestroyDeviceAndSwapChain()
{
OPTICK_SHUTDOWN();
m_RhiSwapChainBuffers.clear();
m_RendererString.clear();
@ -610,7 +612,7 @@ void DeviceManager_DX12::Present()
presentFlags |= DXGI_PRESENT_ALLOW_TEARING;
}
OPTICK_GPU_FLIP( m_SwapChain.Get() );
OPTICK_GPU_FLIP( m_SwapChain.Get(), idLib::frameNumber - 1 );
OPTICK_CATEGORY( "DX12_Present", Optick::Category::Wait );
// SRS - Don't change m_DeviceParams.vsyncEnabled here, simply test for vsync mode 2 to set DXGI SyncInterval

View file

@ -35,7 +35,7 @@
#include <sys/DeviceManager.h>
#include <nvrhi/vulkan.h>
// SRS - optionally needed for VK_MVK_MOLTENVK_EXTENSION_NAME and MoltenVK runtime config visibility
// SRS - optionally needed for MoltenVK runtime config visibility
#if defined(__APPLE__) && defined( USE_MoltenVK )
#include <MoltenVK/vk_mvk_moltenvk.h>
@ -181,10 +181,6 @@ private:
{
// instance
{
#if defined(__APPLE__) && defined( USE_MoltenVK )
// SRS - needed for using MoltenVK configuration on macOS (if USE_MoltenVK defined)
VK_MVK_MOLTENVK_EXTENSION_NAME,
#endif
VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME
},
// layers
@ -193,12 +189,9 @@ private:
{
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_MAINTENANCE1_EXTENSION_NAME,
#if defined(__APPLE__)
#if defined( VK_KHR_portability_subset )
VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME,
#endif
VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME,
#if defined(__APPLE__) && defined( VK_KHR_portability_subset )
// SRS - This is required for using the MoltenVK portability subset implementation on macOS
VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME
#endif
},
};
@ -229,6 +222,9 @@ private:
VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
VK_NV_MESH_SHADER_EXTENSION_NAME,
VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME,
#if USE_OPTICK
VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME,
#endif
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME
},
};
@ -287,6 +283,8 @@ private:
bool enablePModeImmediate = false; // r_swapInterval = 0 (defaults to eFifo if not available)
bool enablePModeFifoRelaxed = false; // r_swapInterval = 1 (defaults to eFifo if not available)
// SRS - flag indicating support for presentation timing via VK_GOOGLE_display_timing extension
bool displayTimingEnabled = false;
private:
static VKAPI_ATTR VkBool32 VKAPI_CALL vulkanDebugCallback(
@ -778,6 +776,10 @@ bool DeviceManager_VK::createDevice()
{
sync2Supported = true;
}
else if( ext == VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME )
{
displayTimingEnabled = true;
}
}
std::unordered_set<int> uniqueQueueFamilies =
@ -828,6 +830,9 @@ bool DeviceManager_VK::createDevice()
#if defined(__APPLE__) && defined( VK_KHR_portability_subset )
auto portabilityFeatures = vk::PhysicalDevicePortabilitySubsetFeaturesKHR()
#if USE_OPTICK
.setEvents( true )
#endif
.setImageViewFormatSwizzle( true );
void* pNext = &portabilityFeatures;
@ -865,6 +870,9 @@ bool DeviceManager_VK::createDevice()
.setTimelineSemaphore( true )
.setShaderSampledImageArrayNonUniformIndexing( true )
.setBufferDeviceAddress( bufferAddressSupported )
#if USE_OPTICK
.setHostQueryReset( true )
#endif
.setPNext( pNext );
auto layerVec = stringSetToVector( enabledExtensions.layers );
@ -1226,11 +1234,17 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain()
#undef CHECK
OPTICK_GPU_INIT_VULKAN( ( VkDevice* )&m_VulkanDevice, ( VkPhysicalDevice* )&m_VulkanPhysicalDevice, ( VkQueue* )&m_GraphicsQueue, ( uint32_t* )&m_GraphicsQueueFamily, 1, nullptr );
return true;
}
void DeviceManager_VK::DestroyDeviceAndSwapChain()
{
OPTICK_SHUTDOWN();
m_VulkanDevice.waitIdle();
m_FrameWaitQuery = nullptr;
for( int i = 0; i < m_SwapChainImages.size(); i++ )
@ -1310,12 +1324,30 @@ void DeviceManager_VK::EndFrame()
void DeviceManager_VK::Present()
{
OPTICK_GPU_FLIP( m_SwapChain );
OPTICK_CATEGORY( "Vulkan_Present", Optick::Category::Wait );
void* pNext = nullptr;
#if USE_OPTICK
// SRS - if display timing enabled, define the presentID for labeling the Optick GPU VSync / Present queue
vk::PresentTimeGOOGLE presentTime = vk::PresentTimeGOOGLE()
.setPresentID( idLib::frameNumber - 1 );
vk::PresentTimesInfoGOOGLE presentTimesInfo = vk::PresentTimesInfoGOOGLE()
.setSwapchainCount( 1 )
.setPTimes( &presentTime );
if( displayTimingEnabled )
{
pNext = &presentTimesInfo;
}
#endif
vk::PresentInfoKHR info = vk::PresentInfoKHR()
.setWaitSemaphoreCount( 1 )
.setPWaitSemaphores( &m_PresentSemaphore )
.setSwapchainCount( 1 )
.setPSwapchains( &m_SwapChain )
.setPImageIndices( &m_SwapChainIndex );
.setPImageIndices( &m_SwapChainIndex )
.setPNext( pNext );
const vk::Result res = m_PresentQueue.presentKHR( &info );
assert( res == vk::Result::eSuccess || res == vk::Result::eErrorOutOfDateKHR || res == vk::Result::eSuboptimalKHR );
@ -1338,6 +1370,8 @@ void DeviceManager_VK::Present()
{
if constexpr( NUM_FRAME_DATA > 2 )
{
OPTICK_CATEGORY( "Vulkan_Sync3", Optick::Category::Wait );
// SRS - For triple buffering, sync on previous frame's command queue completion
m_NvrhiDevice->waitEventQuery( m_FrameWaitQuery );
}
@ -1347,6 +1381,8 @@ void DeviceManager_VK::Present()
if constexpr( NUM_FRAME_DATA < 3 )
{
OPTICK_CATEGORY( "Vulkan_Sync2", Optick::Category::Wait );
// SRS - For double buffering, sync on current frame's command queue completion
m_NvrhiDevice->waitEventQuery( m_FrameWaitQuery );
}

View file

@ -521,6 +521,19 @@ int main( int argc, const char** argv )
Sys_Printf( "memory consistency checking enabled\n" );
#endif
// Setting memory allocators
OPTICK_SET_MEMORY_ALLOCATOR(
[]( size_t size ) -> void* { return operator new( size ); },
[]( void* p )
{
operator delete( p );
},
[]()
{
/* Do some TLS initialization here if needed */
}
);
Posix_EarlyInit();
if( argc > 1 )
@ -537,6 +550,8 @@ int main( int argc, const char** argv )
while( 1 )
{
OPTICK_FRAME( "MainThread" );
common->Frame();
}
}

View file

@ -457,6 +457,19 @@ int main( int argc, const char** argv )
cmdargv = argv;
// DG end
// Setting memory allocators
OPTICK_SET_MEMORY_ALLOCATOR(
[]( size_t size ) -> void* { return operator new( size ); },
[]( void* p )
{
operator delete( p );
},
[]()
{
/* Do some TLS initialization here if needed */
}
);
Posix_EarlyInit();
if( argc > 1 )
@ -472,6 +485,8 @@ int main( int argc, const char** argv )
while( 1 )
{
OPTICK_FRAME( "MainThread" );
common->Frame();
}
}

View file

@ -2051,8 +2051,6 @@ int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLin
common->Frame();
}
OPTICK_SHUTDOWN();
// never gets here
return 0;
}