mirror of
https://github.com/id-Software/DOOM-3-BFG.git
synced 2025-04-24 10:38:53 +00:00
Merge remote-tracking branch 'SRSaunders/optick-vulkan'
This commit is contained in:
commit
33615ef541
19 changed files with 383 additions and 105 deletions
|
@ -1677,6 +1677,15 @@ else()
|
|||
ENDFOREACH(item)
|
||||
endif()
|
||||
|
||||
# SRS - if using gcc compiler enable gnu extensions for ##__VA_ARGS__ support within optick profiler (i.e. __STRICT_ANSI__ not set)
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
set_source_files_properties(
|
||||
${OPTICK_SOURCES}
|
||||
PROPERTIES
|
||||
COMPILE_FLAGS "-std=gnu++${CMAKE_CXX_STANDARD}"
|
||||
)
|
||||
endif()
|
||||
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags DEFINITIONS)
|
||||
LIST(APPEND _compiler_FLAGS ${_directory_flags})
|
||||
SEPARATE_ARGUMENTS(_compiler_FLAGS)
|
||||
|
@ -1685,6 +1694,11 @@ else()
|
|||
# we need to recreate the precompiled header for RBDoom3BFG
|
||||
# (i.e. can't use the one created for idlib before)
|
||||
# because some definitions (e.g. -D__IDLIB__ -D__DOOM_DLL__) differ
|
||||
if(OPTICK)
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=1)
|
||||
else()
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=0)
|
||||
endif()
|
||||
add_custom_target(precomp_header_rbdoom3bfg ALL
|
||||
COMMAND ${CMAKE_CXX_COMPILER} ${_compiler_FLAGS} -x c++-header idlib/precompiled.h -o idlib/precompiled.h.gch
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
|
|
|
@ -90,7 +90,11 @@ be called directly in the foreground thread for comparison.
|
|||
*/
|
||||
int idGameThread::Run()
|
||||
{
|
||||
OPTICK_THREAD( "idGameThread" );
|
||||
if( com_smp.GetBool() )
|
||||
{
|
||||
// SRS - label thread in smp mode only, otherwise CPU frame number is missing
|
||||
OPTICK_THREAD( "idGameThread" );
|
||||
}
|
||||
|
||||
commonLocal.frameTiming.startGameTime = Sys_Microseconds();
|
||||
|
||||
|
|
|
@ -130,6 +130,11 @@ else()
|
|||
SEPARATE_ARGUMENTS(_compiler_FLAGS)
|
||||
|
||||
if (USE_PRECOMPILED_HEADERS)
|
||||
if(OPTICK)
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=1)
|
||||
else()
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=0)
|
||||
endif()
|
||||
add_custom_target(precomp_header_idlib ALL
|
||||
COMMAND ${CMAKE_CXX_COMPILER} ${_compiler_FLAGS} -x c++-header precompiled.h -o precompiled.h.gch
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
|
|
|
@ -68,7 +68,7 @@
|
|||
#if defined(_MSC_VER)
|
||||
#define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU /*&& 0*/)
|
||||
#else
|
||||
#define OPTICK_ENABLE_GPU_VULKAN (0)
|
||||
#define OPTICK_ENABLE_GPU_VULKAN (OPTICK_ENABLE_GPU /*&& 0*/)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -103,11 +103,13 @@ OPTICK_DEFINE_HANDLE(VkCommandBuffer);
|
|||
OPTICK_DEFINE_HANDLE(VkQueryPool);
|
||||
OPTICK_DEFINE_HANDLE(VkCommandPool);
|
||||
OPTICK_DEFINE_HANDLE(VkFence);
|
||||
OPTICK_DEFINE_HANDLE(VkEvent);
|
||||
|
||||
struct VkPhysicalDeviceProperties;
|
||||
struct VkQueryPoolCreateInfo;
|
||||
struct VkAllocationCallbacks;
|
||||
struct VkCommandPoolCreateInfo;
|
||||
struct VkEventCreateInfo;
|
||||
struct VkCommandBufferAllocateInfo;
|
||||
struct VkFenceCreateInfo;
|
||||
struct VkSubmitInfo;
|
||||
|
@ -126,9 +128,14 @@ struct VkCommandBufferBeginInfo;
|
|||
typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties_)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkCreateQueryPool_)(VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkCreateCommandPool_)(VkDevice device, const VkCommandPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkCommandPool* pCommandPool);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkCreateEvent_)(VkDevice device, const VkEventCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkEvent* pEvent);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkAllocateCommandBuffers_)(VkDevice device, const VkCommandBufferAllocateInfo* pAllocateInfo, VkCommandBuffer* pCommandBuffers);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkCreateFence_)(VkDevice device, const VkFenceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence);
|
||||
typedef void (VKAPI_PTR *PFN_vkCmdResetQueryPool_)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount);
|
||||
typedef void (VKAPI_PTR *PFN_vkResetQueryPool_)(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount);
|
||||
typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents_)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, uint32_t srcStageMask, uint32_t dstStageMask, uint32_t memoryBarrierCount, const void* pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const void* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const void* pImageMemoryBarriers);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkResetEvent_)(VkDevice device, VkEvent event);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkSetEvent_)(VkDevice device, VkEvent event);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkQueueSubmit_)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkWaitForFences_)(VkDevice device, uint32_t fenceCount, const VkFence* pFences, uint32_t waitAll, uint64_t timeout);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkResetCommandBuffer_)(VkCommandBuffer commandBuffer, uint32_t flags);
|
||||
|
@ -139,8 +146,10 @@ typedef int32_t (VKAPI_PTR *PFN_vkEndCommandBuffer_)(VkCommandBuffer commandBuff
|
|||
typedef int32_t (VKAPI_PTR *PFN_vkResetFences_)(VkDevice device, uint32_t fenceCount, const VkFence* pFences);
|
||||
typedef void (VKAPI_PTR *PFN_vkDestroyCommandPool_)(VkDevice device, VkCommandPool commandPool, const VkAllocationCallbacks* pAllocator);
|
||||
typedef void (VKAPI_PTR *PFN_vkDestroyQueryPool_)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator);
|
||||
typedef void (VKAPI_PTR *PFN_vkDestroyEvent_)(VkDevice device, VkEvent event, const VkAllocationCallbacks* pAllocator);
|
||||
typedef void (VKAPI_PTR *PFN_vkDestroyFence_)(VkDevice device, VkFence fence, const VkAllocationCallbacks* pAllocator);
|
||||
typedef void (VKAPI_PTR *PFN_vkFreeCommandBuffers_)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer* pCommandBuffers);
|
||||
typedef int32_t (VKAPI_PTR *PFN_vkGetPastPresentationTimingGOOGLE_)(VkDevice device, void* swapchain, uint32_t* pPresentationTimingCount, void* pPresentationTimings);
|
||||
|
||||
#if OPTICK_VKAPI_PTR_DEFINED
|
||||
#undef VKAPI_PTR
|
||||
|
@ -159,9 +168,14 @@ namespace Optick
|
|||
PFN_vkGetPhysicalDeviceProperties_ vkGetPhysicalDeviceProperties;
|
||||
PFN_vkCreateQueryPool_ vkCreateQueryPool;
|
||||
PFN_vkCreateCommandPool_ vkCreateCommandPool;
|
||||
PFN_vkCreateEvent_ vkCreateEvent;
|
||||
PFN_vkAllocateCommandBuffers_ vkAllocateCommandBuffers;
|
||||
PFN_vkCreateFence_ vkCreateFence;
|
||||
PFN_vkCmdResetQueryPool_ vkCmdResetQueryPool;
|
||||
PFN_vkResetQueryPool_ vkResetQueryPool;
|
||||
PFN_vkCmdWaitEvents_ vkCmdWaitEvents;
|
||||
PFN_vkResetEvent_ vkResetEvent;
|
||||
PFN_vkSetEvent_ vkSetEvent;
|
||||
PFN_vkQueueSubmit_ vkQueueSubmit;
|
||||
PFN_vkWaitForFences_ vkWaitForFences;
|
||||
PFN_vkResetCommandBuffer_ vkResetCommandBuffer;
|
||||
|
@ -172,8 +186,10 @@ namespace Optick
|
|||
PFN_vkResetFences_ vkResetFences;
|
||||
PFN_vkDestroyCommandPool_ vkDestroyCommandPool;
|
||||
PFN_vkDestroyQueryPool_ vkDestroyQueryPool;
|
||||
PFN_vkDestroyEvent_ vkDestroyEvent;
|
||||
PFN_vkDestroyFence_ vkDestroyFence;
|
||||
PFN_vkFreeCommandBuffers_ vkFreeCommandBuffers;
|
||||
PFN_vkGetPastPresentationTimingGOOGLE_ vkGetPastPresentationTimingGOOGLE;
|
||||
};
|
||||
|
||||
// Source: http://msdn.microsoft.com/en-us/library/system.windows.media.colors(v=vs.110).aspx
|
||||
|
@ -763,7 +779,7 @@ struct OPTICK_API GPUContext
|
|||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
OPTICK_API void InitGpuD3D12(ID3D12Device* device, ID3D12CommandQueue** cmdQueues, uint32_t numQueues);
|
||||
OPTICK_API void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions);
|
||||
OPTICK_API void GpuFlip(void* swapChain);
|
||||
OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID = 0);
|
||||
OPTICK_API GPUContext SetGpuContext(GPUContext context);
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct OPTICK_API GPUContextScope
|
||||
|
@ -780,6 +796,12 @@ struct OPTICK_API GPUContextScope
|
|||
prevContext = SetGpuContext(GPUContext(cmdBuffer, queue, node));
|
||||
}
|
||||
|
||||
// SRS - add typeless void* commandHandle prototype to support runtime selection of graphics API
|
||||
GPUContextScope(void* commandHandle, GPUQueueType queue = GPU_QUEUE_GRAPHICS, int node = 0)
|
||||
{
|
||||
prevContext = SetGpuContext(GPUContext(commandHandle, queue, node));
|
||||
}
|
||||
|
||||
~GPUContextScope()
|
||||
{
|
||||
SetGpuContext(prevContext);
|
||||
|
@ -1041,7 +1063,7 @@ struct OptickApp
|
|||
if (OPTICK_CONCAT(gpu_autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \
|
||||
::Optick::GPUEvent OPTICK_CONCAT(gpu_autogen_event_, __LINE__)( *(OPTICK_CONCAT(gpu_autogen_description_, __LINE__)) ); \
|
||||
|
||||
#define OPTICK_GPU_FLIP(SWAP_CHAIN) ::Optick::GpuFlip(SWAP_CHAIN);
|
||||
#define OPTICK_GPU_FLIP(...) ::Optick::GpuFlip(__VA_ARGS__);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// [Automation][Startup]
|
||||
|
@ -1096,13 +1118,13 @@ struct OptickApp
|
|||
#define OPTICK_STORAGE_PUSH(STORAGE, DESCRIPTION, CPU_TIMESTAMP_START)
|
||||
#define OPTICK_STORAGE_POP(STORAGE, CPU_TIMESTAMP_FINISH)
|
||||
#define OPTICK_SET_STATE_CHANGED_CALLBACK(CALLBACK)
|
||||
#define OPTICK_SET_MEMORY_ALLOCATOR(ALLOCATE_FUNCTION, DEALLOCATE_FUNCTION)
|
||||
#define OPTICK_SET_MEMORY_ALLOCATOR(ALLOCATE_FUNCTION, DEALLOCATE_FUNCTION, INIT_THREAD_CALLBACK)
|
||||
#define OPTICK_SHUTDOWN()
|
||||
#define OPTICK_GPU_INIT_D3D12(DEVICE, CMD_QUEUES, NUM_CMD_QUEUS)
|
||||
#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS)
|
||||
#define OPTICK_GPU_CONTEXT(...)
|
||||
#define OPTICK_GPU_EVENT(NAME)
|
||||
#define OPTICK_GPU_FLIP(SWAP_CHAIN)
|
||||
#define OPTICK_GPU_FLIP(...)
|
||||
#define OPTICK_UPDATE()
|
||||
#define OPTICK_FRAME_FLIP(...)
|
||||
#define OPTICK_FRAME_EVENT(FRAME_TYPE, ...)
|
||||
|
|
|
@ -1801,10 +1801,10 @@ OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID, Th
|
|||
return entry ? &entry->storage : nullptr;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
OPTICK_API void GpuFlip(void* swapChain)
|
||||
OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID)
|
||||
{
|
||||
if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler)
|
||||
gpuProfiler->Flip(swapChain);
|
||||
gpuProfiler->Flip(swapChain, frameID);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
OPTICK_API GPUContext SetGpuContext(GPUContext context)
|
||||
|
|
|
@ -337,7 +337,8 @@ struct ThreadEntry
|
|||
// https://github.com/ulricheck/optick/pull/1/commits/1e5e1919816a64f235caa0f4b0bf20495225b1fa
|
||||
~ThreadEntry()
|
||||
{
|
||||
if((*threadTLS)!=nullptr)
|
||||
// SRS - check threadTLS for null before dereferencing, not *threadTLS
|
||||
if (threadTLS != nullptr)
|
||||
{
|
||||
*threadTLS = nullptr;
|
||||
}
|
||||
|
@ -650,4 +651,4 @@ public:
|
|||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
}
|
||||
|
||||
#endif //USE_OPTICK
|
||||
#endif //USE_OPTICK
|
||||
|
|
|
@ -61,7 +61,7 @@ namespace Optick
|
|||
int64 Platform::GetTime()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return ts.tv_sec * 1000000000LL + ts.tv_nsec;
|
||||
}
|
||||
}
|
||||
|
@ -306,4 +306,4 @@ SymbolEngine* Platform::CreateSymbolEngine()
|
|||
}
|
||||
#endif //OPTICK_ENABLE_TRACING
|
||||
#endif //USE_OPTICK
|
||||
#endif //__APPLE_CC__
|
||||
#endif //__APPLE_CC__
|
||||
|
|
|
@ -62,10 +62,35 @@ namespace Optick
|
|||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(updateLock);
|
||||
currentState = STATE_OFF;
|
||||
|
||||
// SRS - Resolve delayed GPU frame timestamps before dumping data
|
||||
for (uint32_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex)
|
||||
{
|
||||
Node* node = nodes[nodeIndex];
|
||||
|
||||
uint32_t nextFrameIndex = (frameNumber + 1 - NUM_FRAMES_DELAY) % NUM_FRAMES_DELAY;
|
||||
QueryFrame& nextFrame = node->queryGpuframes[nextFrameIndex];
|
||||
|
||||
while (nextFrame.queryIndexStart != (uint32_t)-1 && nextFrame.queryIndexCount > 0 &&
|
||||
nextFrameIndex != frameNumber % NUM_FRAMES_DELAY)
|
||||
{
|
||||
WaitForFrame(nodeIndex, (uint64_t)nextFrameIndex);
|
||||
|
||||
uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
|
||||
uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount;
|
||||
ResolveTimestamps(nodeIndex, resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart);
|
||||
if (resolveFinish > MAX_QUERIES_COUNT)
|
||||
ResolveTimestamps(nodeIndex, 0, resolveFinish - MAX_QUERIES_COUNT);
|
||||
|
||||
nextFrameIndex = (nextFrameIndex + 1) % NUM_FRAMES_DELAY;
|
||||
nextFrame = node->queryGpuframes[nextFrameIndex];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPUProfiler::Dump(uint32 /*mode*/)
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(updateLock);
|
||||
for (size_t nodeIndex = 0; nodeIndex < nodes.size(); ++nodeIndex)
|
||||
{
|
||||
Node* node = nodes[nodeIndex];
|
||||
|
@ -119,9 +144,9 @@ namespace Optick
|
|||
return event;
|
||||
}
|
||||
|
||||
EventData& GPUProfiler::AddVSyncEvent()
|
||||
EventData& GPUProfiler::AddVSyncEvent(const char *eventName)
|
||||
{
|
||||
static const EventDescription* VSyncDescription = EventDescription::Create("VSync", __FILE__, __LINE__);
|
||||
static const EventDescription* VSyncDescription = EventDescription::Create(eventName, __FILE__, __LINE__);
|
||||
EventData& event = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->eventBuffer.Add();
|
||||
event.description = VSyncDescription;
|
||||
event.start = EventTime::INVALID_TIMESTAMP;
|
||||
|
@ -139,6 +164,16 @@ namespace Optick
|
|||
return tag;
|
||||
}
|
||||
|
||||
TagData<uint32>& GPUProfiler::AddVSyncTag()
|
||||
{
|
||||
static const EventDescription* VSyncTagDescription = EventDescription::CreateShared("Frame");
|
||||
TagData<uint32>& tag = nodes[currentNode]->gpuEventStorage[GPU_QUEUE_VSYNC]->tagU32Buffer.Add();
|
||||
tag.description = VSyncTagDescription;
|
||||
tag.timestamp = EventTime::INVALID_TIMESTAMP;
|
||||
tag.data = 0;
|
||||
return tag;
|
||||
}
|
||||
|
||||
const char * GetGPUQueueName(GPUQueueType queue)
|
||||
{
|
||||
const char* GPUQueueToName[GPU_QUEUE_COUNT] = { "Graphics", "Compute", "Transfer", "VSync" };
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
#include <queue>
|
||||
|
||||
#include <d3d12.h>
|
||||
#include <dxgi.h>
|
||||
|
@ -80,16 +81,14 @@ namespace Optick
|
|||
ID3D12Resource* queryBuffer;
|
||||
ID3D12Device* device;
|
||||
|
||||
// VSync Stats
|
||||
// VSync / Present Stats
|
||||
DXGI_FRAME_STATISTICS prevFrameStatistics;
|
||||
std::queue<UINT> presentIdQueue;
|
||||
std::queue<uint32_t> frameIdQueue;
|
||||
|
||||
//void UpdateRange(uint32_t start, uint32_t finish)
|
||||
void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue);
|
||||
|
||||
void ResolveTimestamps(uint32_t startIndex, uint32_t count);
|
||||
|
||||
void WaitForFrame(uint64_t frameNumber);
|
||||
|
||||
public:
|
||||
GPUProfilerD3D12();
|
||||
~GPUProfilerD3D12();
|
||||
|
@ -98,7 +97,7 @@ namespace Optick
|
|||
|
||||
void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp);
|
||||
|
||||
void Flip(IDXGISwapChain* swapChain);
|
||||
void Flip(IDXGISwapChain* swapChain, uint32_t frameID);
|
||||
|
||||
|
||||
// Interface implementation
|
||||
|
@ -109,9 +108,13 @@ namespace Optick
|
|||
QueryTimestamp((ID3D12GraphicsCommandList*)context, outCpuTimestamp);
|
||||
}
|
||||
|
||||
void Flip(void* swapChain) override
|
||||
void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) override;
|
||||
|
||||
void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override;
|
||||
|
||||
void Flip(void* swapChain, uint32_t frameID) override
|
||||
{
|
||||
Flip(static_cast<IDXGISwapChain*>(swapChain));
|
||||
Flip(static_cast<IDXGISwapChain*>(swapChain), frameID);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -241,11 +244,11 @@ namespace Optick
|
|||
}
|
||||
}
|
||||
|
||||
void GPUProfilerD3D12::ResolveTimestamps(uint32_t startIndex, uint32_t count)
|
||||
void GPUProfilerD3D12::ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count)
|
||||
{
|
||||
if (count)
|
||||
{
|
||||
Node* node = nodes[currentNode];
|
||||
Node* node = nodes[nodeIndex];
|
||||
|
||||
D3D12_RANGE range = { sizeof(uint64_t)*startIndex, sizeof(uint64_t)*(startIndex + count) };
|
||||
void* pData = nullptr;
|
||||
|
@ -259,18 +262,18 @@ namespace Optick
|
|||
}
|
||||
}
|
||||
|
||||
void GPUProfilerD3D12::WaitForFrame(uint64_t frameNumberToWait)
|
||||
void GPUProfilerD3D12::WaitForFrame(uint32_t nodeIndex, uint64_t frameNumberToWait)
|
||||
{
|
||||
OPTICK_EVENT();
|
||||
|
||||
NodePayload* payload = nodePayloads[currentNode];
|
||||
NodePayload* payload = nodePayloads[nodeIndex];
|
||||
while (frameNumberToWait > payload->syncFence->GetCompletedValue())
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
}
|
||||
|
||||
void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain)
|
||||
void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain, uint32_t frameID)
|
||||
{
|
||||
OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug);
|
||||
|
||||
|
@ -328,38 +331,76 @@ namespace Optick
|
|||
commandList->ResolveQueryData(payload.queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, finishIndex, queryBuffer, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Initialize present / frame statistics
|
||||
prevFrameStatistics = { 0 };
|
||||
swapChain->GetFrameStatistics(&prevFrameStatistics);
|
||||
|
||||
while (!presentIdQueue.empty())
|
||||
{
|
||||
presentIdQueue.pop();
|
||||
frameIdQueue.pop();
|
||||
}
|
||||
}
|
||||
|
||||
commandList->Close();
|
||||
|
||||
payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList);
|
||||
payload.commandQueue->Signal(payload.syncFence, frameNumber);
|
||||
|
||||
// Save presentID to frameID correlation for the next present's vsync tag
|
||||
if (frameID > 0)
|
||||
{
|
||||
UINT prevPresentID = 0;
|
||||
HRESULT result = swapChain->GetLastPresentCount(&prevPresentID);
|
||||
if (result == S_OK)
|
||||
{
|
||||
presentIdQueue.push(prevPresentID + 1);
|
||||
frameIdQueue.push(frameID);
|
||||
}
|
||||
}
|
||||
|
||||
// Process VSync / Presentation timing
|
||||
DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 };
|
||||
HRESULT result = swapChain->GetFrameStatistics(¤tFrameStatistics);
|
||||
if ((result == S_OK) && (currentFrameStatistics.SyncQPCTime.QuadPart > prevFrameStatistics.SyncQPCTime.QuadPart))
|
||||
{
|
||||
EventData& data = AddVSyncEvent("Present");
|
||||
data.start = prevFrameStatistics.SyncQPCTime.QuadPart;
|
||||
data.finish = currentFrameStatistics.SyncQPCTime.QuadPart;
|
||||
|
||||
while (!presentIdQueue.empty() && presentIdQueue.front() <= prevFrameStatistics.PresentCount)
|
||||
{
|
||||
if (presentIdQueue.front() == prevFrameStatistics.PresentCount)
|
||||
{
|
||||
TagData<uint32>& tag = AddVSyncTag();
|
||||
tag.timestamp = prevFrameStatistics.SyncQPCTime.QuadPart;
|
||||
tag.data = frameIdQueue.front();
|
||||
}
|
||||
|
||||
presentIdQueue.pop();
|
||||
frameIdQueue.pop();
|
||||
}
|
||||
|
||||
prevFrameStatistics = currentFrameStatistics;
|
||||
}
|
||||
|
||||
// Preparing Next Frame
|
||||
// Try resolve timestamps for the current frame
|
||||
if (frameNumber >= NUM_FRAMES_DELAY && nextFrame.queryIndexCount)
|
||||
{
|
||||
WaitForFrame(frameNumber + 1 - NUM_FRAMES_DELAY);
|
||||
WaitForFrame(currentNode, (uint64_t)frameNumber + 1 - NUM_FRAMES_DELAY);
|
||||
|
||||
uint32_t resolveStart = nextFrame.queryIndexStart % MAX_QUERIES_COUNT;
|
||||
uint32_t resolveFinish = resolveStart + nextFrame.queryIndexCount;
|
||||
ResolveTimestamps(resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart);
|
||||
ResolveTimestamps(currentNode, resolveStart, std::min<uint32_t>(resolveFinish, MAX_QUERIES_COUNT) - resolveStart);
|
||||
if (resolveFinish > MAX_QUERIES_COUNT)
|
||||
ResolveTimestamps(0, resolveFinish - MAX_QUERIES_COUNT);
|
||||
ResolveTimestamps(currentNode, 0, resolveFinish - MAX_QUERIES_COUNT);
|
||||
}
|
||||
|
||||
nextFrame.queryIndexStart = queryEnd;
|
||||
nextFrame.queryIndexCount = 0;
|
||||
|
||||
// Process VSync
|
||||
DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 };
|
||||
HRESULT result = swapChain->GetFrameStatistics(¤tFrameStatistics);
|
||||
if ((result == S_OK) && (prevFrameStatistics.PresentCount + 1 == currentFrameStatistics.PresentCount))
|
||||
{
|
||||
EventData& data = AddVSyncEvent();
|
||||
data.start = prevFrameStatistics.SyncQPCTime.QuadPart;
|
||||
data.finish = currentFrameStatistics.SyncQPCTime.QuadPart;
|
||||
}
|
||||
prevFrameStatistics = currentFrameStatistics;
|
||||
}
|
||||
|
||||
++frameNumber;
|
||||
|
@ -399,4 +440,4 @@ namespace Optick
|
|||
}
|
||||
|
||||
#endif //OPTICK_ENABLE_GPU_D3D12
|
||||
#endif //USE_OPTICK
|
||||
#endif //USE_OPTICK
|
||||
|
|
|
@ -61,7 +61,8 @@ namespace Optick
|
|||
|
||||
int64_t GetCPUTimestamp(int64_t gpuTimestamp)
|
||||
{
|
||||
return timestampCPU + (gpuTimestamp - timestampGPU) * frequencyCPU / frequencyGPU;
|
||||
// SRS - Improve accuracy of GPU to CPU timestamp conversion by using floating point doubles
|
||||
return timestampCPU + (int64_t)(double(gpuTimestamp - timestampGPU) * (double)frequencyCPU / (double)frequencyGPU);
|
||||
}
|
||||
|
||||
ClockSynchronization() : frequencyCPU(0), frequencyGPU(0), timestampCPU(0), timestampGPU(0) {}
|
||||
|
@ -122,8 +123,9 @@ namespace Optick
|
|||
void Reset();
|
||||
|
||||
EventData& AddFrameEvent();
|
||||
EventData& AddVSyncEvent();
|
||||
EventData& AddVSyncEvent(const char *eventName = "VSync");
|
||||
TagData<uint32>& AddFrameTag();
|
||||
TagData<uint32>& AddVSyncTag();
|
||||
|
||||
public:
|
||||
GPUProfiler();
|
||||
|
@ -141,7 +143,9 @@ namespace Optick
|
|||
// Interface to implement
|
||||
virtual ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) = 0;
|
||||
virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0;
|
||||
virtual void Flip(void* swapChain) = 0;
|
||||
virtual void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) = 0;
|
||||
virtual void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) = 0;
|
||||
virtual void Flip(void* swapChain, uint32_t frameID) = 0;
|
||||
|
||||
virtual ~GPUProfiler();
|
||||
};
|
||||
|
|
|
@ -54,16 +54,18 @@ namespace Optick
|
|||
VkQueue queue;
|
||||
VkQueryPool queryPool;
|
||||
VkCommandPool commandPool;
|
||||
VkEvent event;
|
||||
|
||||
array<Frame, NUM_FRAMES_DELAY> frames;
|
||||
|
||||
NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE) {}
|
||||
NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE), event(VK_NULL_HANDLE) {}
|
||||
~NodePayload();
|
||||
};
|
||||
vector<NodePayload*> nodePayloads;
|
||||
|
||||
void ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count);
|
||||
void WaitForFrame(uint64_t frameNumber);
|
||||
// VSync / Present Stats
|
||||
uint64_t prevPresentTime;
|
||||
uint32_t prevPresentID;
|
||||
|
||||
public:
|
||||
GPUProfilerVulkan();
|
||||
|
@ -71,6 +73,7 @@ namespace Optick
|
|||
|
||||
void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions);
|
||||
void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp);
|
||||
void Flip(VkSwapchainKHR swapChain);
|
||||
|
||||
|
||||
// Interface implementation
|
||||
|
@ -81,7 +84,14 @@ namespace Optick
|
|||
QueryTimestamp((VkCommandBuffer)context, outCpuTimestamp);
|
||||
}
|
||||
|
||||
void Flip(void* swapChain) override;
|
||||
void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) override;
|
||||
|
||||
void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override;
|
||||
|
||||
void Flip(void* swapChain, uint32_t frameID) override
|
||||
{
|
||||
Flip(static_cast<VkSwapchainKHR>(swapChain));
|
||||
}
|
||||
};
|
||||
|
||||
void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions)
|
||||
|
@ -93,6 +103,8 @@ namespace Optick
|
|||
|
||||
GPUProfilerVulkan::GPUProfilerVulkan()
|
||||
{
|
||||
prevPresentTime = 0;
|
||||
prevPresentID = 0;
|
||||
}
|
||||
|
||||
void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions)
|
||||
|
@ -107,9 +119,14 @@ namespace Optick
|
|||
vkGetPhysicalDeviceProperties,
|
||||
(PFN_vkCreateQueryPool_)vkCreateQueryPool,
|
||||
(PFN_vkCreateCommandPool_)vkCreateCommandPool,
|
||||
(PFN_vkCreateEvent_)vkCreateEvent,
|
||||
(PFN_vkAllocateCommandBuffers_)vkAllocateCommandBuffers,
|
||||
(PFN_vkCreateFence_)vkCreateFence,
|
||||
vkCmdResetQueryPool,
|
||||
vkResetQueryPool,
|
||||
(PFN_vkCmdWaitEvents_)vkCmdWaitEvents,
|
||||
(PFN_vkResetEvent_)vkResetEvent,
|
||||
(PFN_vkSetEvent_)vkSetEvent,
|
||||
(PFN_vkQueueSubmit_)vkQueueSubmit,
|
||||
(PFN_vkWaitForFences_)vkWaitForFences,
|
||||
(PFN_vkResetCommandBuffer_)vkResetCommandBuffer,
|
||||
|
@ -120,8 +137,10 @@ namespace Optick
|
|||
(PFN_vkResetFences_)vkResetFences,
|
||||
vkDestroyCommandPool,
|
||||
vkDestroyQueryPool,
|
||||
vkDestroyEvent,
|
||||
vkDestroyFence,
|
||||
vkFreeCommandBuffers,
|
||||
nullptr, // dynamically define vkGetPastPresentationTimingGOOGLE if VK_GOOGLE_display_timing extension available
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -137,6 +156,11 @@ namespace Optick
|
|||
commandPoolCreateInfo.pNext = 0;
|
||||
commandPoolCreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
||||
|
||||
VkEventCreateInfo eventCreateInfo;
|
||||
eventCreateInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
|
||||
eventCreateInfo.pNext = 0;
|
||||
eventCreateInfo.flags = 0;
|
||||
|
||||
nodes.resize(nodeCount);
|
||||
nodePayloads.resize(nodeCount);
|
||||
|
||||
|
@ -150,6 +174,7 @@ namespace Optick
|
|||
NodePayload* nodePayload = Memory::New<NodePayload>();
|
||||
nodePayloads[i] = nodePayload;
|
||||
nodePayload->vulkanFunctions = &vulkanFunctions;
|
||||
nodePayload->vulkanFunctions->vkGetPastPresentationTimingGOOGLE = (PFN_vkGetPastPresentationTimingGOOGLE_)vkGetDeviceProcAddr(devices[i], "vkGetPastPresentationTimingGOOGLE");
|
||||
nodePayload->device = devices[i];
|
||||
nodePayload->physicalDevice = physicalDevices[i];
|
||||
nodePayload->queue = cmdQueues[i];
|
||||
|
@ -163,6 +188,10 @@ namespace Optick
|
|||
OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
|
||||
(void)r;
|
||||
|
||||
r = (VkResult)(*vulkanFunctions.vkCreateEvent)(nodePayload->device, &eventCreateInfo, 0, &nodePayload->event);
|
||||
OPTICK_ASSERT(r == VK_SUCCESS, "Failed");
|
||||
(void)r;
|
||||
|
||||
for (uint32_t j = 0; j < nodePayload->frames.size(); ++j)
|
||||
{
|
||||
Frame& frame = nodePayload->frames[j];
|
||||
|
@ -221,16 +250,16 @@ namespace Optick
|
|||
}
|
||||
}
|
||||
|
||||
void GPUProfilerVulkan::ResolveTimestamps(VkCommandBuffer commandBuffer, uint32_t startIndex, uint32_t count)
|
||||
void GPUProfilerVulkan::ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count)
|
||||
{
|
||||
if (count)
|
||||
{
|
||||
Node* node = nodes[currentNode];
|
||||
Node* node = nodes[nodeIndex];
|
||||
|
||||
NodePayload* payload = nodePayloads[currentNode];
|
||||
NodePayload* payload = nodePayloads[nodeIndex];
|
||||
|
||||
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[currentNode]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
|
||||
(*vulkanFunctions.vkCmdResetQueryPool)(commandBuffer, payload->queryPool, startIndex, count);
|
||||
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * (size_t)count, &nodes[nodeIndex]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
|
||||
(*vulkanFunctions.vkResetQueryPool)(payload->device, payload->queryPool, startIndex, count);
|
||||
|
||||
// Convert GPU timestamps => CPU Timestamps
|
||||
for (uint32_t index = startIndex; index < startIndex + count; ++index)
|
||||
|
@ -238,19 +267,19 @@ namespace Optick
|
|||
}
|
||||
}
|
||||
|
||||
void GPUProfilerVulkan::WaitForFrame(uint64_t frameNumberToWait)
|
||||
void GPUProfilerVulkan::WaitForFrame(uint32_t nodeIndex, uint64_t frameNumberToWait)
|
||||
{
|
||||
OPTICK_EVENT();
|
||||
|
||||
int r = VK_SUCCESS;
|
||||
do
|
||||
{
|
||||
NodePayload& payload = *nodePayloads[currentNode];
|
||||
r = (*vulkanFunctions.vkWaitForFences)(nodePayloads[currentNode]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
|
||||
NodePayload& payload = *nodePayloads[nodeIndex];
|
||||
r = (*vulkanFunctions.vkWaitForFences)(nodePayloads[nodeIndex]->device, 1, &payload.frames[frameNumberToWait % payload.frames.size()].fence, 1, 1000 * 30);
|
||||
} while (r != VK_SUCCESS);
|
||||
}
|
||||
|
||||
void GPUProfilerVulkan::Flip(void* /*swapChain*/)
|
||||
void GPUProfilerVulkan::Flip(VkSwapchainKHR swapChain)
|
||||
{
|
||||
OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug);
|
||||
|
||||
|
@ -276,6 +305,7 @@ namespace Optick
|
|||
VkQueue queue = payload.queue;
|
||||
|
||||
(*vulkanFunctions.vkWaitForFences)(device, 1, &fence, 1, (uint64_t)-1);
|
||||
(*vulkanFunctions.vkResetFences)(device, 1, &fence);
|
||||
|
||||
VkCommandBufferBeginInfo commandBufferBeginInfo;
|
||||
commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
|
@ -283,7 +313,6 @@ namespace Optick
|
|||
commandBufferBeginInfo.pInheritanceInfo = 0;
|
||||
commandBufferBeginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkBeginCommandBuffer)(commandBuffer, &commandBufferBeginInfo));
|
||||
(*vulkanFunctions.vkResetFences)(device, 1, &fence);
|
||||
|
||||
if (EventData* frameEvent = currentFrame.frameEvent)
|
||||
QueryTimestamp(commandBuffer, &frameEvent->finish);
|
||||
|
@ -311,8 +340,16 @@ namespace Optick
|
|||
|
||||
if (queryBegin != (uint32_t)-1)
|
||||
{
|
||||
OPTICK_ASSERT(queryEnd - queryBegin <= MAX_QUERIES_COUNT, "Too many queries in one frame? Increase GPUProfiler::MAX_QUERIES_COUNT to fix the problem!");
|
||||
currentFrame.queryIndexCount = queryEnd - queryBegin;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentFrame.queryIndexStart = 0;
|
||||
currentFrame.queryIndexCount = queryEnd;
|
||||
prevPresentTime = 0;
|
||||
prevPresentID = 0;
|
||||
}
|
||||
|
||||
// Preparing Next Frame
|
||||
// Try resolve timestamps for the current frame
|
||||
|
@ -323,12 +360,44 @@ namespace Optick
|
|||
|
||||
if (startIndex < finishIndex)
|
||||
{
|
||||
ResolveTimestamps(commandBuffer, startIndex, finishIndex - startIndex);
|
||||
ResolveTimestamps(currentNode, startIndex, finishIndex - startIndex);
|
||||
}
|
||||
else if (startIndex > finishIndex)
|
||||
{
|
||||
ResolveTimestamps(commandBuffer, startIndex, MAX_QUERIES_COUNT - startIndex);
|
||||
ResolveTimestamps(commandBuffer, 0, finishIndex);
|
||||
ResolveTimestamps(currentNode, startIndex, MAX_QUERIES_COUNT - startIndex);
|
||||
ResolveTimestamps(currentNode, 0, finishIndex);
|
||||
}
|
||||
|
||||
// SRS - Add Vulkan presentation / vsync timing if VK_GOOGLE_display_timing extension available
|
||||
if (vulkanFunctions.vkGetPastPresentationTimingGOOGLE)
|
||||
{
|
||||
uint32_t queryPresentTimingCount = 0;
|
||||
(*vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, nullptr);
|
||||
if (queryPresentTimingCount > 0)
|
||||
{
|
||||
// Query Presentation Timing / VSync
|
||||
vector<VkPastPresentationTimingGOOGLE> queryPresentTimings;
|
||||
queryPresentTimings.resize(queryPresentTimingCount);
|
||||
(*vulkanFunctions.vkGetPastPresentationTimingGOOGLE)(device, swapChain, &queryPresentTimingCount, &queryPresentTimings[0]);
|
||||
for (uint32_t presentIndex = 0; presentIndex < queryPresentTimingCount; presentIndex++)
|
||||
{
|
||||
// Process Presentation Timing / VSync if swap image was actually presented (i.e. not dropped)
|
||||
VkPastPresentationTimingGOOGLE presentTiming = queryPresentTimings[presentIndex];
|
||||
if (presentTiming.actualPresentTime > prevPresentTime)
|
||||
{
|
||||
EventData& data = AddVSyncEvent("Present");
|
||||
data.start = prevPresentTime;
|
||||
data.finish = presentTiming.actualPresentTime;
|
||||
|
||||
TagData<uint32>& tag = AddVSyncTag();
|
||||
tag.timestamp = prevPresentTime;
|
||||
tag.data = prevPresentID;
|
||||
|
||||
prevPresentTime = presentTiming.actualPresentTime;
|
||||
prevPresentID = presentTiming.presentID;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -357,10 +426,12 @@ namespace Optick
|
|||
|
||||
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
|
||||
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
|
||||
(*vulkanFunctions.vkResetEvent)(Device, nodePayloads[nodeIndex]->event);
|
||||
(*vulkanFunctions.vkResetCommandBuffer)(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
|
||||
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
|
||||
(*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, 1);
|
||||
(*vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, nodePayloads[nodeIndex]->queryPool, 0);
|
||||
(*vulkanFunctions.vkCmdWaitEvents)(CB, 1, &nodePayloads[nodeIndex]->event, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, nullptr, 0, nullptr, 0, nullptr);
|
||||
(*vulkanFunctions.vkCmdWriteTimestamp)(CB, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, nodePayloads[nodeIndex]->queryPool, 0);
|
||||
(*vulkanFunctions.vkEndCommandBuffer)(CB);
|
||||
|
||||
VkSubmitInfo submitInfo = {};
|
||||
|
@ -373,22 +444,35 @@ namespace Optick
|
|||
submitInfo.signalSemaphoreCount = 0;
|
||||
submitInfo.pSignalSemaphores = nullptr;
|
||||
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
|
||||
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
|
||||
|
||||
// SRS - Improve GPU to CPU clock offset calibration by using Vulkan events
|
||||
// thanks to cdwfs for concept at https://gist.github.com/cdwfs/4222ca09cb259f8dd50f7f2cf7d09179
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
(*vulkanFunctions.vkSetEvent)(Device, nodePayloads[nodeIndex]->event);
|
||||
clock.timestampCPU = GetHighPrecisionTime();
|
||||
(*vulkanFunctions.vkWaitForFences)(Device, 1, &Fence, 1, (uint64_t)-1);
|
||||
(*vulkanFunctions.vkResetFences)(Device, 1, &Fence);
|
||||
clock.timestampGPU = 0;
|
||||
(*vulkanFunctions.vkGetQueryPoolResults)(Device, nodePayloads[nodeIndex]->queryPool, 0, 1, 8, &clock.timestampGPU, 8, VK_QUERY_RESULT_64_BIT);
|
||||
clock.timestampCPU = GetHighPrecisionTime();
|
||||
clock.frequencyCPU = GetHighPrecisionFrequency();
|
||||
|
||||
// SRS - Improve GPU to CPU clock frequency scaling by using floating point doubles
|
||||
clock.frequencyCPU = GetHighPrecisionFrequency();
|
||||
VkPhysicalDeviceProperties Properties;
|
||||
(*vulkanFunctions.vkGetPhysicalDeviceProperties)(nodePayloads[nodeIndex]->physicalDevice, &Properties);
|
||||
clock.frequencyGPU = (uint64_t)(1000000000ll / Properties.limits.timestampPeriod);
|
||||
clock.frequencyGPU = (int64_t)(1000000000.0 / (double)Properties.limits.timestampPeriod);
|
||||
|
||||
// SRS - Reset entire query pool to clear clock sync query + any leftover queries from previous run
|
||||
(*vulkanFunctions.vkBeginCommandBuffer)(CB, &commandBufferBeginInfo);
|
||||
(*vulkanFunctions.vkCmdResetQueryPool)(CB, nodePayloads[nodeIndex]->queryPool, 0, MAX_QUERIES_COUNT);
|
||||
(*vulkanFunctions.vkEndCommandBuffer)(CB);
|
||||
(*vulkanFunctions.vkQueueSubmit)(nodePayloads[nodeIndex]->queue, 1, &submitInfo, Fence);
|
||||
|
||||
return clock;
|
||||
}
|
||||
|
||||
GPUProfilerVulkan::NodePayload::~NodePayload()
|
||||
{
|
||||
(*vulkanFunctions->vkDestroyEvent)(device, event, nullptr);
|
||||
(*vulkanFunctions->vkDestroyCommandPool)(device, commandPool, nullptr);
|
||||
(*vulkanFunctions->vkDestroyQueryPool)(device, queryPool, nullptr);
|
||||
}
|
||||
|
@ -419,4 +503,4 @@ namespace Optick
|
|||
}
|
||||
}
|
||||
#endif //OPTICK_ENABLE_GPU_D3D12
|
||||
#endif //USE_OPTICK
|
||||
#endif //USE_OPTICK
|
||||
|
|
|
@ -792,10 +792,13 @@ void idRenderBackend::FillDepthBufferFast( drawSurf_t** drawSurfs, int numDrawSu
|
|||
{
|
||||
OPTICK_EVENT( "Render_FillDepthBufferFast" );
|
||||
|
||||
#if USE_OPTICK_GPU
|
||||
OPTICK_GPU_CONTEXT( ( ID3D12GraphicsCommandList* ) commandList->getNativeObject( nvrhi::ObjectTypes::D3D12_GraphicsCommandList ) );
|
||||
nvrhi::ObjectType commandObject = nvrhi::ObjectTypes::D3D12_GraphicsCommandList;
|
||||
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )
|
||||
{
|
||||
commandObject = nvrhi::ObjectTypes::VK_CommandBuffer;
|
||||
}
|
||||
OPTICK_GPU_CONTEXT( ( void* ) commandList->getNativeObject( commandObject ) );
|
||||
OPTICK_GPU_EVENT( "Render_FillDepthBufferFast" );
|
||||
#endif
|
||||
|
||||
if( numDrawSurfs == 0 )
|
||||
{
|
||||
|
@ -3357,10 +3360,13 @@ void idRenderBackend::ShadowAtlasPass( const viewDef_t* _viewDef )
|
|||
|
||||
OPTICK_EVENT( "Render_ShadowAtlas" );
|
||||
|
||||
#if USE_OPTICK_GPU
|
||||
OPTICK_GPU_CONTEXT( ( ID3D12GraphicsCommandList* ) commandList->getNativeObject( nvrhi::ObjectTypes::D3D12_GraphicsCommandList ) );
|
||||
nvrhi::ObjectType commandObject = nvrhi::ObjectTypes::D3D12_GraphicsCommandList;
|
||||
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )
|
||||
{
|
||||
commandObject = nvrhi::ObjectTypes::VK_CommandBuffer;
|
||||
}
|
||||
OPTICK_GPU_CONTEXT( ( void* ) commandList->getNativeObject( commandObject ) );
|
||||
OPTICK_GPU_EVENT( "Render_ShadowAtlas" );
|
||||
#endif
|
||||
|
||||
renderLog.OpenMainBlock( MRB_SHADOW_ATLAS_PASS );
|
||||
renderLog.OpenBlock( "Render_ShadowAtlas", colorYellow );
|
||||
|
@ -3670,10 +3676,13 @@ void idRenderBackend::DrawInteractions( const viewDef_t* _viewDef )
|
|||
|
||||
OPTICK_EVENT( "Render_Interactions" );
|
||||
|
||||
#if USE_OPTICK_GPU
|
||||
OPTICK_GPU_CONTEXT( ( ID3D12GraphicsCommandList* ) commandList->getNativeObject( nvrhi::ObjectTypes::D3D12_GraphicsCommandList ) );
|
||||
nvrhi::ObjectType commandObject = nvrhi::ObjectTypes::D3D12_GraphicsCommandList;
|
||||
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )
|
||||
{
|
||||
commandObject = nvrhi::ObjectTypes::VK_CommandBuffer;
|
||||
}
|
||||
OPTICK_GPU_CONTEXT( ( void* ) commandList->getNativeObject( commandObject ) );
|
||||
OPTICK_GPU_EVENT( "Render_Interactions" );
|
||||
#endif
|
||||
|
||||
renderLog.OpenMainBlock( MRB_DRAW_INTERACTIONS );
|
||||
renderLog.OpenBlock( "Render_Interactions", colorYellow );
|
||||
|
@ -5389,13 +5398,13 @@ void idRenderBackend::DrawViewInternal( const viewDef_t* _viewDef, const int ste
|
|||
OPTICK_EVENT( "Backend_DrawViewInternal" );
|
||||
OPTICK_TAG( "stereoEye", stereoEye );
|
||||
|
||||
#if USE_OPTICK_GPU
|
||||
//uint32_t swapIndex = deviceManager->GetCurrentBackBufferIndex();
|
||||
//idStr eventLabel;
|
||||
//eventLabel.Format( "DrawView( frameIndex = %i, swapIndex = %i ) ", taaPass->GetFrameIndex(), swapIndex );
|
||||
OPTICK_GPU_CONTEXT( ( ID3D12GraphicsCommandList* ) commandList->getNativeObject( nvrhi::ObjectTypes::D3D12_GraphicsCommandList ) );
|
||||
nvrhi::ObjectType commandObject = nvrhi::ObjectTypes::D3D12_GraphicsCommandList;
|
||||
if( deviceManager->GetGraphicsAPI() == nvrhi::GraphicsAPI::VULKAN )
|
||||
{
|
||||
commandObject = nvrhi::ObjectTypes::VK_CommandBuffer;
|
||||
}
|
||||
OPTICK_GPU_CONTEXT( ( void* ) commandList->getNativeObject( commandObject ) );
|
||||
OPTICK_GPU_EVENT( "DrawView" );
|
||||
#endif
|
||||
|
||||
renderLog.OpenBlock( "Render_DrawViewInternal", colorRed );
|
||||
|
||||
|
|
|
@ -43,13 +43,6 @@ If you have questions concerning this license or the applicable additional terms
|
|||
|
||||
#include "PipelineCache.h"
|
||||
|
||||
|
||||
#if USE_OPTICK
|
||||
#define USE_OPTICK_GPU 0
|
||||
#else
|
||||
#define USE_OPTICK_GPU 0
|
||||
#endif
|
||||
|
||||
struct tmu_t
|
||||
{
|
||||
unsigned int current2DMap;
|
||||
|
|
|
@ -459,6 +459,8 @@ bool DeviceManager_DX12::CreateDeviceAndSwapChain()
|
|||
|
||||
void DeviceManager_DX12::DestroyDeviceAndSwapChain()
|
||||
{
|
||||
OPTICK_SHUTDOWN();
|
||||
|
||||
m_RhiSwapChainBuffers.clear();
|
||||
m_RendererString.clear();
|
||||
|
||||
|
@ -610,7 +612,7 @@ void DeviceManager_DX12::Present()
|
|||
presentFlags |= DXGI_PRESENT_ALLOW_TEARING;
|
||||
}
|
||||
|
||||
OPTICK_GPU_FLIP( m_SwapChain.Get() );
|
||||
OPTICK_GPU_FLIP( m_SwapChain.Get(), idLib::frameNumber - 1 );
|
||||
OPTICK_CATEGORY( "DX12_Present", Optick::Category::Wait );
|
||||
|
||||
// SRS - Don't change m_DeviceParams.vsyncEnabled here, simply test for vsync mode 2 to set DXGI SyncInterval
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
#include <sys/DeviceManager.h>
|
||||
|
||||
#include <nvrhi/vulkan.h>
|
||||
// SRS - optionally needed for VK_MVK_MOLTENVK_EXTENSION_NAME and MoltenVK runtime config visibility
|
||||
// SRS - optionally needed for MoltenVK runtime config visibility
|
||||
#if defined(__APPLE__) && defined( USE_MoltenVK )
|
||||
#include <MoltenVK/vk_mvk_moltenvk.h>
|
||||
|
||||
|
@ -181,10 +181,6 @@ private:
|
|||
{
|
||||
// instance
|
||||
{
|
||||
#if defined(__APPLE__) && defined( USE_MoltenVK )
|
||||
// SRS - needed for using MoltenVK configuration on macOS (if USE_MoltenVK defined)
|
||||
VK_MVK_MOLTENVK_EXTENSION_NAME,
|
||||
#endif
|
||||
VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME
|
||||
},
|
||||
// layers
|
||||
|
@ -193,12 +189,9 @@ private:
|
|||
{
|
||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_KHR_MAINTENANCE1_EXTENSION_NAME,
|
||||
#if defined(__APPLE__)
|
||||
#if defined( VK_KHR_portability_subset )
|
||||
VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME,
|
||||
#endif
|
||||
VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
|
||||
VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME,
|
||||
#if defined(__APPLE__) && defined( VK_KHR_portability_subset )
|
||||
// SRS - This is required for using the MoltenVK portability subset implementation on macOS
|
||||
VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME
|
||||
#endif
|
||||
},
|
||||
};
|
||||
|
@ -229,6 +222,9 @@ private:
|
|||
VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME,
|
||||
VK_NV_MESH_SHADER_EXTENSION_NAME,
|
||||
VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME,
|
||||
#if USE_OPTICK
|
||||
VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME,
|
||||
#endif
|
||||
VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME
|
||||
},
|
||||
};
|
||||
|
@ -287,6 +283,8 @@ private:
|
|||
bool enablePModeImmediate = false; // r_swapInterval = 0 (defaults to eFifo if not available)
|
||||
bool enablePModeFifoRelaxed = false; // r_swapInterval = 1 (defaults to eFifo if not available)
|
||||
|
||||
// SRS - flag indicating support for presentation timing via VK_GOOGLE_display_timing extension
|
||||
bool displayTimingEnabled = false;
|
||||
|
||||
private:
|
||||
static VKAPI_ATTR VkBool32 VKAPI_CALL vulkanDebugCallback(
|
||||
|
@ -778,6 +776,10 @@ bool DeviceManager_VK::createDevice()
|
|||
{
|
||||
sync2Supported = true;
|
||||
}
|
||||
else if( ext == VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME )
|
||||
{
|
||||
displayTimingEnabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<int> uniqueQueueFamilies =
|
||||
|
@ -828,6 +830,9 @@ bool DeviceManager_VK::createDevice()
|
|||
|
||||
#if defined(__APPLE__) && defined( VK_KHR_portability_subset )
|
||||
auto portabilityFeatures = vk::PhysicalDevicePortabilitySubsetFeaturesKHR()
|
||||
#if USE_OPTICK
|
||||
.setEvents( true )
|
||||
#endif
|
||||
.setImageViewFormatSwizzle( true );
|
||||
|
||||
void* pNext = &portabilityFeatures;
|
||||
|
@ -865,6 +870,9 @@ bool DeviceManager_VK::createDevice()
|
|||
.setTimelineSemaphore( true )
|
||||
.setShaderSampledImageArrayNonUniformIndexing( true )
|
||||
.setBufferDeviceAddress( bufferAddressSupported )
|
||||
#if USE_OPTICK
|
||||
.setHostQueryReset( true )
|
||||
#endif
|
||||
.setPNext( pNext );
|
||||
|
||||
auto layerVec = stringSetToVector( enabledExtensions.layers );
|
||||
|
@ -1226,11 +1234,17 @@ bool DeviceManager_VK::CreateDeviceAndSwapChain()
|
|||
|
||||
#undef CHECK
|
||||
|
||||
OPTICK_GPU_INIT_VULKAN( ( VkDevice* )&m_VulkanDevice, ( VkPhysicalDevice* )&m_VulkanPhysicalDevice, ( VkQueue* )&m_GraphicsQueue, ( uint32_t* )&m_GraphicsQueueFamily, 1, nullptr );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DeviceManager_VK::DestroyDeviceAndSwapChain()
|
||||
{
|
||||
OPTICK_SHUTDOWN();
|
||||
|
||||
m_VulkanDevice.waitIdle();
|
||||
|
||||
m_FrameWaitQuery = nullptr;
|
||||
|
||||
for( int i = 0; i < m_SwapChainImages.size(); i++ )
|
||||
|
@ -1310,12 +1324,30 @@ void DeviceManager_VK::EndFrame()
|
|||
|
||||
void DeviceManager_VK::Present()
|
||||
{
|
||||
OPTICK_GPU_FLIP( m_SwapChain );
|
||||
OPTICK_CATEGORY( "Vulkan_Present", Optick::Category::Wait );
|
||||
|
||||
void* pNext = nullptr;
|
||||
#if USE_OPTICK
|
||||
// SRS - if display timing enabled, define the presentID for labeling the Optick GPU VSync / Present queue
|
||||
vk::PresentTimeGOOGLE presentTime = vk::PresentTimeGOOGLE()
|
||||
.setPresentID( idLib::frameNumber - 1 );
|
||||
vk::PresentTimesInfoGOOGLE presentTimesInfo = vk::PresentTimesInfoGOOGLE()
|
||||
.setSwapchainCount( 1 )
|
||||
.setPTimes( &presentTime );
|
||||
if( displayTimingEnabled )
|
||||
{
|
||||
pNext = &presentTimesInfo;
|
||||
}
|
||||
#endif
|
||||
|
||||
vk::PresentInfoKHR info = vk::PresentInfoKHR()
|
||||
.setWaitSemaphoreCount( 1 )
|
||||
.setPWaitSemaphores( &m_PresentSemaphore )
|
||||
.setSwapchainCount( 1 )
|
||||
.setPSwapchains( &m_SwapChain )
|
||||
.setPImageIndices( &m_SwapChainIndex );
|
||||
.setPImageIndices( &m_SwapChainIndex )
|
||||
.setPNext( pNext );
|
||||
|
||||
const vk::Result res = m_PresentQueue.presentKHR( &info );
|
||||
assert( res == vk::Result::eSuccess || res == vk::Result::eErrorOutOfDateKHR || res == vk::Result::eSuboptimalKHR );
|
||||
|
@ -1338,6 +1370,8 @@ void DeviceManager_VK::Present()
|
|||
{
|
||||
if constexpr( NUM_FRAME_DATA > 2 )
|
||||
{
|
||||
OPTICK_CATEGORY( "Vulkan_Sync3", Optick::Category::Wait );
|
||||
|
||||
// SRS - For triple buffering, sync on previous frame's command queue completion
|
||||
m_NvrhiDevice->waitEventQuery( m_FrameWaitQuery );
|
||||
}
|
||||
|
@ -1347,6 +1381,8 @@ void DeviceManager_VK::Present()
|
|||
|
||||
if constexpr( NUM_FRAME_DATA < 3 )
|
||||
{
|
||||
OPTICK_CATEGORY( "Vulkan_Sync2", Optick::Category::Wait );
|
||||
|
||||
// SRS - For double buffering, sync on current frame's command queue completion
|
||||
m_NvrhiDevice->waitEventQuery( m_FrameWaitQuery );
|
||||
}
|
||||
|
|
|
@ -521,6 +521,19 @@ int main( int argc, const char** argv )
|
|||
Sys_Printf( "memory consistency checking enabled\n" );
|
||||
#endif
|
||||
|
||||
// Setting memory allocators
|
||||
OPTICK_SET_MEMORY_ALLOCATOR(
|
||||
[]( size_t size ) -> void* { return operator new( size ); },
|
||||
[]( void* p )
|
||||
{
|
||||
operator delete( p );
|
||||
},
|
||||
[]()
|
||||
{
|
||||
/* Do some TLS initialization here if needed */
|
||||
}
|
||||
);
|
||||
|
||||
Posix_EarlyInit();
|
||||
|
||||
if( argc > 1 )
|
||||
|
@ -537,6 +550,8 @@ int main( int argc, const char** argv )
|
|||
|
||||
while( 1 )
|
||||
{
|
||||
OPTICK_FRAME( "MainThread" );
|
||||
|
||||
common->Frame();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -457,6 +457,19 @@ int main( int argc, const char** argv )
|
|||
cmdargv = argv;
|
||||
// DG end
|
||||
|
||||
// Setting memory allocators
|
||||
OPTICK_SET_MEMORY_ALLOCATOR(
|
||||
[]( size_t size ) -> void* { return operator new( size ); },
|
||||
[]( void* p )
|
||||
{
|
||||
operator delete( p );
|
||||
},
|
||||
[]()
|
||||
{
|
||||
/* Do some TLS initialization here if needed */
|
||||
}
|
||||
);
|
||||
|
||||
Posix_EarlyInit();
|
||||
|
||||
if( argc > 1 )
|
||||
|
@ -472,6 +485,8 @@ int main( int argc, const char** argv )
|
|||
|
||||
while( 1 )
|
||||
{
|
||||
OPTICK_FRAME( "MainThread" );
|
||||
|
||||
common->Frame();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2051,8 +2051,6 @@ int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLin
|
|||
common->Frame();
|
||||
}
|
||||
|
||||
OPTICK_SHUTDOWN();
|
||||
|
||||
// never gets here
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue