Redo Optick D3D12 VSync / Present queue labeling using measured vs. calculated frame latency

This commit is contained in:
Stephen Saunders 2023-05-23 08:19:53 -04:00
parent 6bb8c1feeb
commit 22dc020401
5 changed files with 59 additions and 61 deletions

View file

@ -779,7 +779,7 @@ struct OPTICK_API GPUContext
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OPTICK_API void InitGpuD3D12(ID3D12Device* device, ID3D12CommandQueue** cmdQueues, uint32_t numQueues);
OPTICK_API void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions);
OPTICK_API void GpuFlip(void* swapChain);
OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID = 0);
OPTICK_API GPUContext SetGpuContext(GPUContext context);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct OPTICK_API GPUContextScope
@ -1063,7 +1063,7 @@ struct OptickApp
if (OPTICK_CONCAT(gpu_autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \
::Optick::GPUEvent OPTICK_CONCAT(gpu_autogen_event_, __LINE__)( *(OPTICK_CONCAT(gpu_autogen_description_, __LINE__)) ); \
#define OPTICK_GPU_FLIP(SWAP_CHAIN) ::Optick::GpuFlip(SWAP_CHAIN);
#define OPTICK_GPU_FLIP(...) ::Optick::GpuFlip(__VA_ARGS__);
/////////////////////////////////////////////////////////////////////////////////
// [Automation][Startup]
@ -1124,7 +1124,7 @@ struct OptickApp
#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS)
#define OPTICK_GPU_CONTEXT(...)
#define OPTICK_GPU_EVENT(NAME)
#define OPTICK_GPU_FLIP(SWAP_CHAIN)
#define OPTICK_GPU_FLIP(...)
#define OPTICK_UPDATE()
#define OPTICK_FRAME_FLIP(...)
#define OPTICK_FRAME_EVENT(FRAME_TYPE, ...)

View file

@ -1801,10 +1801,10 @@ OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID, Th
return entry ? &entry->storage : nullptr;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OPTICK_API void GpuFlip(void* swapChain)
OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID)
{
if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler)
gpuProfiler->Flip(swapChain);
gpuProfiler->Flip(swapChain, frameID);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
OPTICK_API GPUContext SetGpuContext(GPUContext context)

View file

@ -73,9 +73,6 @@ namespace Optick
ID3D12Fence* syncFence;
array<Frame, NUM_FRAMES_DELAY> frames;
std::queue<uint32_t> presentIDs;
std::queue<uint32_t> frameIDs;
NodePayload() : commandQueue(nullptr), queryHeap(nullptr), syncFence(nullptr) {}
~NodePayload();
};
@ -84,9 +81,10 @@ namespace Optick
ID3D12Resource* queryBuffer;
ID3D12Device* device;
// VSync Stats
// VSync / Present Stats
DXGI_FRAME_STATISTICS prevFrameStatistics;
UINT swapChainLatency;
std::queue<UINT> presentIdQueue;
std::queue<uint32_t> frameIdQueue;
//void UpdateRange(uint32_t start, uint32_t finish)
void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue);
@ -99,7 +97,7 @@ namespace Optick
void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp);
void Flip(IDXGISwapChain* swapChain);
void Flip(IDXGISwapChain* swapChain, uint32_t frameID);
// Interface implementation
@ -114,9 +112,9 @@ namespace Optick
void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override;
void Flip(void* swapChain) override
void Flip(void* swapChain, uint32_t frameID) override
{
Flip(static_cast<IDXGISwapChain*>(swapChain));
Flip(static_cast<IDXGISwapChain*>(swapChain), frameID);
}
};
@ -275,7 +273,7 @@ namespace Optick
}
}
void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain)
void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain, uint32_t frameID)
{
OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug);
@ -286,9 +284,6 @@ namespace Optick
if (currentState == STATE_RUNNING)
{
UINT currentPresentID;
swapChain->GetLastPresentCount(&currentPresentID);
Node& node = *nodes[currentNode];
NodePayload& payload = *nodePayloads[currentNode];
@ -338,25 +333,15 @@ namespace Optick
}
else
{
DXGI_SWAP_CHAIN_DESC swapChainDesc;
swapChain->GetDesc(&swapChainDesc);
IDXGISwapChain2* swapChain2;
swapChain->QueryInterface(IID_PPV_ARGS(&swapChain2));
HRESULT result = swapChain2->GetMaximumFrameLatency(&swapChainLatency);
if (result == S_OK)
swapChainLatency = std::min(swapChainLatency, swapChainDesc.BufferCount + 1);
else
swapChainLatency = swapChainDesc.BufferCount + 1;
while (!payload.presentIDs.empty())
{
payload.presentIDs.pop();
payload.frameIDs.pop();
}
// Initialize present / frame statistics
prevFrameStatistics = { 0 };
swapChain->GetFrameStatistics(&prevFrameStatistics);
while (!presentIdQueue.empty())
{
presentIdQueue.pop();
frameIdQueue.pop();
}
}
commandList->Close();
@ -364,32 +349,38 @@ namespace Optick
payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList);
payload.commandQueue->Signal(payload.syncFence, frameNumber);
payload.presentIDs.push(currentPresentID + 1 + swapChainLatency);
payload.frameIDs.push(Core::Get().GetCurrentFrame(FrameType::CPU));
// Save presentID to frameID correlation for the next present's vsync tag
if (frameID > 0)
{
UINT prevPresentID = 0;
HRESULT result = swapChain->GetLastPresentCount(&prevPresentID);
if (result == S_OK)
{
presentIdQueue.push(prevPresentID + 1);
frameIdQueue.push(frameID);
}
}
// Process VSync
// Process VSync / Presentation timing
DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 };
HRESULT result = swapChain->GetFrameStatistics(&currentFrameStatistics);
if ((result == S_OK) && (prevFrameStatistics.PresentCount < currentFrameStatistics.PresentCount))
if ((result == S_OK) && (currentFrameStatistics.SyncQPCTime.QuadPart > prevFrameStatistics.SyncQPCTime.QuadPart))
{
EventData& data = AddVSyncEvent("Present");
data.start = prevFrameStatistics.SyncQPCTime.QuadPart;
data.finish = currentFrameStatistics.SyncQPCTime.QuadPart;
uint32_t droppedFrames = 0;
while (!payload.presentIDs.empty() && payload.presentIDs.front() <= prevFrameStatistics.PresentCount)
while (!presentIdQueue.empty() && presentIdQueue.front() <= prevFrameStatistics.PresentCount)
{
if (payload.presentIDs.front() == prevFrameStatistics.PresentCount)
if (presentIdQueue.front() == prevFrameStatistics.PresentCount)
{
TagData<uint32>& tag = AddVSyncTag();
tag.timestamp = prevFrameStatistics.SyncQPCTime.QuadPart;
tag.data = payload.frameIDs.front() + currentPresentID - currentFrameStatistics.PresentCount + droppedFrames;
tag.data = frameIdQueue.front();
}
else
droppedFrames++;
payload.presentIDs.pop();
payload.frameIDs.pop();
presentIdQueue.pop();
frameIdQueue.pop();
}
prevFrameStatistics = currentFrameStatistics;

View file

@ -145,7 +145,7 @@ namespace Optick
virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0;
virtual void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) = 0;
virtual void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) = 0;
virtual void Flip(void* swapChain) = 0;
virtual void Flip(void* swapChain, uint32_t frameID) = 0;
virtual ~GPUProfiler();
};

View file

@ -58,20 +58,22 @@ namespace Optick
array<Frame, NUM_FRAMES_DELAY> frames;
uint64_t presentTime;
uint32_t presentID;
NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE), event(VK_NULL_HANDLE) {}
~NodePayload();
};
vector<NodePayload*> nodePayloads;
// VSync / Present Stats
uint64_t prevPresentTime;
uint32_t prevPresentID;
public:
GPUProfilerVulkan();
~GPUProfilerVulkan();
void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions);
void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp);
void Flip(VkSwapchainKHR swapChain);
// Interface implementation
@ -86,7 +88,10 @@ namespace Optick
void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override;
void Flip(void* swapChain) override;
void Flip(void* swapChain, uint32_t frameID) override
{
Flip(static_cast<VkSwapchainKHR>(swapChain));
}
};
void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions)
@ -98,6 +103,8 @@ namespace Optick
GPUProfilerVulkan::GPUProfilerVulkan()
{
prevPresentTime = 0;
prevPresentID = 0;
}
void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions)
@ -251,7 +258,7 @@ namespace Optick
NodePayload* payload = nodePayloads[nodeIndex];
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[nodeIndex]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * (size_t)count, &nodes[nodeIndex]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
(*vulkanFunctions.vkResetQueryPool)(payload->device, payload->queryPool, startIndex, count);
// Convert GPU timestamps => CPU Timestamps
@ -272,7 +279,7 @@ namespace Optick
} while (r != VK_SUCCESS);
}
void GPUProfilerVulkan::Flip(void* swapChain)
void GPUProfilerVulkan::Flip(VkSwapchainKHR swapChain)
{
OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug);
@ -340,8 +347,8 @@ namespace Optick
{
currentFrame.queryIndexStart = 0;
currentFrame.queryIndexCount = queryEnd;
payload.presentTime = 0;
payload.presentID = 0;
prevPresentTime = 0;
prevPresentID = 0;
}
// Preparing Next Frame
@ -376,18 +383,18 @@ namespace Optick
{
// Process Presentation Timing / VSync if swap image was actually presented (i.e. not dropped)
VkPastPresentationTimingGOOGLE presentTiming = queryPresentTimings[presentIndex];
if (presentTiming.actualPresentTime > payload.presentTime)
if (presentTiming.actualPresentTime > prevPresentTime)
{
EventData& data = AddVSyncEvent("Present");
data.start = payload.presentTime;
data.start = prevPresentTime;
data.finish = presentTiming.actualPresentTime;
TagData<uint32>& tag = AddVSyncTag();
tag.timestamp = payload.presentTime;
tag.data = payload.presentID;
tag.timestamp = prevPresentTime;
tag.data = prevPresentID;
payload.presentTime = presentTiming.actualPresentTime;
payload.presentID = presentTiming.presentID;
prevPresentTime = presentTiming.actualPresentTime;
prevPresentID = presentTiming.presentID;
}
}
}