mirror of
https://github.com/id-Software/DOOM-3-BFG.git
synced 2025-03-14 22:50:45 +00:00
Redo Optick D3D12 VSync / Present queue labeling using measured vs. calculated frame latency
This commit is contained in:
parent
6bb8c1feeb
commit
22dc020401
5 changed files with 59 additions and 61 deletions
|
@ -779,7 +779,7 @@ struct OPTICK_API GPUContext
|
|||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
OPTICK_API void InitGpuD3D12(ID3D12Device* device, ID3D12CommandQueue** cmdQueues, uint32_t numQueues);
|
||||
OPTICK_API void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions);
|
||||
OPTICK_API void GpuFlip(void* swapChain);
|
||||
OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID = 0);
|
||||
OPTICK_API GPUContext SetGpuContext(GPUContext context);
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct OPTICK_API GPUContextScope
|
||||
|
@ -1063,7 +1063,7 @@ struct OptickApp
|
|||
if (OPTICK_CONCAT(gpu_autogen_description_, __LINE__) == nullptr) OPTICK_CONCAT(gpu_autogen_description_, __LINE__) = ::Optick::EventDescription::Create( NAME, __FILE__, __LINE__ ); \
|
||||
::Optick::GPUEvent OPTICK_CONCAT(gpu_autogen_event_, __LINE__)( *(OPTICK_CONCAT(gpu_autogen_description_, __LINE__)) ); \
|
||||
|
||||
#define OPTICK_GPU_FLIP(SWAP_CHAIN) ::Optick::GpuFlip(SWAP_CHAIN);
|
||||
#define OPTICK_GPU_FLIP(...) ::Optick::GpuFlip(__VA_ARGS__);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// [Automation][Startup]
|
||||
|
@ -1124,7 +1124,7 @@ struct OptickApp
|
|||
#define OPTICK_GPU_INIT_VULKAN(DEVICES, PHYSICAL_DEVICES, CMD_QUEUES, CMD_QUEUES_FAMILY, NUM_CMD_QUEUS, FUNCTIONS)
|
||||
#define OPTICK_GPU_CONTEXT(...)
|
||||
#define OPTICK_GPU_EVENT(NAME)
|
||||
#define OPTICK_GPU_FLIP(SWAP_CHAIN)
|
||||
#define OPTICK_GPU_FLIP(...)
|
||||
#define OPTICK_UPDATE()
|
||||
#define OPTICK_FRAME_FLIP(...)
|
||||
#define OPTICK_FRAME_EVENT(FRAME_TYPE, ...)
|
||||
|
|
|
@ -1801,10 +1801,10 @@ OPTICK_API EventStorage* RegisterStorage(const char* name, uint64_t threadID, Th
|
|||
return entry ? &entry->storage : nullptr;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
OPTICK_API void GpuFlip(void* swapChain)
|
||||
OPTICK_API void GpuFlip(void* swapChain, uint32_t frameID)
|
||||
{
|
||||
if (GPUProfiler* gpuProfiler = Core::Get().gpuProfiler)
|
||||
gpuProfiler->Flip(swapChain);
|
||||
gpuProfiler->Flip(swapChain, frameID);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
OPTICK_API GPUContext SetGpuContext(GPUContext context)
|
||||
|
|
|
@ -73,9 +73,6 @@ namespace Optick
|
|||
ID3D12Fence* syncFence;
|
||||
array<Frame, NUM_FRAMES_DELAY> frames;
|
||||
|
||||
std::queue<uint32_t> presentIDs;
|
||||
std::queue<uint32_t> frameIDs;
|
||||
|
||||
NodePayload() : commandQueue(nullptr), queryHeap(nullptr), syncFence(nullptr) {}
|
||||
~NodePayload();
|
||||
};
|
||||
|
@ -84,9 +81,10 @@ namespace Optick
|
|||
ID3D12Resource* queryBuffer;
|
||||
ID3D12Device* device;
|
||||
|
||||
// VSync Stats
|
||||
// VSync / Present Stats
|
||||
DXGI_FRAME_STATISTICS prevFrameStatistics;
|
||||
UINT swapChainLatency;
|
||||
std::queue<UINT> presentIdQueue;
|
||||
std::queue<uint32_t> frameIdQueue;
|
||||
|
||||
//void UpdateRange(uint32_t start, uint32_t finish)
|
||||
void InitNodeInternal(const char* nodeName, uint32_t nodeIndex, ID3D12CommandQueue* pCmdQueue);
|
||||
|
@ -99,7 +97,7 @@ namespace Optick
|
|||
|
||||
void QueryTimestamp(ID3D12GraphicsCommandList* context, int64_t* outCpuTimestamp);
|
||||
|
||||
void Flip(IDXGISwapChain* swapChain);
|
||||
void Flip(IDXGISwapChain* swapChain, uint32_t frameID);
|
||||
|
||||
|
||||
// Interface implementation
|
||||
|
@ -114,9 +112,9 @@ namespace Optick
|
|||
|
||||
void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override;
|
||||
|
||||
void Flip(void* swapChain) override
|
||||
void Flip(void* swapChain, uint32_t frameID) override
|
||||
{
|
||||
Flip(static_cast<IDXGISwapChain*>(swapChain));
|
||||
Flip(static_cast<IDXGISwapChain*>(swapChain), frameID);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -275,7 +273,7 @@ namespace Optick
|
|||
}
|
||||
}
|
||||
|
||||
void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain)
|
||||
void GPUProfilerD3D12::Flip(IDXGISwapChain* swapChain, uint32_t frameID)
|
||||
{
|
||||
OPTICK_CATEGORY("GPUProfilerD3D12::Flip", Category::Debug);
|
||||
|
||||
|
@ -286,9 +284,6 @@ namespace Optick
|
|||
|
||||
if (currentState == STATE_RUNNING)
|
||||
{
|
||||
UINT currentPresentID;
|
||||
swapChain->GetLastPresentCount(¤tPresentID);
|
||||
|
||||
Node& node = *nodes[currentNode];
|
||||
NodePayload& payload = *nodePayloads[currentNode];
|
||||
|
||||
|
@ -338,25 +333,15 @@ namespace Optick
|
|||
}
|
||||
else
|
||||
{
|
||||
DXGI_SWAP_CHAIN_DESC swapChainDesc;
|
||||
swapChain->GetDesc(&swapChainDesc);
|
||||
|
||||
IDXGISwapChain2* swapChain2;
|
||||
swapChain->QueryInterface(IID_PPV_ARGS(&swapChain2));
|
||||
HRESULT result = swapChain2->GetMaximumFrameLatency(&swapChainLatency);
|
||||
if (result == S_OK)
|
||||
swapChainLatency = std::min(swapChainLatency, swapChainDesc.BufferCount + 1);
|
||||
else
|
||||
swapChainLatency = swapChainDesc.BufferCount + 1;
|
||||
|
||||
while (!payload.presentIDs.empty())
|
||||
{
|
||||
payload.presentIDs.pop();
|
||||
payload.frameIDs.pop();
|
||||
}
|
||||
|
||||
// Initialize present / frame statistics
|
||||
prevFrameStatistics = { 0 };
|
||||
swapChain->GetFrameStatistics(&prevFrameStatistics);
|
||||
|
||||
while (!presentIdQueue.empty())
|
||||
{
|
||||
presentIdQueue.pop();
|
||||
frameIdQueue.pop();
|
||||
}
|
||||
}
|
||||
|
||||
commandList->Close();
|
||||
|
@ -364,32 +349,38 @@ namespace Optick
|
|||
payload.commandQueue->ExecuteCommandLists(1, (ID3D12CommandList*const*)&commandList);
|
||||
payload.commandQueue->Signal(payload.syncFence, frameNumber);
|
||||
|
||||
payload.presentIDs.push(currentPresentID + 1 + swapChainLatency);
|
||||
payload.frameIDs.push(Core::Get().GetCurrentFrame(FrameType::CPU));
|
||||
// Save presentID to frameID correlation for the next present's vsync tag
|
||||
if (frameID > 0)
|
||||
{
|
||||
UINT prevPresentID = 0;
|
||||
HRESULT result = swapChain->GetLastPresentCount(&prevPresentID);
|
||||
if (result == S_OK)
|
||||
{
|
||||
presentIdQueue.push(prevPresentID + 1);
|
||||
frameIdQueue.push(frameID);
|
||||
}
|
||||
}
|
||||
|
||||
// Process VSync
|
||||
// Process VSync / Presentation timing
|
||||
DXGI_FRAME_STATISTICS currentFrameStatistics = { 0 };
|
||||
HRESULT result = swapChain->GetFrameStatistics(¤tFrameStatistics);
|
||||
if ((result == S_OK) && (prevFrameStatistics.PresentCount < currentFrameStatistics.PresentCount))
|
||||
if ((result == S_OK) && (currentFrameStatistics.SyncQPCTime.QuadPart > prevFrameStatistics.SyncQPCTime.QuadPart))
|
||||
{
|
||||
EventData& data = AddVSyncEvent("Present");
|
||||
data.start = prevFrameStatistics.SyncQPCTime.QuadPart;
|
||||
data.finish = currentFrameStatistics.SyncQPCTime.QuadPart;
|
||||
|
||||
uint32_t droppedFrames = 0;
|
||||
while (!payload.presentIDs.empty() && payload.presentIDs.front() <= prevFrameStatistics.PresentCount)
|
||||
while (!presentIdQueue.empty() && presentIdQueue.front() <= prevFrameStatistics.PresentCount)
|
||||
{
|
||||
if (payload.presentIDs.front() == prevFrameStatistics.PresentCount)
|
||||
if (presentIdQueue.front() == prevFrameStatistics.PresentCount)
|
||||
{
|
||||
TagData<uint32>& tag = AddVSyncTag();
|
||||
tag.timestamp = prevFrameStatistics.SyncQPCTime.QuadPart;
|
||||
tag.data = payload.frameIDs.front() + currentPresentID - currentFrameStatistics.PresentCount + droppedFrames;
|
||||
tag.data = frameIdQueue.front();
|
||||
}
|
||||
else
|
||||
droppedFrames++;
|
||||
|
||||
payload.presentIDs.pop();
|
||||
payload.frameIDs.pop();
|
||||
presentIdQueue.pop();
|
||||
frameIdQueue.pop();
|
||||
}
|
||||
|
||||
prevFrameStatistics = currentFrameStatistics;
|
||||
|
|
|
@ -145,7 +145,7 @@ namespace Optick
|
|||
virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0;
|
||||
virtual void ResolveTimestamps(uint32_t nodeIndex, uint32_t startIndex, uint32_t count) = 0;
|
||||
virtual void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) = 0;
|
||||
virtual void Flip(void* swapChain) = 0;
|
||||
virtual void Flip(void* swapChain, uint32_t frameID) = 0;
|
||||
|
||||
virtual ~GPUProfiler();
|
||||
};
|
||||
|
|
|
@ -58,20 +58,22 @@ namespace Optick
|
|||
|
||||
array<Frame, NUM_FRAMES_DELAY> frames;
|
||||
|
||||
uint64_t presentTime;
|
||||
uint32_t presentID;
|
||||
|
||||
NodePayload() : vulkanFunctions(), device(VK_NULL_HANDLE), physicalDevice(VK_NULL_HANDLE), queue(VK_NULL_HANDLE), queryPool(VK_NULL_HANDLE), commandPool(VK_NULL_HANDLE), event(VK_NULL_HANDLE) {}
|
||||
~NodePayload();
|
||||
};
|
||||
vector<NodePayload*> nodePayloads;
|
||||
|
||||
// VSync / Present Stats
|
||||
uint64_t prevPresentTime;
|
||||
uint32_t prevPresentID;
|
||||
|
||||
public:
|
||||
GPUProfilerVulkan();
|
||||
~GPUProfilerVulkan();
|
||||
|
||||
void InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions);
|
||||
void QueryTimestamp(VkCommandBuffer commandBuffer, int64_t* outCpuTimestamp);
|
||||
void Flip(VkSwapchainKHR swapChain);
|
||||
|
||||
|
||||
// Interface implementation
|
||||
|
@ -86,7 +88,10 @@ namespace Optick
|
|||
|
||||
void WaitForFrame(uint32_t nodeIndex, uint64_t frameNumber) override;
|
||||
|
||||
void Flip(void* swapChain) override;
|
||||
void Flip(void* swapChain, uint32_t frameID) override
|
||||
{
|
||||
Flip(static_cast<VkSwapchainKHR>(swapChain));
|
||||
}
|
||||
};
|
||||
|
||||
void InitGpuVulkan(VkDevice* vkDevices, VkPhysicalDevice* vkPhysicalDevices, VkQueue* vkQueues, uint32_t* cmdQueuesFamily, uint32_t numQueues, const VulkanFunctions* functions)
|
||||
|
@ -98,6 +103,8 @@ namespace Optick
|
|||
|
||||
GPUProfilerVulkan::GPUProfilerVulkan()
|
||||
{
|
||||
prevPresentTime = 0;
|
||||
prevPresentID = 0;
|
||||
}
|
||||
|
||||
void GPUProfilerVulkan::InitDevice(VkDevice* devices, VkPhysicalDevice* physicalDevices, VkQueue* cmdQueues, uint32_t* cmdQueuesFamily, uint32_t nodeCount, const VulkanFunctions* functions)
|
||||
|
@ -251,7 +258,7 @@ namespace Optick
|
|||
|
||||
NodePayload* payload = nodePayloads[nodeIndex];
|
||||
|
||||
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * count, &nodes[nodeIndex]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
|
||||
OPTICK_VK_CHECK((VkResult)(*vulkanFunctions.vkGetQueryPoolResults)(payload->device, payload->queryPool, startIndex, count, 8 * (size_t)count, &nodes[nodeIndex]->queryGpuTimestamps[startIndex], 8, VK_QUERY_RESULT_64_BIT));
|
||||
(*vulkanFunctions.vkResetQueryPool)(payload->device, payload->queryPool, startIndex, count);
|
||||
|
||||
// Convert GPU timestamps => CPU Timestamps
|
||||
|
@ -272,7 +279,7 @@ namespace Optick
|
|||
} while (r != VK_SUCCESS);
|
||||
}
|
||||
|
||||
void GPUProfilerVulkan::Flip(void* swapChain)
|
||||
void GPUProfilerVulkan::Flip(VkSwapchainKHR swapChain)
|
||||
{
|
||||
OPTICK_CATEGORY("GPUProfilerVulkan::Flip", Category::Debug);
|
||||
|
||||
|
@ -340,8 +347,8 @@ namespace Optick
|
|||
{
|
||||
currentFrame.queryIndexStart = 0;
|
||||
currentFrame.queryIndexCount = queryEnd;
|
||||
payload.presentTime = 0;
|
||||
payload.presentID = 0;
|
||||
prevPresentTime = 0;
|
||||
prevPresentID = 0;
|
||||
}
|
||||
|
||||
// Preparing Next Frame
|
||||
|
@ -376,18 +383,18 @@ namespace Optick
|
|||
{
|
||||
// Process Presentation Timing / VSync if swap image was actually presented (i.e. not dropped)
|
||||
VkPastPresentationTimingGOOGLE presentTiming = queryPresentTimings[presentIndex];
|
||||
if (presentTiming.actualPresentTime > payload.presentTime)
|
||||
if (presentTiming.actualPresentTime > prevPresentTime)
|
||||
{
|
||||
EventData& data = AddVSyncEvent("Present");
|
||||
data.start = payload.presentTime;
|
||||
data.start = prevPresentTime;
|
||||
data.finish = presentTiming.actualPresentTime;
|
||||
|
||||
TagData<uint32>& tag = AddVSyncTag();
|
||||
tag.timestamp = payload.presentTime;
|
||||
tag.data = payload.presentID;
|
||||
tag.timestamp = prevPresentTime;
|
||||
tag.data = prevPresentID;
|
||||
|
||||
payload.presentTime = presentTiming.actualPresentTime;
|
||||
payload.presentID = presentTiming.presentID;
|
||||
prevPresentTime = presentTiming.actualPresentTime;
|
||||
prevPresentID = presentTiming.presentID;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue