added RHI support for ranged buffer uploads

renamed StaticUnorderedArray to StaticArray
This commit is contained in:
myT 2024-07-02 01:55:35 +02:00
parent aa88622348
commit 385a75c9cd
6 changed files with 100 additions and 27 deletions

View file

@ -407,7 +407,7 @@ private:
FrameData frameData[RTFrameCount]; FrameData frameData[RTFrameCount];
BLASBuffers staticBLASBuffers[BLASBucket::Count] = {}; BLASBuffers staticBLASBuffers[BLASBucket::Count] = {};
StaticUnorderedArray<TLASInstanceDesc, 2 * BLASBucket::Count> tlasInstanceDescs; StaticArray<TLASInstanceDesc, 2 * BLASBucket::Count> tlasInstanceDescs;
uint32_t staticTLASInstanceCount = 0; uint32_t staticTLASInstanceCount = 0;
}; };

View file

@ -130,7 +130,7 @@ struct EntityTracker
} }
private: private:
typedef RHI::StaticUnorderedArray<EntityData, 16384> EntityArray; typedef RHI::StaticArray<EntityData, 16384> EntityArray;
EntityArray entities[2]; EntityArray entities[2];
EntityArray* currEnts = &entities[0]; EntityArray* currEnts = &entities[0];
EntityArray* prevEnts = &entities[1]; EntityArray* prevEnts = &entities[1];

View file

@ -276,7 +276,9 @@ namespace RHI
uint32_t uavIndex; uint32_t uavIndex;
bool mapped; bool mapped;
bool uploading; bool uploading;
UINT64 uploadByteOffset; UINT64 uploadSrcByteOffset;
UINT64 uploadDestByteOffset;
UINT64 uploadByteCount;
bool shortLifeTime = false; bool shortLifeTime = false;
}; };
@ -372,7 +374,7 @@ namespace RHI
{ {
void Create(); void Create();
void Release(); void Release();
uint8_t* BeginBufferUpload(HBuffer buffer); uint8_t* BeginBufferUpload(HBuffer buffer, uint32_t destByteOffset, uint32_t byteCount);
void EndBufferUpload(HBuffer buffer); void EndBufferUpload(HBuffer buffer);
void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture); void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture);
void EndTextureUpload(); void EndTextureUpload();
@ -635,9 +637,9 @@ namespace RHI
LinearAllocator tempStringAllocator; LinearAllocator tempStringAllocator;
UploadManager upload; UploadManager upload;
ReadbackManager readback; ReadbackManager readback;
StaticUnorderedArray<HTexture, MAX_DRAWIMAGES> texturesToTransition; StaticArray<HTexture, MAX_DRAWIMAGES> texturesToTransition;
StaticUnorderedArray<HBuffer, 64> buffersToTransition; StaticArray<HBuffer, 64> buffersToTransition;
StaticUnorderedArray<BufferToDelete, 64> buffersToDelete; StaticArray<BufferToDelete, 64> buffersToDelete;
FrameQueries frameQueries[FrameCount]; FrameQueries frameQueries[FrameCount];
ResolvedQueries resolvedQueries; ResolvedQueries resolvedQueries;
PIX pix; PIX pix;
@ -743,7 +745,12 @@ namespace RHI
} }
// fatal error mode always on for now // fatal error mode always on for now
ri.Error(ERR_FATAL, "'%s' failed with code 0x%08X (%s)\n", function, (unsigned int)hr, GetSystemErrorString(hr)); const char* const errorMessage = GetSystemErrorString(hr);
if(IsDebuggerPresent())
{
__debugbreak();
}
ri.Error(ERR_FATAL, "'%s' failed with code 0x%08X (%s)\n", function, (unsigned int)hr, errorMessage);
return false; return false;
} }
@ -958,7 +965,7 @@ namespace RHI
COM_RELEASE(commandAllocator); COM_RELEASE(commandAllocator);
} }
uint8_t* UploadManager::BeginBufferUpload(HBuffer userHBuffer) uint8_t* UploadManager::BeginBufferUpload(HBuffer userHBuffer, uint32_t destByteOffset, uint32_t byteCount)
{ {
Q_assert(bufferUploadCounter >= 0); Q_assert(bufferUploadCounter >= 0);
bufferUploadCounter++; bufferUploadCounter++;
@ -968,20 +975,29 @@ namespace RHI
} }
Buffer& userBuffer = rhi.buffers.Get(userHBuffer); Buffer& userBuffer = rhi.buffers.Get(userHBuffer);
Buffer& uploadBuffer = rhi.buffers.Get(uploadHBuffer);
Q_assert(!userBuffer.uploading); Q_assert(!userBuffer.uploading);
if(byteCount == 0)
{
Q_assert(destByteOffset == 0);
destByteOffset = 0;
byteCount = min(userBuffer.desc.byteCount, uploadBuffer.desc.byteCount);
}
Q_assert(destByteOffset + byteCount <= userBuffer.desc.byteCount);
uint8_t* mapped = NULL; uint8_t* mapped = NULL;
Q_assert(userBuffer.desc.memoryUsage != MemoryUsage::Readback); Q_assert(userBuffer.desc.memoryUsage != MemoryUsage::Readback);
if(userBuffer.desc.memoryUsage == MemoryUsage::GPU && if(userBuffer.desc.memoryUsage == MemoryUsage::GPU &&
rhi.umaPool == NULL) rhi.umaPool == NULL)
{ {
const uint32_t uploadByteCount = userBuffer.desc.byteCount; WaitToStartUploading(byteCount);
WaitToStartUploading(uploadByteCount);
mapped = mappedBuffer + bufferByteOffset; mapped = mappedBuffer + bufferByteOffset;
userBuffer.uploadByteOffset = bufferByteOffset; userBuffer.uploadSrcByteOffset = bufferByteOffset;
userBuffer.uploadDestByteOffset = destByteOffset;
userBuffer.uploadByteCount = byteCount;
bufferByteOffset = AlignUp<uint32_t>(bufferByteOffset + uploadByteCount, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); bufferByteOffset = AlignUp<uint32_t>(bufferByteOffset + byteCount, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
if(multiBufferUpload) if(multiBufferUpload)
{ {
@ -1008,19 +1024,21 @@ namespace RHI
Buffer& userBuffer = rhi.buffers.Get(userHBuffer); Buffer& userBuffer = rhi.buffers.Get(userHBuffer);
Q_assert(userBuffer.uploading); Q_assert(userBuffer.uploading);
Buffer& uploadBuffer = rhi.buffers.Get(uploadHBuffer);
if(!userBuffer.mapped) if(!userBuffer.mapped)
{ {
D3D(commandList->Reset(commandAllocator, NULL)); D3D(commandList->Reset(commandAllocator, NULL));
const UINT64 byteCount = min(userBuffer.desc.byteCount, uploadBuffer.desc.byteCount);
#if defined(RHI_ENABLE_AFTERMATH) #if defined(RHI_ENABLE_AFTERMATH)
const uint64_t byteCount = (uint64_t)userBuffer.uploadByteCount;
const char* const marker = va("Upload: CopyBufferRegion -> %s (%s)", userBuffer.desc.name, Com_FormatBytes(byteCount)); const char* const marker = va("Upload: CopyBufferRegion -> %s (%s)", userBuffer.desc.name, Com_FormatBytes(byteCount));
const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathUploadCommandList, marker, strlen(marker) + 1); const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathUploadCommandList, marker, strlen(marker) + 1);
Q_assert(result == GFSDK_Aftermath_Result_Success); Q_assert(result == GFSDK_Aftermath_Result_Success);
#endif #endif
commandList->CopyBufferRegion(userBuffer.buffer, 0, uploadBuffer.buffer, userBuffer.uploadByteOffset, byteCount); const Buffer& uploadBuffer = rhi.buffers.Get(uploadHBuffer);
commandList->CopyBufferRegion(
userBuffer.buffer, userBuffer.uploadDestByteOffset,
uploadBuffer.buffer, userBuffer.uploadSrcByteOffset,
userBuffer.uploadByteCount);
ID3D12CommandList* commandLists[] = { commandList }; ID3D12CommandList* commandLists[] = { commandList };
D3D(commandList->Close()); D3D(commandList->Close());
@ -3003,6 +3021,10 @@ namespace RHI
TableRow(2, "Shader model", model); TableRow(2, "Shader model", model);
} }
TableRow(2, "UMA", rhiInfo.isUMA ? "YES" : "NO");
TableRow(2, "Cache-coherent UMA", rhiInfo.isCacheCoherentUMA ? "YES" : "NO");
TableRow(2, "Barycentrics", rhiInfo.hasBarycentrics ? "YES" : "NO");
// the validation layer reports live objects at shutdown when NvAPI_D3D12_QueryCpuVisibleVidmem is called // the validation layer reports live objects at shutdown when NvAPI_D3D12_QueryCpuVisibleVidmem is called
#if defined(RHI_ENABLE_NVAPI) #if defined(RHI_ENABLE_NVAPI)
if(rhi.nvapiActive) if(rhi.nvapiActive)
@ -3710,6 +3732,7 @@ namespace RHI
rhiInfo.isCacheCoherentUMA = rhi.allocator->IsCacheCoherentUMA(); rhiInfo.isCacheCoherentUMA = rhi.allocator->IsCacheCoherentUMA();
rhiInfo.hasInlineRaytracing = hasInlineRaytracing; rhiInfo.hasInlineRaytracing = hasInlineRaytracing;
rhiInfo.hasBarycentrics = hasBarycentrics; rhiInfo.hasBarycentrics = hasBarycentrics;
rhiInfo.allocatedByteCount = 0;
rhi.initialized = true; rhi.initialized = true;
@ -3880,6 +3903,9 @@ namespace RHI
} }
#endif #endif
D3D12MA::Budget budget;
rhi.allocator->GetBudget(&budget, NULL);
rhiInfo.allocatedByteCount = (uint64_t)budget.UsageBytes;
DrawGUI(); DrawGUI();
Q_assert(rhi.commandList == rhi.mainCommandList); Q_assert(rhi.commandList == rhi.mainCommandList);
@ -3926,7 +3952,7 @@ namespace RHI
if(rhi.beginFrameCounter >= rhi.buffersToDelete[b].beginFrameCounter) if(rhi.beginFrameCounter >= rhi.buffersToDelete[b].beginFrameCounter)
{ {
DestroyBuffer(rhi.buffersToDelete[b].buffer); DestroyBuffer(rhi.buffersToDelete[b].buffer);
rhi.buffersToDelete.Remove(b); rhi.buffersToDelete.RemoveUnordered(b);
} }
else else
{ {
@ -5497,9 +5523,10 @@ namespace RHI
memcpy(gpuMicroSeconds, rhi.resolvedQueries.gpuMicroSeconds, rhi.resolvedQueries.durationQueryCount * sizeof(uint32_t)); memcpy(gpuMicroSeconds, rhi.resolvedQueries.gpuMicroSeconds, rhi.resolvedQueries.durationQueryCount * sizeof(uint32_t));
} }
uint8_t* BeginBufferUpload(HBuffer buffer) uint8_t* BeginBufferUpload(HBuffer buffer, uint32_t destByteOffset, uint32_t byteCount)
{ {
return rhi.upload.BeginBufferUpload(buffer); Q_assert(!IsNullHandle(buffer));
return rhi.upload.BeginBufferUpload(buffer, destByteOffset, byteCount);
} }
void EndBufferUpload(HBuffer buffer) void EndBufferUpload(HBuffer buffer)
@ -5509,6 +5536,7 @@ namespace RHI
void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture) void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture)
{ {
Q_assert(!IsNullHandle(texture));
rhi.upload.BeginTextureUpload(mappedTexture, texture); rhi.upload.BeginTextureUpload(mappedTexture, texture);
} }

View file

@ -818,7 +818,7 @@ namespace RHI
uint32_t GetDurationCount(); uint32_t GetDurationCount();
void GetDurations(uint32_t* gpuMicroSeconds); void GetDurations(uint32_t* gpuMicroSeconds);
uint8_t* BeginBufferUpload(HBuffer buffer); uint8_t* BeginBufferUpload(HBuffer buffer, uint32_t destByteOffset = 0, uint32_t byteCount = 0);
void EndBufferUpload(HBuffer buffer); void EndBufferUpload(HBuffer buffer);
void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture); void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture);
@ -1012,9 +1012,9 @@ namespace RHI
}; };
template<typename T, uint32_t N> template<typename T, uint32_t N>
struct StaticUnorderedArray struct StaticArray
{ {
StaticUnorderedArray() StaticArray()
{ {
Clear(); Clear();
} }
@ -1033,7 +1033,23 @@ namespace RHI
void Remove(uint32_t index) void Remove(uint32_t index)
{ {
Q_assert(index < N); Q_assert(index < N);
if(count >= N) if(index >= N)
{
return;
}
if(index < count - 1)
{
const uint32_t moveCount = count - (index + 1);
memmove(&items[index], &items[index + 1], sizeof(T) * (size_t)moveCount);
}
count--;
}
void RemoveUnordered(uint32_t index)
{
Q_assert(index < N);
if(index >= N)
{ {
return; return;
} }
@ -1045,6 +1061,23 @@ namespace RHI
count--; count--;
} }
void RemoveRange(uint32_t index, uint32_t removalCount)
{
Q_assert(index < count);
Q_assert(index + removalCount <= count);
if(removalCount == 0 || index + removalCount > count)
{
return;
}
if(index + removalCount < count)
{
const uint32_t moveCount = count - (index + removalCount);
memmove(&items[index], &items[index + removalCount], sizeof(T) * (size_t)moveCount);
}
count -= removalCount;
}
void Clear() void Clear()
{ {
count = 0; count = 0;
@ -1064,13 +1097,24 @@ namespace RHI
return items[index]; return items[index];
} }
bool IsFull() const
{
return count == N;
}
bool IsEmpty() const
{
return count == 0;
}
private: private:
StaticUnorderedArray(const StaticUnorderedArray<T, N>&); StaticArray(const StaticArray<T, N>&);
void operator=(const StaticUnorderedArray<T, N>&); void operator=(const StaticArray<T, N>&);
public: public:
T items[N]; T items[N];
uint32_t count; uint32_t count;
const uint32_t capacity = N;
}; };
template<typename T, uint32_t Invalid> template<typename T, uint32_t Invalid>

View file

@ -1924,7 +1924,7 @@ void RE_DrawMainMenuBarInfo()
frameCount = 0; frameCount = 0;
} }
const char* const info = va("%s | %3d FPS", rhiInfo.adapter, displayedFPS); const char* const info = va("%s | %s | %3d FPS", rhiInfo.adapter, Com_FormatBytes(rhiInfo.allocatedByteCount), displayedFPS);
const float offset = ImGui::GetWindowWidth() - ImGui::CalcTextSize("___").x - ImGui::CalcTextSize(info).x; const float offset = ImGui::GetWindowWidth() - ImGui::CalcTextSize("___").x - ImGui::CalcTextSize(info).x;
ImGui::SameLine(offset); ImGui::SameLine(offset);
ImGui::Text(info); ImGui::Text(info);

View file

@ -1766,6 +1766,7 @@ struct RHIInfo
{ {
char name[MAX_QPATH]; char name[MAX_QPATH];
char adapter[MAX_QPATH]; char adapter[MAX_QPATH];
uint64_t allocatedByteCount;
qbool hasTearing; qbool hasTearing;
qbool hasBaseVRS; qbool hasBaseVRS;
qbool hasExtendedVRS; qbool hasExtendedVRS;