added RHI support for ranged buffer uploads

renamed StaticUnorderedArray to StaticArray
This commit is contained in:
myT 2024-07-02 01:55:35 +02:00
parent aa88622348
commit 385a75c9cd
6 changed files with 100 additions and 27 deletions

View file

@ -407,7 +407,7 @@ private:
FrameData frameData[RTFrameCount];
BLASBuffers staticBLASBuffers[BLASBucket::Count] = {};
StaticUnorderedArray<TLASInstanceDesc, 2 * BLASBucket::Count> tlasInstanceDescs;
StaticArray<TLASInstanceDesc, 2 * BLASBucket::Count> tlasInstanceDescs;
uint32_t staticTLASInstanceCount = 0;
};

View file

@ -130,7 +130,7 @@ struct EntityTracker
}
private:
typedef RHI::StaticUnorderedArray<EntityData, 16384> EntityArray;
typedef RHI::StaticArray<EntityData, 16384> EntityArray;
EntityArray entities[2];
EntityArray* currEnts = &entities[0];
EntityArray* prevEnts = &entities[1];

View file

@ -276,7 +276,9 @@ namespace RHI
uint32_t uavIndex;
bool mapped;
bool uploading;
UINT64 uploadByteOffset;
UINT64 uploadSrcByteOffset;
UINT64 uploadDestByteOffset;
UINT64 uploadByteCount;
bool shortLifeTime = false;
};
@ -372,7 +374,7 @@ namespace RHI
{
void Create();
void Release();
uint8_t* BeginBufferUpload(HBuffer buffer);
uint8_t* BeginBufferUpload(HBuffer buffer, uint32_t destByteOffset, uint32_t byteCount);
void EndBufferUpload(HBuffer buffer);
void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture);
void EndTextureUpload();
@ -635,9 +637,9 @@ namespace RHI
LinearAllocator tempStringAllocator;
UploadManager upload;
ReadbackManager readback;
StaticUnorderedArray<HTexture, MAX_DRAWIMAGES> texturesToTransition;
StaticUnorderedArray<HBuffer, 64> buffersToTransition;
StaticUnorderedArray<BufferToDelete, 64> buffersToDelete;
StaticArray<HTexture, MAX_DRAWIMAGES> texturesToTransition;
StaticArray<HBuffer, 64> buffersToTransition;
StaticArray<BufferToDelete, 64> buffersToDelete;
FrameQueries frameQueries[FrameCount];
ResolvedQueries resolvedQueries;
PIX pix;
@ -743,7 +745,12 @@ namespace RHI
}
// fatal error mode always on for now
ri.Error(ERR_FATAL, "'%s' failed with code 0x%08X (%s)\n", function, (unsigned int)hr, GetSystemErrorString(hr));
const char* const errorMessage = GetSystemErrorString(hr);
if(IsDebuggerPresent())
{
__debugbreak();
}
ri.Error(ERR_FATAL, "'%s' failed with code 0x%08X (%s)\n", function, (unsigned int)hr, errorMessage);
return false;
}
@ -958,7 +965,7 @@ namespace RHI
COM_RELEASE(commandAllocator);
}
uint8_t* UploadManager::BeginBufferUpload(HBuffer userHBuffer)
uint8_t* UploadManager::BeginBufferUpload(HBuffer userHBuffer, uint32_t destByteOffset, uint32_t byteCount)
{
Q_assert(bufferUploadCounter >= 0);
bufferUploadCounter++;
@ -968,20 +975,29 @@ namespace RHI
}
Buffer& userBuffer = rhi.buffers.Get(userHBuffer);
Buffer& uploadBuffer = rhi.buffers.Get(uploadHBuffer);
Q_assert(!userBuffer.uploading);
if(byteCount == 0)
{
Q_assert(destByteOffset == 0);
destByteOffset = 0;
byteCount = min(userBuffer.desc.byteCount, uploadBuffer.desc.byteCount);
}
Q_assert(destByteOffset + byteCount <= userBuffer.desc.byteCount);
uint8_t* mapped = NULL;
Q_assert(userBuffer.desc.memoryUsage != MemoryUsage::Readback);
if(userBuffer.desc.memoryUsage == MemoryUsage::GPU &&
rhi.umaPool == NULL)
{
const uint32_t uploadByteCount = userBuffer.desc.byteCount;
WaitToStartUploading(uploadByteCount);
WaitToStartUploading(byteCount);
mapped = mappedBuffer + bufferByteOffset;
userBuffer.uploadByteOffset = bufferByteOffset;
userBuffer.uploadSrcByteOffset = bufferByteOffset;
userBuffer.uploadDestByteOffset = destByteOffset;
userBuffer.uploadByteCount = byteCount;
bufferByteOffset = AlignUp<uint32_t>(bufferByteOffset + uploadByteCount, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
bufferByteOffset = AlignUp<uint32_t>(bufferByteOffset + byteCount, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
if(multiBufferUpload)
{
@ -1008,19 +1024,21 @@ namespace RHI
Buffer& userBuffer = rhi.buffers.Get(userHBuffer);
Q_assert(userBuffer.uploading);
Buffer& uploadBuffer = rhi.buffers.Get(uploadHBuffer);
if(!userBuffer.mapped)
{
D3D(commandList->Reset(commandAllocator, NULL));
const UINT64 byteCount = min(userBuffer.desc.byteCount, uploadBuffer.desc.byteCount);
#if defined(RHI_ENABLE_AFTERMATH)
const uint64_t byteCount = (uint64_t)userBuffer.uploadByteCount;
const char* const marker = va("Upload: CopyBufferRegion -> %s (%s)", userBuffer.desc.name, Com_FormatBytes(byteCount));
const GFSDK_Aftermath_Result result = GFSDK_Aftermath_SetEventMarker(rhi.aftermathUploadCommandList, marker, strlen(marker) + 1);
Q_assert(result == GFSDK_Aftermath_Result_Success);
#endif
commandList->CopyBufferRegion(userBuffer.buffer, 0, uploadBuffer.buffer, userBuffer.uploadByteOffset, byteCount);
const Buffer& uploadBuffer = rhi.buffers.Get(uploadHBuffer);
commandList->CopyBufferRegion(
userBuffer.buffer, userBuffer.uploadDestByteOffset,
uploadBuffer.buffer, userBuffer.uploadSrcByteOffset,
userBuffer.uploadByteCount);
ID3D12CommandList* commandLists[] = { commandList };
D3D(commandList->Close());
@ -3003,6 +3021,10 @@ namespace RHI
TableRow(2, "Shader model", model);
}
TableRow(2, "UMA", rhiInfo.isUMA ? "YES" : "NO");
TableRow(2, "Cache-coherent UMA", rhiInfo.isCacheCoherentUMA ? "YES" : "NO");
TableRow(2, "Barycentrics", rhiInfo.hasBarycentrics ? "YES" : "NO");
// the validation layer reports live objects at shutdown when NvAPI_D3D12_QueryCpuVisibleVidmem is called
#if defined(RHI_ENABLE_NVAPI)
if(rhi.nvapiActive)
@ -3710,6 +3732,7 @@ namespace RHI
rhiInfo.isCacheCoherentUMA = rhi.allocator->IsCacheCoherentUMA();
rhiInfo.hasInlineRaytracing = hasInlineRaytracing;
rhiInfo.hasBarycentrics = hasBarycentrics;
rhiInfo.allocatedByteCount = 0;
rhi.initialized = true;
@ -3880,6 +3903,9 @@ namespace RHI
}
#endif
D3D12MA::Budget budget;
rhi.allocator->GetBudget(&budget, NULL);
rhiInfo.allocatedByteCount = (uint64_t)budget.UsageBytes;
DrawGUI();
Q_assert(rhi.commandList == rhi.mainCommandList);
@ -3926,7 +3952,7 @@ namespace RHI
if(rhi.beginFrameCounter >= rhi.buffersToDelete[b].beginFrameCounter)
{
DestroyBuffer(rhi.buffersToDelete[b].buffer);
rhi.buffersToDelete.Remove(b);
rhi.buffersToDelete.RemoveUnordered(b);
}
else
{
@ -5497,9 +5523,10 @@ namespace RHI
memcpy(gpuMicroSeconds, rhi.resolvedQueries.gpuMicroSeconds, rhi.resolvedQueries.durationQueryCount * sizeof(uint32_t));
}
uint8_t* BeginBufferUpload(HBuffer buffer)
uint8_t* BeginBufferUpload(HBuffer buffer, uint32_t destByteOffset, uint32_t byteCount)
{
return rhi.upload.BeginBufferUpload(buffer);
Q_assert(!IsNullHandle(buffer));
return rhi.upload.BeginBufferUpload(buffer, destByteOffset, byteCount);
}
void EndBufferUpload(HBuffer buffer)
@ -5509,6 +5536,7 @@ namespace RHI
void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture)
{
Q_assert(!IsNullHandle(texture));
rhi.upload.BeginTextureUpload(mappedTexture, texture);
}

View file

@ -818,7 +818,7 @@ namespace RHI
uint32_t GetDurationCount();
void GetDurations(uint32_t* gpuMicroSeconds);
uint8_t* BeginBufferUpload(HBuffer buffer);
uint8_t* BeginBufferUpload(HBuffer buffer, uint32_t destByteOffset = 0, uint32_t byteCount = 0);
void EndBufferUpload(HBuffer buffer);
void BeginTextureUpload(MappedTexture& mappedTexture, HTexture texture);
@ -1012,9 +1012,9 @@ namespace RHI
};
template<typename T, uint32_t N>
struct StaticUnorderedArray
struct StaticArray
{
StaticUnorderedArray()
StaticArray()
{
Clear();
}
@ -1033,7 +1033,23 @@ namespace RHI
void Remove(uint32_t index)
{
Q_assert(index < N);
if(count >= N)
if(index >= N)
{
return;
}
if(index < count - 1)
{
const uint32_t moveCount = count - (index + 1);
memmove(&items[index], &items[index + 1], sizeof(T) * (size_t)moveCount);
}
count--;
}
void RemoveUnordered(uint32_t index)
{
Q_assert(index < N);
if(index >= N)
{
return;
}
@ -1045,6 +1061,23 @@ namespace RHI
count--;
}
void RemoveRange(uint32_t index, uint32_t removalCount)
{
Q_assert(index < count);
Q_assert(index + removalCount <= count);
if(removalCount == 0 || index + removalCount > count)
{
return;
}
if(index + removalCount < count)
{
const uint32_t moveCount = count - (index + removalCount);
memmove(&items[index], &items[index + removalCount], sizeof(T) * (size_t)moveCount);
}
count -= removalCount;
}
void Clear()
{
count = 0;
@ -1064,13 +1097,24 @@ namespace RHI
return items[index];
}
bool IsFull() const
{
return count == N;
}
bool IsEmpty() const
{
return count == 0;
}
private:
StaticUnorderedArray(const StaticUnorderedArray<T, N>&);
void operator=(const StaticUnorderedArray<T, N>&);
StaticArray(const StaticArray<T, N>&);
void operator=(const StaticArray<T, N>&);
public:
T items[N];
uint32_t count;
const uint32_t capacity = N;
};
template<typename T, uint32_t Invalid>

View file

@ -1924,7 +1924,7 @@ void RE_DrawMainMenuBarInfo()
frameCount = 0;
}
const char* const info = va("%s | %3d FPS", rhiInfo.adapter, displayedFPS);
const char* const info = va("%s | %s | %3d FPS", rhiInfo.adapter, Com_FormatBytes(rhiInfo.allocatedByteCount), displayedFPS);
const float offset = ImGui::GetWindowWidth() - ImGui::CalcTextSize("___").x - ImGui::CalcTextSize(info).x;
ImGui::SameLine(offset);
ImGui::Text(info);

View file

@ -1766,6 +1766,7 @@ struct RHIInfo
{
char name[MAX_QPATH];
char adapter[MAX_QPATH];
uint64_t allocatedByteCount;
qbool hasTearing;
qbool hasBaseVRS;
qbool hasExtendedVRS;