added world shader tracing

This commit is contained in:
myT 2023-06-18 02:42:59 +02:00
parent 3b6a3a5019
commit f8567ecba0
8 changed files with 178 additions and 19 deletions

View file

@ -47,8 +47,20 @@ struct WorldVertexRC
struct WorldPixelRC
{
// general
uint32_t stageIndices[8];
float greyscale;
float pad0;
float pad1;
float pad2;
// r_shaderTrace - dynamically enabled
uint32_t shaderTrace;
uint32_t shaderIndex;
uint32_t frameIndex;
uint32_t centerPixel; // x | (y << 16)
// r_dither - statically enabled
float frameSeed;
float noiseScale;
float invGamma;
@ -291,6 +303,7 @@ struct World
{
void Init();
void BeginFrame();
void EndFrame();
void Begin();
void End();
void DrawPrePass();
@ -363,6 +376,10 @@ struct World
HPipeline fogInsidePipeline;
HBuffer boxVertexBuffer;
HBuffer boxIndexBuffer;
// shader trace
HBuffer traceRenderBuffer;
HBuffer traceReadbackBuffer;
};
struct UI

View file

@ -206,6 +206,7 @@ void GRP::Init()
desc.samplerVisibility = ShaderStages::PixelBit;
desc.genericVisibility = ShaderStages::PixelBit;
desc.AddRange(DescriptorType::Texture, 0, MAX_DRAWIMAGES * 2);
desc.AddRange(DescriptorType::RWBuffer, MAX_DRAWIMAGES * 2, 1);
rootSignatureDesc = desc;
rootSignature = CreateRootSignature(desc);
@ -309,6 +310,7 @@ void GRP::EndFrame()
R_DrawGUI();
imgui.Draw();
post.Draw();
world.EndFrame();
RHI::EndFrame();
if(rhie.presentToPresentUS > 0)

View file

@ -262,6 +262,23 @@ void World::Init()
memcpy(mapped, vertices, sizeof(vertices));
EndBufferUpload(boxVertexBuffer);
}
//
// shader trace
//
{
BufferDesc desc("shader trace opaque", 2 * sizeof(uint32_t), ResourceStates::UnorderedAccessBit);
traceRenderBuffer = CreateBuffer(desc);
DescriptorTableUpdate update;
update.SetRWBuffers(1, &traceRenderBuffer, MAX_DRAWIMAGES * 2);
UpdateDescriptorTable(grp.descriptorTable, update);
}
{
BufferDesc desc("shader trace opaque readback", 2 * sizeof(uint32_t), ResourceStates::Common);
desc.memoryUsage = MemoryUsage::Readback;
traceReadbackBuffer = CreateBuffer(desc);
}
}
//
@ -309,6 +326,33 @@ void World::BeginFrame()
psoChangeCount = 0; // read by the GUI code
}
void World::EndFrame()
{
tr.tracedWorldShaderIndex = -1;
if(tr.traceWorldShader && tr.world != NULL)
{
// schedule a GPU -> CPU transfer
{
BufferBarrier barrier(traceRenderBuffer, ResourceStates::CopySourceBit);
CmdBarrier(0, NULL, 1, &barrier);
}
CmdCopyBuffer(traceReadbackBuffer, traceRenderBuffer);
{
BufferBarrier barrier(traceRenderBuffer, ResourceStates::UnorderedAccessBit);
CmdBarrier(0, NULL, 1, &barrier);
}
// grab last frame's result
uint32_t* shaderIndices = (uint32_t*)MapBuffer(traceReadbackBuffer);
const uint32_t shaderIndex = shaderIndices[RHI::GetFrameIndex() ^ 1];
UnmapBuffer(traceReadbackBuffer);
if(shaderIndex < (uint32_t)tr.numShaders)
{
tr.tracedWorldShaderIndex = (int)shaderIndex;
}
}
}
void World::Begin()
{
grp.renderMode = RenderMode::World;
@ -501,6 +545,10 @@ void World::EndBatch()
pixelRC.noiseScale = r_ditherStrength->value;
pixelRC.invBrightness = 1.0f / r_brightness->value;
pixelRC.invGamma = 1.0f / r_gamma->value;
pixelRC.shaderTrace = (uint32_t)!!tr.traceWorldShader;
pixelRC.shaderIndex = (uint32_t)shader->index;
pixelRC.frameIndex = RHI::GetFrameIndex();
pixelRC.centerPixel = (glConfig.vidWidth / 2) | ((glConfig.vidHeight / 2) << 16);
for(int s = 0; s < pipeline.numStages; ++s)
{
const image_t* image = GetBundleImage(shader->stages[pipeline.firstStage + s]->bundle);

View file

@ -165,13 +165,22 @@ VOut vs(VIn input)
cbuffer RootConstants
{
// @TODO: 16 bits per stage: low 12 = texture, high 4 = sampler
//uint stageIndices[4];
// low 16 = texture, high 16 = sampler
uint4 stageIndices0;
// general
uint4 stageIndices0; // low 16 = texture, high 16 = sampler
uint4 stageIndices1;
float greyscale;
float pad0;
float pad1;
float pad2;
// shader trace
uint shaderTrace;
uint shaderIndex;
uint frameIndex;
uint centerPixel; // x | (y << 16)
#if DITHER
// dither
float frameSeed;
float noiseScale;
float invGamma;
@ -181,6 +190,7 @@ cbuffer RootConstants
Texture2D textures2D[4096] : register(t0);
SamplerState samplers[96] : register(s0);
RWByteAddressBuffer shaderIndexBuffer : register(u0);
#define GLS_SRCBLEND_ZERO 0x00000001
#define GLS_SRCBLEND_ONE 0x00000002
@ -328,6 +338,17 @@ float4 ps(VOut input) : SV_Target
dst = Dither(dst, input.position.xyz, frameSeed, noiseScale, invBrightness, invGamma);
#endif
if(shaderTrace)
{
// we only store the shader index of 1 pixel
uint2 fragmentCoords = uint2(input.position.xy);
uint2 centerCoords = uint2(centerPixel & 0xFFFF, centerPixel >> 16);
if(all(fragmentCoords == centerCoords))
{
shaderIndexBuffer.Store(frameIndex * 4, shaderIndex);
}
}
return dst;
}

View file

@ -2988,14 +2988,23 @@ namespace RHI
{
D3D12_SHADER_RESOURCE_VIEW_DESC srv = {};
srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
//srv.Format = DXGI_FORMAT_UNKNOWN; // @TODO: structured buffer
srv.Format = DXGI_FORMAT_R32_TYPELESS;
srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv.Buffer.FirstElement = 0;
srv.Buffer.NumElements = rhiDesc.byteCount / 4;
srv.Buffer.StructureByteStride = 0;
//srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; // @TODO: structured buffer
srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
if(rhiDesc.structureByteCount > 0)
{
srv.Format = DXGI_FORMAT_UNKNOWN;
srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount;
srv.Buffer.StructureByteStride = rhiDesc.structureByteCount;
srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
}
else
{
srv.Format = DXGI_FORMAT_R32_TYPELESS;
srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv.Buffer.NumElements = rhiDesc.byteCount / 4;
srv.Buffer.StructureByteStride = 0;
srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
}
srvIndex = rhi.descHeapGeneric.CreateSRV(resource, srv);
}
@ -3009,19 +3018,28 @@ namespace RHI
}
uint32_t uavIndex = InvalidDescriptorIndex;
// @TODO:
/*if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit)
if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit)
{
D3D12_UNORDERED_ACCESS_VIEW_DESC uav = { 0 };
uav.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // @TODO: is this field needed?
uav.Buffer.CounterOffsetInBytes = ;
uav.Buffer.CounterOffsetInBytes = 0;
uav.Buffer.FirstElement = 0;
uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; // flag RAW?
uav.Buffer.NumElements = ;
uav.Buffer.StructureByteStride = ;
if(rhiDesc.structureByteCount > 0)
{
uav.Format = DXGI_FORMAT_UNKNOWN;
uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
uav.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount;
uav.Buffer.StructureByteStride = rhiDesc.structureByteCount;
}
else
{
uav.Format = DXGI_FORMAT_R32_TYPELESS;
uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
uav.Buffer.NumElements = rhiDesc.byteCount / 4;
uav.Buffer.StructureByteStride = 0;
}
uavIndex = rhi.descHeapGeneric.CreateUAV(resource, uav);
}*/
}
Buffer buffer = {};
buffer.desc = rhiDesc;
@ -3531,9 +3549,19 @@ namespace RHI
for(uint32_t i = 0; i < update.resourceCount; ++i)
{
const Texture& texture = rhi.textures.Get(update.textures[i]);
Q_assert(texture.srvIndex != InvalidDescriptorIndex);
CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, texture.srvIndex);
}
}
else if(update.type == DescriptorType::RWBuffer && table.genericHeap)
{
for(uint32_t i = 0; i < update.resourceCount; ++i)
{
const Buffer& buffer = rhi.buffers.Get(update.buffers[i]);
Q_assert(buffer.uavIndex != InvalidDescriptorIndex);
CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, buffer.uavIndex);
}
}
else if(update.type == DescriptorType::RWTexture && table.genericHeap)
{
uint32_t destIndex = update.firstIndex;
@ -3556,6 +3584,7 @@ namespace RHI
for(uint32_t m = start; m < end; ++m)
{
Q_assert(texture.mips[m].uavIndex != InvalidDescriptorIndex);
CopyDescriptor(table.genericHeap, destIndex++, rhi.descHeapGeneric, texture.mips[m].uavIndex);
}
}
@ -3566,6 +3595,7 @@ namespace RHI
{
Handle htype, index, gen;
DecomposeHandle(&htype, &index, &gen, update.samplers[i].v);
Q_assert(index != InvalidDescriptorIndex);
CopyDescriptor(table.samplerHeap, update.firstIndex + i, rhi.descHeapSamplers, index);
}
}
@ -4223,6 +4253,16 @@ namespace RHI
rhi.commandList->OMSetStencilRef((UINT)stencilRef);
}
void CmdCopyBuffer(HBuffer dest, HBuffer source)
{
Q_assert(CanWriteCommands());
const Buffer& dst = rhi.buffers.Get(dest);
const Buffer& src = rhi.buffers.Get(source);
const UINT64 byteCount = min(src.desc.byteCount, dst.desc.byteCount);
rhi.commandList->CopyBufferRegion(dst.buffer, 0, src.buffer, 0, byteCount);
}
uint32_t GetDurationCount()
{
return rhi.resolvedQueries.durationQueryCount;

View file

@ -439,6 +439,7 @@ namespace RHI
ResourceStates::Flags initialState = ResourceStates::Common;
MemoryUsage::Id memoryUsage = MemoryUsage::GPU;
bool committedResource = false;
uint32_t structureByteCount = 0; // > 0 means structured buffer, == 0 means byte address buffer
};
struct TextureDesc
@ -713,6 +714,7 @@ namespace RHI
void CmdBeginDebugLabel(const char* name, float r = 1.0f, float g = 1.0f, float b = 1.0f);
void CmdEndDebugLabel();
void CmdSetStencilReference(uint8_t stencilRef);
void CmdCopyBuffer(HBuffer dest, HBuffer source);
#if 0
void CmdClearUAV(HTexture htexture, uint32_t mip);

View file

@ -520,6 +520,32 @@ static void DrawShaderList()
ClearShaderReplacements();
}
if(tr.world != NULL)
{
if(tr.traceWorldShader)
{
if(ImGui::Button("Disable world shader tracing"))
{
tr.traceWorldShader = qfalse;
}
if((uint32_t)tr.tracedWorldShaderIndex < (uint32_t)tr.numShaders)
{
shader_t* shader = tr.shaders[tr.tracedWorldShaderIndex];
if(ImGui::Selectable(va("%s##world_shader_trace", shader->name), false))
{
OpenShaderDetails(shader);
}
}
}
else
{
if(ImGui::Button("Enable world shader tracing"))
{
tr.traceWorldShader = qtrue;
}
}
}
static char filter[256];
DrawFilter(filter, sizeof(filter));

View file

@ -951,6 +951,9 @@ typedef struct {
shader_t* shaders[MAX_SHADERS];
shader_t* sortedShaders[MAX_SHADERS];
qbool traceWorldShader;
int tracedWorldShaderIndex;
int numSkins;
skin_t* skins[MAX_SKINS];