added world shader tracing

This commit is contained in:
myT 2023-06-18 02:42:59 +02:00
parent 3b6a3a5019
commit f8567ecba0
8 changed files with 178 additions and 19 deletions

View File

@ -47,8 +47,20 @@ struct WorldVertexRC
struct WorldPixelRC struct WorldPixelRC
{ {
// general
uint32_t stageIndices[8]; uint32_t stageIndices[8];
float greyscale; float greyscale;
float pad0;
float pad1;
float pad2;
// r_shaderTrace - dynamically enabled
uint32_t shaderTrace;
uint32_t shaderIndex;
uint32_t frameIndex;
uint32_t centerPixel; // x | (y << 16)
// r_dither - statically enabled
float frameSeed; float frameSeed;
float noiseScale; float noiseScale;
float invGamma; float invGamma;
@ -291,6 +303,7 @@ struct World
{ {
void Init(); void Init();
void BeginFrame(); void BeginFrame();
void EndFrame();
void Begin(); void Begin();
void End(); void End();
void DrawPrePass(); void DrawPrePass();
@ -363,6 +376,10 @@ struct World
HPipeline fogInsidePipeline; HPipeline fogInsidePipeline;
HBuffer boxVertexBuffer; HBuffer boxVertexBuffer;
HBuffer boxIndexBuffer; HBuffer boxIndexBuffer;
// shader trace
HBuffer traceRenderBuffer;
HBuffer traceReadbackBuffer;
}; };
struct UI struct UI

View File

@ -206,6 +206,7 @@ void GRP::Init()
desc.samplerVisibility = ShaderStages::PixelBit; desc.samplerVisibility = ShaderStages::PixelBit;
desc.genericVisibility = ShaderStages::PixelBit; desc.genericVisibility = ShaderStages::PixelBit;
desc.AddRange(DescriptorType::Texture, 0, MAX_DRAWIMAGES * 2); desc.AddRange(DescriptorType::Texture, 0, MAX_DRAWIMAGES * 2);
desc.AddRange(DescriptorType::RWBuffer, MAX_DRAWIMAGES * 2, 1);
rootSignatureDesc = desc; rootSignatureDesc = desc;
rootSignature = CreateRootSignature(desc); rootSignature = CreateRootSignature(desc);
@ -309,6 +310,7 @@ void GRP::EndFrame()
R_DrawGUI(); R_DrawGUI();
imgui.Draw(); imgui.Draw();
post.Draw(); post.Draw();
world.EndFrame();
RHI::EndFrame(); RHI::EndFrame();
if(rhie.presentToPresentUS > 0) if(rhie.presentToPresentUS > 0)

View File

@ -262,6 +262,23 @@ void World::Init()
memcpy(mapped, vertices, sizeof(vertices)); memcpy(mapped, vertices, sizeof(vertices));
EndBufferUpload(boxVertexBuffer); EndBufferUpload(boxVertexBuffer);
} }
//
// shader trace
//
{
BufferDesc desc("shader trace opaque", 2 * sizeof(uint32_t), ResourceStates::UnorderedAccessBit);
traceRenderBuffer = CreateBuffer(desc);
DescriptorTableUpdate update;
update.SetRWBuffers(1, &traceRenderBuffer, MAX_DRAWIMAGES * 2);
UpdateDescriptorTable(grp.descriptorTable, update);
}
{
BufferDesc desc("shader trace opaque readback", 2 * sizeof(uint32_t), ResourceStates::Common);
desc.memoryUsage = MemoryUsage::Readback;
traceReadbackBuffer = CreateBuffer(desc);
}
} }
// //
@ -309,6 +326,33 @@ void World::BeginFrame()
psoChangeCount = 0; // read by the GUI code psoChangeCount = 0; // read by the GUI code
} }
void World::EndFrame()
{
tr.tracedWorldShaderIndex = -1;
if(tr.traceWorldShader && tr.world != NULL)
{
// schedule a GPU -> CPU transfer
{
BufferBarrier barrier(traceRenderBuffer, ResourceStates::CopySourceBit);
CmdBarrier(0, NULL, 1, &barrier);
}
CmdCopyBuffer(traceReadbackBuffer, traceRenderBuffer);
{
BufferBarrier barrier(traceRenderBuffer, ResourceStates::UnorderedAccessBit);
CmdBarrier(0, NULL, 1, &barrier);
}
// grab last frame's result
uint32_t* shaderIndices = (uint32_t*)MapBuffer(traceReadbackBuffer);
const uint32_t shaderIndex = shaderIndices[RHI::GetFrameIndex() ^ 1];
UnmapBuffer(traceReadbackBuffer);
if(shaderIndex < (uint32_t)tr.numShaders)
{
tr.tracedWorldShaderIndex = (int)shaderIndex;
}
}
}
void World::Begin() void World::Begin()
{ {
grp.renderMode = RenderMode::World; grp.renderMode = RenderMode::World;
@ -501,6 +545,10 @@ void World::EndBatch()
pixelRC.noiseScale = r_ditherStrength->value; pixelRC.noiseScale = r_ditherStrength->value;
pixelRC.invBrightness = 1.0f / r_brightness->value; pixelRC.invBrightness = 1.0f / r_brightness->value;
pixelRC.invGamma = 1.0f / r_gamma->value; pixelRC.invGamma = 1.0f / r_gamma->value;
pixelRC.shaderTrace = (uint32_t)!!tr.traceWorldShader;
pixelRC.shaderIndex = (uint32_t)shader->index;
pixelRC.frameIndex = RHI::GetFrameIndex();
pixelRC.centerPixel = (glConfig.vidWidth / 2) | ((glConfig.vidHeight / 2) << 16);
for(int s = 0; s < pipeline.numStages; ++s) for(int s = 0; s < pipeline.numStages; ++s)
{ {
const image_t* image = GetBundleImage(shader->stages[pipeline.firstStage + s]->bundle); const image_t* image = GetBundleImage(shader->stages[pipeline.firstStage + s]->bundle);

View File

@ -165,13 +165,22 @@ VOut vs(VIn input)
cbuffer RootConstants cbuffer RootConstants
{ {
// @TODO: 16 bits per stage: low 12 = texture, high 4 = sampler // general
//uint stageIndices[4]; uint4 stageIndices0; // low 16 = texture, high 16 = sampler
// low 16 = texture, high 16 = sampler
uint4 stageIndices0;
uint4 stageIndices1; uint4 stageIndices1;
float greyscale; float greyscale;
float pad0;
float pad1;
float pad2;
// shader trace
uint shaderTrace;
uint shaderIndex;
uint frameIndex;
uint centerPixel; // x | (y << 16)
#if DITHER #if DITHER
// dither
float frameSeed; float frameSeed;
float noiseScale; float noiseScale;
float invGamma; float invGamma;
@ -181,6 +190,7 @@ cbuffer RootConstants
Texture2D textures2D[4096] : register(t0); Texture2D textures2D[4096] : register(t0);
SamplerState samplers[96] : register(s0); SamplerState samplers[96] : register(s0);
RWByteAddressBuffer shaderIndexBuffer : register(u0);
#define GLS_SRCBLEND_ZERO 0x00000001 #define GLS_SRCBLEND_ZERO 0x00000001
#define GLS_SRCBLEND_ONE 0x00000002 #define GLS_SRCBLEND_ONE 0x00000002
@ -328,6 +338,17 @@ float4 ps(VOut input) : SV_Target
dst = Dither(dst, input.position.xyz, frameSeed, noiseScale, invBrightness, invGamma); dst = Dither(dst, input.position.xyz, frameSeed, noiseScale, invBrightness, invGamma);
#endif #endif
if(shaderTrace)
{
// we only store the shader index of 1 pixel
uint2 fragmentCoords = uint2(input.position.xy);
uint2 centerCoords = uint2(centerPixel & 0xFFFF, centerPixel >> 16);
if(all(fragmentCoords == centerCoords))
{
shaderIndexBuffer.Store(frameIndex * 4, shaderIndex);
}
}
return dst; return dst;
} }

View File

@ -2988,14 +2988,23 @@ namespace RHI
{ {
D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; D3D12_SHADER_RESOURCE_VIEW_DESC srv = {};
srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
//srv.Format = DXGI_FORMAT_UNKNOWN; // @TODO: structured buffer
srv.Format = DXGI_FORMAT_R32_TYPELESS;
srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv.Buffer.FirstElement = 0; srv.Buffer.FirstElement = 0;
srv.Buffer.NumElements = rhiDesc.byteCount / 4; if(rhiDesc.structureByteCount > 0)
srv.Buffer.StructureByteStride = 0; {
//srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; // @TODO: structured buffer srv.Format = DXGI_FORMAT_UNKNOWN;
srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount;
srv.Buffer.StructureByteStride = rhiDesc.structureByteCount;
srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
}
else
{
srv.Format = DXGI_FORMAT_R32_TYPELESS;
srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv.Buffer.NumElements = rhiDesc.byteCount / 4;
srv.Buffer.StructureByteStride = 0;
srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
}
srvIndex = rhi.descHeapGeneric.CreateSRV(resource, srv); srvIndex = rhi.descHeapGeneric.CreateSRV(resource, srv);
} }
@ -3009,19 +3018,28 @@ namespace RHI
} }
uint32_t uavIndex = InvalidDescriptorIndex; uint32_t uavIndex = InvalidDescriptorIndex;
// @TODO: if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit)
/*if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit)
{ {
D3D12_UNORDERED_ACCESS_VIEW_DESC uav = { 0 }; D3D12_UNORDERED_ACCESS_VIEW_DESC uav = { 0 };
uav.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; uav.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // @TODO: is this field needed? uav.Buffer.CounterOffsetInBytes = 0;
uav.Buffer.CounterOffsetInBytes = ;
uav.Buffer.FirstElement = 0; uav.Buffer.FirstElement = 0;
uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; // flag RAW? if(rhiDesc.structureByteCount > 0)
uav.Buffer.NumElements = ; {
uav.Buffer.StructureByteStride = ; uav.Format = DXGI_FORMAT_UNKNOWN;
uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
uav.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount;
uav.Buffer.StructureByteStride = rhiDesc.structureByteCount;
}
else
{
uav.Format = DXGI_FORMAT_R32_TYPELESS;
uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
uav.Buffer.NumElements = rhiDesc.byteCount / 4;
uav.Buffer.StructureByteStride = 0;
}
uavIndex = rhi.descHeapGeneric.CreateUAV(resource, uav); uavIndex = rhi.descHeapGeneric.CreateUAV(resource, uav);
}*/ }
Buffer buffer = {}; Buffer buffer = {};
buffer.desc = rhiDesc; buffer.desc = rhiDesc;
@ -3531,9 +3549,19 @@ namespace RHI
for(uint32_t i = 0; i < update.resourceCount; ++i) for(uint32_t i = 0; i < update.resourceCount; ++i)
{ {
const Texture& texture = rhi.textures.Get(update.textures[i]); const Texture& texture = rhi.textures.Get(update.textures[i]);
Q_assert(texture.srvIndex != InvalidDescriptorIndex);
CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, texture.srvIndex); CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, texture.srvIndex);
} }
} }
else if(update.type == DescriptorType::RWBuffer && table.genericHeap)
{
for(uint32_t i = 0; i < update.resourceCount; ++i)
{
const Buffer& buffer = rhi.buffers.Get(update.buffers[i]);
Q_assert(buffer.uavIndex != InvalidDescriptorIndex);
CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, buffer.uavIndex);
}
}
else if(update.type == DescriptorType::RWTexture && table.genericHeap) else if(update.type == DescriptorType::RWTexture && table.genericHeap)
{ {
uint32_t destIndex = update.firstIndex; uint32_t destIndex = update.firstIndex;
@ -3556,6 +3584,7 @@ namespace RHI
for(uint32_t m = start; m < end; ++m) for(uint32_t m = start; m < end; ++m)
{ {
Q_assert(texture.mips[m].uavIndex != InvalidDescriptorIndex);
CopyDescriptor(table.genericHeap, destIndex++, rhi.descHeapGeneric, texture.mips[m].uavIndex); CopyDescriptor(table.genericHeap, destIndex++, rhi.descHeapGeneric, texture.mips[m].uavIndex);
} }
} }
@ -3566,6 +3595,7 @@ namespace RHI
{ {
Handle htype, index, gen; Handle htype, index, gen;
DecomposeHandle(&htype, &index, &gen, update.samplers[i].v); DecomposeHandle(&htype, &index, &gen, update.samplers[i].v);
Q_assert(index != InvalidDescriptorIndex);
CopyDescriptor(table.samplerHeap, update.firstIndex + i, rhi.descHeapSamplers, index); CopyDescriptor(table.samplerHeap, update.firstIndex + i, rhi.descHeapSamplers, index);
} }
} }
@ -4223,6 +4253,16 @@ namespace RHI
rhi.commandList->OMSetStencilRef((UINT)stencilRef); rhi.commandList->OMSetStencilRef((UINT)stencilRef);
} }
void CmdCopyBuffer(HBuffer dest, HBuffer source)
{
Q_assert(CanWriteCommands());
const Buffer& dst = rhi.buffers.Get(dest);
const Buffer& src = rhi.buffers.Get(source);
const UINT64 byteCount = min(src.desc.byteCount, dst.desc.byteCount);
rhi.commandList->CopyBufferRegion(dst.buffer, 0, src.buffer, 0, byteCount);
}
uint32_t GetDurationCount() uint32_t GetDurationCount()
{ {
return rhi.resolvedQueries.durationQueryCount; return rhi.resolvedQueries.durationQueryCount;

View File

@ -439,6 +439,7 @@ namespace RHI
ResourceStates::Flags initialState = ResourceStates::Common; ResourceStates::Flags initialState = ResourceStates::Common;
MemoryUsage::Id memoryUsage = MemoryUsage::GPU; MemoryUsage::Id memoryUsage = MemoryUsage::GPU;
bool committedResource = false; bool committedResource = false;
uint32_t structureByteCount = 0; // > 0 means structured buffer, == 0 means byte address buffer
}; };
struct TextureDesc struct TextureDesc
@ -713,6 +714,7 @@ namespace RHI
void CmdBeginDebugLabel(const char* name, float r = 1.0f, float g = 1.0f, float b = 1.0f); void CmdBeginDebugLabel(const char* name, float r = 1.0f, float g = 1.0f, float b = 1.0f);
void CmdEndDebugLabel(); void CmdEndDebugLabel();
void CmdSetStencilReference(uint8_t stencilRef); void CmdSetStencilReference(uint8_t stencilRef);
void CmdCopyBuffer(HBuffer dest, HBuffer source);
#if 0 #if 0
void CmdClearUAV(HTexture htexture, uint32_t mip); void CmdClearUAV(HTexture htexture, uint32_t mip);

View File

@ -520,6 +520,32 @@ static void DrawShaderList()
ClearShaderReplacements(); ClearShaderReplacements();
} }
if(tr.world != NULL)
{
if(tr.traceWorldShader)
{
if(ImGui::Button("Disable world shader tracing"))
{
tr.traceWorldShader = qfalse;
}
if((uint32_t)tr.tracedWorldShaderIndex < (uint32_t)tr.numShaders)
{
shader_t* shader = tr.shaders[tr.tracedWorldShaderIndex];
if(ImGui::Selectable(va("%s##world_shader_trace", shader->name), false))
{
OpenShaderDetails(shader);
}
}
}
else
{
if(ImGui::Button("Enable world shader tracing"))
{
tr.traceWorldShader = qtrue;
}
}
}
static char filter[256]; static char filter[256];
DrawFilter(filter, sizeof(filter)); DrawFilter(filter, sizeof(filter));

View File

@ -951,6 +951,9 @@ typedef struct {
shader_t* shaders[MAX_SHADERS]; shader_t* shaders[MAX_SHADERS];
shader_t* sortedShaders[MAX_SHADERS]; shader_t* sortedShaders[MAX_SHADERS];
qbool traceWorldShader;
int tracedWorldShaderIndex;
int numSkins; int numSkins;
skin_t* skins[MAX_SKINS]; skin_t* skins[MAX_SKINS];