diff --git a/code/renderer/grp_local.h b/code/renderer/grp_local.h index d325e57..22f34bb 100644 --- a/code/renderer/grp_local.h +++ b/code/renderer/grp_local.h @@ -47,8 +47,20 @@ struct WorldVertexRC struct WorldPixelRC { + // general uint32_t stageIndices[8]; float greyscale; + float pad0; + float pad1; + float pad2; + + // r_shaderTrace - dynamically enabled + uint32_t shaderTrace; + uint32_t shaderIndex; + uint32_t frameIndex; + uint32_t centerPixel; // x | (y << 16) + + // r_dither - statically enabled float frameSeed; float noiseScale; float invGamma; @@ -291,6 +303,7 @@ struct World { void Init(); void BeginFrame(); + void EndFrame(); void Begin(); void End(); void DrawPrePass(); @@ -363,6 +376,10 @@ struct World HPipeline fogInsidePipeline; HBuffer boxVertexBuffer; HBuffer boxIndexBuffer; + + // shader trace + HBuffer traceRenderBuffer; + HBuffer traceReadbackBuffer; }; struct UI diff --git a/code/renderer/grp_main.cpp b/code/renderer/grp_main.cpp index 7e33dde..7e618b4 100644 --- a/code/renderer/grp_main.cpp +++ b/code/renderer/grp_main.cpp @@ -206,6 +206,7 @@ void GRP::Init() desc.samplerVisibility = ShaderStages::PixelBit; desc.genericVisibility = ShaderStages::PixelBit; desc.AddRange(DescriptorType::Texture, 0, MAX_DRAWIMAGES * 2); + desc.AddRange(DescriptorType::RWBuffer, MAX_DRAWIMAGES * 2, 1); rootSignatureDesc = desc; rootSignature = CreateRootSignature(desc); @@ -309,6 +310,7 @@ void GRP::EndFrame() R_DrawGUI(); imgui.Draw(); post.Draw(); + world.EndFrame(); RHI::EndFrame(); if(rhie.presentToPresentUS > 0) diff --git a/code/renderer/grp_world.cpp b/code/renderer/grp_world.cpp index e256926..544e2d2 100644 --- a/code/renderer/grp_world.cpp +++ b/code/renderer/grp_world.cpp @@ -262,6 +262,23 @@ void World::Init() memcpy(mapped, vertices, sizeof(vertices)); EndBufferUpload(boxVertexBuffer); } + + // + // shader trace + // + { + BufferDesc desc("shader trace opaque", 2 * sizeof(uint32_t), ResourceStates::UnorderedAccessBit); + traceRenderBuffer = CreateBuffer(desc); + + DescriptorTableUpdate update; + update.SetRWBuffers(1, &traceRenderBuffer, MAX_DRAWIMAGES * 2); + UpdateDescriptorTable(grp.descriptorTable, update); + } + { + BufferDesc desc("shader trace opaque readback", 2 * sizeof(uint32_t), ResourceStates::Common); + desc.memoryUsage = MemoryUsage::Readback; + traceReadbackBuffer = CreateBuffer(desc); + } } // @@ -309,6 +326,33 @@ void World::BeginFrame() psoChangeCount = 0; // read by the GUI code } +void World::EndFrame() +{ + tr.tracedWorldShaderIndex = -1; + if(tr.traceWorldShader && tr.world != NULL) + { + // schedule a GPU -> CPU transfer + { + BufferBarrier barrier(traceRenderBuffer, ResourceStates::CopySourceBit); + CmdBarrier(0, NULL, 1, &barrier); + } + CmdCopyBuffer(traceReadbackBuffer, traceRenderBuffer); + { + BufferBarrier barrier(traceRenderBuffer, ResourceStates::UnorderedAccessBit); + CmdBarrier(0, NULL, 1, &barrier); + } + + // grab last frame's result + uint32_t* shaderIndices = (uint32_t*)MapBuffer(traceReadbackBuffer); + const uint32_t shaderIndex = shaderIndices[RHI::GetFrameIndex() ^ 1]; + UnmapBuffer(traceReadbackBuffer); + if(shaderIndex < (uint32_t)tr.numShaders) + { + tr.tracedWorldShaderIndex = (int)shaderIndex; + } + } +} + void World::Begin() { grp.renderMode = RenderMode::World; @@ -501,6 +545,10 @@ void World::EndBatch() pixelRC.noiseScale = r_ditherStrength->value; pixelRC.invBrightness = 1.0f / r_brightness->value; pixelRC.invGamma = 1.0f / r_gamma->value; + pixelRC.shaderTrace = (uint32_t)!!tr.traceWorldShader; + pixelRC.shaderIndex = (uint32_t)shader->index; + pixelRC.frameIndex = RHI::GetFrameIndex(); + pixelRC.centerPixel = (glConfig.vidWidth / 2) | ((glConfig.vidHeight / 2) << 16); for(int s = 0; s < pipeline.numStages; ++s) { const image_t* image = GetBundleImage(shader->stages[pipeline.firstStage + s]->bundle); diff --git a/code/renderer/hlsl/uber_shader.hlsl b/code/renderer/hlsl/uber_shader.hlsl index a4f8075..ac870b4 100644 --- a/code/renderer/hlsl/uber_shader.hlsl +++ b/code/renderer/hlsl/uber_shader.hlsl @@ -165,13 +165,22 @@ VOut vs(VIn input) cbuffer RootConstants { - // @TODO: 16 bits per stage: low 12 = texture, high 4 = sampler - //uint stageIndices[4]; - // low 16 = texture, high 16 = sampler - uint4 stageIndices0; + // general + uint4 stageIndices0; // low 16 = texture, high 16 = sampler uint4 stageIndices1; float greyscale; + float pad0; + float pad1; + float pad2; + + // shader trace + uint shaderTrace; + uint shaderIndex; + uint frameIndex; + uint centerPixel; // x | (y << 16) + #if DITHER + // dither float frameSeed; float noiseScale; float invGamma; @@ -181,6 +190,7 @@ cbuffer RootConstants Texture2D textures2D[4096] : register(t0); SamplerState samplers[96] : register(s0); +RWByteAddressBuffer shaderIndexBuffer : register(u0); #define GLS_SRCBLEND_ZERO 0x00000001 #define GLS_SRCBLEND_ONE 0x00000002 @@ -328,6 +338,17 @@ float4 ps(VOut input) : SV_Target dst = Dither(dst, input.position.xyz, frameSeed, noiseScale, invBrightness, invGamma); #endif + if(shaderTrace) + { + // we only store the shader index of 1 pixel + uint2 fragmentCoords = uint2(input.position.xy); + uint2 centerCoords = uint2(centerPixel & 0xFFFF, centerPixel >> 16); + if(all(fragmentCoords == centerCoords)) + { + shaderIndexBuffer.Store(frameIndex * 4, shaderIndex); + } + } + return dst; } diff --git a/code/renderer/rhi_d3d12.cpp b/code/renderer/rhi_d3d12.cpp index 2342191..45ba184 100644 --- a/code/renderer/rhi_d3d12.cpp +++ b/code/renderer/rhi_d3d12.cpp @@ -2988,14 +2988,23 @@ namespace RHI { D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - //srv.Format = DXGI_FORMAT_UNKNOWN; // @TODO: structured buffer - srv.Format = DXGI_FORMAT_R32_TYPELESS; - srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srv.Buffer.FirstElement = 0; - srv.Buffer.NumElements = rhiDesc.byteCount / 4; - srv.Buffer.StructureByteStride = 0; - //srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; // @TODO: structured buffer - srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + if(rhiDesc.structureByteCount > 0) + { + srv.Format = DXGI_FORMAT_UNKNOWN; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount; + srv.Buffer.StructureByteStride = rhiDesc.structureByteCount; + srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + } + else + { + srv.Format = DXGI_FORMAT_R32_TYPELESS; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv.Buffer.NumElements = rhiDesc.byteCount / 4; + srv.Buffer.StructureByteStride = 0; + srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + } srvIndex = rhi.descHeapGeneric.CreateSRV(resource, srv); } @@ -3009,19 +3018,28 @@ namespace RHI } uint32_t uavIndex = InvalidDescriptorIndex; - // @TODO: - /*if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit) + if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit) { D3D12_UNORDERED_ACCESS_VIEW_DESC uav = { 0 }; uav.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uav.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // @TODO: is this field needed? - uav.Buffer.CounterOffsetInBytes = ; + uav.Buffer.CounterOffsetInBytes = 0; uav.Buffer.FirstElement = 0; - uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; // flag RAW? - uav.Buffer.NumElements = ; - uav.Buffer.StructureByteStride = ; + if(rhiDesc.structureByteCount > 0) + { + uav.Format = DXGI_FORMAT_UNKNOWN; + uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + uav.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount; + uav.Buffer.StructureByteStride = rhiDesc.structureByteCount; + } + else + { + uav.Format = DXGI_FORMAT_R32_TYPELESS; + uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + uav.Buffer.NumElements = rhiDesc.byteCount / 4; + uav.Buffer.StructureByteStride = 0; + } uavIndex = rhi.descHeapGeneric.CreateUAV(resource, uav); - }*/ + } Buffer buffer = {}; buffer.desc = rhiDesc; @@ -3531,9 +3549,19 @@ namespace RHI for(uint32_t i = 0; i < update.resourceCount; ++i) { const Texture& texture = rhi.textures.Get(update.textures[i]); + Q_assert(texture.srvIndex != InvalidDescriptorIndex); CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, texture.srvIndex); } } + else if(update.type == DescriptorType::RWBuffer && table.genericHeap) + { + for(uint32_t i = 0; i < update.resourceCount; ++i) + { + const Buffer& buffer = rhi.buffers.Get(update.buffers[i]); + Q_assert(buffer.uavIndex != InvalidDescriptorIndex); + CopyDescriptor(table.genericHeap, update.firstIndex + i, rhi.descHeapGeneric, buffer.uavIndex); + } + } else if(update.type == DescriptorType::RWTexture && table.genericHeap) { uint32_t destIndex = update.firstIndex; @@ -3556,6 +3584,7 @@ namespace RHI for(uint32_t m = start; m < end; ++m) { + Q_assert(texture.mips[m].uavIndex != InvalidDescriptorIndex); CopyDescriptor(table.genericHeap, destIndex++, rhi.descHeapGeneric, texture.mips[m].uavIndex); } } @@ -3566,6 +3595,7 @@ namespace RHI { Handle htype, index, gen; DecomposeHandle(&htype, &index, &gen, update.samplers[i].v); + Q_assert(index != InvalidDescriptorIndex); CopyDescriptor(table.samplerHeap, update.firstIndex + i, rhi.descHeapSamplers, index); } } @@ -4223,6 +4253,16 @@ namespace RHI rhi.commandList->OMSetStencilRef((UINT)stencilRef); } + void CmdCopyBuffer(HBuffer dest, HBuffer source) + { + Q_assert(CanWriteCommands()); + + const Buffer& dst = rhi.buffers.Get(dest); + const Buffer& src = rhi.buffers.Get(source); + const UINT64 byteCount = min(src.desc.byteCount, dst.desc.byteCount); + rhi.commandList->CopyBufferRegion(dst.buffer, 0, src.buffer, 0, byteCount); + } + uint32_t GetDurationCount() { return rhi.resolvedQueries.durationQueryCount; diff --git a/code/renderer/rhi_local.h b/code/renderer/rhi_local.h index b0730e6..4efab07 100644 --- a/code/renderer/rhi_local.h +++ b/code/renderer/rhi_local.h @@ -439,6 +439,7 @@ namespace RHI ResourceStates::Flags initialState = ResourceStates::Common; MemoryUsage::Id memoryUsage = MemoryUsage::GPU; bool committedResource = false; + uint32_t structureByteCount = 0; // > 0 means structured buffer, == 0 means byte address buffer }; struct TextureDesc @@ -713,6 +714,7 @@ namespace RHI void CmdBeginDebugLabel(const char* name, float r = 1.0f, float g = 1.0f, float b = 1.0f); void CmdEndDebugLabel(); void CmdSetStencilReference(uint8_t stencilRef); + void CmdCopyBuffer(HBuffer dest, HBuffer source); #if 0 void CmdClearUAV(HTexture htexture, uint32_t mip); diff --git a/code/renderer/tr_gui.cpp b/code/renderer/tr_gui.cpp index 2444584..b72717d 100644 --- a/code/renderer/tr_gui.cpp +++ b/code/renderer/tr_gui.cpp @@ -520,6 +520,32 @@ static void DrawShaderList() ClearShaderReplacements(); } + if(tr.world != NULL) + { + if(tr.traceWorldShader) + { + if(ImGui::Button("Disable world shader tracing")) + { + tr.traceWorldShader = qfalse; + } + if((uint32_t)tr.tracedWorldShaderIndex < (uint32_t)tr.numShaders) + { + shader_t* shader = tr.shaders[tr.tracedWorldShaderIndex]; + if(ImGui::Selectable(va("%s##world_shader_trace", shader->name), false)) + { + OpenShaderDetails(shader); + } + } + } + else + { + if(ImGui::Button("Enable world shader tracing")) + { + tr.traceWorldShader = qtrue; + } + } + } + static char filter[256]; DrawFilter(filter, sizeof(filter)); diff --git a/code/renderer/tr_local.h b/code/renderer/tr_local.h index 7760a2b..ac75885 100644 --- a/code/renderer/tr_local.h +++ b/code/renderer/tr_local.h @@ -951,6 +951,9 @@ typedef struct { shader_t* shaders[MAX_SHADERS]; shader_t* sortedShaders[MAX_SHADERS]; + qbool traceWorldShader; + int tracedWorldShaderIndex; + int numSkins; skin_t* skins[MAX_SKINS];