/* =========================================================================== Copyright (C) 2023-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). Challenge Quake 3 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Challenge Quake 3 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ // Shared Rendering Pipeline - core functionality #include "srp_local.h" #include "../client/cl_imgui.h" #include "shaders/crp/world.h.hlsli" extern IRenderPipeline* grpp; extern IRenderPipeline* crpp; SRP srp; IRenderPipeline* renderPipeline; static ImPlotPoint FrameTimeGetter(int index, void*) { const FrameStats& fs = srp.frameStats; const int realIndex = (fs.frameIndex + index) % fs.frameCount; const float value = fs.p2pMS[realIndex]; ImPlotPoint p; p.x = index; p.y = value; return p; } static void UpdateAnimatedImage(image_t* image, int w, int h, const byte* data, qbool dirty) { if(w != image->width || h != image->height) { // @TODO: ? /*image->width = w; image->height = h; CreateTexture(&d3d.textures[image->texnum], image, 1, w, h); GAL_UpdateTexture(image, 0, 0, 0, w, h, data);*/ } else if(dirty) { // @TODO: ? //GAL_UpdateTexture(image, 0, 0, 0, w, h, data); } } const image_t* GetBundleImage(const textureBundle_t& bundle) { return R_UpdateAndGetBundleImage(&bundle, &UpdateAnimatedImage); } uint32_t GetBaseSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD) { Q_assert((uint32_t)wrap < TW_COUNT); Q_assert((uint32_t)filter < TextureFilter::Count); const uint32_t baseIndex = (uint32_t)filter + (uint32_t)TextureFilter::Count * (uint32_t)wrap + (uint32_t)TextureFilter::Count * (uint32_t)TW_COUNT * minLOD; return baseIndex; } uint32_t GetSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD) { const uint32_t baseIndex = GetBaseSamplerIndex(wrap, filter, minLOD); const uint32_t descIndex = renderPipeline->GetSamplerDescriptorIndexFromBaseIndex(baseIndex); return descIndex; } uint32_t GetSamplerIndex(const image_t* image) { TextureFilter::Id filter = TextureFilter::Anisotropic; if(r_lego->integer && srp.renderMode == RenderMode::World && (image->flags & (IMG_LMATLAS | IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0) { filter = TextureFilter::Point; } else if((image->flags & IMG_NOAF) != 0 || srp.renderMode != RenderMode::World) { filter = TextureFilter::Linear; } int minLOD = 0; if(srp.renderMode == RenderMode::World && (image->flags & IMG_NOPICMIP) == 0) { minLOD = Com_ClampInt(0, MaxTextureMips - 1, r_picmip->integer); } return GetSamplerIndex(image->wrapClampMode, filter, (uint32_t)minLOD); } void ReadTextureImage(void* outPixels, HTexture hreadback, int w, int h, int alignment, colorSpace_t colorSpace) { MappedTexture mapped; BeginTextureReadback(mapped, hreadback); byte* const out0 = (byte*)outPixels; const byte* const in0 = mapped.mappedData; if(colorSpace == CS_RGBA) { const int dstRowSizeNoPadding = w * 4; mapped.dstRowByteCount = AlignUp(dstRowSizeNoPadding, alignment); for(int y = 0; y < mapped.rowCount; ++y) { byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount; const byte* in = in0 + y * mapped.srcRowByteCount; memcpy(out, in, dstRowSizeNoPadding); } } else if(colorSpace == CS_BGR) { mapped.dstRowByteCount = AlignUp(w * 3, alignment); for(int y = 0; y < mapped.rowCount; ++y) { byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount; const byte* in = in0 + y * mapped.srcRowByteCount; for(int x = 0; x < mapped.columnCount; ++x) { out[2] = in[0]; out[1] = in[1]; out[0] = in[2]; out += 3; in += 4; } } } else { Q_assert(!"Unsupported color space"); } EndTextureReadback(); } void UpdateEntityData(bool& depthHack, int entityNum, double originalTime) { depthHack = false; if(entityNum != ENTITYNUM_WORLD) { backEnd.currentEntity = &backEnd.refdef.entities[entityNum]; if(backEnd.currentEntity->intShaderTime) { backEnd.refdef.floatTime = originalTime - (double)backEnd.currentEntity->e.shaderTime.iShaderTime / 1000.0; } else { backEnd.refdef.floatTime = originalTime - backEnd.currentEntity->e.shaderTime.fShaderTime; } // we have to reset the shaderTime as well otherwise image animations start // from the wrong frame tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset; // set up the transformation matrix R_RotateForEntity(backEnd.currentEntity, &backEnd.viewParms, &backEnd.orient, backEnd.modelMatrix); if(backEnd.currentEntity->e.renderfx & RF_DEPTHHACK) { depthHack = true; } } else { backEnd.currentEntity = &tr.worldEntity; backEnd.refdef.floatTime = originalTime; backEnd.orient = backEnd.viewParms.world; R_MakeIdentityMatrix(backEnd.modelMatrix); // we have to reset the shaderTime as well otherwise image animations on // the world (like water) continue with the wrong frame tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset; } } cullType_t GetMirrorredCullType(cullType_t cullType) { switch(cullType) { case CT_BACK_SIDED: return CT_FRONT_SIDED; case CT_FRONT_SIDED: return CT_BACK_SIDED; default: return CT_TWO_SIDED; } } uint32_t AlphaTestShaderConstFromStateBits(unsigned int stateBits) { stateBits &= GLS_ATEST_BITS; switch(stateBits) { case GLS_ATEST_GE_80: return ATEST_GE_HALF; case GLS_ATEST_GT_0: return ATEST_GT_0; case GLS_ATEST_LT_80: return ATEST_LT_HALF; default: return ATEST_NONE; } } void R_SelectRenderPipeline() { if(r_pipeline->integer == 0) { renderPipeline = grpp; } else { renderPipeline = crpp; } } void FrameStats::EndFrame() { frameCount = min(frameCount + 1, (int)MaxFrames); frameIndex = (frameIndex + 1) % MaxFrames; Com_StatsFromArray(p2pMS, frameCount, temp, &p2pStats); } void RenderPassStats::EndFrame(uint32_t cpu, uint32_t gpu) { static uint32_t tempSamples[MaxStatsFrameCount]; samplesCPU[index] = cpu; samplesGPU[index] = gpu; count = min(count + 1, (uint32_t)MaxStatsFrameCount); index = (index + 1) % MaxStatsFrameCount; Com_StatsFromArray((const int*)samplesCPU, count, (int*)tempSamples, &statsCPU); Com_StatsFromArray((const int*)samplesGPU, count, (int*)tempSamples, &statsGPU); } uint32_t SRP::BeginRenderPass(const char* name, float r, float g, float b) { if(!enableRenderPassQueries) { CmdBeginDebugLabel(name, r, g, b); return 0xDEADBEEF; } RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount]; if(f.count >= ARRAY_LEN(f.passes)) { Q_assert(0); return UINT32_MAX; } CmdBeginDebugLabel(name, r, g, b); const uint32_t index = f.count++; RenderPassQueries& q = f.passes[index]; Q_strncpyz(q.name, name, sizeof(q.name)); q.cpuStartUS = Sys_Microseconds(); q.queryIndex = CmdBeginDurationQuery(); return index; } void SRP::EndRenderPass(uint32_t index) { if(!enableRenderPassQueries) { CmdEndDebugLabel(); return; } Q_assert(index != 0xDEADBEEF); // enableRenderPassQueries should still be false! RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount]; if(index >= f.count) { Q_assert(0); return; } CmdEndDebugLabel(); RenderPassQueries& q = f.passes[index]; q.cpuDurationUS = (uint32_t)(Sys_Microseconds() - q.cpuStartUS); CmdEndDurationQuery(q.queryIndex); } void SRP::DrawGUI() { uint32_t durations[MaxDurationQueries]; GetDurations(durations); wholeFrameStats.EndFrame(rhie.renderToPresentUS, durations[0]); const RenderPassFrame& currFrame = renderPasses[(tr.frameCount % FrameCount) ^ 1]; RenderPassFrame& tempFrame = tempRenderPasses; // see if the render pass list is the same as the previous frame's bool sameRenderPass = true; if(currFrame.count == tempRenderPasses.count) { for(uint32_t p = 0; p < currFrame.count; ++p) { if(Q_stricmp(currFrame.passes[p].name, tempRenderPasses.passes[p].name) != 0) { sameRenderPass = false; break; } } } else { sameRenderPass = false; } // write out the displayed timings into the temp buffer tempFrame.count = currFrame.count; if(sameRenderPass) { for(uint32_t p = 0; p < currFrame.count; ++p) { const uint32_t index = currFrame.passes[p].queryIndex; if(index < MaxDurationQueries) { renderPassStats[p].EndFrame(currFrame.passes[p].cpuDurationUS, durations[index]); tempFrame.passes[p].gpuDurationUS = renderPassStats[p].statsGPU.median; tempFrame.passes[p].cpuDurationUS = renderPassStats[p].statsCPU.median; } } } else { for(uint32_t p = 0; p < currFrame.count; ++p) { const uint32_t index = currFrame.passes[p].queryIndex; if(index < MaxDurationQueries) { tempFrame.passes[p].gpuDurationUS = durations[index]; tempFrame.passes[p].cpuDurationUS = currFrame.passes[p].cpuDurationUS; } } } static bool breakdownActive = false; ToggleBooleanWithShortcut(breakdownActive, ImGuiKey_F); GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame breakdown", "Ctrl+F", &breakdownActive); if(breakdownActive) { if(ImGui::Begin("Frame breakdown", &breakdownActive, ImGuiWindowFlags_AlwaysAutoResize)) { if(BeginTable("Frame breakdown", 3)) { TableHeader(3, "Pass", "GPU [us]", "CPU [us]"); TableRow(3, "Whole frame", va("%d", (int)wholeFrameStats.statsGPU.median), va("%d", (int)wholeFrameStats.statsCPU.median)); for(uint32_t p = 0; p < currFrame.count; ++p) { const RenderPassQueries& rp = tempFrame.passes[p]; if(rp.queryIndex < MaxDurationQueries) { TableRow(3, rp.name, va("%d", (int)rp.gpuDurationUS), va("%d", (int)rp.cpuDurationUS)); } } ImGui::EndTable(); } if(psoStatsValid) { ImGui::Text("PSO count: %d", (int)psoCount); ImGui::Text("PSO changes: %d", (int)psoChangeCount); } } ImGui::End(); } // save the current render pass list in the temp buffer memcpy(&tempFrame, &currFrame, sizeof(tempFrame)); static bool frameTimeActive = false; GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame stats", NULL, &frameTimeActive); if(frameTimeActive) { if(ImGui::Begin("Frame stats", &frameTimeActive, ImGuiWindowFlags_AlwaysAutoResize)) { if(BeginTable("Frame stats", 2)) { const FrameStats& fs = frameStats; const stats_t& s = fs.p2pStats; TableRow2("Skipped frames", fs.skippedFrames); TableRow2("Frame time target", rhie.targetFrameDurationMS); TableRow2("Frame time average", s.average); TableRow2("Frame time std dev.", s.stdDev); TableRow2("Input to render", (float)rhie.inputToRenderUS / 1000.0f); TableRow2("Input to present", (float)rhie.inputToPresentUS / 1000.0f); ImGui::EndTable(); } } ImGui::End(); } static bool graphsActive = false; ToggleBooleanWithShortcut(graphsActive, ImGuiKey_G); GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame time graphs", "Ctrl+G", &graphsActive); if(graphsActive) { const int windowFlags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoMove; ImGui::SetNextWindowSize(ImVec2(glConfig.vidWidth, glConfig.vidHeight / 2), ImGuiCond_Always); ImGui::SetNextWindowPos(ImVec2(0, glConfig.vidHeight / 2), ImGuiCond_Always); if(ImGui::Begin("Frame time graphs", &graphsActive, windowFlags)) { const FrameStats& fs = frameStats; const double target = (double)rhie.targetFrameDurationMS; static bool autoFit = false; ImGui::Checkbox("Auto-fit", &autoFit); if(ImPlot::BeginPlot("Frame Times", ImVec2(-1, -1), ImPlotFlags_NoInputs)) { const int axisFlags = 0; // ImPlotAxisFlags_NoTickLabels const int axisFlagsY = axisFlags | (autoFit ? ImPlotAxisFlags_AutoFit : 0); ImPlot::SetupAxes(NULL, NULL, axisFlags, axisFlagsY); ImPlot::SetupAxisLimits(ImAxis_X1, 0, FrameStats::MaxFrames, ImGuiCond_Always); if(!autoFit) { ImPlot::SetupAxisLimits(ImAxis_Y1, max(target - 2.0, 0.0), target + 2.0, ImGuiCond_Always); } ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f); ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f); ImPlot::PlotInfLines("Target", &target, 1, ImPlotInfLinesFlags_Horizontal); ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f); ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f); ImPlot::PlotLineG("Frame Time", &FrameTimeGetter, NULL, fs.frameCount, ImPlotLineFlags_None); ImPlot::EndPlot(); } } ImGui::End(); } GUI_DrawMainMenu(); R_DrawGUI(); } void SRP::BeginFrame() { srp.renderPasses[tr.frameCount % FrameCount].count = 0; R_SetColorMappings(); // nothing is bound to the command list yet! srp.renderMode = RenderMode::None; } void SRP::EndFrame() { tr.tracedWorldShaderIndex = -1; if(tr.traceWorldShader && tr.world != NULL) { CmdBeginBarrier(); CmdBufferBarrier(traceRenderBuffer, ResourceStates::CopySourceBit); CmdEndBarrier(); // schedule a GPU -> CPU transfer CmdCopyBuffer(traceReadbackBuffer, traceRenderBuffer); CmdBeginBarrier(); CmdBufferBarrier(traceRenderBuffer, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); // grab the currently available result uint32_t* const shaderIndices = (uint32_t*)MapBuffer(traceReadbackBuffer); const uint32_t shaderIndex = *shaderIndices; UnmapBuffer(traceReadbackBuffer); if(shaderIndex < (uint32_t)tr.numShaders) { tr.tracedWorldShaderIndex = (int)shaderIndex; } } RHI::EndFrame(); if(rhie.presentToPresentUS > 0) { frameStats.p2pMS[frameStats.frameIndex] = (float)rhie.presentToPresentUS / 1000.0f; frameStats.EndFrame(); } else { frameStats.skippedFrames++; } if(backEnd.renderFrame) { Sys_V_EndFrame(); } } void SRP::CreateShaderTraceBuffers() { { BufferDesc desc("shader trace render", sizeof(uint32_t), ResourceStates::UnorderedAccessBit); traceRenderBuffer = CreateBuffer(desc); } { BufferDesc desc("shader trace readback", sizeof(uint32_t), ResourceStates::Common); desc.memoryUsage = MemoryUsage::Readback; traceReadbackBuffer = CreateBuffer(desc); } }