cnq3/code/renderer/srp_main.cpp

539 lines
14 KiB
C++

/*
===========================================================================
Copyright (C) 2023-2024 Gian 'myT' Schellenbaum
This file is part of Challenge Quake 3 (CNQ3).
Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// Shared Rendering Pipeline - core functionality
#include "srp_local.h"
#include "../client/cl_imgui.h"
#include "shaders/crp/world.h.hlsli"
extern IRenderPipeline* grpp;
extern IRenderPipeline* crpp;
SRP srp;
IRenderPipeline* renderPipeline;
static ImPlotPoint FrameTimeGetter(int index, void*)
{
const FrameStats& fs = srp.frameStats;
const int realIndex = (fs.frameIndex + index) % fs.frameCount;
const float value = fs.p2pMS[realIndex];
ImPlotPoint p;
p.x = index;
p.y = value;
return p;
}
static void UpdateAnimatedImage(image_t* image, int w, int h, const byte* data, qbool dirty)
{
if(w != image->width || h != image->height)
{
// @TODO: ?
/*image->width = w;
image->height = h;
CreateTexture(&d3d.textures[image->texnum], image, 1, w, h);
GAL_UpdateTexture(image, 0, 0, 0, w, h, data);*/
}
else if(dirty)
{
// @TODO: ?
//GAL_UpdateTexture(image, 0, 0, 0, w, h, data);
}
}
const image_t* GetBundleImage(const textureBundle_t& bundle)
{
return R_UpdateAndGetBundleImage(&bundle, &UpdateAnimatedImage);
}
uint32_t GetBaseSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD)
{
Q_assert((uint32_t)wrap < TW_COUNT);
Q_assert((uint32_t)filter < TextureFilter::Count);
const uint32_t baseIndex =
(uint32_t)filter +
(uint32_t)TextureFilter::Count * (uint32_t)wrap +
(uint32_t)TextureFilter::Count * (uint32_t)TW_COUNT * minLOD;
return baseIndex;
}
uint32_t GetSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD)
{
const uint32_t baseIndex = GetBaseSamplerIndex(wrap, filter, minLOD);
const uint32_t descIndex = renderPipeline->GetSamplerDescriptorIndexFromBaseIndex(baseIndex);
return descIndex;
}
uint32_t GetSamplerIndex(const image_t* image)
{
TextureFilter::Id filter = TextureFilter::Anisotropic;
if(r_lego->integer &&
srp.renderMode == RenderMode::World &&
(image->flags & (IMG_LMATLAS | IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0)
{
filter = TextureFilter::Point;
}
else if((image->flags & IMG_NOAF) != 0 ||
srp.renderMode != RenderMode::World)
{
filter = TextureFilter::Linear;
}
int minLOD = 0;
if(srp.renderMode == RenderMode::World &&
(image->flags & IMG_NOPICMIP) == 0)
{
minLOD = Com_ClampInt(0, MaxTextureMips - 1, r_picmip->integer);
}
return GetSamplerIndex(image->wrapClampMode, filter, (uint32_t)minLOD);
}
void ReadTextureImage(void* outPixels, HTexture hreadback, int w, int h, int alignment, colorSpace_t colorSpace)
{
MappedTexture mapped;
BeginTextureReadback(mapped, hreadback);
byte* const out0 = (byte*)outPixels;
const byte* const in0 = mapped.mappedData;
if(colorSpace == CS_RGBA)
{
const int dstRowSizeNoPadding = w * 4;
mapped.dstRowByteCount = AlignUp(dstRowSizeNoPadding, alignment);
for(int y = 0; y < mapped.rowCount; ++y)
{
byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount;
const byte* in = in0 + y * mapped.srcRowByteCount;
memcpy(out, in, dstRowSizeNoPadding);
}
}
else if(colorSpace == CS_BGR)
{
mapped.dstRowByteCount = AlignUp(w * 3, alignment);
for(int y = 0; y < mapped.rowCount; ++y)
{
byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount;
const byte* in = in0 + y * mapped.srcRowByteCount;
for(int x = 0; x < mapped.columnCount; ++x)
{
out[2] = in[0];
out[1] = in[1];
out[0] = in[2];
out += 3;
in += 4;
}
}
}
else
{
Q_assert(!"Unsupported color space");
}
EndTextureReadback();
}
void UpdateEntityData(bool& depthHack, int entityNum, double originalTime)
{
depthHack = false;
if(entityNum != ENTITYNUM_WORLD)
{
backEnd.currentEntity = &backEnd.refdef.entities[entityNum];
if(backEnd.currentEntity->intShaderTime)
{
backEnd.refdef.floatTime = originalTime - (double)backEnd.currentEntity->e.shaderTime.iShaderTime / 1000.0;
}
else
{
backEnd.refdef.floatTime = originalTime - backEnd.currentEntity->e.shaderTime.fShaderTime;
}
// we have to reset the shaderTime as well otherwise image animations start
// from the wrong frame
tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset;
// set up the transformation matrix
R_RotateForEntity(backEnd.currentEntity, &backEnd.viewParms, &backEnd.orient);
if(backEnd.currentEntity->e.renderfx & RF_DEPTHHACK)
{
depthHack = true;
}
}
else
{
backEnd.currentEntity = &tr.worldEntity;
backEnd.refdef.floatTime = originalTime;
backEnd.orient = backEnd.viewParms.world;
// we have to reset the shaderTime as well otherwise image animations on
// the world (like water) continue with the wrong frame
tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset;
}
}
cullType_t GetMirrorredCullType(cullType_t cullType)
{
switch(cullType)
{
case CT_BACK_SIDED: return CT_FRONT_SIDED;
case CT_FRONT_SIDED: return CT_BACK_SIDED;
default: return CT_TWO_SIDED;
}
}
uint32_t AlphaTestShaderConstFromStateBits(unsigned int stateBits)
{
stateBits &= GLS_ATEST_BITS;
switch(stateBits)
{
case GLS_ATEST_GE_80: return ATEST_GE_HALF;
case GLS_ATEST_GT_0: return ATEST_GT_0;
case GLS_ATEST_LT_80: return ATEST_LT_HALF;
default: return ATEST_NONE;
}
}
void R_SelectRenderPipeline()
{
if(r_pipeline->integer == 0)
{
renderPipeline = grpp;
}
else
{
renderPipeline = crpp;
}
}
void FrameStats::EndFrame()
{
frameCount = min(frameCount + 1, (int)MaxFrames);
frameIndex = (frameIndex + 1) % MaxFrames;
Com_StatsFromArray(p2pMS, frameCount, temp, &p2pStats);
}
void RenderPassStats::EndFrame(uint32_t cpu, uint32_t gpu)
{
static uint32_t tempSamples[MaxStatsFrameCount];
samplesCPU[index] = cpu;
samplesGPU[index] = gpu;
count = min(count + 1, (uint32_t)MaxStatsFrameCount);
index = (index + 1) % MaxStatsFrameCount;
Com_StatsFromArray((const int*)samplesCPU, count, (int*)tempSamples, &statsCPU);
Com_StatsFromArray((const int*)samplesGPU, count, (int*)tempSamples, &statsGPU);
}
uint32_t SRP::BeginRenderPass(const char* name, float r, float g, float b)
{
if(!enableRenderPassQueries)
{
CmdBeginDebugLabel(name, r, g, b);
return 0xDEADBEEF;
}
RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount];
if(f.count >= ARRAY_LEN(f.passes))
{
Q_assert(0);
return UINT32_MAX;
}
CmdBeginDebugLabel(name, r, g, b);
const uint32_t index = f.count++;
RenderPassQueries& q = f.passes[index];
Q_strncpyz(q.name, name, sizeof(q.name));
q.cpuStartUS = Sys_Microseconds();
q.queryIndex = CmdBeginDurationQuery();
return index;
}
void SRP::EndRenderPass(uint32_t index)
{
if(!enableRenderPassQueries)
{
CmdEndDebugLabel();
return;
}
Q_assert(index != 0xDEADBEEF); // enableRenderPassQueries should still be false!
RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount];
if(index >= f.count)
{
Q_assert(0);
return;
}
CmdEndDebugLabel();
RenderPassQueries& q = f.passes[index];
q.cpuDurationUS = (uint32_t)(Sys_Microseconds() - q.cpuStartUS);
CmdEndDurationQuery(q.queryIndex);
}
void SRP::DrawGUI()
{
uint32_t durations[MaxDurationQueries];
GetDurations(durations);
wholeFrameStats.EndFrame(rhie.renderToPresentUS, durations[0]);
const RenderPassFrame& currFrame = renderPasses[(tr.frameCount % FrameCount) ^ 1];
RenderPassFrame& tempFrame = tempRenderPasses;
// see if the render pass list is the same as the previous frame's
bool sameRenderPass = true;
if(currFrame.count == tempRenderPasses.count)
{
for(uint32_t p = 0; p < currFrame.count; ++p)
{
if(Q_stricmp(currFrame.passes[p].name, tempRenderPasses.passes[p].name) != 0)
{
sameRenderPass = false;
break;
}
}
}
else
{
sameRenderPass = false;
}
// write out the displayed timings into the temp buffer
tempFrame.count = currFrame.count;
if(sameRenderPass)
{
for(uint32_t p = 0; p < currFrame.count; ++p)
{
const uint32_t index = currFrame.passes[p].queryIndex;
if(index < MaxDurationQueries)
{
renderPassStats[p].EndFrame(currFrame.passes[p].cpuDurationUS, durations[index]);
tempFrame.passes[p].gpuDurationUS = renderPassStats[p].statsGPU.median;
tempFrame.passes[p].cpuDurationUS = renderPassStats[p].statsCPU.median;
}
}
}
else
{
for(uint32_t p = 0; p < currFrame.count; ++p)
{
const uint32_t index = currFrame.passes[p].queryIndex;
if(index < MaxDurationQueries)
{
tempFrame.passes[p].gpuDurationUS = durations[index];
tempFrame.passes[p].cpuDurationUS = currFrame.passes[p].cpuDurationUS;
}
}
}
static bool breakdownActive = false;
ToggleBooleanWithShortcut(breakdownActive, ImGuiKey_F);
GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame breakdown", "Ctrl+F", &breakdownActive);
if(breakdownActive)
{
if(ImGui::Begin("Frame breakdown", &breakdownActive, ImGuiWindowFlags_AlwaysAutoResize))
{
if(BeginTable("Frame breakdown", 3))
{
TableHeader(3, "Pass", "GPU [us]", "CPU [us]");
TableRow(3, "Whole frame",
va("%d", (int)wholeFrameStats.statsGPU.median),
va("%d", (int)wholeFrameStats.statsCPU.median));
for(uint32_t p = 0; p < currFrame.count; ++p)
{
const RenderPassQueries& rp = tempFrame.passes[p];
if(rp.queryIndex < MaxDurationQueries)
{
TableRow(3, rp.name,
va("%d", (int)rp.gpuDurationUS),
va("%d", (int)rp.cpuDurationUS));
}
}
ImGui::EndTable();
}
if(psoStatsValid)
{
ImGui::Text("PSO count: %d", (int)psoCount);
ImGui::Text("PSO changes: %d", (int)psoChangeCount);
}
}
ImGui::End();
}
// save the current render pass list in the temp buffer
memcpy(&tempFrame, &currFrame, sizeof(tempFrame));
static bool frameTimeActive = false;
GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame stats", NULL, &frameTimeActive);
if(frameTimeActive)
{
if(ImGui::Begin("Frame stats", &frameTimeActive, ImGuiWindowFlags_AlwaysAutoResize))
{
if(BeginTable("Frame stats", 2))
{
const FrameStats& fs = frameStats;
const stats_t& s = fs.p2pStats;
TableRow2("Skipped frames", fs.skippedFrames);
TableRow2("Frame time target", rhie.targetFrameDurationMS);
TableRow2("Frame time average", s.average);
TableRow2("Frame time std dev.", s.stdDev);
TableRow2("Input to render", (float)rhie.inputToRenderUS / 1000.0f);
TableRow2("Input to present", (float)rhie.inputToPresentUS / 1000.0f);
ImGui::EndTable();
}
}
ImGui::End();
}
static bool graphsActive = false;
ToggleBooleanWithShortcut(graphsActive, ImGuiKey_G);
GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame time graphs", "Ctrl+G", &graphsActive);
if(graphsActive)
{
const int windowFlags =
ImGuiWindowFlags_NoDecoration |
ImGuiWindowFlags_NoBackground |
ImGuiWindowFlags_NoMove;
ImGui::SetNextWindowSize(ImVec2(glConfig.vidWidth, glConfig.vidHeight / 2), ImGuiCond_Always);
ImGui::SetNextWindowPos(ImVec2(0, glConfig.vidHeight / 2), ImGuiCond_Always);
if(ImGui::Begin("Frame time graphs", &graphsActive, windowFlags))
{
const FrameStats& fs = frameStats;
const double target = (double)rhie.targetFrameDurationMS;
static bool autoFit = false;
ImGui::Checkbox("Auto-fit", &autoFit);
if(ImPlot::BeginPlot("Frame Times", ImVec2(-1, -1), ImPlotFlags_NoInputs))
{
const int axisFlags = 0; // ImPlotAxisFlags_NoTickLabels
const int axisFlagsY = axisFlags | (autoFit ? ImPlotAxisFlags_AutoFit : 0);
ImPlot::SetupAxes(NULL, NULL, axisFlags, axisFlagsY);
ImPlot::SetupAxisLimits(ImAxis_X1, 0, FrameStats::MaxFrames, ImGuiCond_Always);
if(!autoFit)
{
ImPlot::SetupAxisLimits(ImAxis_Y1, max(target - 2.0, 0.0), target + 2.0, ImGuiCond_Always);
}
ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f);
ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f);
ImPlot::PlotInfLines("Target", &target, 1, ImPlotInfLinesFlags_Horizontal);
ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f);
ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f);
ImPlot::PlotLineG("Frame Time", &FrameTimeGetter, NULL, fs.frameCount, ImPlotLineFlags_None);
ImPlot::EndPlot();
}
}
ImGui::End();
}
GUI_DrawMainMenu();
R_DrawGUI();
}
void SRP::BeginFrame()
{
srp.renderPasses[tr.frameCount % FrameCount].count = 0;
R_SetColorMappings();
// nothing is bound to the command list yet!
srp.renderMode = RenderMode::None;
}
void SRP::EndFrame()
{
tr.tracedWorldShaderIndex = -1;
if(tr.traceWorldShader && tr.world != NULL)
{
// schedule a GPU -> CPU transfer
{
BufferBarrier barrier(traceRenderBuffer, ResourceStates::CopySourceBit);
CmdBarrier(0, NULL, 1, &barrier);
}
CmdCopyBuffer(traceReadbackBuffer, traceRenderBuffer);
{
BufferBarrier barrier(traceRenderBuffer, ResourceStates::UnorderedAccessBit);
CmdBarrier(0, NULL, 1, &barrier);
}
// grab last frame's result
uint32_t* shaderIndices = (uint32_t*)MapBuffer(traceReadbackBuffer);
const uint32_t shaderIndex = shaderIndices[RHI::GetFrameIndex() ^ 1];
UnmapBuffer(traceReadbackBuffer);
if(shaderIndex < (uint32_t)tr.numShaders)
{
tr.tracedWorldShaderIndex = (int)shaderIndex;
}
}
RHI::EndFrame();
if(rhie.presentToPresentUS > 0)
{
frameStats.p2pMS[frameStats.frameIndex] = (float)rhie.presentToPresentUS / 1000.0f;
frameStats.EndFrame();
}
else
{
frameStats.skippedFrames++;
}
if(backEnd.renderFrame)
{
Sys_V_EndFrame();
}
}
void SRP::CreateShaderTraceBuffers()
{
{
BufferDesc desc("shader trace opaque", 2 * sizeof(uint32_t), ResourceStates::UnorderedAccessBit);
traceRenderBuffer = CreateBuffer(desc);
}
{
BufferDesc desc("shader trace opaque readback", 2 * sizeof(uint32_t), ResourceStates::Common);
desc.memoryUsage = MemoryUsage::Readback;
traceReadbackBuffer = CreateBuffer(desc);
}
}