mirror of
https://bitbucket.org/CPMADevs/cnq3
synced 2024-11-26 22:11:51 +00:00
539 lines
14 KiB
C++
539 lines
14 KiB
C++
/*
|
|
===========================================================================
|
|
Copyright (C) 2023-2024 Gian 'myT' Schellenbaum
|
|
|
|
This file is part of Challenge Quake 3 (CNQ3).
|
|
|
|
Challenge Quake 3 is free software; you can redistribute it
|
|
and/or modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the License,
|
|
or (at your option) any later version.
|
|
|
|
Challenge Quake 3 is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
|
|
===========================================================================
|
|
*/
|
|
// Shared Rendering Pipeline - core functionality
|
|
|
|
|
|
#include "srp_local.h"
|
|
#include "../client/cl_imgui.h"
|
|
#include "shaders/crp/world.h.hlsli"
|
|
|
|
|
|
extern IRenderPipeline* grpp;
|
|
extern IRenderPipeline* crpp;
|
|
|
|
SRP srp;
|
|
IRenderPipeline* renderPipeline;
|
|
|
|
|
|
static ImPlotPoint FrameTimeGetter(int index, void*)
|
|
{
|
|
const FrameStats& fs = srp.frameStats;
|
|
const int realIndex = (fs.frameIndex + index) % fs.frameCount;
|
|
const float value = fs.p2pMS[realIndex];
|
|
|
|
ImPlotPoint p;
|
|
p.x = index;
|
|
p.y = value;
|
|
|
|
return p;
|
|
}
|
|
|
|
static void UpdateAnimatedImage(image_t* image, int w, int h, const byte* data, qbool dirty)
|
|
{
|
|
if(w != image->width || h != image->height)
|
|
{
|
|
// @TODO: ?
|
|
/*image->width = w;
|
|
image->height = h;
|
|
CreateTexture(&d3d.textures[image->texnum], image, 1, w, h);
|
|
GAL_UpdateTexture(image, 0, 0, 0, w, h, data);*/
|
|
}
|
|
else if(dirty)
|
|
{
|
|
// @TODO: ?
|
|
//GAL_UpdateTexture(image, 0, 0, 0, w, h, data);
|
|
}
|
|
}
|
|
|
|
|
|
const image_t* GetBundleImage(const textureBundle_t& bundle)
|
|
{
|
|
return R_UpdateAndGetBundleImage(&bundle, &UpdateAnimatedImage);
|
|
}
|
|
|
|
uint32_t GetBaseSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD)
|
|
{
|
|
Q_assert((uint32_t)wrap < TW_COUNT);
|
|
Q_assert((uint32_t)filter < TextureFilter::Count);
|
|
|
|
const uint32_t baseIndex =
|
|
(uint32_t)filter +
|
|
(uint32_t)TextureFilter::Count * (uint32_t)wrap +
|
|
(uint32_t)TextureFilter::Count * (uint32_t)TW_COUNT * minLOD;
|
|
|
|
return baseIndex;
|
|
}
|
|
|
|
uint32_t GetSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD)
|
|
{
|
|
const uint32_t baseIndex = GetBaseSamplerIndex(wrap, filter, minLOD);
|
|
const uint32_t descIndex = renderPipeline->GetSamplerDescriptorIndexFromBaseIndex(baseIndex);
|
|
|
|
return descIndex;
|
|
}
|
|
|
|
uint32_t GetSamplerIndex(const image_t* image)
|
|
{
|
|
TextureFilter::Id filter = TextureFilter::Anisotropic;
|
|
if(r_lego->integer &&
|
|
srp.renderMode == RenderMode::World &&
|
|
(image->flags & (IMG_LMATLAS | IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0)
|
|
{
|
|
filter = TextureFilter::Point;
|
|
}
|
|
else if((image->flags & IMG_NOAF) != 0 ||
|
|
srp.renderMode != RenderMode::World)
|
|
{
|
|
filter = TextureFilter::Linear;
|
|
}
|
|
|
|
int minLOD = 0;
|
|
if(srp.renderMode == RenderMode::World &&
|
|
(image->flags & IMG_NOPICMIP) == 0)
|
|
{
|
|
minLOD = Com_ClampInt(0, MaxTextureMips - 1, r_picmip->integer);
|
|
}
|
|
|
|
return GetSamplerIndex(image->wrapClampMode, filter, (uint32_t)minLOD);
|
|
}
|
|
|
|
void ReadTextureImage(void* outPixels, HTexture hreadback, int w, int h, int alignment, colorSpace_t colorSpace)
|
|
{
|
|
MappedTexture mapped;
|
|
BeginTextureReadback(mapped, hreadback);
|
|
|
|
byte* const out0 = (byte*)outPixels;
|
|
const byte* const in0 = mapped.mappedData;
|
|
|
|
if(colorSpace == CS_RGBA)
|
|
{
|
|
const int dstRowSizeNoPadding = w * 4;
|
|
mapped.dstRowByteCount = AlignUp(dstRowSizeNoPadding, alignment);
|
|
|
|
for(int y = 0; y < mapped.rowCount; ++y)
|
|
{
|
|
byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount;
|
|
const byte* in = in0 + y * mapped.srcRowByteCount;
|
|
memcpy(out, in, dstRowSizeNoPadding);
|
|
}
|
|
}
|
|
else if(colorSpace == CS_BGR)
|
|
{
|
|
mapped.dstRowByteCount = AlignUp(w * 3, alignment);
|
|
|
|
for(int y = 0; y < mapped.rowCount; ++y)
|
|
{
|
|
byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount;
|
|
const byte* in = in0 + y * mapped.srcRowByteCount;
|
|
for(int x = 0; x < mapped.columnCount; ++x)
|
|
{
|
|
out[2] = in[0];
|
|
out[1] = in[1];
|
|
out[0] = in[2];
|
|
out += 3;
|
|
in += 4;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Q_assert(!"Unsupported color space");
|
|
}
|
|
|
|
EndTextureReadback();
|
|
}
|
|
|
|
void UpdateEntityData(bool& depthHack, int entityNum, double originalTime)
|
|
{
|
|
depthHack = false;
|
|
|
|
if(entityNum != ENTITYNUM_WORLD)
|
|
{
|
|
backEnd.currentEntity = &backEnd.refdef.entities[entityNum];
|
|
if(backEnd.currentEntity->intShaderTime)
|
|
{
|
|
backEnd.refdef.floatTime = originalTime - (double)backEnd.currentEntity->e.shaderTime.iShaderTime / 1000.0;
|
|
}
|
|
else
|
|
{
|
|
backEnd.refdef.floatTime = originalTime - backEnd.currentEntity->e.shaderTime.fShaderTime;
|
|
}
|
|
// we have to reset the shaderTime as well otherwise image animations start
|
|
// from the wrong frame
|
|
tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset;
|
|
|
|
// set up the transformation matrix
|
|
R_RotateForEntity(backEnd.currentEntity, &backEnd.viewParms, &backEnd.orient);
|
|
|
|
if(backEnd.currentEntity->e.renderfx & RF_DEPTHHACK)
|
|
{
|
|
depthHack = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
backEnd.currentEntity = &tr.worldEntity;
|
|
backEnd.refdef.floatTime = originalTime;
|
|
backEnd.orient = backEnd.viewParms.world;
|
|
// we have to reset the shaderTime as well otherwise image animations on
|
|
// the world (like water) continue with the wrong frame
|
|
tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset;
|
|
}
|
|
}
|
|
|
|
cullType_t GetMirrorredCullType(cullType_t cullType)
|
|
{
|
|
switch(cullType)
|
|
{
|
|
case CT_BACK_SIDED: return CT_FRONT_SIDED;
|
|
case CT_FRONT_SIDED: return CT_BACK_SIDED;
|
|
default: return CT_TWO_SIDED;
|
|
}
|
|
}
|
|
|
|
uint32_t AlphaTestShaderConstFromStateBits(unsigned int stateBits)
|
|
{
|
|
stateBits &= GLS_ATEST_BITS;
|
|
switch(stateBits)
|
|
{
|
|
case GLS_ATEST_GE_80: return ATEST_GE_HALF;
|
|
case GLS_ATEST_GT_0: return ATEST_GT_0;
|
|
case GLS_ATEST_LT_80: return ATEST_LT_HALF;
|
|
default: return ATEST_NONE;
|
|
}
|
|
}
|
|
|
|
void R_SelectRenderPipeline()
|
|
{
|
|
if(r_pipeline->integer == 0)
|
|
{
|
|
renderPipeline = grpp;
|
|
}
|
|
else
|
|
{
|
|
renderPipeline = crpp;
|
|
}
|
|
}
|
|
|
|
|
|
void FrameStats::EndFrame()
|
|
{
|
|
frameCount = min(frameCount + 1, (int)MaxFrames);
|
|
frameIndex = (frameIndex + 1) % MaxFrames;
|
|
Com_StatsFromArray(p2pMS, frameCount, temp, &p2pStats);
|
|
}
|
|
|
|
|
|
void RenderPassStats::EndFrame(uint32_t cpu, uint32_t gpu)
|
|
{
|
|
static uint32_t tempSamples[MaxStatsFrameCount];
|
|
samplesCPU[index] = cpu;
|
|
samplesGPU[index] = gpu;
|
|
count = min(count + 1, (uint32_t)MaxStatsFrameCount);
|
|
index = (index + 1) % MaxStatsFrameCount;
|
|
Com_StatsFromArray((const int*)samplesCPU, count, (int*)tempSamples, &statsCPU);
|
|
Com_StatsFromArray((const int*)samplesGPU, count, (int*)tempSamples, &statsGPU);
|
|
}
|
|
|
|
|
|
uint32_t SRP::BeginRenderPass(const char* name, float r, float g, float b)
|
|
{
|
|
if(!enableRenderPassQueries)
|
|
{
|
|
CmdBeginDebugLabel(name, r, g, b);
|
|
return 0xDEADBEEF;
|
|
}
|
|
|
|
RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount];
|
|
if(f.count >= ARRAY_LEN(f.passes))
|
|
{
|
|
Q_assert(0);
|
|
return UINT32_MAX;
|
|
}
|
|
|
|
CmdBeginDebugLabel(name, r, g, b);
|
|
|
|
const uint32_t index = f.count++;
|
|
RenderPassQueries& q = f.passes[index];
|
|
Q_strncpyz(q.name, name, sizeof(q.name));
|
|
q.cpuStartUS = Sys_Microseconds();
|
|
q.queryIndex = CmdBeginDurationQuery();
|
|
|
|
return index;
|
|
}
|
|
|
|
void SRP::EndRenderPass(uint32_t index)
|
|
{
|
|
if(!enableRenderPassQueries)
|
|
{
|
|
CmdEndDebugLabel();
|
|
return;
|
|
}
|
|
|
|
Q_assert(index != 0xDEADBEEF); // enableRenderPassQueries should still be false!
|
|
RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount];
|
|
if(index >= f.count)
|
|
{
|
|
Q_assert(0);
|
|
return;
|
|
}
|
|
|
|
CmdEndDebugLabel();
|
|
|
|
RenderPassQueries& q = f.passes[index];
|
|
q.cpuDurationUS = (uint32_t)(Sys_Microseconds() - q.cpuStartUS);
|
|
CmdEndDurationQuery(q.queryIndex);
|
|
}
|
|
|
|
void SRP::DrawGUI()
|
|
{
|
|
uint32_t durations[MaxDurationQueries];
|
|
GetDurations(durations);
|
|
|
|
wholeFrameStats.EndFrame(rhie.renderToPresentUS, durations[0]);
|
|
|
|
const RenderPassFrame& currFrame = renderPasses[(tr.frameCount % FrameCount) ^ 1];
|
|
RenderPassFrame& tempFrame = tempRenderPasses;
|
|
|
|
// see if the render pass list is the same as the previous frame's
|
|
bool sameRenderPass = true;
|
|
if(currFrame.count == tempRenderPasses.count)
|
|
{
|
|
for(uint32_t p = 0; p < currFrame.count; ++p)
|
|
{
|
|
if(Q_stricmp(currFrame.passes[p].name, tempRenderPasses.passes[p].name) != 0)
|
|
{
|
|
sameRenderPass = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sameRenderPass = false;
|
|
}
|
|
|
|
// write out the displayed timings into the temp buffer
|
|
tempFrame.count = currFrame.count;
|
|
if(sameRenderPass)
|
|
{
|
|
for(uint32_t p = 0; p < currFrame.count; ++p)
|
|
{
|
|
const uint32_t index = currFrame.passes[p].queryIndex;
|
|
if(index < MaxDurationQueries)
|
|
{
|
|
renderPassStats[p].EndFrame(currFrame.passes[p].cpuDurationUS, durations[index]);
|
|
tempFrame.passes[p].gpuDurationUS = renderPassStats[p].statsGPU.median;
|
|
tempFrame.passes[p].cpuDurationUS = renderPassStats[p].statsCPU.median;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(uint32_t p = 0; p < currFrame.count; ++p)
|
|
{
|
|
const uint32_t index = currFrame.passes[p].queryIndex;
|
|
if(index < MaxDurationQueries)
|
|
{
|
|
tempFrame.passes[p].gpuDurationUS = durations[index];
|
|
tempFrame.passes[p].cpuDurationUS = currFrame.passes[p].cpuDurationUS;
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool breakdownActive = false;
|
|
ToggleBooleanWithShortcut(breakdownActive, ImGuiKey_F);
|
|
GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame breakdown", "Ctrl+F", &breakdownActive);
|
|
if(breakdownActive)
|
|
{
|
|
if(ImGui::Begin("Frame breakdown", &breakdownActive, ImGuiWindowFlags_AlwaysAutoResize))
|
|
{
|
|
if(BeginTable("Frame breakdown", 3))
|
|
{
|
|
TableHeader(3, "Pass", "GPU [us]", "CPU [us]");
|
|
|
|
TableRow(3, "Whole frame",
|
|
va("%d", (int)wholeFrameStats.statsGPU.median),
|
|
va("%d", (int)wholeFrameStats.statsCPU.median));
|
|
|
|
for(uint32_t p = 0; p < currFrame.count; ++p)
|
|
{
|
|
const RenderPassQueries& rp = tempFrame.passes[p];
|
|
if(rp.queryIndex < MaxDurationQueries)
|
|
{
|
|
TableRow(3, rp.name,
|
|
va("%d", (int)rp.gpuDurationUS),
|
|
va("%d", (int)rp.cpuDurationUS));
|
|
}
|
|
}
|
|
|
|
ImGui::EndTable();
|
|
}
|
|
|
|
if(psoStatsValid)
|
|
{
|
|
ImGui::Text("PSO count: %d", (int)psoCount);
|
|
ImGui::Text("PSO changes: %d", (int)psoChangeCount);
|
|
}
|
|
}
|
|
ImGui::End();
|
|
}
|
|
|
|
// save the current render pass list in the temp buffer
|
|
memcpy(&tempFrame, &currFrame, sizeof(tempFrame));
|
|
|
|
static bool frameTimeActive = false;
|
|
GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame stats", NULL, &frameTimeActive);
|
|
if(frameTimeActive)
|
|
{
|
|
if(ImGui::Begin("Frame stats", &frameTimeActive, ImGuiWindowFlags_AlwaysAutoResize))
|
|
{
|
|
if(BeginTable("Frame stats", 2))
|
|
{
|
|
const FrameStats& fs = frameStats;
|
|
const stats_t& s = fs.p2pStats;
|
|
TableRow2("Skipped frames", fs.skippedFrames);
|
|
TableRow2("Frame time target", rhie.targetFrameDurationMS);
|
|
TableRow2("Frame time average", s.average);
|
|
TableRow2("Frame time std dev.", s.stdDev);
|
|
TableRow2("Input to render", (float)rhie.inputToRenderUS / 1000.0f);
|
|
TableRow2("Input to present", (float)rhie.inputToPresentUS / 1000.0f);
|
|
|
|
ImGui::EndTable();
|
|
}
|
|
}
|
|
ImGui::End();
|
|
}
|
|
|
|
static bool graphsActive = false;
|
|
ToggleBooleanWithShortcut(graphsActive, ImGuiKey_G);
|
|
GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame time graphs", "Ctrl+G", &graphsActive);
|
|
if(graphsActive)
|
|
{
|
|
const int windowFlags =
|
|
ImGuiWindowFlags_NoDecoration |
|
|
ImGuiWindowFlags_NoBackground |
|
|
ImGuiWindowFlags_NoMove;
|
|
ImGui::SetNextWindowSize(ImVec2(glConfig.vidWidth, glConfig.vidHeight / 2), ImGuiCond_Always);
|
|
ImGui::SetNextWindowPos(ImVec2(0, glConfig.vidHeight / 2), ImGuiCond_Always);
|
|
if(ImGui::Begin("Frame time graphs", &graphsActive, windowFlags))
|
|
{
|
|
const FrameStats& fs = frameStats;
|
|
const double target = (double)rhie.targetFrameDurationMS;
|
|
|
|
static bool autoFit = false;
|
|
ImGui::Checkbox("Auto-fit", &autoFit);
|
|
|
|
if(ImPlot::BeginPlot("Frame Times", ImVec2(-1, -1), ImPlotFlags_NoInputs))
|
|
{
|
|
const int axisFlags = 0; // ImPlotAxisFlags_NoTickLabels
|
|
const int axisFlagsY = axisFlags | (autoFit ? ImPlotAxisFlags_AutoFit : 0);
|
|
ImPlot::SetupAxes(NULL, NULL, axisFlags, axisFlagsY);
|
|
ImPlot::SetupAxisLimits(ImAxis_X1, 0, FrameStats::MaxFrames, ImGuiCond_Always);
|
|
if(!autoFit)
|
|
{
|
|
ImPlot::SetupAxisLimits(ImAxis_Y1, max(target - 2.0, 0.0), target + 2.0, ImGuiCond_Always);
|
|
}
|
|
|
|
ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f);
|
|
ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f);
|
|
ImPlot::PlotInfLines("Target", &target, 1, ImPlotInfLinesFlags_Horizontal);
|
|
|
|
ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f);
|
|
ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f);
|
|
ImPlot::PlotLineG("Frame Time", &FrameTimeGetter, NULL, fs.frameCount, ImPlotLineFlags_None);
|
|
|
|
ImPlot::EndPlot();
|
|
}
|
|
}
|
|
ImGui::End();
|
|
}
|
|
|
|
GUI_DrawMainMenu();
|
|
R_DrawGUI();
|
|
}
|
|
|
|
void SRP::BeginFrame()
|
|
{
|
|
srp.renderPasses[tr.frameCount % FrameCount].count = 0;
|
|
R_SetColorMappings();
|
|
|
|
// nothing is bound to the command list yet!
|
|
srp.renderMode = RenderMode::None;
|
|
}
|
|
|
|
void SRP::EndFrame()
|
|
{
|
|
tr.tracedWorldShaderIndex = -1;
|
|
if(tr.traceWorldShader && tr.world != NULL)
|
|
{
|
|
// schedule a GPU -> CPU transfer
|
|
{
|
|
BufferBarrier barrier(traceRenderBuffer, ResourceStates::CopySourceBit);
|
|
CmdBarrier(0, NULL, 1, &barrier);
|
|
}
|
|
CmdCopyBuffer(traceReadbackBuffer, traceRenderBuffer);
|
|
{
|
|
BufferBarrier barrier(traceRenderBuffer, ResourceStates::UnorderedAccessBit);
|
|
CmdBarrier(0, NULL, 1, &barrier);
|
|
}
|
|
|
|
// grab last frame's result
|
|
uint32_t* shaderIndices = (uint32_t*)MapBuffer(traceReadbackBuffer);
|
|
const uint32_t shaderIndex = shaderIndices[RHI::GetFrameIndex() ^ 1];
|
|
UnmapBuffer(traceReadbackBuffer);
|
|
if(shaderIndex < (uint32_t)tr.numShaders)
|
|
{
|
|
tr.tracedWorldShaderIndex = (int)shaderIndex;
|
|
}
|
|
}
|
|
|
|
RHI::EndFrame();
|
|
|
|
if(rhie.presentToPresentUS > 0)
|
|
{
|
|
frameStats.p2pMS[frameStats.frameIndex] = (float)rhie.presentToPresentUS / 1000.0f;
|
|
frameStats.EndFrame();
|
|
}
|
|
else
|
|
{
|
|
frameStats.skippedFrames++;
|
|
}
|
|
|
|
if(backEnd.renderFrame)
|
|
{
|
|
Sys_V_EndFrame();
|
|
}
|
|
}
|
|
|
|
void SRP::CreateShaderTraceBuffers()
|
|
{
|
|
{
|
|
BufferDesc desc("shader trace opaque", 2 * sizeof(uint32_t), ResourceStates::UnorderedAccessBit);
|
|
traceRenderBuffer = CreateBuffer(desc);
|
|
}
|
|
|
|
{
|
|
BufferDesc desc("shader trace opaque readback", 2 * sizeof(uint32_t), ResourceStates::Common);
|
|
desc.memoryUsage = MemoryUsage::Readback;
|
|
traceReadbackBuffer = CreateBuffer(desc);
|
|
}
|
|
}
|