cnq3/code/renderer/crp_main.cpp
2024-11-04 00:23:09 +01:00

1185 lines
38 KiB
C++

/*
===========================================================================
Copyright (C) 2023-2024 Gian 'myT' Schellenbaum
This file is part of Challenge Quake 3 (CNQ3).
Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// Cinematic Rendering Pipeline - main interface
#include "crp_local.h"
#include "../client/cl_imgui.h"
#include "shaders/crp/oit.h.hlsli"
#include "shaders/crp/scene_view.h.hlsli"
#include "compshaders/crp/fullscreen.h"
#include "compshaders/crp/blit.h"
#include "compshaders/crp/ui.h"
#include "compshaders/crp/imgui.h"
#include "compshaders/crp/nuklear.h"
#include "compshaders/crp/mip_1.h"
#include "compshaders/crp/mip_2.h"
#include "compshaders/crp/mip_3.h"
#include "compshaders/crp/depth_pyramid.h"
#include "compshaders/crp/im3d_points.h"
#include "compshaders/crp/im3d_lines.h"
#include "compshaders/crp/im3d_triangles.h"
struct SceneViewConst
{
enum Constants
{
MaxViews = 1024,
LightBytes = sizeof(DynamicLight),
StructBytes = sizeof(SceneView),
BufferBytes = MaxViews * StructBytes
};
};
#pragma pack(push, 4)
struct DepthPyramidRC
{
uint32_t destTextureIndices[7];
uint32_t depthTextureIndex;
uint32_t depthSamplerIndex;
};
#pragma pack(pop)
CRP crp;
IRenderPipeline* crpp = &crp;
cvar_t* crp_dof;
cvar_t* crp_dof_overlay;
cvar_t* crp_dof_blades;
cvar_t* crp_dof_angle;
cvar_t* crp_gatherDof_focusNearDist;
cvar_t* crp_gatherDof_focusNearRange;
cvar_t* crp_gatherDof_focusFarDist;
cvar_t* crp_gatherDof_focusFarRange;
cvar_t* crp_gatherDof_brightness;
cvar_t* crp_accumDof_focusDist;
cvar_t* crp_accumDof_radius;
cvar_t* crp_accumDof_samples;
cvar_t* crp_accumDof_preview;
cvar_t* crp_mblur;
cvar_t* crp_mblur_exposure;
cvar_t* crp_sunlight;
cvar_t* crp_volLight;
cvar_t* crp_drawNormals;
cvar_t* crp_updateRTAS;
cvar_t* crp_debug0;
cvar_t* crp_debug1;
cvar_t* crp_debug2;
cvar_t* crp_debug3;
static const cvarTableItem_t crp_cvars[] =
{
{
&crp_dof, "crp_dof", "1", CVAR_ARCHIVE, CVART_INTEGER, "0", "2",
"enables depth of field\n"
S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n"
S_COLOR_VAL " 1 " S_COLOR_HELP "= Gather (fast, more flexible, issues with transparency)\n"
S_COLOR_VAL " 2 " S_COLOR_HELP "= Accumulation (slow, less flexible, great IQ)\n",
"DoF mode", CVARCAT_GRAPHICS, "Depth of field mode", "",
CVAR_GUI_VALUE("0", "Disabled", "")
CVAR_GUI_VALUE("1", "Gather", "Fast, lower IQ")
CVAR_GUI_VALUE("2", "Accumulation", "Very slow, great IQ")
},
{
&crp_dof_overlay, "crp_dof_overlay", "0", CVAR_ARCHIVE, CVART_INTEGER, "0", "2",
"debug overlay mode\n"
S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n"
S_COLOR_VAL " 1 " S_COLOR_HELP "= Colorized Blur\n"
S_COLOR_VAL " 2 " S_COLOR_HELP "= Focus Plane",
"DoF overlay mode", CVARCAT_GRAPHICS, "Debug overlay mode", "",
CVAR_GUI_VALUE("0", "Disabled", "")
CVAR_GUI_VALUE("1", "Colorized Blur", "")
CVAR_GUI_VALUE("2", "Focus Plane", "")
},
{
&crp_dof_blades, "crp_dof_blades", "6", CVAR_ARCHIVE, CVART_FLOAT, "0", "16",
"aperture blade count\n"
"Set to less than 3 for a disk shape.",
"DoF blade count", CVARCAT_GRAPHICS, "Aperture blade count", "Set to less than 3 for a disk shape."
},
{
&crp_dof_angle, "crp_dof_angle", "20", CVAR_ARCHIVE, CVART_FLOAT, "0", "360", "aperture angle, in degrees",
"DoF aperture angle", CVARCAT_GRAPHICS, "Aperture angle, in degrees", ""
},
{
&crp_accumDof_focusDist, "crp_accumDof_focusDist", "256", CVAR_ARCHIVE, CVART_FLOAT, "2", "2048", "focus distance",
"Accum DoF focus distance", CVARCAT_GRAPHICS, "Focus distance", ""
},
{
&crp_accumDof_radius, "crp_accumDof_blurRadius", "0.1", CVAR_ARCHIVE, CVART_FLOAT, "0.001", "20", "aperture radius in world units",
"Accum DoF aperture radius", CVARCAT_GRAPHICS, "Aperture radius in world units", ""
},
{
&crp_accumDof_samples, "crp_accumDof_samples", "2", CVAR_ARCHIVE, CVART_INTEGER, "1", "12",
"per-axis sampling density\n"
"Density N means (2N + 1)(2N + 1) scene renders in total.",
"Accum DoF sample count", CVARCAT_GRAPHICS, "Per-axis sampling density", "Density N means (2N + 1)^2 scene renders in total."
},
{
&crp_accumDof_preview, "crp_accumDof_preview", "0", CVAR_ARCHIVE, CVART_INTEGER, "0", "2",
"low-res preview mode\n"
S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n"
S_COLOR_VAL " 1 " S_COLOR_HELP "= 1/4 pixel count, 9 samples total\n"
S_COLOR_VAL " 2 " S_COLOR_HELP "= 1/16 pixel count, 25 samples total",
"Accum DoF preview mode", CVARCAT_GRAPHICS, "Low-resolution preview modes", "",
CVAR_GUI_VALUE("0", "Disabled", "")
CVAR_GUI_VALUE("1", "1/4 pixel count", "9 samples total")
CVAR_GUI_VALUE("2", "1/16 pixel count", "25 samples total")
},
{
&crp_gatherDof_focusNearDist, "crp_gatherDof_focusNearDist", "192", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "near focus distance",
"Gather DoF near focus distance", CVARCAT_GRAPHICS, "Near focus distance", ""
},
{
&crp_gatherDof_focusNearRange, "crp_gatherDof_focusNearRange", "256", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "near focus range",
"Gather DoF near focus range", CVARCAT_GRAPHICS, "Near focus range", ""
},
{
&crp_gatherDof_focusFarDist, "crp_gatherDof_focusFarDist", "512", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "far focus distance",
"Gather DoF far focus distance", CVARCAT_GRAPHICS, "Far focus distance", ""
},
{
&crp_gatherDof_focusFarRange, "crp_gatherDof_focusFarRange", "384", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "far focus range",
"Gather DoF far focus range", CVARCAT_GRAPHICS, "Far focus range", ""
},
{
&crp_gatherDof_brightness, "crp_gatherDof_brightness", "2", CVAR_ARCHIVE, CVART_FLOAT, "0", "8", "blur brightness weight",
"Gather DoF bokeh brightness", CVARCAT_GRAPHICS, "Blur brightness weight", ""
},
{
&crp_mblur, "crp_mblur", "0", CVAR_ARCHIVE, CVART_INTEGER, "0", "3",
"motion blur mode\n"
S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n"
S_COLOR_VAL " 1 " S_COLOR_HELP "= Camera only\n"
S_COLOR_VAL " 2 " S_COLOR_HELP "= Object only\n"
S_COLOR_VAL " 3 " S_COLOR_HELP "= Camera + Object",
"Motion blur mode", CVARCAT_GRAPHICS, "", "",
CVAR_GUI_VALUE("0", "Disabled", "")
CVAR_GUI_VALUE("1", "Camera only", "")
CVAR_GUI_VALUE("2", "Object only", "")
CVAR_GUI_VALUE("3", "Camera + Object", "")
},
{
&crp_mblur_exposure, "crp_mblur_exposure", "0.5", CVAR_ARCHIVE, CVART_FLOAT, "0", "1",
"motion blur scale\n"
"This is the exposure time in percentage of frame time.",
"Motion blur exposure", CVARCAT_GRAPHICS, "Exposure time in percentage of frame time", ""
},
{
&crp_sunlight, "crp_sunlight", "1", CVAR_ARCHIVE, CVART_BOOL, NULL, NULL, "sunlight",
"Sunlight", CVARCAT_GRAPHICS, "Sunlight on non-lightmapped surfaces", ""
},
{
&crp_volLight, "crp_volLight", "1", CVAR_ARCHIVE, CVART_BOOL, NULL, NULL, "volumetric light",
"Volumetric light", CVARCAT_GRAPHICS, "Sunlight scattering through the air", ""
},
{
&crp_drawNormals, "crp_drawNormals", "0", CVAR_TEMP, CVART_BOOL, NULL, NULL, "draws vertex normals",
"Draw vertex normals", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", ""
},
{
&crp_updateRTAS, "crp_updateRTAS", "1", CVAR_TEMP, CVART_BOOL, NULL, NULL, "enables RTAS builds every frame",
"Enable RTAS builds", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "Allows raytracing acceleration structure updates", ""
}
#if defined(_DEBUG)
,
{
&crp_debug0, "crp_debug0", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 0",
"Debug value 0", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", ""
},
{
&crp_debug1, "crp_debug1", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 1",
"Debug value 1", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", ""
},
{
&crp_debug2, "crp_debug2", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 2",
"Debug value 2", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", ""
},
{
&crp_debug3, "crp_debug3", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 3",
"Debug value 3", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", ""
}
#endif
};
static void FreezeFrame_f();
static void FreezeFrameMB_f();
static const cmdTableItem_t crp_cmds[] =
{
{ "cin_freezeFrame", FreezeFrame_f, NULL, "toggles freeze frame mode" },
{ "cin_freezeFrameMB", FreezeFrameMB_f, NULL, "toggles freeze frame mode for MB" }
};
static void FreezeFrame_f()
{
if(crp.freezeFrame == FreezeFrame::Active ||
crp.freezeFrame == FreezeFrame::ActiveBeforeMB)
{
crp.freezeFrame = FreezeFrame::Inactive;
}
else
{
crp.freezeFrame = FreezeFrame::Pending;
}
}
static void FreezeFrameMB_f()
{
if(crp.freezeFrame == FreezeFrame::Active ||
crp.freezeFrame == FreezeFrame::ActiveBeforeMB)
{
crp.freezeFrame = FreezeFrame::Inactive;
}
else
{
crp.freezeFrame = FreezeFrame::PendingBeforeMB;
}
}
static HTexture LoadTexture(const char* name, int flags, textureWrap_t glWrapClampMode, const image_t* onFail = tr.defaultImage)
{
image_t* const image = R_FindImageFile(name, flags, glWrapClampMode);
if(image == NULL)
{
return onFail->texture;
}
return image->texture;
}
static void SunToZMatrix(matrix3x3_t rot)
{
float sc[2];
DirectionToAzimuthInclination(sc, crp.sunlightData.direction);
const float azi = -(sc[0] + M_PI / 2.0f);
const float ele = M_PI - sc[1];
const float rol = 0.0f;
rot[0] = cosf(rol) * cosf(azi) - sinf(rol) * cosf(ele) * sinf(azi);
rot[3] = sinf(rol) * cosf(azi) + cosf(rol) * cosf(ele) * sinf(azi);
rot[6] = sinf(ele) * sinf(azi);
rot[1] = -cosf(rol) * sinf(azi) - sinf(rol) * cosf(ele) * cosf(azi);
rot[4] = -sinf(rol) * sinf(azi) + cosf(rol) * cosf(ele) * cosf(azi);
rot[7] = sinf(ele) * cosf(azi);
rot[2] = sinf(rol) * sinf(ele);
rot[5] = -cosf(rol) * sinf(ele);
rot[8] = cosf(ele);
}
HPipeline CreateComputePipeline(const char* name, const ShaderByteCode& shader)
{
ComputePipelineDesc desc(name);
desc.shortLifeTime = true;
desc.shader = shader;
return CreateComputePipeline(desc);
}
void MakeFullScreenPipeline(GraphicsPipelineDesc& desc, const ShaderByteCode& pixelShader)
{
desc.shortLifeTime = true;
desc.vertexShader = ShaderByteCode(g_fullscreen_vs);
desc.pixelShader = pixelShader;
desc.SetPostProcessState();
}
void PSOCache::Init(Entry* entries_, uint32_t maxEntryCount_)
{
entries = entries_;
maxEntryCount = maxEntryCount_;
entryCount = 1; // we treat index 0 as invalid
}
int PSOCache::AddPipeline(const GraphicsPipelineDesc& desc, const char* name)
{
// we treat index 0 as invalid, so start at 1
for(uint32_t i = 1; i < entryCount; ++i)
{
Entry& entry = entries[i];
if(memcmp(&entry.desc, &desc, sizeof(desc)) == 0)
{
return (int)i;
}
}
ASSERT_OR_DIE(entryCount < maxEntryCount, "Not enough entries in the PSO cache");
GraphicsPipelineDesc namedDesc = desc;
namedDesc.name = name;
// @NOTE: we keep the original desc and its padding bytes for proper comparison results
const uint32_t index = entryCount++;
Entry& entry = entries[index];
memcpy(&entry.desc, &desc, sizeof(entry.desc));
entry.handle = CreateGraphicsPipeline(namedDesc);
return (int)index;
}
void CRP::Init()
{
ri.Cvar_RegisterTable(crp_cvars, ARRAY_LEN(crp_cvars));
ri.Cmd_RegisterTable(crp_cmds, ARRAY_LEN(crp_cmds));
InitDesc initDesc;
initDesc.directDescriptorHeapIndexing = true;
srp.firstInit = RHI::Init(initDesc);
srp.psoStatsValid = false;
if(srp.firstInit)
{
srp.CreateShaderTraceBuffers();
for(uint32_t f = 0; f < FrameCount; ++f)
{
// the doubled index count is for the depth pre-pass
const int MaxDynamicVertexCount = 16 << 20;
const int MaxDynamicIndexCount = MaxDynamicVertexCount * 4;
GeoBuffers& db = dynBuffers[f];
db.Create(va("world #%d", f + 1), MaxDynamicVertexCount, MaxDynamicIndexCount);
}
}
// we recreate the samplers on every vid_restart to create the right level
// of anisotropy based on the latched CVar
for(uint32_t w = 0; w < TW_COUNT; ++w)
{
for(uint32_t f = 0; f < TextureFilter::Count; ++f)
{
for(uint32_t m = 0; m < MaxTextureMips; ++m)
{
const textureWrap_t wrap = (textureWrap_t)w;
const TextureFilter::Id filter = (TextureFilter::Id)f;
const uint32_t s = GetBaseSamplerIndex(wrap, filter, m);
SamplerDesc desc(wrap, filter, (float)m);
desc.shortLifeTime = true;
samplers[s] = CreateSampler(desc);
samplerIndices[s] = RHI::GetSamplerIndex(samplers[s]);
}
}
}
{
renderTargetFormat = TextureFormat::R16G16B16A16_Float;
TextureDesc desc("render target #1", glConfig.vidWidth, glConfig.vidHeight);
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.SetClearColor(vec4_zero);
desc.committedResource = true;
desc.format = renderTargetFormat;
desc.shortLifeTime = true;
renderTargets[0] = RHI::CreateTexture(desc);
desc.name = "render target #2";
renderTargets[1] = RHI::CreateTexture(desc);
renderTargetIndex = 0;
renderTarget = renderTargets[0];
}
{
TextureDesc desc("frozen frame", glConfig.vidWidth, glConfig.vidHeight);
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.format = renderTargetFormat;
desc.shortLifeTime = true;
frozenTexture = RHI::CreateTexture(desc);
freezeFrame = FreezeFrame::Inactive;
}
{
TextureDesc desc("readback render target", glConfig.vidWidth, glConfig.vidHeight);
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit;
Vector4Clear(desc.clearColor);
desc.usePreferredClearValue = true;
desc.committedResource = true;
desc.format = TextureFormat::R8G8B8A8_UNorm;
desc.shortLifeTime = true;
readbackRenderTarget = RHI::CreateTexture(desc);
}
{
TextureDesc desc("OIT index", glConfig.vidWidth, glConfig.vidHeight);
desc.initialState = ResourceStates::UnorderedAccessBit;
desc.allowedState = ResourceStates::UnorderedAccessBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.committedResource = true;
desc.format = TextureFormat::R32_UInt;
desc.shortLifeTime = true;
oitIndexTexture = RHI::CreateTexture(desc);
}
uint32_t oitMaxFragmentCount = 0;
{
const int byteCountPerFragment = sizeof(OIT_Fragment);
const int fragmentCount = glConfig.vidWidth * glConfig.vidHeight * OIT_AVG_FRAGMENTS_PER_PIXEL;
const int byteCount = byteCountPerFragment * fragmentCount;
oitMaxFragmentCount = fragmentCount;
BufferDesc desc("OIT fragment", byteCount, ResourceStates::UnorderedAccessBit);
desc.committedResource = true;
desc.memoryUsage = MemoryUsage::GPU;
desc.structureByteCount = byteCountPerFragment;
desc.shortLifeTime = true;
oitFragmentBuffer = CreateBuffer(desc);
}
{
const int byteCount = sizeof(OIT_Counter);
{
BufferDesc desc("OIT counter", byteCount, ResourceStates::UnorderedAccessBit);
desc.committedResource = true;
desc.memoryUsage = MemoryUsage::GPU;
desc.structureByteCount = byteCount;
desc.shortLifeTime = true;
oitCounterBuffer = CreateBuffer(desc);
}
{
BufferDesc desc("OIT counter staging", byteCount, ResourceStates::Common);
desc.committedResource = false;
desc.memoryUsage = MemoryUsage::Upload;
desc.structureByteCount = byteCount;
desc.shortLifeTime = true;
oitCounterStagingBuffer = CreateBuffer(desc);
uint32_t* dst = (uint32_t*)MapBuffer(oitCounterStagingBuffer);
dst[0] = 1; // fragment index 0 is the end-of-list value
dst[1] = oitMaxFragmentCount;
dst[2] = 0;
UnmapBuffer(oitCounterStagingBuffer);
}
}
{
TextureDesc desc("depth buffer", glConfig.vidWidth, glConfig.vidHeight);
desc.committedResource = true;
desc.shortLifeTime = true;
desc.initialState = ResourceStates::DepthWriteBit;
desc.allowedState = ResourceStates::DepthAccessBits | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.format = TextureFormat::Depth32_Float;
desc.SetClearDepthStencil(0.0f, 0);
depthTexture = RHI::CreateTexture(desc);
}
{
TextureDesc desc("depth pyramid", glConfig.vidWidth, glConfig.vidHeight, 7);
desc.shortLifeTime = true;
desc.initialState = ResourceStates::UnorderedAccessBit;
desc.allowedState = ResourceStates::UnorderedAccessBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.format = TextureFormat::R32G32_Float;
depthMinMaxTexture = RHI::CreateTexture(desc);
}
{
TextureDesc desc("GBuffer normals", glConfig.vidWidth, glConfig.vidHeight);
desc.committedResource = true;
desc.shortLifeTime = true;
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.format = TextureFormat::R16G16_SNorm;
desc.SetClearColor(vec4_zero);
normalTexture = RHI::CreateTexture(desc);
}
{
TextureDesc desc("GBuffer raw motion vectors", glConfig.vidWidth, glConfig.vidHeight);
desc.committedResource = true;
desc.shortLifeTime = true;
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.format = TextureFormat::R16G16_Float;
desc.SetClearColor(vec4_zero);
motionVectorTexture = RHI::CreateTexture(desc);
}
{
TextureDesc desc("GBuffer MB motion vectors", glConfig.vidWidth, glConfig.vidHeight);
desc.committedResource = true;
desc.shortLifeTime = true;
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.format = TextureFormat::R16G16_Float;
desc.SetClearColor(vec4_zero);
motionVectorMBTexture = RHI::CreateTexture(desc);
}
{
TextureDesc desc("GBuffer direct light", glConfig.vidWidth, glConfig.vidHeight);
desc.committedResource = true;
desc.shortLifeTime = true;
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.format = TextureFormat::R16G16B16A16_Float;
desc.SetClearColor(colorBlack);
lightTexture = RHI::CreateTexture(desc);
desc.name = "GBuffer raw direct light";
sunlightTexture = RHI::CreateTexture(desc);
}
{
TextureDesc desc("GBuffer shading position", glConfig.vidWidth, glConfig.vidHeight);
desc.committedResource = true;
desc.shortLifeTime = true;
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit;
desc.format = TextureFormat::R32G32B32A32_Float;
desc.SetClearColor(vec4_zero);
shadingPositionTexture = RHI::CreateTexture(desc);
}
{
GraphicsPipelineDesc desc("blit LDR");
MakeFullScreenPipeline(desc, ShaderByteCode(g_blit_ps));
desc.AddRenderTarget(0, TextureFormat::R8G8B8A8_UNorm);
blitPipelineLDR = CreateGraphicsPipeline(desc);
desc.name = "blit HDR";
desc.renderTargets[0].format = TextureFormat::R16G16B16A16_Float;
blitPipelineHDR = CreateGraphicsPipeline(desc);
}
depthPyramidPipeline = CreateComputePipeline("Depth Pyramid", ShaderByteCode(g_depth_pyramid_cs));
{
BufferDesc desc("scene view upload #1", SceneViewConst::BufferBytes, ResourceStates::ShaderAccessBits);
desc.shortLifeTime = true;
desc.memoryUsage = MemoryUsage::Upload;
desc.structureByteCount = SceneViewConst::StructBytes;
sceneViewUploadBuffers[0] = CreateBuffer(desc);
desc.name = "scene view upload #2";
sceneViewUploadBuffers[1] = CreateBuffer(desc);
}
{
BufferDesc desc("scene view", SceneViewConst::StructBytes, ResourceStates::ShaderAccessBits);
desc.shortLifeTime = true;
desc.structureByteCount = SceneViewConst::StructBytes;
desc.useSrvIndex0 = true; // the one and only buffer allowed to be there
sceneViewBuffer = CreateBuffer(desc);
}
ShaderByteCode im3dShaders[Im3D::Shader::Count];
im3dShaders[Im3D::Shader::PointVS] = ShaderByteCode(g_im3d_points_vs);
im3dShaders[Im3D::Shader::PointPS] = ShaderByteCode(g_im3d_points_ps);
im3dShaders[Im3D::Shader::LineVS] = ShaderByteCode(g_im3d_lines_vs);
im3dShaders[Im3D::Shader::LinePS] = ShaderByteCode(g_im3d_lines_ps);
im3dShaders[Im3D::Shader::TriangleVS] = ShaderByteCode(g_im3d_triangles_vs);
im3dShaders[Im3D::Shader::TrianglePS] = ShaderByteCode(g_im3d_triangles_ps);
raytracing.Init();
ui.Init(true, ShaderByteCode(g_ui_vs), ShaderByteCode(g_ui_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL);
imgui.Init(true, ShaderByteCode(g_imgui_vs), ShaderByteCode(g_imgui_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL);
im3d.Init(true, im3dShaders, renderTargetFormat);
nuklear.Init(true, ShaderByteCode(g_nuklear_vs), ShaderByteCode(g_nuklear_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL);
mipMapGen.Init(true, ShaderByteCode(g_mip_1_cs), ShaderByteCode(g_mip_2_cs), ShaderByteCode(g_mip_3_cs));
prepass.Init();
opaque.Init();
transp.Init();
transpResolve.Init();
toneMap.Init();
gatherDof.Init();
accumDof.Init();
motionBlur.Init();
magnifier.Init();
dynamicLights.Init();
sunlight.Init();
volumetricLight.Init();
gbufferViz.Init();
sunlightEditor.Init();
particleSystem.Init();
vdbManager.Init();
srp.firstInit = false;
}
void CRP::LoadResources()
{
const int flags = IMG_NOPICMIP | IMG_NOMIPMAP | IMG_NOIMANIP | IMG_NOAF;
blueNoise2D = LoadTexture("textures/stbn_2d.tga", flags, TW_REPEAT);
blackbodyTexture = LoadTexture("textures/blackbody.tga", flags, TW_CLAMP_TO_EDGE);
}
void CRP::ShutDown(bool fullShutDown)
{
RHI::ShutDown(fullShutDown);
}
void CRP::BeginFrame()
{
renderTargetIndex = 0;
renderTarget = renderTargets[0];
sceneViewIndex = 0;
srp.BeginFrame();
// have it be first to we can use ImGUI in the other components too
imgui.BeginFrame();
vdbManager.BeforeFrame();
// must be run outside of the RHI::BeginFrame/RHI::EndFrame pair
const bool rtasUpdate =
dynamicLights.WantRTASUpdate(tr.rtRefdef) ||
sunlight.WantRTASUpdate(tr.rtRefdef) ||
volumetricLight.WantRTASUpdate(tr.rtRefdef);
raytracing.BeforeFrame(rtasUpdate); // uploads data
RHI::BeginFrame();
raytracing.BeginFrame(rtasUpdate); // does BLAS/TLAS builds
ui.BeginFrame();
nuklear.BeginFrame();
im3d.BeginFrame();
CmdBeginBarrier();
CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit);
CmdEndBarrier();
const float clearColor[4] = { 0.0f, 0.5f, 0.0f, 0.0f };
CmdClearColorTarget(renderTarget, clearColor);
frameSeed = (float)rand() / (float)RAND_MAX;
dynBuffers[GetFrameIndex()].Rewind();
}
void CRP::EndFrame()
{
if(freezeFrame == FreezeFrame::Pending)
{
crp.SwapRenderTargets();
CmdBeginBarrier();
CmdTextureBarrier(GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit);
CmdTextureBarrier(frozenTexture, ResourceStates::RenderTargetBit);
CmdEndBarrier();
Blit(frozenTexture, GetReadRenderTarget(), "Blit Frozen Frame", true, vec2_one, vec2_zero);
CmdBeginBarrier();
CmdTextureBarrier(frozenTexture, ResourceStates::PixelShaderAccessBit);
CmdEndBarrier();
srp.EndFrame();
freezeFrame = FreezeFrame::Active;
return;
}
srp.DrawGUI();
gbufferViz.DrawGUI();
magnifier.DrawGUI();
sunlightEditor.DrawGUI();
volumetricLight.DrawGUI();
vdbManager.DrawGUI();
im3d.DrawGUI();
toneMap.DrawToneMap();
imgui.Draw(renderTarget);
magnifier.Draw();
BlitRenderTarget(GetSwapChainTexture(), "Blit to Swap Chain");
BlitRenderTarget(readbackRenderTarget, "Blit to Readback Texture");
srp.EndFrame();
}
void CRP::Blit(HTexture destination, HTexture source, const char* passName, bool hdr, const vec2_t tcScale, const vec2_t tcBias)
{
SCOPED_DEBUG_LABEL(passName, 0.125f, 0.125f, 0.5f);
CmdBeginBarrier();
CmdTextureBarrier(source, ResourceStates::PixelShaderAccessBit);
CmdTextureBarrier(destination, ResourceStates::RenderTargetBit);
CmdEndBarrier();
#pragma pack(push, 4)
struct BlitRC
{
uint32_t textureIndex;
uint32_t samplerIndex;
float tcScale[2];
float tcBias[2];
};
#pragma pack(pop)
BlitRC rc;
rc.textureIndex = GetTextureIndexSRV(source);
rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear);
rc.tcScale[0] = tcScale[0];
rc.tcScale[1] = tcScale[1];
rc.tcBias[0] = tcBias[0];
rc.tcBias[1] = tcBias[0];
CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight);
CmdBindRenderTargets(1, &destination, NULL);
CmdBindPipeline(hdr ? blitPipelineHDR : blitPipelineLDR);
CmdSetGraphicsRootConstants(0, sizeof(rc), &rc);
CmdDraw(3, 0);
}
void CRP::BlitRenderTarget(HTexture destination, const char* passName)
{
Blit(destination, crp.renderTarget, passName, false, vec2_one, vec2_zero);
}
void CRP::CreateTexture(image_t* image, int mipCount, int width, int height)
{
TextureDesc desc(image->name, width, height, mipCount);
desc.committedResource = width * height >= (1 << 20);
desc.shortLifeTime = true;
if(mipCount > 1)
{
desc.allowedState |= ResourceStates::UnorderedAccessBit; // for mip-map generation
}
image->texture = ::RHI::CreateTexture(desc);
image->textureIndex = GetTextureIndexSRV(image->texture);
}
void CRP::UpoadTextureAndGenerateMipMaps(image_t* image, const byte* data)
{
MappedTexture texture;
RHI::BeginTextureUpload(texture, image->texture);
for(uint32_t r = 0; r < texture.rowCount; ++r)
{
memcpy(texture.mappedData + r * texture.dstRowByteCount, data + r * texture.srcRowByteCount, texture.srcRowByteCount);
}
RHI::EndTextureUpload();
mipMapGen.GenerateMipMaps(image->texture);
}
void CRP::BeginTextureUpload(MappedTexture& mappedTexture, image_t* image)
{
RHI::BeginTextureUpload(mappedTexture, image->texture);
}
void CRP::EndTextureUpload()
{
RHI::EndTextureUpload();
}
void CRP::ProcessWorld(world_t& world)
{
raytracing.ProcessWorld(world);
sunlightEditor.ProcessWorld(world);
volumetricLight.ProcessWorld(world);
}
void CRP::ProcessModel(model_t&)
{
}
void CRP::ProcessShader(shader_t& shader)
{
if(shader.isOpaque)
{
prepass.ProcessShader(shader);
opaque.ProcessShader(shader);
}
else
{
transp.ProcessShader(shader);
}
}
void CRP::ExecuteRenderCommands(const byte* data, bool /*readbackRequested*/)
{
// @NOTE: readbackRequested is unused because
// the CRP always blits the final result to the readback texture
if(freezeFrame == FreezeFrame::Active ||
freezeFrame == FreezeFrame::ActiveBeforeMB)
{
BeginFrame();
Blit(GetWriteRenderTarget(), frozenTexture, "Blit Frozen Frame", true, vec2_one, vec2_zero);
if(freezeFrame == FreezeFrame::ActiveBeforeMB)
{
motionBlur.Draw();
}
EndFrame();
return;
}
for(;;)
{
const int commandId = ((const renderCommandBase_t*)data)->commandId;
if(commandId < 0 || commandId >= RC_COUNT)
{
assert(!"Invalid render command type");
return;
}
if(commandId == RC_END_OF_LIST)
{
return;
}
switch(commandId)
{
case RC_UI_SET_COLOR:
ui.CmdSetColor(*(const uiSetColorCommand_t*)data);
break;
case RC_UI_DRAW_QUAD:
ui.CmdDrawQuad(*(const uiDrawQuadCommand_t*)data);
break;
case RC_UI_DRAW_TRIANGLE:
ui.CmdDrawTriangle(*(const uiDrawTriangleCommand_t*)data);
break;
case RC_DRAW_SCENE_VIEW:
DrawSceneView(*(const drawSceneViewCommand_t*)data);
break;
case RC_BEGIN_FRAME:
BeginFrame();
break;
case RC_SWAP_BUFFERS:
EndFrame();
break;
case RC_BEGIN_UI:
ui.Begin(renderTarget);
break;
case RC_END_UI:
ui.End();
break;
case RC_BEGIN_3D:
srp.renderMode = RenderMode::None;
break;
case RC_END_3D:
srp.renderMode = RenderMode::None;
break;
case RC_END_SCENE:
break;
case RC_BEGIN_NK:
nuklear.Begin(renderTarget);
break;
case RC_END_NK:
nuklear.End();
break;
case RC_NK_UPLOAD:
nuklear.Upload(*(const nuklearUploadCommand_t*)data);
break;
case RC_NK_DRAW:
nuklear.Draw(*(const nuklearDrawCommand_t*)data);
break;
default:
Q_assert(!"Unsupported render command type");
return;
}
data += renderCommandSizes[commandId];
}
}
void CRP::TessellationOverflow()
{
switch(tess.tessellator)
{
case Tessellator::Prepass: prepass.TessellationOverflow(); break;
case Tessellator::Opaque: opaque.TessellationOverflow(); break;
case Tessellator::Transp: transp.TessellationOverflow(); break;
default: break;
}
tess.numIndexes = 0;
tess.numVertexes = 0;
}
void CRP::DrawSceneView3D(const drawSceneViewCommand_t& cmd)
{
const int lightCount = backEnd.refdef.num_dlights;
prepass.Draw(cmd);
BuildDepthPyramid();
particleSystem.Draw();
dynamicLights.DrawBegin();
if(volumetricLight.ShouldDraw())
{
volumetricLight.DrawBegin();
if(raytracing.CanRaytrace())
{
{
SCOPED_RENDER_PASS("VL/DL Point Lights", 1.0f, 1.0f, 1.0f);
for(int i = 0; i < lightCount; i++)
{
volumetricLight.DrawPointLight(backEnd.refdef.dlights[i]);
dynamicLights.DrawPointLight(backEnd.refdef.dlights[i]);
}
}
volumetricLight.DrawSunlight();
}
volumetricLight.DrawEnd();
}
else
{
SCOPED_RENDER_PASS("DL Point Lights", 1.0f, 1.0f, 1.0f);
for(int i = 0; i < lightCount; i++)
{
dynamicLights.DrawPointLight(backEnd.refdef.dlights[i]);
}
}
sunlight.Draw();
opaque.Draw(cmd);
volumetricLight.DrawDebug();
transp.Draw(cmd);
transpResolve.Draw(cmd);
vdbManager.DrawIm3d();
volumetricLight.DrawIm3d();
im3d.Draw(cmd, renderTarget, depthTexture); // draw our debug stuff in front of all transparencies
}
void CRP::DrawSceneView(const drawSceneViewCommand_t& cmd)
{
const viewParms_t& vp = cmd.viewParms;
if(cmd.shouldClearColor)
{
const Rect rect(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight);
CmdBeginBarrier();
CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit);
CmdEndBarrier();
CmdClearColorTarget(renderTarget, cmd.clearColor, &rect);
}
if(cmd.numDrawSurfs <= 0 || !cmd.shouldDrawScene)
{
return;
}
if(crp_dof->integer == DOFMethod::Accumulation &&
IsViewportFullscreen(vp))
{
const Rect rect(0, 0, glConfig.vidWidth, glConfig.vidHeight);
accumDof.Begin(cmd);
const uint32_t sampleCount = accumDof.GetSampleCount();
for(uint32_t y = 0; y < sampleCount; y++)
{
for(uint32_t x = 0; x < sampleCount; x++)
{
srp.enableRenderPassQueries = x == 0 && y == 0;
drawSceneViewCommand_t newCmd;
accumDof.FixCommand(newCmd, cmd, x, y);
backEnd.refdef = newCmd.refdef;
backEnd.viewParms = newCmd.viewParms;
UploadSceneViewData();
CmdBeginBarrier();
CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit);
CmdEndBarrier();
CmdClearColorTarget(renderTarget, cmd.clearColor, &rect);
DrawSceneView3D(newCmd);
accumDof.Accumulate();
// geometry allocation is a linear allocation instead of a ring buffer
// we force a CPU-GPU sync point after every full scene render
// that way, we can keep the buffer sizes at least somewhat reasonable
SubmitAndContinue();
dynBuffers[GetFrameIndex()].Rewind();
}
}
CmdSetViewportAndScissor(backEnd.viewParms);
srp.enableRenderPassQueries = true;
accumDof.Normalize();
backEnd.viewParms = cmd.viewParms;
backEnd.refdef = cmd.refdef;
accumDof.DrawDebug();
}
else
{
backEnd.refdef = cmd.refdef;
backEnd.viewParms = cmd.viewParms;
UploadSceneViewData();
DrawSceneView3D(cmd);
CmdSetViewportAndScissor(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight);
gatherDof.Draw();
if(freezeFrame == FreezeFrame::PendingBeforeMB &&
IsViewportFullscreen(backEnd.viewParms))
{
crp.SwapRenderTargets();
CmdBeginBarrier();
CmdTextureBarrier(GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit);
CmdTextureBarrier(frozenTexture, ResourceStates::RenderTargetBit);
CmdEndBarrier();
Blit(frozenTexture, GetReadRenderTarget(), "Blit Frozen Frame", true, vec2_one, vec2_zero);
CmdBeginBarrier();
CmdTextureBarrier(frozenTexture, ResourceStates::PixelShaderAccessBit);
CmdEndBarrier();
freezeFrame = FreezeFrame::ActiveBeforeMB;
}
motionBlur.Draw();
}
sunlightEditor.DrawOverlay();
}
void CRP::UploadSceneViewData()
{
Q_assert(sceneViewIndex < SceneViewConst::MaxViews);
if(sceneViewIndex >= SceneViewConst::MaxViews)
{
return;
}
SCOPED_DEBUG_LABEL("Scene View Upload", 1.0f, 1.0f, 1.0f);
const viewParms_t& vp = backEnd.viewParms;
const HBuffer uploadBuffer = sceneViewUploadBuffers[GetFrameIndex()];
const uint32_t uploadByteOffset = sceneViewIndex * SceneViewConst::StructBytes;
if(!vp.isPortal && IsViewportFullscreen(vp))
{
Q_assert(tr.currZFar == vp.zFar);
Q_assert(tr.currZNear == vp.zNear);
}
SceneView scene = {};
#if defined(_DEBUG)
scene.debug[0] = crp_debug0->value;
scene.debug[1] = crp_debug1->value;
scene.debug[2] = crp_debug2->value;
scene.debug[3] = crp_debug3->value;
#endif
scene.frameSeed = (float)rand() / (float)RAND_MAX;
// @NOTE: yes, world.modelMatrix is actually the view matrix
// it's the model-view matrix for the world entity, thus the view matrix
memcpy(scene.projectionMatrix, vp.projectionMatrix, sizeof(scene.projectionMatrix));
R_InvMatrix(scene.projectionMatrix, scene.invProjectionMatrix);
memcpy(scene.viewMatrix, vp.world.modelMatrix, sizeof(scene.viewMatrix));
R_InvMatrix(scene.viewMatrix, scene.invViewMatrix);
memcpy(scene.prevViewProjMatrix, tr.prevViewProjMatrix, sizeof(scene.prevViewProjMatrix));
memcpy(scene.prevViewMatrix, tr.prevViewMatrix, sizeof(scene.prevViewMatrix));
memcpy(scene.prevProjectionMatrix, tr.prevProjMatrix, sizeof(scene.prevProjectionMatrix));
// we want the first Z slice to be closest to the sun to simplify ray marching
SunToZMatrix(scene.sunToZMatrix);
R_InvMatrix3x3(scene.sunToZMatrix, scene.zToSunMatrix);
RB_CreateClipPlane(scene.clipPlane);
VectorCopy(vp.world.viewOrigin, scene.cameraPosition);
VectorCopy(tr.prevCameraPosition, scene.prevCameraPosition);
VectorCopy(vp.orient.axis[0], scene.cameraForward);
VectorCopy(vp.orient.axis[1], scene.cameraLeft);
VectorCopy(vp.orient.axis[2], scene.cameraUp);
scene.sceneViewIndex = sceneViewIndex;
scene.frameIndex = tr.frameCount;
scene.depthTextureIndex = GetTextureIndexSRV(depthTexture);
scene.depthMinMaxTextureIndex = GetTextureIndexSRV(depthMinMaxTexture);
scene.normalTextureIndex = GetTextureIndexSRV(normalTexture);
scene.shadingPositionTextureIndex = GetTextureIndexSRV(shadingPositionTexture);
scene.motionVectorTextureIndex = GetTextureIndexSRV(motionVectorTexture);
scene.motionVectorMBTextureIndex = GetTextureIndexSRV(motionVectorMBTexture);
scene.lightTextureIndex = GetTextureIndexSRV(lightTexture);
scene.sunlightTextureIndex = GetTextureIndexSRV(sunlightTexture);
scene.tlasBufferIndex = raytracing.GetTLASBufferIndex();
scene.tlasInstanceBufferIndex = raytracing.GetInstanceBufferIndex();
RB_LinearDepthConstants(scene.linearDepthConstants);
scene.zNear = vp.zNear;
scene.zFar = vp.zFar;
scene.prevZNear = tr.prevZNear;
scene.prevZFar = tr.prevZFar;
VectorCopy(sunlightData.direction, scene.sunDirection);
VectorCopy(sunlightData.color, scene.sunColor);
scene.sunIntensityVL = sunlightData.intensityVL;
scene.sunIntensityDL = sunlightData.intensityDL;
scene.pointLightIntensityVL = volumetricLight.pointLightIntensity;
VectorCopy(volumetricLight.ambientColor, scene.ambientColor);
scene.ambientIntensity = volumetricLight.ambientIntensity;
Vector4Copy(volumetricLight.extinctionVolumeScale, scene.extinctionWorldScale);
for(int c = 0; c < 4; c++)
{
scene.extinctionTextureIndices[c] = GetTextureIndexSRV(volumetricLight.extinctionTextures[c]);
}
Vector4Copy(volumetricLight.sunShadowVolumeScale, scene.sunVShadowWorldScale);
for(int c = 0; c < 4; c++)
{
scene.sunVShadowTextureIndices[c] = GetTextureIndexSRV(volumetricLight.sunShadowTextures[c]);
}
scene.linearClampSamplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear);
SceneView* const mappedScene = (SceneView*)(MapBuffer(uploadBuffer) + uploadByteOffset);
memcpy(mappedScene, &scene, sizeof(scene));
UnmapBuffer(uploadBuffer);
CmdBeginBarrier();
CmdBufferBarrier(uploadBuffer, ResourceStates::CopySourceBit);
CmdBufferBarrier(sceneViewBuffer, ResourceStates::CopyDestinationBit);
CmdEndBarrier();
CmdCopyBuffer(sceneViewBuffer, 0, uploadBuffer, uploadByteOffset, SceneViewConst::StructBytes);
CmdBeginBarrier();
CmdBufferBarrier(sceneViewBuffer, ResourceStates::ShaderAccessBits);
CmdEndBarrier();
sceneViewIndex++;
}
void CRP::BuildDepthPyramid()
{
SCOPED_RENDER_PASS("Depth Pyramid", 1.0f, 1.0f, 1.0f);
CmdBeginBarrier();
CmdTextureBarrier(depthTexture, ResourceStates::ComputeShaderAccessBit);
CmdTextureBarrier(depthMinMaxTexture, ResourceStates::UnorderedAccessBit);
CmdEndBarrier();
DepthPyramidRC rc = {};
for(uint32_t i = 0; i < ARRAY_LEN(rc.destTextureIndices); i++)
{
rc.destTextureIndices[i] = GetTextureIndexUAV(depthMinMaxTexture, i);
}
rc.depthTextureIndex = GetTextureIndexSRV(depthTexture);
rc.depthSamplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Point);
const int w = glConfig.vidWidth / 2;
const int h = glConfig.vidHeight / 2;
CmdBindPipeline(depthPyramidPipeline);
CmdSetComputeRootConstants(0, sizeof(rc), &rc);
CmdDispatch((w + 7) / 8, (h + 7) / 8, 1);
}
void CRP::ReadPixels(int w, int h, int alignment, colorSpace_t colorSpace, void* outPixels)
{
ReadTextureImage(outPixels, readbackRenderTarget, w, h, alignment, colorSpace);
}
uint32_t CRP::GetSamplerDescriptorIndexFromBaseIndex(uint32_t baseIndex)
{
Q_assert(baseIndex < ARRAY_LEN(samplerIndices));
return samplerIndices[baseIndex];
}
HTexture CRP::GetReadRenderTarget()
{
return renderTargets[renderTargetIndex ^ 1];
}
HTexture CRP::GetWriteRenderTarget()
{
return renderTargets[renderTargetIndex];
}
void CRP::SwapRenderTargets()
{
renderTargetIndex ^= 1;
renderTarget = GetWriteRenderTarget();
}