cnq3/code/renderer/grp_main.cpp
2024-11-04 00:23:09 +01:00

679 lines
20 KiB
C++

/*
===========================================================================
Copyright (C) 2022-2024 Gian 'myT' Schellenbaum
This file is part of Challenge Quake 3 (CNQ3).
Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// Gameplay Rendering Pipeline - main interface
#include "grp_local.h"
#include "grp_uber_shaders.h"
#include "../client/cl_imgui.h"
#include "compshaders/grp/uber_shader.h"
#include "compshaders/grp/complete_uber_vs.h"
#include "compshaders/grp/complete_uber_ps.h"
#include "compshaders/grp/ui.h"
#include "compshaders/grp/imgui.h"
#include "compshaders/grp/nuklear.h"
#include "compshaders/grp/mip_1.h"
#include "compshaders/grp/mip_2.h"
#include "compshaders/grp/mip_3.h"
#include "compshaders/grp/im3d_points.h"
#include "compshaders/grp/im3d_lines.h"
#include "compshaders/grp/im3d_triangles.h"
GRP grp;
IRenderPipeline* grpp = &grp;
static const ShaderByteCode vertexShaderByteCodes[8] =
{
ShaderByteCode(g_vs_1),
ShaderByteCode(g_vs_2),
ShaderByteCode(g_vs_3),
ShaderByteCode(g_vs_4),
ShaderByteCode(g_vs_5),
ShaderByteCode(g_vs_6),
ShaderByteCode(g_vs_7),
ShaderByteCode(g_vs_8)
};
#define PS(Data) #Data,
static const char* uberPixelShaderStateStrings[] =
{
UBER_SHADER_PS_LIST(PS)
};
#undef PS
#define PS(Data) ShaderByteCode(g_ps_##Data),
static const ShaderByteCode uberPixelShaderByteCodes[] =
{
UBER_SHADER_PS_LIST(PS)
};
#undef PS
#define PS(Data) 1 +
static const uint32_t uberPixelShaderCacheSize = UBER_SHADER_PS_LIST(PS) 0;
#undef PS
static UberPixelShaderState uberPixelShaderStates[uberPixelShaderCacheSize];
static bool IsCommutativeBlendState(unsigned int stateBits)
{
const unsigned int blendStates[] =
{
GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE, // additive
GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ZERO, // modulate
GLS_SRCBLEND_ZERO | GLS_DSTBLEND_SRC_COLOR, // modulate
GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE // pre-multiplied alpha blend
};
const unsigned int blendBits = stateBits & GLS_BLEND_BITS;
for(int b = 0; b < ARRAY_LEN(blendStates); ++b)
{
if(blendBits == blendStates[b])
{
return true;
}
}
return false;
}
void GRP::Init()
{
InitDesc initDesc;
initDesc.directDescriptorHeapIndexing = false;
srp.firstInit = RHI::Init(initDesc);
if(srp.firstInit)
{
RootSignatureDesc desc("main");
desc.usingVertexBuffers = true;
desc.samplerCount = ARRAY_LEN(samplers);
desc.samplerVisibility = ShaderStages::PixelBit;
desc.genericVisibility = ShaderStages::PixelBit;
desc.AddRange(DescriptorType::Texture, 0, MAX_DRAWIMAGES * 2);
desc.AddRange(DescriptorType::RWBuffer, MAX_DRAWIMAGES * 2, 1);
rootSignatureDesc = desc;
rootSignature = CreateRootSignature(desc);
descriptorTable = CreateDescriptorTable(DescriptorTableDesc("game textures", rootSignature));
desc.name = "world";
desc.usingVertexBuffers = true;
desc.constants[ShaderStage::Vertex].byteCount = sizeof(WorldVertexRC);
desc.constants[ShaderStage::Pixel].byteCount = sizeof(WorldPixelRC);
desc.samplerVisibility = ShaderStages::PixelBit;
desc.genericVisibility = ShaderStages::VertexBit | ShaderStages::PixelBit;
uberRootSignature = CreateRootSignature(desc);
for(uint32_t i = 0; i < uberPixelShaderCacheSize; ++i)
{
if(!ParseUberPixelShaderState(uberPixelShaderStates[i], uberPixelShaderStateStrings[i]))
{
Q_assert(!"ParseUberPixelShaderState failed!");
}
}
srp.CreateShaderTraceBuffers();
DescriptorTableUpdate update;
update.SetRWBuffers(1, &srp.traceRenderBuffer, MAX_DRAWIMAGES * 2);
UpdateDescriptorTable(descriptorTable, update);
}
// we recreate the samplers on every vid_restart to create the right level
// of anisotropy based on the latched CVar
for(uint32_t w = 0; w < TW_COUNT; ++w)
{
for(uint32_t f = 0; f < TextureFilter::Count; ++f)
{
for(uint32_t m = 0; m < MaxTextureMips; ++m)
{
const textureWrap_t wrap = (textureWrap_t)w;
const TextureFilter::Id filter = (TextureFilter::Id)f;
const uint32_t s = GetBaseSamplerIndex(wrap, filter, m);
SamplerDesc desc(wrap, filter, (float)m);
desc.shortLifeTime = true;
samplers[s] = CreateSampler(desc);
}
}
}
// update our descriptor table with the new sampler descriptors
{
DescriptorTableUpdate update;
update.SetSamplers(ARRAY_LEN(samplers), samplers);
UpdateDescriptorTable(descriptorTable, update);
}
textureIndex = 0;
psoCount = 1; // we treat index 0 as invalid
{
switch(r_rtColorFormat->integer)
{
case RTCF_R10G10B10A2:
renderTargetFormat = TextureFormat::R10G10B10A2_UNorm;
break;
case RTCF_R16G16B16A16:
renderTargetFormat = TextureFormat::R16G16B16A16_UNorm;
break;
case RTCF_R8G8B8A8:
default:
renderTargetFormat = TextureFormat::R8G8B8A8_UNorm;
break;
}
TextureDesc desc("render target", glConfig.vidWidth, glConfig.vidHeight);
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit;
Vector4Clear(desc.clearColor);
desc.usePreferredClearValue = true;
desc.committedResource = true;
desc.format = renderTargetFormat;
desc.shortLifeTime = true;
renderTarget = RHI::CreateTexture(desc);
}
{
TextureDesc desc("readback render target", glConfig.vidWidth, glConfig.vidHeight);
desc.initialState = ResourceStates::RenderTargetBit;
desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit;
Vector4Clear(desc.clearColor);
desc.usePreferredClearValue = true;
desc.committedResource = true;
desc.format = TextureFormat::R8G8B8A8_UNorm;
desc.shortLifeTime = true;
readbackRenderTarget = RHI::CreateTexture(desc);
}
ShaderByteCode im3dShaders[Im3D::Shader::Count];
im3dShaders[Im3D::Shader::PointVS] = ShaderByteCode(g_im3d_points_vs);
im3dShaders[Im3D::Shader::PointPS] = ShaderByteCode(g_im3d_points_ps);
im3dShaders[Im3D::Shader::LineVS] = ShaderByteCode(g_im3d_lines_vs);
im3dShaders[Im3D::Shader::LinePS] = ShaderByteCode(g_im3d_lines_ps);
im3dShaders[Im3D::Shader::TriangleVS] = ShaderByteCode(g_im3d_triangles_vs);
im3dShaders[Im3D::Shader::TrianglePS] = ShaderByteCode(g_im3d_triangles_ps);
ui.Init(false, ShaderByteCode(g_ui_vs), ShaderByteCode(g_ui_ps), renderTargetFormat, descriptorTable, &rootSignatureDesc);
world.Init();
mipMapGen.Init(false, ShaderByteCode(g_mip_1_cs), ShaderByteCode(g_mip_2_cs), ShaderByteCode(g_mip_3_cs));
const HTexture fontAtlas = imgui.Init(false, ShaderByteCode(g_imgui_vs), ShaderByteCode(g_imgui_ps), TextureFormat::R8G8B8A8_UNorm, descriptorTable, &rootSignatureDesc);
const uint32_t fontAtlasSRV = RegisterTexture(fontAtlas);
imgui.RegisterFontAtlas(fontAtlasSRV);
im3d.Init(false, im3dShaders, renderTargetFormat);
nuklear.Init(false, ShaderByteCode(g_nuklear_vs), ShaderByteCode(g_nuklear_ps), renderTargetFormat, descriptorTable, &rootSignatureDesc);
post.Init();
post.SetToneMapInput(renderTarget);
smaa.Init(); // must be after post
blitter.Init();
blitter.SetInputTexture(readbackRenderTarget);
srp.firstInit = false;
}
void GRP::ShutDown(bool fullShutDown)
{
RHI::ShutDown(fullShutDown);
}
void GRP::BeginFrame()
{
srp.psoCount = psoCount;
srp.psoChangeCount = world.psoChangeCount;
srp.psoStatsValid = true;
srp.BeginFrame();
smaa.Update();
// have it be first to we can use ImGUI in the other components too
grp.imgui.BeginFrame();
RHI::BeginFrame();
ui.BeginFrame();
world.BeginFrame();
nuklear.BeginFrame();
im3d.BeginFrame();
CmdBeginBarrier();
CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit);
CmdEndBarrier();
const float clearColor[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
CmdClearColorTarget(renderTarget, clearColor);
// nothing is bound to the command list yet!
srp.renderMode = RenderMode::None;
frameSeed = (float)rand() / (float)RAND_MAX;
}
void GRP::EndFrame()
{
// issue ImGUI calls
srp.DrawGUI();
im3d.DrawGUI();
// now using a R8G8B8A8 UNorm render target
post.Draw("Post-process", readbackRenderTarget);
imgui.Draw(readbackRenderTarget);
blitter.Blit(GetSwapChainTexture());
world.EndFrame();
srp.EndFrame();
}
void GRP::CreateTexture(image_t* image, int mipCount, int width, int height)
{
TextureDesc desc(image->name, width, height, mipCount);
desc.committedResource = width * height >= (1 << 20);
desc.shortLifeTime = true;
if(mipCount > 1)
{
desc.allowedState |= ResourceStates::UnorderedAccessBit; // for mip-map generation
}
image->texture = ::RHI::CreateTexture(desc);
image->textureIndex = RegisterTexture(image->texture);
}
void GRP::UpoadTextureAndGenerateMipMaps(image_t* image, const byte* data)
{
MappedTexture texture;
RHI::BeginTextureUpload(texture, image->texture);
for(uint32_t r = 0; r < texture.rowCount; ++r)
{
memcpy(texture.mappedData + r * texture.dstRowByteCount, data + r * texture.srcRowByteCount, texture.srcRowByteCount);
}
RHI::EndTextureUpload();
mipMapGen.GenerateMipMaps(image->texture);
}
void GRP::BeginTextureUpload(MappedTexture& mappedTexture, image_t* image)
{
RHI::BeginTextureUpload(mappedTexture, image->texture);
}
void GRP::EndTextureUpload()
{
RHI::EndTextureUpload();
}
void GRP::ProcessWorld(world_t& world_)
{
world.ProcessWorld(world_);
}
void GRP::ProcessModel(model_t& model)
{
// @TODO: !!!
//__debugbreak();
}
void GRP::ProcessShader(shader_t& shader)
{
shader.numPipelines = 0;
if(shader.numStages < 1)
{
return;
}
// @TODO: GLS_POLYMODE_LINE
const bool clampDepth = r_depthClamp->integer != 0 || shader.isSky;
if(shader.isOpaque)
{
Q_assert(IsDepthFadeEnabled(shader) == false);
// @TODO: fix up cache.stageStateBits[0] based on depth state from follow-up states
CachedPSO cache = {};
cache.desc.depthFade = false;
cache.desc.polygonOffset = !!shader.polygonOffset;
cache.desc.clampDepth = clampDepth;
cache.stageStateBits[0] = shader.stages[0]->stateBits & (~GLS_POLYMODE_LINE);
for(int s = 1; s < shader.numStages; ++s)
{
cache.stageStateBits[s] = shader.stages[s]->stateBits & (GLS_BLEND_BITS | GLS_ATEST_BITS);
}
cache.stageCount = shader.numStages;
cache.desc.cullType = shader.cullType;
shader.pipelines[0].pipeline = CreatePSO(cache, shader.name);
cache.desc.cullType = GetMirrorredCullType(shader.cullType);
shader.pipelines[0].mirrorPipeline = CreatePSO(cache, va("%s mirror", shader.name));
shader.pipelines[0].firstStage = 0;
shader.pipelines[0].numStages = shader.numStages;
shader.numPipelines = 1;
}
else
{
CachedPSO cache = {};
cache.desc.depthFade = IsDepthFadeEnabled(shader);
cache.desc.polygonOffset = !!shader.polygonOffset;
cache.desc.clampDepth = clampDepth;
cache.stageCount = 0;
unsigned int prevStateBits = 0xFFFFFFFF;
int firstStage = 0;
for(int s = 0; s < shader.numStages; ++s)
{
const unsigned int currStateBits = shader.stages[s]->stateBits & (~GLS_POLYMODE_LINE);
if(cache.stageCount > 0)
{
// we could combine AT/DW in some circumstances, but we don't care to for now
const bool cantCombine = (shader.stages[s]->stateBits & (GLS_ATEST_BITS | GLS_DEPTHMASK_TRUE)) != 0;
if(currStateBits == prevStateBits &&
!cantCombine &&
IsCommutativeBlendState(currStateBits))
{
cache.stageStateBits[cache.stageCount++] = currStateBits;
}
else
{
pipeline_t& p = shader.pipelines[shader.numPipelines++];
cache.desc.cullType = shader.cullType;
p.pipeline = CreatePSO(cache, va("%s #%d", shader.name, shader.numPipelines));
cache.desc.cullType = GetMirrorredCullType(shader.cullType);
p.mirrorPipeline = CreatePSO(cache, va("%s #%d mirror", shader.name, shader.numPipelines));
p.firstStage = firstStage;
p.numStages = cache.stageCount;
cache.stageStateBits[0] = currStateBits;
cache.stageCount = 1;
firstStage = s;
}
}
else
{
cache.stageStateBits[0] = currStateBits;
cache.stageCount = 1;
}
prevStateBits = currStateBits;
}
if(cache.stageCount > 0)
{
pipeline_t& p = shader.pipelines[shader.numPipelines++];
cache.desc.cullType = shader.cullType;
p.pipeline = CreatePSO(cache, va("%s #%d", shader.name, shader.numPipelines));
cache.desc.cullType = GetMirrorredCullType(shader.cullType);
p.mirrorPipeline = CreatePSO(cache, va("%s #%d mirror", shader.name, shader.numPipelines));
p.firstStage = firstStage;
p.numStages = cache.stageCount;
}
}
}
uint32_t GRP::RegisterTexture(HTexture htexture)
{
const uint32_t index = textureIndex++;
DescriptorTableUpdate update;
update.SetTextures(1, &htexture, index);
UpdateDescriptorTable(descriptorTable, update);
return index;
}
uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name)
{
Q_assert(cache.stageCount > 0);
const uint32_t pixelShaderStateBits = GLS_BLEND_BITS | GLS_ATEST_BITS;
for(uint32_t p = 1; p < psoCount; ++p)
{
if(cache.stageCount == psos[p].stageCount &&
memcmp(&cache.desc, &psos[p].desc, sizeof(cache.desc)) == 0 &&
memcmp(&cache.stageStateBits, &psos[p].stageStateBits, cache.stageCount * sizeof(cache.stageStateBits[0])) == 0)
{
return p;
}
}
Q_assert(psoCount < ARRAY_LEN(psos));
#if defined(_DEBUG)
Q_strncpyz(cache.name, name, sizeof(cache.name));
#endif
int uberPixelShaderIndex = -1;
for(uint32_t i = 0; i < uberPixelShaderCacheSize; ++i)
{
const UberPixelShaderState& state = uberPixelShaderStates[i];
const int dither = (state.globalState & UBERPS_DITHER_BIT) != 0 ? 1 : 0;
const bool depthFade = (state.globalState & UBERPS_DEPTHFADE_BIT) != 0;
if(cache.stageCount != (uint32_t)state.stageCount ||
r_dither->integer != dither ||
cache.desc.depthFade != depthFade)
{
continue;
}
bool found = true;
for(uint32_t s = 0; s < cache.stageCount; ++s)
{
const uint32_t psoCacheState = cache.stageStateBits[s] & pixelShaderStateBits;
const uint32_t psCacheState = (uint32_t)state.stageStates[s] & pixelShaderStateBits;
if(psoCacheState != psCacheState)
{
found = false;
break;
}
}
if(found)
{
uberPixelShaderIndex = (int)i;
break;
}
}
HShader pixelShader = RHI_MAKE_NULL_HANDLE();
ShaderByteCode pixelShaderByteCode;
if(uberPixelShaderIndex < 0)
{
uint32_t macroCount = 0;
ShaderMacro macros[64];
macros[macroCount].name = "DISABLE_PRAGMA_ONCE";
macros[macroCount].value = "1";
macroCount++;
macros[macroCount].name = "STAGE_COUNT";
macros[macroCount].value = va("%d", cache.stageCount);
macroCount++;
if(r_dither->integer)
{
macros[macroCount].name = "DITHER";
macros[macroCount].value = "1";
macroCount++;
}
if(cache.desc.depthFade)
{
macros[macroCount].name = "DEPTH_FADE";
macros[macroCount].value = "1";
macroCount++;
}
for(int s = 0; s < cache.stageCount; ++s)
{
macros[macroCount].name = va("STAGE%d_BITS", s);
macros[macroCount].value = va("%d", (int)cache.stageStateBits[s] & pixelShaderStateBits);
macroCount++;
}
Q_assert(macroCount <= ARRAY_LEN(macros));
pixelShader = CreateShader(ShaderDesc(ShaderStage::Pixel, sizeof(uber_shader_string), uber_shader_string, "ps", macroCount, macros));
pixelShaderByteCode = GetShaderByteCode(pixelShader);
}
else
{
pixelShaderByteCode = uberPixelShaderByteCodes[uberPixelShaderIndex];
}
// important missing entries can be copy-pasted into UBER_SHADER_PS_LIST
#if 0
if(uberPixelShaderIndex < 0)
{
unsigned int flags = 0;
if(r_dither->integer)
{
flags |= UBERPS_DITHER_BIT;
}
if(cache.desc.depthFade)
{
flags |= UBERPS_DEPTHFADE_BIT;
}
Sys_DebugPrintf("\tshader: %s\n", name);
ri.Printf(PRINT_ALL, "^2 shader: %s\n", name);
Sys_DebugPrintf("\tPS(%d_%X", (int)cache.stageCount, flags);
ri.Printf(PRINT_ALL, " PS(%d_%X", (int)cache.stageCount, flags);
for(int s = 0; s < cache.stageCount; ++s)
{
Sys_DebugPrintf("_%X", (unsigned int)(cache.stageStateBits[s] & pixelShaderStateBits));
ri.Printf(PRINT_ALL, "_%X", (unsigned int)(cache.stageStateBits[s] & pixelShaderStateBits));
}
Sys_DebugPrintf(") \\\n");
ri.Printf(PRINT_ALL, ") \\\n");
}
#endif
uint32_t a = 0;
GraphicsPipelineDesc desc(name, uberRootSignature);
desc.shortLifeTime = true; // the PSO cache is only valid for this map!
desc.vertexShader = vertexShaderByteCodes[cache.stageCount - 1];
desc.pixelShader = pixelShaderByteCode;
desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Position, DataType::Float32, 3, 0);
desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Normal, DataType::Float32, 3, 0);
for(int s = 0; s < cache.stageCount; ++s)
{
desc.vertexLayout.AddAttribute(a++, ShaderSemantic::TexCoord, DataType::Float32, 2, 0);
desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Color, DataType::UNorm8, 4, 0);
}
if(cache.desc.depthFade)
{
desc.depthStencil.DisableDepth();
}
else
{
desc.depthStencil.depthStencilFormat = TextureFormat::Depth32_Float;
desc.depthStencil.depthComparison =
(cache.stageStateBits[0] & GLS_DEPTHFUNC_EQUAL) != 0 ?
ComparisonFunction::Equal :
ComparisonFunction::GreaterEqual;
desc.depthStencil.enableDepthTest = (cache.stageStateBits[0] & GLS_DEPTHTEST_DISABLE) == 0;
desc.depthStencil.enableDepthWrites = (cache.stageStateBits[0] & GLS_DEPTHMASK_TRUE) != 0;
}
desc.rasterizer.cullMode = cache.desc.cullType;
desc.rasterizer.polygonOffset = cache.desc.polygonOffset;
desc.rasterizer.clampDepth = cache.desc.clampDepth;
desc.AddRenderTarget(cache.stageStateBits[0] & GLS_BLEND_BITS, renderTargetFormat);
cache.pipeline = CreateGraphicsPipeline(desc);
if(uberPixelShaderIndex < 0)
{
DestroyShader(pixelShader);
}
const uint32_t index = psoCount++;
psos[index] = cache;
return index;
}
void GRP::ExecuteRenderCommands(const byte* data, bool /*readbackRequested*/)
{
for(;;)
{
const int commandId = ((const renderCommandBase_t*)data)->commandId;
if(commandId < 0 || commandId >= RC_COUNT)
{
assert(!"Invalid render command type");
return;
}
if(commandId == RC_END_OF_LIST)
{
return;
}
switch(commandId)
{
case RC_UI_SET_COLOR:
ui.CmdSetColor(*(const uiSetColorCommand_t*)data);
break;
case RC_UI_DRAW_QUAD:
ui.CmdDrawQuad(*(const uiDrawQuadCommand_t*)data);
break;
case RC_UI_DRAW_TRIANGLE:
ui.CmdDrawTriangle(*(const uiDrawTriangleCommand_t*)data);
break;
case RC_DRAW_SCENE_VIEW:
world.DrawSceneView(*(const drawSceneViewCommand_t*)data);
im3d.Draw(*(const drawSceneViewCommand_t*)data, renderTarget, world.depthTexture);
break;
case RC_BEGIN_FRAME:
BeginFrame();
break;
case RC_SWAP_BUFFERS:
EndFrame();
break;
case RC_BEGIN_UI:
ui.Begin(renderTarget);
break;
case RC_END_UI:
ui.End();
break;
case RC_BEGIN_3D:
world.Begin();
break;
case RC_END_3D:
world.End();
break;
case RC_END_SCENE:
smaa.Draw(((const endSceneCommand_t*)data)->viewParms);
break;
case RC_BEGIN_NK:
nuklear.Begin(renderTarget);
break;
case RC_END_NK:
nuklear.End();
break;
case RC_NK_UPLOAD:
nuklear.Upload(*(const nuklearUploadCommand_t*)data);
break;
case RC_NK_DRAW:
nuklear.Draw(*(const nuklearDrawCommand_t*)data);
break;
default:
Q_assert(!"Unsupported render command type");
return;
}
data += renderCommandSizes[commandId];
}
}
void GRP::ReadPixels(int w, int h, int alignment, colorSpace_t colorSpace, void* outPixels)
{
ReadTextureImage(outPixels, readbackRenderTarget, w, h, alignment, colorSpace);
}