/* =========================================================================== Copyright (C) 2023-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). Challenge Quake 3 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Challenge Quake 3 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ // Cinematic Rendering Pipeline - main interface #include "crp_local.h" #include "../client/cl_imgui.h" #include "shaders/crp/oit.h.hlsli" #include "shaders/crp/scene_view.h.hlsli" #include "compshaders/crp/fullscreen.h" #include "compshaders/crp/blit.h" #include "compshaders/crp/ui.h" #include "compshaders/crp/imgui.h" #include "compshaders/crp/nuklear.h" #include "compshaders/crp/mip_1.h" #include "compshaders/crp/mip_2.h" #include "compshaders/crp/mip_3.h" #include "compshaders/crp/depth_pyramid.h" struct SceneViewConst { enum Constants { MaxViews = 1024, LightBytes = sizeof(DynamicLight), MaxLights = SCENE_VIEW_MAX_LIGHTS, StructBytes = sizeof(SceneView), BufferBytes = MaxViews * StructBytes }; }; #pragma pack(push, 4) struct DepthPyramidRC { uint32_t destTextureIndices[7]; uint32_t depthTextureIndex; uint32_t depthSamplerIndex; }; #pragma pack(pop) CRP crp; IRenderPipeline* crpp = &crp; cvar_t* crp_dof; cvar_t* crp_dof_overlay; cvar_t* crp_dof_blades; cvar_t* crp_dof_angle; cvar_t* crp_gatherDof_focusNearDist; cvar_t* crp_gatherDof_focusNearRange; cvar_t* crp_gatherDof_focusFarDist; cvar_t* crp_gatherDof_focusFarRange; cvar_t* crp_gatherDof_brightness; cvar_t* crp_accumDof_focusDist; cvar_t* crp_accumDof_radius; cvar_t* crp_accumDof_samples; cvar_t* crp_accumDof_preview; cvar_t* crp_mblur; cvar_t* crp_mblur_exposure; cvar_t* crp_sunlight; cvar_t* crp_volLight; cvar_t* crp_drawNormals; cvar_t* crp_updateRTAS; cvar_t* crp_debug0; cvar_t* crp_debug1; cvar_t* crp_debug2; cvar_t* crp_debug3; static const cvarTableItem_t crp_cvars[] = { { &crp_dof, "crp_dof", "1", CVAR_ARCHIVE, CVART_INTEGER, "0", "2", "enables depth of field\n" S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n" S_COLOR_VAL " 1 " S_COLOR_HELP "= Gather (fast, more flexible, issues with transparency)\n" S_COLOR_VAL " 2 " S_COLOR_HELP "= Accumulation (slow, less flexible, great IQ)\n", "DoF mode", CVARCAT_GRAPHICS, "Depth of field mode", "", CVAR_GUI_VALUE("0", "Disabled", "") CVAR_GUI_VALUE("1", "Gather", "Fast, lower IQ") CVAR_GUI_VALUE("2", "Accumulation", "Very slow, great IQ") }, { &crp_dof_overlay, "crp_dof_overlay", "0", CVAR_ARCHIVE, CVART_INTEGER, "0", "2", "debug overlay mode\n" S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n" S_COLOR_VAL " 1 " S_COLOR_HELP "= Colorized Blur\n" S_COLOR_VAL " 2 " S_COLOR_HELP "= Focus Plane", "DoF overlay mode", CVARCAT_GRAPHICS, "Debug overlay mode", "", CVAR_GUI_VALUE("0", "Disabled", "") CVAR_GUI_VALUE("1", "Colorized Blur", "") CVAR_GUI_VALUE("2", "Focus Plane", "") }, { &crp_dof_blades, "crp_dof_blades", "6", CVAR_ARCHIVE, CVART_FLOAT, "0", "16", "aperture blade count\n" "Set to less than 3 for a disk shape.", "DoF blade count", CVARCAT_GRAPHICS, "Aperture blade count", "Set to less than 3 for a disk shape." }, { &crp_dof_angle, "crp_dof_angle", "20", CVAR_ARCHIVE, CVART_FLOAT, "0", "360", "aperture angle, in degrees", "DoF aperture angle", CVARCAT_GRAPHICS, "Aperture angle, in degrees", "" }, { &crp_accumDof_focusDist, "crp_accumDof_focusDist", "256", CVAR_ARCHIVE, CVART_FLOAT, "2", "2048", "focus distance", "Accum DoF focus distance", CVARCAT_GRAPHICS, "Focus distance", "" }, { &crp_accumDof_radius, "crp_accumDof_blurRadius", "0.1", CVAR_ARCHIVE, CVART_FLOAT, "0.001", "20", "aperture radius in world units", "Accum DoF aperture radius", CVARCAT_GRAPHICS, "Aperture radius in world units", "" }, { &crp_accumDof_samples, "crp_accumDof_samples", "2", CVAR_ARCHIVE, CVART_INTEGER, "1", "12", "per-axis sampling density\n" "Density N means (2N + 1)(2N + 1) scene renders in total.", "Accum DoF sample count", CVARCAT_GRAPHICS, "Per-axis sampling density", "Density N means (2N + 1)^2 scene renders in total." }, { &crp_accumDof_preview, "crp_accumDof_preview", "0", CVAR_ARCHIVE, CVART_INTEGER, "0", "2", "low-res preview mode\n" S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n" S_COLOR_VAL " 1 " S_COLOR_HELP "= 1/4 pixel count, 9 samples total\n" S_COLOR_VAL " 2 " S_COLOR_HELP "= 1/16 pixel count, 25 samples total", "Accum DoF preview mode", CVARCAT_GRAPHICS, "Low-resolution preview modes", "", CVAR_GUI_VALUE("0", "Disabled", "") CVAR_GUI_VALUE("1", "1/4 pixel count", "9 samples total") CVAR_GUI_VALUE("2", "1/16 pixel count", "25 samples total") }, { &crp_gatherDof_focusNearDist, "crp_gatherDof_focusNearDist", "192", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "near focus distance", "Gather DoF near focus distance", CVARCAT_GRAPHICS, "Near focus distance", "" }, { &crp_gatherDof_focusNearRange, "crp_gatherDof_focusNearRange", "256", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "near focus range", "Gather DoF near focus range", CVARCAT_GRAPHICS, "Near focus range", "" }, { &crp_gatherDof_focusFarDist, "crp_gatherDof_focusFarDist", "512", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "far focus distance", "Gather DoF far focus distance", CVARCAT_GRAPHICS, "Far focus distance", "" }, { &crp_gatherDof_focusFarRange, "crp_gatherDof_focusFarRange", "384", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "far focus range", "Gather DoF far focus range", CVARCAT_GRAPHICS, "Far focus range", "" }, { &crp_gatherDof_brightness, "crp_gatherDof_brightness", "2", CVAR_ARCHIVE, CVART_FLOAT, "0", "8", "blur brightness weight", "Gather DoF bokeh brightness", CVARCAT_GRAPHICS, "Blur brightness weight", "" }, { &crp_mblur, "crp_mblur", "0", CVAR_ARCHIVE, CVART_INTEGER, "0", "3", "motion blur mode\n" S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n" S_COLOR_VAL " 1 " S_COLOR_HELP "= Camera only\n" S_COLOR_VAL " 2 " S_COLOR_HELP "= Object only\n" S_COLOR_VAL " 3 " S_COLOR_HELP "= Camera + Object", "Motion blur mode", CVARCAT_GRAPHICS, "", "", CVAR_GUI_VALUE("0", "Disabled", "") CVAR_GUI_VALUE("1", "Camera only", "") CVAR_GUI_VALUE("2", "Object only", "") CVAR_GUI_VALUE("3", "Camera + Object", "") }, { &crp_mblur_exposure, "crp_mblur_exposure", "0.5", CVAR_ARCHIVE, CVART_FLOAT, "0", "1", "motion blur scale\n" "This is the exposure time in percentage of frame time.", "Motion blur exposure", CVARCAT_GRAPHICS, "Exposure time in percentage of frame time", "" }, { &crp_sunlight, "crp_sunlight", "1", CVAR_ARCHIVE, CVART_BOOL, NULL, NULL, "sunlight", "Sunlight", CVARCAT_GRAPHICS, "Sunlight on non-lightmapped surfaces", "" }, { &crp_volLight, "crp_volLight", "1", CVAR_ARCHIVE, CVART_BOOL, NULL, NULL, "volumetric light", "Volumetric light", CVARCAT_GRAPHICS, "Sunlight scattering through the air", "" }, { &crp_drawNormals, "crp_drawNormals", "0", CVAR_TEMP, CVART_BOOL, NULL, NULL, "draws vertex normals", "Draw vertex normals", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" }, { &crp_updateRTAS, "crp_updateRTAS", "1", CVAR_TEMP, CVART_BOOL, NULL, NULL, "enables RTAS builds every frame", "Enable RTAS builds", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "Allows raytracing acceleration structure updates", "" } #if defined(_DEBUG) , { &crp_debug0, "crp_debug0", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 0", "Debug value 0", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" }, { &crp_debug1, "crp_debug1", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 1", "Debug value 1", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" }, { &crp_debug2, "crp_debug2", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 2", "Debug value 2", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" }, { &crp_debug3, "crp_debug3", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 3", "Debug value 3", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" } #endif }; static void FreezeFrame_f(); static void FreezeFrameMB_f(); static const cmdTableItem_t crp_cmds[] = { { "cin_freezeFrame", FreezeFrame_f, NULL, "toggles freeze frame mode" }, { "cin_freezeFrameMB", FreezeFrameMB_f, NULL, "toggles freeze frame mode for MB" } }; static void FreezeFrame_f() { if(crp.freezeFrame == FreezeFrame::Active || crp.freezeFrame == FreezeFrame::ActiveBeforeMB) { crp.freezeFrame = FreezeFrame::Inactive; } else { crp.freezeFrame = FreezeFrame::Pending; } } static void FreezeFrameMB_f() { if(crp.freezeFrame == FreezeFrame::Active || crp.freezeFrame == FreezeFrame::ActiveBeforeMB) { crp.freezeFrame = FreezeFrame::Inactive; } else { crp.freezeFrame = FreezeFrame::PendingBeforeMB; } } static HTexture LoadTexture(const char* name, int flags, textureWrap_t glWrapClampMode, const image_t* onFail = tr.defaultImage) { image_t* const image = R_FindImageFile(name, flags, glWrapClampMode); if(image == NULL) { return onFail->texture; } return image->texture; } static void SunToZMatrix(matrix3x3_t rot) { float sc[2]; DirectionToAzimuthInclination(sc, crp.sunlightData.direction); const float azi = -(sc[0] + M_PI / 2.0f); const float ele = M_PI - sc[1]; const float rol = 0.0f; rot[0] = cosf(rol) * cosf(azi) - sinf(rol) * cosf(ele) * sinf(azi); rot[3] = sinf(rol) * cosf(azi) + cosf(rol) * cosf(ele) * sinf(azi); rot[6] = sinf(ele) * sinf(azi); rot[1] = -cosf(rol) * sinf(azi) - sinf(rol) * cosf(ele) * cosf(azi); rot[4] = -sinf(rol) * sinf(azi) + cosf(rol) * cosf(ele) * cosf(azi); rot[7] = sinf(ele) * cosf(azi); rot[2] = sinf(rol) * sinf(ele); rot[5] = -cosf(rol) * sinf(ele); rot[8] = cosf(ele); } HPipeline CreateComputePipeline(const char* name, const ShaderByteCode& shader) { ComputePipelineDesc desc(name); desc.shortLifeTime = true; desc.shader = shader; return CreateComputePipeline(desc); } void MakeFullScreenPipeline(GraphicsPipelineDesc& desc, const ShaderByteCode& pixelShader) { desc.shortLifeTime = true; desc.vertexShader = ShaderByteCode(g_fullscreen_vs); desc.pixelShader = pixelShader; desc.SetPostProcessState(); } void PSOCache::Init(Entry* entries_, uint32_t maxEntryCount_) { entries = entries_; maxEntryCount = maxEntryCount_; entryCount = 1; // we treat index 0 as invalid } int PSOCache::AddPipeline(const GraphicsPipelineDesc& desc, const char* name) { // we treat index 0 as invalid, so start at 1 for(uint32_t i = 1; i < entryCount; ++i) { Entry& entry = entries[i]; if(memcmp(&entry.desc, &desc, sizeof(desc)) == 0) { return (int)i; } } ASSERT_OR_DIE(entryCount < maxEntryCount, "Not enough entries in the PSO cache"); GraphicsPipelineDesc namedDesc = desc; namedDesc.name = name; // @NOTE: we keep the original desc and its padding bytes for proper comparison results const uint32_t index = entryCount++; Entry& entry = entries[index]; memcpy(&entry.desc, &desc, sizeof(entry.desc)); entry.handle = CreateGraphicsPipeline(namedDesc); return (int)index; } void CRP::Init() { ri.Cvar_RegisterTable(crp_cvars, ARRAY_LEN(crp_cvars)); ri.Cmd_RegisterTable(crp_cmds, ARRAY_LEN(crp_cmds)); InitDesc initDesc; initDesc.directDescriptorHeapIndexing = true; srp.firstInit = RHI::Init(initDesc); srp.psoStatsValid = false; if(srp.firstInit) { srp.CreateShaderTraceBuffers(); for(uint32_t f = 0; f < FrameCount; ++f) { // the doubled index count is for the depth pre-pass const int MaxDynamicVertexCount = 16 << 20; const int MaxDynamicIndexCount = MaxDynamicVertexCount * 4; GeoBuffers& db = dynBuffers[f]; db.Create(va("world #%d", f + 1), MaxDynamicVertexCount, MaxDynamicIndexCount); } } // we recreate the samplers on every vid_restart to create the right level // of anisotropy based on the latched CVar for(uint32_t w = 0; w < TW_COUNT; ++w) { for(uint32_t f = 0; f < TextureFilter::Count; ++f) { for(uint32_t m = 0; m < MaxTextureMips; ++m) { const textureWrap_t wrap = (textureWrap_t)w; const TextureFilter::Id filter = (TextureFilter::Id)f; const uint32_t s = GetBaseSamplerIndex(wrap, filter, m); SamplerDesc desc(wrap, filter, (float)m); desc.shortLifeTime = true; samplers[s] = CreateSampler(desc); samplerIndices[s] = RHI::GetSamplerIndex(samplers[s]); } } } { renderTargetFormat = TextureFormat::R16G16B16A16_Float; TextureDesc desc("render target #1", glConfig.vidWidth, glConfig.vidHeight); desc.initialState = ResourceStates::RenderTargetBit; desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.SetClearColor(vec4_zero); desc.committedResource = true; desc.format = renderTargetFormat; desc.shortLifeTime = true; renderTargets[0] = RHI::CreateTexture(desc); desc.name = "render target #2"; renderTargets[1] = RHI::CreateTexture(desc); renderTargetIndex = 0; renderTarget = renderTargets[0]; } { TextureDesc desc("frozen frame", glConfig.vidWidth, glConfig.vidHeight); desc.initialState = ResourceStates::RenderTargetBit; desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.format = renderTargetFormat; desc.shortLifeTime = true; frozenTexture = RHI::CreateTexture(desc); freezeFrame = FreezeFrame::Inactive; } { TextureDesc desc("readback render target", glConfig.vidWidth, glConfig.vidHeight); desc.initialState = ResourceStates::RenderTargetBit; desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit; Vector4Clear(desc.clearColor); desc.usePreferredClearValue = true; desc.committedResource = true; desc.format = TextureFormat::R8G8B8A8_UNorm; desc.shortLifeTime = true; readbackRenderTarget = RHI::CreateTexture(desc); } { TextureDesc desc("OIT index", glConfig.vidWidth, glConfig.vidHeight); desc.initialState = ResourceStates::UnorderedAccessBit; desc.allowedState = ResourceStates::UnorderedAccessBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.committedResource = true; desc.format = TextureFormat::R32_UInt; desc.shortLifeTime = true; oitIndexTexture = RHI::CreateTexture(desc); } uint32_t oitMaxFragmentCount = 0; { const int byteCountPerFragment = sizeof(OIT_Fragment); const int fragmentCount = glConfig.vidWidth * glConfig.vidHeight * OIT_AVG_FRAGMENTS_PER_PIXEL; const int byteCount = byteCountPerFragment * fragmentCount; oitMaxFragmentCount = fragmentCount; BufferDesc desc("OIT fragment", byteCount, ResourceStates::UnorderedAccessBit); desc.committedResource = true; desc.memoryUsage = MemoryUsage::GPU; desc.structureByteCount = byteCountPerFragment; desc.shortLifeTime = true; oitFragmentBuffer = CreateBuffer(desc); } { const int byteCount = sizeof(OIT_Counter); { BufferDesc desc("OIT counter", byteCount, ResourceStates::UnorderedAccessBit); desc.committedResource = true; desc.memoryUsage = MemoryUsage::GPU; desc.structureByteCount = byteCount; desc.shortLifeTime = true; oitCounterBuffer = CreateBuffer(desc); } { BufferDesc desc("OIT counter staging", byteCount, ResourceStates::Common); desc.committedResource = false; desc.memoryUsage = MemoryUsage::Upload; desc.structureByteCount = byteCount; desc.shortLifeTime = true; oitCounterStagingBuffer = CreateBuffer(desc); uint32_t* dst = (uint32_t*)MapBuffer(oitCounterStagingBuffer); dst[0] = 1; // fragment index 0 is the end-of-list value dst[1] = oitMaxFragmentCount; dst[2] = 0; UnmapBuffer(oitCounterStagingBuffer); } } { TextureDesc desc("depth buffer", glConfig.vidWidth, glConfig.vidHeight); desc.committedResource = true; desc.shortLifeTime = true; desc.initialState = ResourceStates::DepthWriteBit; desc.allowedState = ResourceStates::DepthAccessBits | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.format = TextureFormat::Depth32_Float; desc.SetClearDepthStencil(0.0f, 0); depthTexture = RHI::CreateTexture(desc); } { TextureDesc desc("depth pyramid", glConfig.vidWidth, glConfig.vidHeight, 7); desc.shortLifeTime = true; desc.initialState = ResourceStates::UnorderedAccessBit; desc.allowedState = ResourceStates::UnorderedAccessBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.format = TextureFormat::R32G32_Float; depthMinMaxTexture = RHI::CreateTexture(desc); } { TextureDesc desc("GBuffer normals", glConfig.vidWidth, glConfig.vidHeight); desc.committedResource = true; desc.shortLifeTime = true; desc.initialState = ResourceStates::RenderTargetBit; desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.format = TextureFormat::R16G16_SNorm; desc.SetClearColor(vec4_zero); normalTexture = RHI::CreateTexture(desc); } { TextureDesc desc("GBuffer raw motion vectors", glConfig.vidWidth, glConfig.vidHeight); desc.committedResource = true; desc.shortLifeTime = true; desc.initialState = ResourceStates::RenderTargetBit; desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.format = TextureFormat::R16G16_Float; desc.SetClearColor(vec4_zero); motionVectorTexture = RHI::CreateTexture(desc); } { TextureDesc desc("GBuffer MB motion vectors", glConfig.vidWidth, glConfig.vidHeight); desc.committedResource = true; desc.shortLifeTime = true; desc.initialState = ResourceStates::RenderTargetBit; desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.format = TextureFormat::R16G16_Float; desc.SetClearColor(vec4_zero); motionVectorMBTexture = RHI::CreateTexture(desc); } { TextureDesc desc("GBuffer direct light", glConfig.vidWidth, glConfig.vidHeight); desc.committedResource = true; desc.shortLifeTime = true; desc.initialState = ResourceStates::RenderTargetBit; desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.format = TextureFormat::R16G16B16A16_Float; desc.SetClearColor(colorBlack); lightTexture = RHI::CreateTexture(desc); desc.name = "GBuffer raw direct light"; sunlightTexture = RHI::CreateTexture(desc); } { TextureDesc desc("GBuffer shading position", glConfig.vidWidth, glConfig.vidHeight); desc.committedResource = true; desc.shortLifeTime = true; desc.initialState = ResourceStates::RenderTargetBit; desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; desc.format = TextureFormat::R32G32B32A32_Float; desc.SetClearColor(vec4_zero); shadingPositionTexture = RHI::CreateTexture(desc); } { GraphicsPipelineDesc desc("blit LDR"); MakeFullScreenPipeline(desc, ShaderByteCode(g_blit_ps)); desc.AddRenderTarget(0, TextureFormat::R8G8B8A8_UNorm); blitPipelineLDR = CreateGraphicsPipeline(desc); desc.name = "blit HDR"; desc.renderTargets[0].format = TextureFormat::R16G16B16A16_Float; blitPipelineHDR = CreateGraphicsPipeline(desc); } depthPyramidPipeline = CreateComputePipeline("Depth Pyramid", ShaderByteCode(g_depth_pyramid_cs)); { BufferDesc desc("scene view upload #1", SceneViewConst::BufferBytes, ResourceStates::ShaderAccessBits); desc.shortLifeTime = true; desc.memoryUsage = MemoryUsage::Upload; desc.structureByteCount = SceneViewConst::StructBytes; sceneViewUploadBuffers[0] = CreateBuffer(desc); desc.name = "scene view upload #2"; sceneViewUploadBuffers[1] = CreateBuffer(desc); } { BufferDesc desc("scene view", SceneViewConst::StructBytes, ResourceStates::ShaderAccessBits); desc.shortLifeTime = true; desc.structureByteCount = SceneViewConst::StructBytes; desc.useSrvIndex0 = true; // the one and only buffer allowed to be there sceneViewBuffer = CreateBuffer(desc); } raytracing.Init(); ui.Init(true, ShaderByteCode(g_ui_vs), ShaderByteCode(g_ui_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL); imgui.Init(true, ShaderByteCode(g_imgui_vs), ShaderByteCode(g_imgui_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL); nuklear.Init(true, ShaderByteCode(g_nuklear_vs), ShaderByteCode(g_nuklear_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL); mipMapGen.Init(true, ShaderByteCode(g_mip_1_cs), ShaderByteCode(g_mip_2_cs), ShaderByteCode(g_mip_3_cs)); prepass.Init(); opaque.Init(); transp.Init(); transpResolve.Init(); toneMap.Init(); gatherDof.Init(); accumDof.Init(); motionBlur.Init(); magnifier.Init(); dynamicLights.Init(); sunlight.Init(); volumetricLight.Init(); gbufferViz.Init(); sunlightEditor.Init(); srp.firstInit = false; } void CRP::LoadResources() { const int flags = IMG_NOPICMIP | IMG_NOMIPMAP | IMG_NOIMANIP | IMG_NOAF; blueNoise2D = LoadTexture("textures/stbn_2d.tga", flags, TW_REPEAT); } void CRP::ShutDown(bool fullShutDown) { RHI::ShutDown(fullShutDown); } void CRP::BeginFrame() { renderTargetIndex = 0; renderTarget = renderTargets[0]; sceneViewIndex = 0; srp.BeginFrame(); // have it be first to we can use ImGUI in the other components too imgui.BeginFrame(); // must be run outside of the RHI::BeginFrame/RHI::EndFrame pair const bool rtasUpdate = dynamicLights.WantRTASUpdate(tr.rtRefdef) || sunlight.WantRTASUpdate(tr.rtRefdef) || volumetricLight.WantRTASUpdate(tr.rtRefdef); raytracing.BeginFrame(rtasUpdate); RHI::BeginFrame(); ui.BeginFrame(); nuklear.BeginFrame(); CmdBeginBarrier(); CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit); CmdEndBarrier(); const float clearColor[4] = { 0.0f, 0.5f, 0.0f, 0.0f }; CmdClearColorTarget(renderTarget, clearColor); frameSeed = (float)rand() / (float)RAND_MAX; dynBuffers[GetFrameIndex()].Rewind(); } void CRP::EndFrame() { if(freezeFrame == FreezeFrame::Pending) { crp.SwapRenderTargets(); CmdBeginBarrier(); CmdTextureBarrier(GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit); CmdTextureBarrier(frozenTexture, ResourceStates::RenderTargetBit); CmdEndBarrier(); Blit(frozenTexture, GetReadRenderTarget(), "Blit Frozen Frame", true, vec2_one, vec2_zero); CmdBeginBarrier(); CmdTextureBarrier(frozenTexture, ResourceStates::PixelShaderAccessBit); CmdEndBarrier(); srp.EndFrame(); freezeFrame = FreezeFrame::Active; return; } srp.DrawGUI(); gbufferViz.DrawGUI(); magnifier.DrawGUI(); sunlightEditor.DrawGUI(); volumetricLight.DrawGUI(); imgui.Draw(renderTarget); toneMap.DrawToneMap(); magnifier.Draw(); BlitRenderTarget(GetSwapChainTexture(), "Blit to Swap Chain"); BlitRenderTarget(readbackRenderTarget, "Blit to Readback Texture"); srp.EndFrame(); } void CRP::Blit(HTexture destination, HTexture source, const char* passName, bool hdr, const vec2_t tcScale, const vec2_t tcBias) { SCOPED_DEBUG_LABEL(passName, 0.125f, 0.125f, 0.5f); CmdBeginBarrier(); CmdTextureBarrier(source, ResourceStates::PixelShaderAccessBit); CmdTextureBarrier(destination, ResourceStates::RenderTargetBit); CmdEndBarrier(); #pragma pack(push, 4) struct BlitRC { uint32_t textureIndex; uint32_t samplerIndex; float tcScale[2]; float tcBias[2]; }; #pragma pack(pop) BlitRC rc; rc.textureIndex = GetTextureIndexSRV(source); rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); rc.tcScale[0] = tcScale[0]; rc.tcScale[1] = tcScale[1]; rc.tcBias[0] = tcBias[0]; rc.tcBias[1] = tcBias[0]; CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); CmdBindRenderTargets(1, &destination, NULL); CmdBindPipeline(hdr ? blitPipelineHDR : blitPipelineLDR); CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); CmdDraw(3, 0); } void CRP::BlitRenderTarget(HTexture destination, const char* passName) { Blit(destination, crp.renderTarget, passName, false, vec2_one, vec2_zero); } void CRP::CreateTexture(image_t* image, int mipCount, int width, int height) { TextureDesc desc(image->name, width, height, mipCount); desc.committedResource = width * height >= (1 << 20); desc.shortLifeTime = true; if(mipCount > 1) { desc.allowedState |= ResourceStates::UnorderedAccessBit; // for mip-map generation } image->texture = ::RHI::CreateTexture(desc); image->textureIndex = GetTextureIndexSRV(image->texture); } void CRP::UpoadTextureAndGenerateMipMaps(image_t* image, const byte* data) { MappedTexture texture; RHI::BeginTextureUpload(texture, image->texture); for(uint32_t r = 0; r < texture.rowCount; ++r) { memcpy(texture.mappedData + r * texture.dstRowByteCount, data + r * texture.srcRowByteCount, texture.srcRowByteCount); } RHI::EndTextureUpload(); mipMapGen.GenerateMipMaps(image->texture); } void CRP::BeginTextureUpload(MappedTexture& mappedTexture, image_t* image) { RHI::BeginTextureUpload(mappedTexture, image->texture); } void CRP::EndTextureUpload() { RHI::EndTextureUpload(); } void CRP::ProcessWorld(world_t& world) { raytracing.ProcessWorld(world); sunlightEditor.ProcessWorld(world); volumetricLight.ProcessWorld(world); } void CRP::ProcessModel(model_t&) { } void CRP::ProcessShader(shader_t& shader) { if(shader.isOpaque) { prepass.ProcessShader(shader); opaque.ProcessShader(shader); } else { transp.ProcessShader(shader); } } void CRP::ExecuteRenderCommands(const byte* data, bool /*readbackRequested*/) { // @NOTE: readbackRequested is unused because // the CRP always blits the final result to the readback texture if(freezeFrame == FreezeFrame::Active || freezeFrame == FreezeFrame::ActiveBeforeMB) { BeginFrame(); Blit(GetWriteRenderTarget(), frozenTexture, "Blit Frozen Frame", true, vec2_one, vec2_zero); if(freezeFrame == FreezeFrame::ActiveBeforeMB) { motionBlur.Draw(); } EndFrame(); return; } for(;;) { const int commandId = ((const renderCommandBase_t*)data)->commandId; if(commandId < 0 || commandId >= RC_COUNT) { assert(!"Invalid render command type"); return; } if(commandId == RC_END_OF_LIST) { return; } switch(commandId) { case RC_UI_SET_COLOR: ui.CmdSetColor(*(const uiSetColorCommand_t*)data); break; case RC_UI_DRAW_QUAD: ui.CmdDrawQuad(*(const uiDrawQuadCommand_t*)data); break; case RC_UI_DRAW_TRIANGLE: ui.CmdDrawTriangle(*(const uiDrawTriangleCommand_t*)data); break; case RC_DRAW_SCENE_VIEW: DrawSceneView(*(const drawSceneViewCommand_t*)data); break; case RC_BEGIN_FRAME: BeginFrame(); break; case RC_SWAP_BUFFERS: EndFrame(); break; case RC_BEGIN_UI: ui.Begin(renderTarget); break; case RC_END_UI: ui.End(); break; case RC_BEGIN_3D: // @TODO: srp.renderMode = RenderMode::None; break; case RC_END_3D: // @TODO: srp.renderMode = RenderMode::None; break; case RC_END_SCENE: // @TODO: post-processing break; case RC_BEGIN_NK: nuklear.Begin(renderTarget); break; case RC_END_NK: nuklear.End(); break; case RC_NK_UPLOAD: nuklear.Upload(*(const nuklearUploadCommand_t*)data); break; case RC_NK_DRAW: nuklear.Draw(*(const nuklearDrawCommand_t*)data); break; default: Q_assert(!"Unsupported render command type"); return; } data += renderCommandSizes[commandId]; } } void CRP::TessellationOverflow() { switch(tess.tessellator) { case Tessellator::Prepass: prepass.TessellationOverflow(); break; case Tessellator::Opaque: opaque.TessellationOverflow(); break; case Tessellator::Transp: transp.TessellationOverflow(); break; default: break; } tess.numIndexes = 0; tess.numVertexes = 0; } void CRP::DrawSceneView3D(const drawSceneViewCommand_t& cmd) { const int lightCount = backEnd.refdef.num_dlights; prepass.Draw(cmd); BuildDepthPyramid(); dynamicLights.DrawBegin(); if(volumetricLight.ShouldDraw()) { volumetricLight.DrawBegin(); if(raytracing.CanRaytrace()) { { SCOPED_RENDER_PASS("VL/DL Point Lights", 1.0f, 1.0f, 1.0f); for(int i = 0; i < lightCount; i++) { volumetricLight.DrawPointLight(backEnd.refdef.dlights[i]); dynamicLights.DrawPointLight(backEnd.refdef.dlights[i]); } } volumetricLight.DrawSunlight(); } volumetricLight.DrawEnd(); } else { SCOPED_RENDER_PASS("DL Point Lights", 1.0f, 1.0f, 1.0f); for(int i = 0; i < lightCount; i++) { dynamicLights.DrawPointLight(backEnd.refdef.dlights[i]); } } sunlight.Draw(); opaque.Draw(cmd); volumetricLight.DrawDebug(); transp.Draw(cmd); transpResolve.Draw(cmd); } void CRP::DrawSceneView(const drawSceneViewCommand_t& cmd) { const viewParms_t& vp = cmd.viewParms; if(cmd.shouldClearColor) { const Rect rect(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight); CmdBeginBarrier(); CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit); CmdEndBarrier(); CmdClearColorTarget(renderTarget, cmd.clearColor, &rect); } if(cmd.numDrawSurfs <= 0 || !cmd.shouldDrawScene) { return; } if(crp_dof->integer == DOFMethod::Accumulation && IsViewportFullscreen(vp)) { const Rect rect(0, 0, glConfig.vidWidth, glConfig.vidHeight); accumDof.Begin(cmd); const uint32_t sampleCount = accumDof.GetSampleCount(); for(uint32_t y = 0; y < sampleCount; y++) { for(uint32_t x = 0; x < sampleCount; x++) { srp.enableRenderPassQueries = x == 0 && y == 0; drawSceneViewCommand_t newCmd; accumDof.FixCommand(newCmd, cmd, x, y); backEnd.refdef = newCmd.refdef; backEnd.viewParms = newCmd.viewParms; UploadSceneViewData(); CmdBeginBarrier(); CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit); CmdEndBarrier(); CmdClearColorTarget(renderTarget, cmd.clearColor, &rect); DrawSceneView3D(newCmd); accumDof.Accumulate(); // geometry allocation is a linear allocation instead of a ring buffer // we force a CPU-GPU sync point after every full scene render // that way, we can keep the buffer sizes at least somewhat reasonable SubmitAndContinue(); dynBuffers[GetFrameIndex()].Rewind(); } } CmdSetViewportAndScissor(backEnd.viewParms); srp.enableRenderPassQueries = true; accumDof.Normalize(); backEnd.viewParms = cmd.viewParms; backEnd.refdef = cmd.refdef; accumDof.DrawDebug(); } else { backEnd.refdef = cmd.refdef; backEnd.viewParms = cmd.viewParms; UploadSceneViewData(); DrawSceneView3D(cmd); CmdSetViewportAndScissor(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight); gatherDof.Draw(); if(freezeFrame == FreezeFrame::PendingBeforeMB && IsViewportFullscreen(backEnd.viewParms)) { crp.SwapRenderTargets(); CmdBeginBarrier(); CmdTextureBarrier(GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit); CmdTextureBarrier(frozenTexture, ResourceStates::RenderTargetBit); CmdEndBarrier(); Blit(frozenTexture, GetReadRenderTarget(), "Blit Frozen Frame", true, vec2_one, vec2_zero); CmdBeginBarrier(); CmdTextureBarrier(frozenTexture, ResourceStates::PixelShaderAccessBit); CmdEndBarrier(); freezeFrame = FreezeFrame::ActiveBeforeMB; } motionBlur.Draw(); } sunlightEditor.DrawOverlay(); } void CRP::UploadSceneViewData() { Q_assert(sceneViewIndex < SceneViewConst::MaxViews); if(sceneViewIndex >= SceneViewConst::MaxViews) { return; } SCOPED_DEBUG_LABEL("Scene View Upload", 1.0f, 1.0f, 1.0f); const viewParms_t& vp = backEnd.viewParms; const HBuffer uploadBuffer = sceneViewUploadBuffers[GetFrameIndex()]; const uint32_t uploadByteOffset = sceneViewIndex * SceneViewConst::StructBytes; if(!vp.isPortal && IsViewportFullscreen(vp)) { Q_assert(tr.currZFar == vp.zFar); Q_assert(tr.currZNear == vp.zNear); } SceneView scene = {}; #if defined(_DEBUG) scene.debug[0] = crp_debug0->value; scene.debug[1] = crp_debug1->value; scene.debug[2] = crp_debug2->value; scene.debug[3] = crp_debug3->value; #endif scene.frameSeed = (float)rand() / (float)RAND_MAX; // @NOTE: yes, world.modelMatrix is actually the view matrix // it's the model-view matrix for the world entity, thus the view matrix memcpy(scene.projectionMatrix, vp.projectionMatrix, sizeof(scene.projectionMatrix)); R_InvMatrix(scene.projectionMatrix, scene.invProjectionMatrix); memcpy(scene.viewMatrix, vp.world.modelMatrix, sizeof(scene.viewMatrix)); R_InvMatrix(scene.viewMatrix, scene.invViewMatrix); memcpy(scene.prevViewProjMatrix, tr.prevViewProjMatrix, sizeof(scene.prevViewProjMatrix)); memcpy(scene.prevViewMatrix, tr.prevViewMatrix, sizeof(scene.prevViewMatrix)); memcpy(scene.prevProjectionMatrix, tr.prevProjMatrix, sizeof(scene.prevProjectionMatrix)); // we want the first Z slice to be closest to the sun to simplify ray marching vec3_t zDown; VectorSet(zDown, 0, 0, -1); SunToZMatrix(scene.sunToZMatrix); R_InvMatrix3x3(scene.sunToZMatrix, scene.zToSunMatrix); RB_CreateClipPlane(scene.clipPlane); VectorCopy(vp.world.viewOrigin, scene.cameraPosition); VectorCopy(tr.prevCameraPosition, scene.prevCameraPosition); VectorCopy(vp.orient.axis[0], scene.cameraForward); VectorCopy(vp.orient.axis[1], scene.cameraLeft); VectorCopy(vp.orient.axis[2], scene.cameraUp); scene.sceneViewIndex = sceneViewIndex; scene.frameIndex = tr.frameCount; scene.depthTextureIndex = GetTextureIndexSRV(depthTexture); scene.depthMinMaxTextureIndex = GetTextureIndexSRV(depthMinMaxTexture); scene.normalTextureIndex = GetTextureIndexSRV(normalTexture); scene.shadingPositionTextureIndex = GetTextureIndexSRV(shadingPositionTexture); scene.motionVectorTextureIndex = GetTextureIndexSRV(motionVectorTexture); scene.motionVectorMBTextureIndex = GetTextureIndexSRV(motionVectorMBTexture); scene.lightTextureIndex = GetTextureIndexSRV(lightTexture); scene.sunlightTextureIndex = GetTextureIndexSRV(sunlightTexture); scene.tlasBufferIndex = raytracing.GetTLASBufferIndex(); scene.tlasInstanceBufferIndex = raytracing.GetInstanceBufferIndex(); RB_LinearDepthConstants(scene.linearDepthConstants); scene.zNear = vp.zNear; scene.zFar = vp.zFar; scene.prevZNear = tr.prevZNear; scene.prevZFar = tr.prevZFar; VectorCopy(sunlightData.direction, scene.sunDirection); VectorCopy(sunlightData.color, scene.sunColor); scene.sunIntensity = sunlightData.intensity; VectorCopy(volumetricLight.ambientColor, scene.ambientColor); scene.ambientIntensity = volumetricLight.ambientIntensity; Vector4Copy(volumetricLight.extinctionVolumeScale, scene.extinctionWorldScale); for(int c = 0; c < 4; c++) { scene.extinctionTextureIndices[c] = GetTextureIndexSRV(volumetricLight.extinctionTextures[c]); } Vector4Copy(volumetricLight.sunShadowVolumeScale, scene.sunVShadowWorldScale); for(int c = 0; c < 4; c++) { scene.sunVShadowTextureIndices[c] = GetTextureIndexSRV(volumetricLight.sunShadowTextures[c]); } scene.linearClampSamplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); SceneView* const mappedScene = (SceneView*)(MapBuffer(uploadBuffer) + uploadByteOffset); memcpy(mappedScene, &scene, sizeof(scene)); UnmapBuffer(uploadBuffer); CmdBeginBarrier(); CmdBufferBarrier(uploadBuffer, ResourceStates::CopySourceBit); CmdBufferBarrier(sceneViewBuffer, ResourceStates::CopyDestinationBit); CmdEndBarrier(); CmdCopyBuffer(sceneViewBuffer, 0, uploadBuffer, uploadByteOffset, SceneViewConst::StructBytes); CmdBeginBarrier(); CmdBufferBarrier(sceneViewBuffer, ResourceStates::ShaderAccessBits); CmdEndBarrier(); sceneViewIndex++; } void CRP::BuildDepthPyramid() { SCOPED_RENDER_PASS("Depth Pyramid", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); CmdTextureBarrier(depthTexture, ResourceStates::ComputeShaderAccessBit); CmdTextureBarrier(depthMinMaxTexture, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); DepthPyramidRC rc = {}; for(uint32_t i = 0; i < ARRAY_LEN(rc.destTextureIndices); i++) { rc.destTextureIndices[i] = GetTextureIndexUAV(depthMinMaxTexture, i); } rc.depthTextureIndex = GetTextureIndexSRV(depthTexture); rc.depthSamplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Point); const int w = glConfig.vidWidth / 2; const int h = glConfig.vidHeight / 2; CmdBindPipeline(depthPyramidPipeline); CmdSetComputeRootConstants(0, sizeof(rc), &rc); CmdDispatch((w + 7) / 8, (h + 7) / 8, 1); } void CRP::ReadPixels(int w, int h, int alignment, colorSpace_t colorSpace, void* outPixels) { ReadTextureImage(outPixels, readbackRenderTarget, w, h, alignment, colorSpace); } uint32_t CRP::GetSamplerDescriptorIndexFromBaseIndex(uint32_t baseIndex) { Q_assert(baseIndex < ARRAY_LEN(samplerIndices)); return samplerIndices[baseIndex]; } HTexture CRP::GetReadRenderTarget() { return renderTargets[renderTargetIndex ^ 1]; } HTexture CRP::GetWriteRenderTarget() { return renderTargets[renderTargetIndex]; } void CRP::SwapRenderTargets() { renderTargetIndex ^= 1; renderTarget = GetWriteRenderTarget(); }