diff --git a/changelog.txt b/changelog.txt index 9c77d20..eee67fa 100644 --- a/changelog.txt +++ b/changelog.txt @@ -71,6 +71,10 @@ add: r_shadingRate <0 to 6> (default: 0) sets the variable-rate shading (VRS) mo prefer horizontal subsampling as many maps have textures with thin horizontal lines which become an aliased mess when vertically subsampled +add: r_normalSmoothing <0|1> (default: 1) enables smooth vertex normal generation + +add: r_normalAreaWeight <0|1> (default: 1) enables area weighting for vertex normal generation + add: Cinematic Rendering Pipeline CVars depth of field: crp_dof <0|1|2> (default: 1) selects the depth of field mode @@ -99,6 +103,9 @@ add: Cinematic Rendering Pipeline CVars 0 - disabled 1 - 1/4 pixel count, 9 samples total 2 - 1/16 pixel count, 25 samples total + miscellaneous: + crp_drawNormals <0|1> (default: 0) draws vertex normals as colorized wireframe lines + crp_updateRTAS <0|1> (default: 1) enables raytracing acceleration structure builds every frame fix: allocating enough memory for 4K screenshots and video captures @@ -128,9 +135,10 @@ chg: reworked renderer with 2 new rendering pipelines chg: removed cl_drawMouseLag, r_backend, r_frameSleep, r_gpuMipGen, r_alphaToCoverage, r_alphaToCoverageMipBoost removed r_d3d11_syncOffsets, r_d3d11_presentMode, r_gl3_geoStream, r_ignoreGLErrors, r_finish, r_khr_debug - removed r_verbose, r_customaspect, r_speeds, r_msaa, r_showsky, r_showtris, r_shownormals + removed r_verbose, r_customaspect, r_speeds, r_msaa, r_showsky, r_showtris replaced r_swapInterval with r_vsync <0|1> (default: 0) to enable V-Sync replaced r_textureMode with r_lego <0|1> (default: 0) to enable nearest-neighbor texture filtering + replaced r_shownormals with crp_debugNormals <0|1> (default: 0) to draw vertex normals 30 Oct 23 - 1.53 diff --git a/code/client/cl_imgui.cpp b/code/client/cl_imgui.cpp index 623db02..bdcb38d 100644 --- a/code/client/cl_imgui.cpp +++ b/code/client/cl_imgui.cpp @@ -183,31 +183,31 @@ static void ImGUI_ApplyTheme() ImVec4* colors = ImGui::GetStyle().Colors; colors[ImGuiCol_Text] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f); colors[ImGuiCol_TextDisabled] = ImVec4(0.50f, 0.50f, 0.50f, 1.00f); - colors[ImGuiCol_WindowBg] = ImVec4(0.10f, 0.10f, 0.10f, 1.00f); + colors[ImGuiCol_WindowBg] = ImVec4(0.20f, 0.20f, 0.20f, 1.00f); colors[ImGuiCol_ChildBg] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f); colors[ImGuiCol_PopupBg] = ImVec4(0.19f, 0.19f, 0.19f, 0.92f); colors[ImGuiCol_Border] = ImVec4(0.19f, 0.19f, 0.19f, 0.29f); colors[ImGuiCol_BorderShadow] = ImVec4(0.00f, 0.00f, 0.00f, 0.24f); colors[ImGuiCol_FrameBg] = ImVec4(0.05f, 0.05f, 0.05f, 0.54f); colors[ImGuiCol_FrameBgHovered] = ImVec4(0.19f, 0.19f, 0.19f, 0.54f); - colors[ImGuiCol_FrameBgActive] = ImVec4(0.20f, 0.22f, 0.23f, 1.00f); + colors[ImGuiCol_FrameBgActive] = ImVec4(0.40f, 0.44f, 0.46f, 1.00f); colors[ImGuiCol_TitleBg] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); - colors[ImGuiCol_TitleBgActive] = ImVec4(0.06f, 0.06f, 0.06f, 1.00f); + colors[ImGuiCol_TitleBgActive] = ImVec4(0.12f, 0.12f, 0.12f, 1.00f); colors[ImGuiCol_TitleBgCollapsed] = ImVec4(0.00f, 0.00f, 0.00f, 1.00f); colors[ImGuiCol_MenuBarBg] = ImVec4(0.14f, 0.14f, 0.14f, 1.00f); colors[ImGuiCol_ScrollbarBg] = ImVec4(0.05f, 0.05f, 0.05f, 0.54f); colors[ImGuiCol_ScrollbarGrab] = ImVec4(0.34f, 0.34f, 0.34f, 0.54f); colors[ImGuiCol_ScrollbarGrabHovered] = ImVec4(0.40f, 0.40f, 0.40f, 0.54f); colors[ImGuiCol_ScrollbarGrabActive] = ImVec4(0.56f, 0.56f, 0.56f, 0.54f); - colors[ImGuiCol_CheckMark] = ImVec4(0.33f, 0.67f, 0.86f, 1.00f); + colors[ImGuiCol_CheckMark] = ImVec4(0.49f, 1.00f, 1.00f, 1.00f); colors[ImGuiCol_SliderGrab] = ImVec4(0.34f, 0.34f, 0.34f, 0.54f); colors[ImGuiCol_SliderGrabActive] = ImVec4(0.56f, 0.56f, 0.56f, 0.54f); colors[ImGuiCol_Button] = ImVec4(0.05f, 0.05f, 0.05f, 0.54f); colors[ImGuiCol_ButtonHovered] = ImVec4(0.19f, 0.19f, 0.19f, 0.54f); - colors[ImGuiCol_ButtonActive] = ImVec4(0.20f, 0.22f, 0.23f, 1.00f); + colors[ImGuiCol_ButtonActive] = ImVec4(0.40f, 0.44f, 0.46f, 1.00f); colors[ImGuiCol_Header] = ImVec4(0.00f, 0.00f, 0.00f, 0.52f); colors[ImGuiCol_HeaderHovered] = ImVec4(0.00f, 0.00f, 0.00f, 0.36f); - colors[ImGuiCol_HeaderActive] = ImVec4(0.20f, 0.22f, 0.23f, 0.33f); + colors[ImGuiCol_HeaderActive] = ImVec4(0.40f, 0.44f, 0.46f, 0.33f); colors[ImGuiCol_Separator] = ImVec4(0.28f, 0.28f, 0.28f, 0.29f); colors[ImGuiCol_SeparatorHovered] = ImVec4(0.44f, 0.44f, 0.44f, 0.29f); colors[ImGuiCol_SeparatorActive] = ImVec4(0.40f, 0.44f, 0.47f, 1.00f); @@ -215,10 +215,10 @@ static void ImGUI_ApplyTheme() colors[ImGuiCol_ResizeGripHovered] = ImVec4(0.44f, 0.44f, 0.44f, 0.29f); colors[ImGuiCol_ResizeGripActive] = ImVec4(0.40f, 0.44f, 0.47f, 1.00f); colors[ImGuiCol_Tab] = ImVec4(0.00f, 0.00f, 0.00f, 0.52f); - colors[ImGuiCol_TabHovered] = ImVec4(0.14f, 0.14f, 0.14f, 1.00f); - colors[ImGuiCol_TabActive] = ImVec4(0.20f, 0.20f, 0.20f, 0.36f); + colors[ImGuiCol_TabHovered] = ImVec4(0.28f, 0.28f, 0.28f, 1.00f); + colors[ImGuiCol_TabActive] = ImVec4(0.49f, 1.00f, 1.00f, 1.00f); colors[ImGuiCol_TabUnfocused] = ImVec4(0.00f, 0.00f, 0.00f, 0.52f); - colors[ImGuiCol_TabUnfocusedActive] = ImVec4(0.14f, 0.14f, 0.14f, 1.00f); + colors[ImGuiCol_TabUnfocusedActive] = ImVec4(0.56f, 0.56f, 0.56f, 1.00f); colors[ImGuiCol_PlotLines] = ImVec4(1.00f, 0.00f, 0.00f, 1.00f); colors[ImGuiCol_PlotLinesHovered] = ImVec4(1.00f, 0.00f, 0.00f, 1.00f); colors[ImGuiCol_PlotHistogram] = ImVec4(1.00f, 0.00f, 0.00f, 1.00f); @@ -235,8 +235,8 @@ static void ImGUI_ApplyTheme() colors[ImGuiCol_NavWindowingDimBg] = ImVec4(1.00f, 0.00f, 0.00f, 0.20f); colors[ImGuiCol_ModalWindowDimBg] = ImVec4(1.00f, 0.00f, 0.00f, 0.35f); - const ImVec4 hover(0.26f, 0.59f, 0.98f, 0.4f); - const ImVec4 active(0.2f, 0.41f, 0.68f, 0.5f); + const ImVec4 hover(0.49f, 0.75f, 0.75f, 0.35f); + const ImVec4 active(0.49f, 1.00f, 1.00f, 0.55f); colors[ImGuiCol_HeaderHovered] = hover; colors[ImGuiCol_HeaderActive] = active; colors[ImGuiCol_TabHovered] = hover; diff --git a/code/qcommon/q_shared.h b/code/qcommon/q_shared.h index 7c8e323..6269f61 100644 --- a/code/qcommon/q_shared.h +++ b/code/qcommon/q_shared.h @@ -266,6 +266,7 @@ typedef float vec_t; typedef vec_t vec2_t[2]; typedef vec_t vec3_t[3]; typedef vec_t vec4_t[4]; +typedef vec_t matrix4x4_t[16]; extern const vec3_t vec2_zero; extern const vec3_t vec2_one; extern const vec3_t vec3_origin; diff --git a/code/renderer/crp_dof_gather.cpp b/code/renderer/crp_dof_gather.cpp index 05de309..e152cd7 100644 --- a/code/renderer/crp_dof_gather.cpp +++ b/code/renderer/crp_dof_gather.cpp @@ -239,10 +239,7 @@ void GatherDepthOfField::Draw() return; } - if(backEnd.viewParms.viewportX != 0 || - backEnd.viewParms.viewportY != 0 || - backEnd.viewParms.viewportWidth != glConfig.vidWidth || - backEnd.viewParms.viewportHeight != glConfig.vidHeight) + if(!IsViewportFullscreen(backEnd.viewParms)) { return; } diff --git a/code/renderer/crp_dynamic_lights.cpp b/code/renderer/crp_dynamic_lights.cpp new file mode 100644 index 0000000..7ba4262 --- /dev/null +++ b/code/renderer/crp_dynamic_lights.cpp @@ -0,0 +1,130 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - direct lighting from dynamic lights + + +#include "crp_local.h" +#include "compshaders/crp/fullscreen.h" +#include "compshaders/crp/dl_draw.h" +#include "compshaders/crp/dl_denoising.h" + + +#pragma pack(push, 4) + +struct DynamicLightsRC +{ + uint32_t blueNoiseTextureIndex; +}; + + +struct DenoiseRC +{ + uint32_t textureIndex; +}; + +#pragma pack(pop) + + +void DynamicLights::Init() +{ + { + GraphicsPipelineDesc desc("Dynamic Lights"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(g_fullscreen_vs); + desc.pixelShader = ShaderByteCode(g_dl_draw_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + pipeline = CreateGraphicsPipeline(desc); + } + + { + GraphicsPipelineDesc desc("Dynamic Lights Denoising"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(g_fullscreen_vs); + desc.pixelShader = ShaderByteCode(g_dl_denoising_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + denoisingPipeline = CreateGraphicsPipeline(desc); + } +} + +void DynamicLights::Draw() +{ + if(r_dynamiclight->integer == 0 || + (backEnd.refdef.rdflags & RDF_NOWORLDMODEL) != 0 || + !IsViewportFullscreen(backEnd.viewParms)) + { + return; + } + + const HBuffer tlasBuffer = crp.raytracing.GetTLAS(); + if(IsNullHandle(tlasBuffer) || + backEnd.refdef.num_dlights <= 0) + { + CmdBeginBarrier(); + CmdTextureBarrier(crp.lightTexture, ResourceStates::RenderTargetBit); + CmdEndBarrier(); + CmdClearColorTarget(crp.lightTexture, colorBlack); + return; + } + + srp.renderMode = RenderMode::None; + + { + SCOPED_RENDER_PASS("Dynamic Lights", 1.0f, 1.0f, 1.0f); + + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + + CmdBeginBarrier(); + CmdTextureBarrier(crp.shadingPositionTexture, ResourceStates::PixelShaderAccessBit); + CmdTextureBarrier(crp.normalTexture, ResourceStates::PixelShaderAccessBit); + CmdTextureBarrier(crp.noisyLightTexture, ResourceStates::RenderTargetBit); + CmdEndBarrier(); + + DynamicLightsRC rc = {}; + rc.blueNoiseTextureIndex = GetTextureIndexSRV(crp.blueNoise2D); + CmdBindRenderTargets(1, &crp.noisyLightTexture, NULL); + CmdBindPipeline(pipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); + } + + { + SCOPED_RENDER_PASS("DL Denoising", 1.0f, 1.0f, 1.0f); + + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + + CmdBeginBarrier(); + CmdTextureBarrier(crp.shadingPositionTexture, ResourceStates::PixelShaderAccessBit); + CmdTextureBarrier(crp.noisyLightTexture, ResourceStates::PixelShaderAccessBit); + CmdTextureBarrier(crp.lightTexture, ResourceStates::RenderTargetBit); + CmdEndBarrier(); + + DenoiseRC rc = {}; + rc.textureIndex = GetTextureIndexSRV(crp.noisyLightTexture); + CmdBindRenderTargets(1, &crp.lightTexture, NULL); + CmdBindPipeline(denoisingPipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); + } +} diff --git a/code/renderer/crp_gbuffer_viz.cpp b/code/renderer/crp_gbuffer_viz.cpp new file mode 100644 index 0000000..a5c0a06 --- /dev/null +++ b/code/renderer/crp_gbuffer_viz.cpp @@ -0,0 +1,202 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - G-Buffer texture visualization + + +#include "crp_local.h" +#include "../client/cl_imgui.h" +#include "compshaders/crp/fullscreen.h" +#include "compshaders/crp/gbufferviz_depth.h" +#include "compshaders/crp/gbufferviz_normal.h" +#include "compshaders/crp/gbufferviz_position.h" + + +#pragma pack(push, 4) + +struct LinearizeDepthRC +{ + uint32_t depthTextureIndex; + float linearDepthA; + float linearDepthB; + float zFarInv; +}; + +struct DecodeNormalsRC +{ + uint32_t normalTextureIndex; +}; + +struct DecodePositionRC +{ + uint32_t textureIndex; + uint32_t coloredDelta; +}; + +#pragma pack(pop) + + +void GBufferViz::Init() +{ + { + GraphicsPipelineDesc desc("G-Buffer Depth"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(g_fullscreen_vs); + desc.pixelShader = ShaderByteCode(g_gbufferviz_depth_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + linearizeDepthPipeline = CreateGraphicsPipeline(desc); + } + + { + GraphicsPipelineDesc desc("G-Buffer Normal"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(g_fullscreen_vs); + desc.pixelShader = ShaderByteCode(g_gbufferviz_normal_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + decodeNormalsPipeline = CreateGraphicsPipeline(desc); + } + + { + GraphicsPipelineDesc desc("G-Buffer Position"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(g_fullscreen_vs); + desc.pixelShader = ShaderByteCode(g_gbufferviz_position_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + decodeShadingPositionPipeline = CreateGraphicsPipeline(desc); + } +} + +void GBufferViz::DrawGUI() +{ + GUI_AddMainMenuItem(GUI_MainMenu::Tools, "Show G-Buffer", "", &windowActive); + + if(!windowActive) + { + return; + } + + const HTexture renderTarget = crp.GetReadRenderTarget(); + + if(textureIndex == GBufferTexture::Depth) + { + srp.renderMode = RenderMode::None; + + SCOPED_RENDER_PASS("Debug Depth", 1.0f, 1.0f, 1.0f); + + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + + CmdBeginBarrier(); + CmdTextureBarrier(crp.depthTexture, ResourceStates::PixelShaderAccessBit); + CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit); + CmdEndBarrier(); + + LinearizeDepthRC rc = {}; + rc.depthTextureIndex = GetTextureIndexSRV(crp.depthTexture); + RB_LinearDepthConstants(&rc.linearDepthA, &rc.linearDepthB); + rc.zFarInv = 1.0f / backEnd.viewParms.zFar; + + CmdBindRenderTargets(1, &renderTarget, NULL); + CmdBindPipeline(linearizeDepthPipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); + } + else if(textureIndex == GBufferTexture::Normal) + { + srp.renderMode = RenderMode::None; + + SCOPED_RENDER_PASS("Debug Normal", 1.0f, 1.0f, 1.0f); + + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + + CmdBeginBarrier(); + CmdTextureBarrier(crp.normalTexture, ResourceStates::PixelShaderAccessBit); + CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit); + CmdEndBarrier(); + + DecodeNormalsRC rc = {}; + rc.normalTextureIndex = GetTextureIndexSRV(crp.normalTexture); + + CmdBindRenderTargets(1, &renderTarget, NULL); + CmdBindPipeline(decodeNormalsPipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); + } + else if(textureIndex == GBufferTexture::ShadingPositionDelta) + { + srp.renderMode = RenderMode::None; + + SCOPED_RENDER_PASS("Debug Position", 1.0f, 1.0f, 1.0f); + + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + + CmdBeginBarrier(); + CmdTextureBarrier(crp.shadingPositionTexture, ResourceStates::PixelShaderAccessBit); + CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit); + CmdEndBarrier(); + + DecodePositionRC rc = {}; + rc.textureIndex = GetTextureIndexSRV(crp.shadingPositionTexture); + rc.coloredDelta = coloredPositionDelta ? 1 : 0; + + CmdBindRenderTargets(1, &renderTarget, NULL); + CmdBindPipeline(decodeShadingPositionPipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); + } + + if(ImGui::Begin("G-Buffer", &windowActive, ImGuiWindowFlags_AlwaysAutoResize)) + { + ImGui::RadioButton("Depth", &textureIndex, GBufferTexture::Depth); + ImGui::SameLine(); + ImGui::RadioButton("Normal", &textureIndex, GBufferTexture::Normal); + ImGui::SameLine(); + ImGui::RadioButton("Light", &textureIndex, GBufferTexture::Light); + ImGui::SameLine(); + ImGui::RadioButton("Shading Position Delta", &textureIndex, GBufferTexture::ShadingPositionDelta); + if(textureIndex == GBufferTexture::ShadingPositionDelta) + { + ImGui::SameLine(); + ImGui::Checkbox("Per-axis delta", &coloredPositionDelta); + } + + const ImVec2 resolution = ImVec2(glConfig.vidWidth / 2, glConfig.vidHeight / 2); + HTexture texture; + switch(textureIndex) + { + case GBufferTexture::Depth: texture = renderTarget; break; + case GBufferTexture::Normal: texture = renderTarget; break; + case GBufferTexture::Light: texture = crp.lightTexture; break; + case GBufferTexture::ShadingPositionDelta: texture = renderTarget; break; + default: Q_assert(!"Invalid G-Buffer texture index"); texture = crp.lightTexture; break; + } + ImGui::Image((ImTextureID)GetTextureIndexSRV(texture), resolution); + + CmdBeginBarrier(); + CmdTextureBarrier(texture, ResourceStates::PixelShaderAccessBit); + CmdEndBarrier(); + } + ImGui::End(); +} diff --git a/code/renderer/crp_geometry.cpp b/code/renderer/crp_geometry.cpp index 966a1a8..29a0f88 100644 --- a/code/renderer/crp_geometry.cpp +++ b/code/renderer/crp_geometry.cpp @@ -153,3 +153,73 @@ void GeoBuffers::DrawStage(uint32_t vertexCount, uint32_t indexCount) stageVertexBuffers[StageBufferId::TexCoords].EndBatch(vertexCount); stageVertexBuffers[StageBufferId::Color].EndBatch(vertexCount); } + +void GeoBuffers::DrawPositionOnly(uint32_t vertexCount, uint32_t indexCount) +{ + const HBuffer buffer = vertexBuffers[BaseBufferId::Position]; + const uint32_t stride = vertexBufferStrides[BaseBufferId::Position]; + const uint32_t byteOffset = 0; + CmdBindVertexBuffers(1, &buffer, &stride, &byteOffset); + CmdDrawIndexed(indexCount, indexBuffer.batchFirst, baseVertexBuffers[0].batchFirst); +} + +void GeoBuffers::UploadAndDrawDebugNormals() +{ + if(!baseVertexBuffers[0].CanAdd(tess.numVertexes * 2) || + !indexBuffer.CanAdd(tess.numVertexes * 3)) + { + return; + } + + const uint32_t posBatchOffset = baseVertexBuffers[BaseBufferId::Position].batchFirst + baseVertexBuffers[BaseBufferId::Position].batchCount; + float* positions = (float*)baseVertexBuffers[BaseBufferId::Position].mapped + 3 * posBatchOffset; + for(uint32_t v = 0; v < tess.numVertexes; v++) + { + vec3_t newPoint; + VectorMA(tess.xyz[v], 4.0f, tess.normal[v], newPoint); + *positions++ = tess.xyz[v][0]; + *positions++ = tess.xyz[v][1]; + *positions++ = tess.xyz[v][2]; + *positions++ = newPoint[0]; + *positions++ = newPoint[1]; + *positions++ = newPoint[2]; + } + + const uint32_t colBatchOffset = stageVertexBuffers[StageBufferId::Color].batchFirst + stageVertexBuffers[StageBufferId::Color].batchCount; + uint8_t* const colBuffer = stageVertexBuffers[StageBufferId::Color].mapped; + uint32_t* col = (uint32_t*)colBuffer + colBatchOffset; + for(uint32_t v = 0; v < tess.numVertexes; v++) + { + *col++ = 0xFF0000FF; + *col++ = 0xFFFF7F00; + } + + uint32_t* indices = indexBuffer.GetCurrentAddress(); + for(uint32_t i = 0; i < tess.numVertexes; i++) + { + *indices++ = i * 2 + 0; + *indices++ = i * 2 + 0; + *indices++ = i * 2 + 1; + } + + const uint32_t vertexOffset = stageVertexBuffers[0].batchFirst - baseVertexBuffers[0].batchFirst; + const uint32_t byteOffsets[2] = { 0, vertexOffset * sizeof(color4ub_t) }; + HBuffer vb[2]; + vb[0] = vertexBuffers[BaseBufferId::Position]; + vb[1] = vertexBuffers[BaseBufferId::Count + StageBufferId::Color]; + uint32_t strides[2]; + strides[0] = vertexBufferStrides[BaseBufferId::Position]; + strides[1] = vertexBufferStrides[BaseBufferId::Count + StageBufferId::Color]; + CmdBindVertexBuffers(ARRAY_LEN(vb), vb, strides, byteOffsets); + CmdDrawIndexed(tess.numVertexes * 3, indexBuffer.batchFirst, baseVertexBuffers[0].batchFirst); + + for(int b = 0; b < ARRAY_LEN(baseVertexBuffers); b++) + { + baseVertexBuffers[b].EndBatch(tess.numVertexes * 2); + } + for(int b = 0; b < ARRAY_LEN(stageVertexBuffers); b++) + { + stageVertexBuffers[b].EndBatch(tess.numVertexes * 2); + } + indexBuffer.EndBatch(tess.numVertexes * 3); +} diff --git a/code/renderer/crp_local.h b/code/renderer/crp_local.h index f77a20b..39b7750 100644 --- a/code/renderer/crp_local.h +++ b/code/renderer/crp_local.h @@ -40,6 +40,12 @@ extern cvar_t* crp_accumDof_focusDist; extern cvar_t* crp_accumDof_radius; extern cvar_t* crp_accumDof_samples; extern cvar_t* crp_accumDof_preview; +extern cvar_t* crp_drawNormals; +extern cvar_t* crp_updateRTAS; +extern cvar_t* crp_debug0; +extern cvar_t* crp_debug1; +extern cvar_t* crp_debug2; +extern cvar_t* crp_debug3; struct DOFMethod { @@ -71,8 +77,6 @@ using namespace RHI; struct WorldVertexRC { float modelViewMatrix[16]; - float projectionMatrix[16]; - float clipPlane[4]; }; struct PSOCache @@ -105,7 +109,6 @@ private: PSOCache::Entry psoCacheEntries[128]; PSOCache psoCache; - float clipPlane[4]; bool batchOldDepthHack; bool batchDepthHack; }; @@ -127,9 +130,10 @@ private: PSOCache::Entry psoCacheEntries[128]; PSOCache psoCache; - float clipPlane[4]; bool batchOldDepthHack; bool batchDepthHack; + + HPipeline wireframeNormalsPipeline; }; struct WorldTransp @@ -146,7 +150,6 @@ private: PSOCache::Entry psoCacheEntries[32]; PSOCache psoCache; - float clipPlane[4]; bool batchOldDepthHack; bool batchDepthHack; }; @@ -246,6 +249,119 @@ struct Magnifier private: HPipeline pipeline; + bool magnifierActive = false; +}; + +struct GBufferViz +{ + void Init(); + void DrawGUI(); + +private: + struct GBufferTexture + { + enum Id + { + Depth, + Normal, + Light, + ShadingPositionDelta, + Count + }; + }; + + HPipeline linearizeDepthPipeline; + HPipeline decodeNormalsPipeline; + HPipeline decodeShadingPositionPipeline; + bool windowActive = false; + int textureIndex = 0; + bool coloredPositionDelta = false; +}; + +struct DynamicLights +{ + void Init(); + void Draw(); + +private: + HPipeline pipeline; + HPipeline denoisingPipeline; +}; + +struct Raytracing +{ + void Init(); + void ProcessWorld(world_t& world); + void BeginFrame(); + HBuffer GetTLAS() { return tlasBuffer; } + HBuffer GetInstanceBuffer() { return tlasInstanceBuffer; } + +private: + void TagMapSurfacesRecursively(mnode_t* node); + + struct BLASBucket + { + enum Constants + { + Count = CT_COUNT + }; + }; + + struct BLASBuildBuffers + { + HBuffer vertexBuffer; + HBuffer indexBuffer; + uint32_t vertexBufferByteCount; + uint32_t indexBufferByteCount; + }; + + struct BLASBuffers + { + HBuffer blasBuffer; + HBuffer vertexBuffer; + HBuffer indexBuffer; + HBuffer meshBuffer; + uint32_t vertexBufferByteCount; + uint32_t indexBufferByteCount; + uint32_t meshBufferByteCount; + }; + + struct Surface + { + const surfaceType_t* surface; + const shader_t* shader; + int entityNum; + }; + + struct ISurfaceList + { + virtual uint32_t GetSurfaceCount() = 0; + virtual bool GetSurface(Surface& surface, uint32_t index) = 0; // true when skipped + }; + + struct WorldSurfaceList : ISurfaceList + { + uint32_t GetSurfaceCount() override; + bool GetSurface(Surface& surface, uint32_t index) override; + }; + + struct DynamicSurfaceList : ISurfaceList + { + uint32_t GetSurfaceCount() override; + bool GetSurface(Surface& surface, uint32_t index) override; + }; + + void EnsureBuffersAreLargeEnough(BLASBuildBuffers& buffers, uint32_t maxVertexCount, uint32_t maxIndexCount); + void EnsureBuffersAreLargeEnough(BLASBuffers& buffers, uint32_t maxVertexCount, uint32_t maxIndexCount, uint32_t maxMeshCount); + void BuildBLASes(BLASBuffers* blasBuffers, struct BLASBuilder* blasBuilders, ISurfaceList* surfaceList); + + BLASBuildBuffers blasBuildBuffers[BLASBucket::Count] = {}; + BLASBuffers staticBLASBuffers[BLASBucket::Count] = {}; + BLASBuffers dynamicBLASBuffers[BLASBucket::Count] = {}; + StaticUnorderedArray tlasInstanceDescs; + uint32_t staticTLASInstanceCount = 0; + HBuffer tlasBuffer = RHI_MAKE_NULL_HANDLE(); + HBuffer tlasInstanceBuffer = RHI_MAKE_NULL_HANDLE(); }; struct BaseBufferId @@ -279,6 +395,8 @@ struct GeoBuffers void EndBaseBatch(uint32_t vertexCount); bool CanAdd(uint32_t vertexCount, uint32_t indexCount, uint32_t stageCount); void DrawStage(uint32_t vertexCount, uint32_t indexCount); + void DrawPositionOnly(uint32_t vertexCount, uint32_t indexCount); + void UploadAndDrawDebugNormals(); GeometryBuffer baseVertexBuffers[BaseBufferId::Count]; GeometryBuffer stageVertexBuffers[StageBufferId::Count]; @@ -290,6 +408,7 @@ struct GeoBuffers struct CRP : IRenderPipeline { void Init() override; + void LoadResources() override; void ShutDown(bool fullShutDown) override; void ProcessWorld(world_t& world) override; @@ -315,6 +434,7 @@ struct CRP : IRenderPipeline void Blit(HTexture destination, HTexture source, const char* passName, bool hdr, const vec2_t tcScale, const vec2_t tcBias); void BlitRenderTarget(HTexture destination, const char* passName); void DrawSceneView(const drawSceneViewCommand_t& cmd); + void UploadSceneViewData(); HTexture GetReadRenderTarget(); HTexture GetWriteRenderTarget(); @@ -326,12 +446,16 @@ struct CRP : IRenderPipeline HTexture depthTexture; HTexture normalTexture; HTexture motionVectorTexture; + HTexture noisyLightTexture; + HTexture lightTexture; + HTexture shadingPositionTexture; HTexture renderTarget; TextureFormat::Id renderTargetFormat; HTexture renderTargets[2]; uint32_t renderTargetIndex; // the one to write to HSampler samplers[BASE_SAMPLER_COUNT]; // all base samplers uint32_t samplerIndices[BASE_SAMPLER_COUNT]; // descriptor heap indices + HTexture blueNoise2D; // blit HPipeline blitPipelineLDR; @@ -340,6 +464,11 @@ struct CRP : IRenderPipeline // world geometry GeoBuffers dynBuffers[FrameCount]; // for rendering world surfaces + // scene view data + HBuffer sceneViewUploadBuffers[FrameCount]; + HBuffer sceneViewBuffer; // this is the buffer that lives at ResourceDescriptorHeap[0] + uint32_t sceneViewIndex; + // for rendering transparent world surfaces HTexture oitIndexTexture; HBuffer oitFragmentBuffer; @@ -359,6 +488,9 @@ struct CRP : IRenderPipeline AccumDepthOfField accumDof; Fog fog; Magnifier magnifier; + DynamicLights dynamicLights; + Raytracing raytracing; + GBufferViz gbufferViz; }; extern CRP crp; diff --git a/code/renderer/crp_magnifier.cpp b/code/renderer/crp_magnifier.cpp index 4ccf2a4..1bddb60 100644 --- a/code/renderer/crp_magnifier.cpp +++ b/code/renderer/crp_magnifier.cpp @@ -40,8 +40,6 @@ struct MagnifierRC }; #pragma pack(pop) -static bool s_magnifierActive = false; - void Magnifier::Init() { @@ -57,7 +55,7 @@ void Magnifier::Init() void Magnifier::Draw() { - if(r_debugUI->integer == 0 || !s_magnifierActive) + if(r_debugUI->integer == 0 || !magnifierActive) { return; } @@ -92,6 +90,6 @@ void Magnifier::Draw() void Magnifier::DrawGUI() { - ToggleBooleanWithShortcut(s_magnifierActive, ImGuiKey_M); - GUI_AddMainMenuItem(GUI_MainMenu::Tools, "Magnifier", "Ctrl+M", &s_magnifierActive); + ToggleBooleanWithShortcut(magnifierActive, ImGuiKey_M); + GUI_AddMainMenuItem(GUI_MainMenu::Tools, "Magnifier", "Ctrl+M", &magnifierActive); } diff --git a/code/renderer/crp_main.cpp b/code/renderer/crp_main.cpp index 8417d15..8ebadc4 100644 --- a/code/renderer/crp_main.cpp +++ b/code/renderer/crp_main.cpp @@ -24,6 +24,7 @@ along with Challenge Quake 3. If not, see . #include "crp_local.h" #include "../client/cl_imgui.h" #include "shaders/crp/oit.h.hlsli" +#include "shaders/crp/scene_view.h.hlsli" #include "compshaders/crp/fullscreen.h" #include "compshaders/crp/blit.h" #include "compshaders/crp/ui.h" @@ -34,6 +35,19 @@ along with Challenge Quake 3. If not, see . #include "compshaders/crp/mip_3.h" +struct SceneViewConst +{ + enum Constants + { + MaxViews = 1024, + LightBytes = sizeof(DynamicLight), + MaxLights = SCENE_VIEW_MAX_LIGHTS, + StructBytes = sizeof(SceneView), + BufferBytes = MaxViews * StructBytes + }; +}; + + CRP crp; IRenderPipeline* crpp = &crp; @@ -50,6 +64,12 @@ cvar_t* crp_accumDof_focusDist; cvar_t* crp_accumDof_radius; cvar_t* crp_accumDof_samples; cvar_t* crp_accumDof_preview; +cvar_t* crp_drawNormals; +cvar_t* crp_updateRTAS; +cvar_t* crp_debug0; +cvar_t* crp_debug1; +cvar_t* crp_debug2; +cvar_t* crp_debug3; static const cvarTableItem_t crp_cvars[] = { @@ -129,7 +149,34 @@ static const cvarTableItem_t crp_cvars[] = { &crp_gatherDof_brightness, "crp_gatherDof_brightness", "2", CVAR_ARCHIVE, CVART_FLOAT, "0", "8", "blur brightness weight", "Gather DoF bokeh brightness", CVARCAT_GRAPHICS, "Blur brightness weight", "" + }, + { + &crp_drawNormals, "crp_drawNormals", "0", CVAR_TEMP, CVART_BOOL, NULL, NULL, "draws vertex normals", + "Draw vertex normals", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" + }, + { + &crp_updateRTAS, "crp_updateRTAS", "1", CVAR_TEMP, CVART_BOOL, NULL, NULL, "enables RTAS builds every frame", + "Enable RTAS builds", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "Allows raytracing acceleration structure updates", "" } +#if defined(_DEBUG) + , + { + &crp_debug0, "crp_debug0", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 0", + "Debug value 0", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" + }, + { + &crp_debug1, "crp_debug1", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 1", + "Debug value 1", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" + }, + { + &crp_debug2, "crp_debug2", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 2", + "Debug value 2", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" + }, + { + &crp_debug3, "crp_debug3", "0", CVAR_TEMP, CVART_FLOAT, "0", "1", "debug value 3", + "Debug value 3", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "", "" + } +#endif }; @@ -177,6 +224,7 @@ void CRP::Init() InitDesc initDesc; initDesc.directDescriptorHeapIndexing = true; + initDesc.inlineRaytracing = true; srp.firstInit = RHI::Init(initDesc); srp.psoStatsValid = false; @@ -328,8 +376,33 @@ void CRP::Init() motionVectorTexture = RHI::CreateTexture(desc); } + { + TextureDesc desc("GBuffer direct light", glConfig.vidWidth, glConfig.vidHeight); + desc.committedResource = true; + desc.shortLifeTime = true; + desc.initialState = ResourceStates::RenderTargetBit; + desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; + desc.format = TextureFormat::RGBA64_Float; + desc.SetClearColor(colorBlack); + lightTexture = RHI::CreateTexture(desc); + desc.name = "GBuffer raw direct light"; + noisyLightTexture = RHI::CreateTexture(desc); + } + + { + TextureDesc desc("GBuffer shading position", glConfig.vidWidth, glConfig.vidHeight); + desc.committedResource = true; + desc.shortLifeTime = true; + desc.initialState = ResourceStates::RenderTargetBit; + desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; + desc.format = TextureFormat::RGBA128_Float; + desc.SetClearColor(vec4_zero); + shadingPositionTexture = RHI::CreateTexture(desc); + } + { GraphicsPipelineDesc desc("blit LDR"); + desc.shortLifeTime = true; desc.vertexShader = ShaderByteCode(g_fullscreen_vs); desc.pixelShader = ShaderByteCode(g_blit_ps); desc.depthStencil.DisableDepth(); @@ -341,6 +414,24 @@ void CRP::Init() blitPipelineHDR = CreateGraphicsPipeline(desc); } + { + BufferDesc desc("scene view upload #1", SceneViewConst::BufferBytes, ResourceStates::ShaderAccessBits); + desc.shortLifeTime = true; + desc.memoryUsage = MemoryUsage::Upload; + desc.structureByteCount = SceneViewConst::StructBytes; + sceneViewUploadBuffers[0] = CreateBuffer(desc); + desc.name = "scene view upload #2"; + sceneViewUploadBuffers[1] = CreateBuffer(desc); + } + + { + BufferDesc desc("scene view", SceneViewConst::StructBytes, ResourceStates::ShaderAccessBits); + desc.shortLifeTime = true; + desc.structureByteCount = SceneViewConst::StructBytes; + desc.useSrvIndex0 = true; // the one and only buffer allowed to be there + sceneViewBuffer = CreateBuffer(desc); + } + ui.Init(true, ShaderByteCode(g_ui_vs), ShaderByteCode(g_ui_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL); imgui.Init(true, ShaderByteCode(g_imgui_vs), ShaderByteCode(g_imgui_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL); nuklear.Init(true, ShaderByteCode(g_nuklear_vs), ShaderByteCode(g_nuklear_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL); @@ -354,10 +445,19 @@ void CRP::Init() accumDof.Init(); fog.Init(); magnifier.Init(); + dynamicLights.Init(); + raytracing.Init(); + gbufferViz.Init(); srp.firstInit = false; } +void CRP::LoadResources() +{ + const int flags = IMG_NOPICMIP | IMG_NOMIPMAP | IMG_NOIMANIP | IMG_NOAF; + blueNoise2D = R_FindImageFile("textures/stbn_2d.tga", flags, TW_REPEAT)->texture; +} + void CRP::ShutDown(bool fullShutDown) { RHI::ShutDown(fullShutDown); @@ -367,12 +467,16 @@ void CRP::BeginFrame() { renderTargetIndex = 0; renderTarget = renderTargets[0]; + sceneViewIndex = 0; srp.BeginFrame(); // have it be first to we can use ImGUI in the other components too imgui.BeginFrame(); + // must be run outside of the RHI::BeginFrame/RHI::EndFrame pair + raytracing.BeginFrame(); + RHI::BeginFrame(); ui.BeginFrame(); nuklear.BeginFrame(); @@ -392,6 +496,7 @@ void CRP::BeginFrame() void CRP::EndFrame() { srp.DrawGUI(); + gbufferViz.DrawGUI(); magnifier.DrawGUI(); imgui.Draw(renderTarget); toneMap.DrawToneMap(); @@ -476,8 +581,9 @@ void CRP::EndTextureUpload() RHI::EndTextureUpload(); } -void CRP::ProcessWorld(world_t&) +void CRP::ProcessWorld(world_t& world) { + raytracing.ProcessWorld(world); } void CRP::ProcessModel(model_t&) @@ -605,10 +711,7 @@ void CRP::DrawSceneView(const drawSceneViewCommand_t& cmd) } if(crp_dof->integer == DOFMethod::Accumulation && - vp.viewportX == 0 && - vp.viewportY == 0 && - vp.viewportWidth == glConfig.vidWidth && - vp.viewportHeight == glConfig.vidHeight) + IsViewportFullscreen(vp)) { const Rect rect(0, 0, glConfig.vidWidth, glConfig.vidHeight); accumDof.Begin(cmd); @@ -620,11 +723,17 @@ void CRP::DrawSceneView(const drawSceneViewCommand_t& cmd) srp.enableRenderPassQueries = x == 0 && y == 0; drawSceneViewCommand_t newCmd; accumDof.FixCommand(newCmd, cmd, x, y); + + backEnd.refdef = newCmd.refdef; + backEnd.viewParms = newCmd.viewParms; + UploadSceneViewData(); + CmdBeginBarrier(); CmdTextureBarrier(renderTarget, ResourceStates::RenderTargetBit); CmdEndBarrier(); CmdClearColorTarget(renderTarget, cmd.clearColor, &rect); prepass.Draw(newCmd); + dynamicLights.Draw(); opaque.Draw(newCmd); fog.Draw(); transp.Draw(newCmd); @@ -647,7 +756,12 @@ void CRP::DrawSceneView(const drawSceneViewCommand_t& cmd) } else { + backEnd.refdef = cmd.refdef; + backEnd.viewParms = cmd.viewParms; + UploadSceneViewData(); + prepass.Draw(cmd); + dynamicLights.Draw(); opaque.Draw(cmd); fog.Draw(); transp.Draw(cmd); @@ -657,6 +771,78 @@ void CRP::DrawSceneView(const drawSceneViewCommand_t& cmd) } } +void CRP::UploadSceneViewData() +{ + Q_assert(sceneViewIndex < SceneViewConst::MaxViews); + if(sceneViewIndex >= SceneViewConst::MaxViews) + { + return; + } + + SCOPED_RENDER_PASS("Scene View Upload", 1.0f, 1.0f, 1.0f); + + const trRefdef_t& refdef = backEnd.refdef; + const viewParms_t& vp = backEnd.viewParms; + const HBuffer uploadBuffer = sceneViewUploadBuffers[GetFrameIndex()]; + const uint32_t uploadByteOffset = sceneViewIndex * SceneViewConst::StructBytes; + const HBuffer tlasBuffer = raytracing.GetTLAS(); + const HBuffer tlasInstanceBuffer = raytracing.GetInstanceBuffer(); + + SceneView& dest = *(SceneView*)(MapBuffer(uploadBuffer) + uploadByteOffset); + + // @NOTE: yes, world.modelMatrix is actually the view matrix + // it's the model-view matrix for the world entity, thus the view matrix + memcpy(dest.projectionMatrix, vp.projectionMatrix, sizeof(dest.projectionMatrix)); + R_InvMatrix(dest.projectionMatrix, dest.invProjectionMatrix); + memcpy(dest.viewMatrix, vp.world.modelMatrix, sizeof(dest.viewMatrix)); + R_InvMatrix(dest.viewMatrix, dest.invViewMatrix); + RB_CreateClipPlane(dest.clipPlane); +#if defined(_DEBUG) + dest.debug[0] = crp_debug0->value; + dest.debug[1] = crp_debug1->value; + dest.debug[2] = crp_debug2->value; + dest.debug[3] = crp_debug3->value; +#else + const uint32_t deadBeef = 0xDEADBEEF; + dest.debug[0] = *(const float*)&deadBeef; + dest.debug[1] = *(const float*)&deadBeef; + dest.debug[2] = *(const float*)&deadBeef; + dest.debug[3] = *(const float*)&deadBeef; +#endif + dest.sceneViewIndex = sceneViewIndex; + dest.frameIndex = tr.frameCount; + dest.depthTextureIndex = GetTextureIndexSRV(depthTexture); + dest.normalTextureIndex = GetTextureIndexSRV(normalTexture); + dest.shadingPositionTextureIndex = GetTextureIndexSRV(shadingPositionTexture); + dest.lightTextureIndex = GetTextureIndexSRV(lightTexture); + dest.tlasBufferIndex = IsNullHandle(tlasBuffer) ? 0 : GetBufferIndexSRV(tlasBuffer); + dest.tlasInstanceBufferIndex = IsNullHandle(tlasInstanceBuffer) ? 0 : GetBufferIndexSRV(tlasInstanceBuffer); + dest.lightCount = refdef.num_dlights; + + for(int i = 0; i < refdef.num_dlights; i++) + { + const dlight_t& srcLight = refdef.dlights[i]; + DynamicLight& destLight = dest.lights[i]; + VectorCopy(srcLight.origin, destLight.position); + VectorCopy(srcLight.color, destLight.color); + destLight.radius = srcLight.radius; + destLight.padding = 0.0f; + } + + UnmapBuffer(uploadBuffer); + + CmdBeginBarrier(); + CmdBufferBarrier(uploadBuffer, ResourceStates::CopySourceBit); + CmdBufferBarrier(sceneViewBuffer, ResourceStates::CopyDestinationBit); + CmdEndBarrier(); + CmdCopyBuffer(sceneViewBuffer, 0, uploadBuffer, uploadByteOffset, SceneViewConst::StructBytes); + CmdBeginBarrier(); + CmdBufferBarrier(sceneViewBuffer, ResourceStates::ShaderAccessBits); + CmdEndBarrier(); + + sceneViewIndex++; +} + void CRP::ReadPixels(int w, int h, int alignment, colorSpace_t colorSpace, void* outPixels) { ReadTextureImage(outPixels, readbackRenderTarget, w, h, alignment, colorSpace); diff --git a/code/renderer/crp_opaque.cpp b/code/renderer/crp_opaque.cpp index 82c635a..7138223 100644 --- a/code/renderer/crp_opaque.cpp +++ b/code/renderer/crp_opaque.cpp @@ -23,6 +23,8 @@ along with Challenge Quake 3. If not, see . #include "crp_local.h" #include "compshaders/crp/opaque.h" +#include "compshaders/crp/wireframe_normals.h" +#include "compshaders/crp/add_light.h" #pragma pack(push, 4) @@ -38,6 +40,8 @@ struct OpaquePixelRC uint32_t samplerIndex; uint32_t shaderIndexBufferIndex; uint32_t alphaTest; + uint32_t lightTextureIndex; + uint32_t lightmapPass; float greyscale; // shader trace @@ -46,12 +50,35 @@ struct OpaquePixelRC uint16_t centerPixelY; }; +struct AddLightVertexRC : WorldVertexRC +{ +}; + #pragma pack(pop) void WorldOpaque::Init() { psoCache.Init(psoCacheEntries, ARRAY_LEN(psoCacheEntries)); + + { + GraphicsPipelineDesc desc("Debug Normals"); + desc.shortLifeTime = true; + desc.vertexShader.Set(g_wireframe_normals_vs); + desc.pixelShader.Set(g_wireframe_normals_ps); + desc.vertexLayout.AddAttribute(0, ShaderSemantic::Position, DataType::Float32, 3, 0); + desc.vertexLayout.AddAttribute(1, ShaderSemantic::Color, DataType::UNorm8, 4, 0); + desc.depthStencil.depthStencilFormat = TextureFormat::Depth32_Float; + desc.depthStencil.depthComparison = ComparisonFunction::GreaterEqual; + desc.depthStencil.enableDepthTest = true; + desc.depthStencil.enableDepthWrites = false; + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.rasterizer.polygonOffset = false; + desc.rasterizer.clampDepth = false; + desc.rasterizer.wireFrame = true; + desc.AddRenderTarget(0, crp.renderTargetFormat); + wireframeNormalsPipeline = CreateGraphicsPipeline(desc); + } } void WorldOpaque::Draw(const drawSceneViewCommand_t& cmd) @@ -65,7 +92,6 @@ void WorldOpaque::Draw(const drawSceneViewCommand_t& cmd) backEnd.refdef = cmd.refdef; backEnd.viewParms = cmd.viewParms; - RB_CreateClipPlane(clipPlane); CmdSetViewportAndScissor(backEnd.viewParms); batchOldDepthHack = false; @@ -73,6 +99,7 @@ void WorldOpaque::Draw(const drawSceneViewCommand_t& cmd) CmdBeginBarrier(); CmdTextureBarrier(crp.depthTexture, ResourceStates::DepthReadBit); + CmdTextureBarrier(crp.lightTexture, ResourceStates::PixelShaderAccessBit); CmdBufferBarrier(srp.traceRenderBuffer, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); @@ -196,6 +223,27 @@ void WorldOpaque::ProcessShader(shader_t& shader) p.mirrorPipeline = psoCache.AddPipeline(desc, va("opaque %d %d mirrored", psoCache.entryCount, s + 1)); } + if(!shader.hasLightmapStage) + { + static int counter = 0; + GraphicsPipelineDesc desc = {}; + desc.name = "Add Light"; + desc.rootSignature = RHI_MAKE_NULL_HANDLE(); + desc.shortLifeTime = true; + desc.vertexShader.Set(g_add_light_vs); + desc.pixelShader.Set(g_add_light_ps); + desc.vertexLayout.AddAttribute(0, ShaderSemantic::Position, DataType::Float32, 3, 0); + desc.depthStencil.depthStencilFormat = TextureFormat::Depth32_Float; + desc.depthStencil.depthComparison = shader.isSky ? ComparisonFunction::GreaterEqual : ComparisonFunction::Equal; + desc.depthStencil.enableDepthTest = true; + desc.depthStencil.enableDepthWrites = false; + desc.rasterizer.cullMode = shader.cullType; + desc.rasterizer.polygonOffset = shader.polygonOffset != 0; + desc.rasterizer.clampDepth = clampDepth; + desc.AddRenderTarget(GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE, crp.renderTargetFormat); + shader.addLightPipeline = psoCache.AddPipeline(desc, va("Add Light #%d", counter++ + 1)); + } + shader.numPipelines = shader.numStages; } @@ -254,8 +302,6 @@ void WorldOpaque::EndBatch() OpaqueVertexRC vertexRC = {}; memcpy(vertexRC.modelViewMatrix, backEnd.orient.modelMatrix, sizeof(vertexRC.modelViewMatrix)); - memcpy(vertexRC.projectionMatrix, backEnd.viewParms.projectionMatrix, sizeof(vertexRC.projectionMatrix)); - memcpy(vertexRC.clipPlane, clipPlane, sizeof(vertexRC.clipPlane)); CmdSetGraphicsRootConstants(0, sizeof(vertexRC), &vertexRC); for(int s = 0; s < shader->numStages; ++s) @@ -284,6 +330,8 @@ void WorldOpaque::EndBatch() pixelRC.samplerIndex = sampIdx; pixelRC.shaderIndexBufferIndex = bufferIndex; pixelRC.alphaTest = alphaTest; + pixelRC.lightTextureIndex = GetTextureIndexSRV(crp.lightTexture); + pixelRC.lightmapPass = stage->type == ST_LIGHTMAP ? 1 : 0; pixelRC.greyscale = tess.greyscale; pixelRC.shaderTrace = ((uint32_t)shader->index << 1) | enableShaderTrace; pixelRC.centerPixelX = glConfig.vidWidth / 2; @@ -293,8 +341,23 @@ void WorldOpaque::EndBatch() db.DrawStage(vertexCount, indexCount); } + if(!shader->hasLightmapStage) + { + AddLightVertexRC rc = {}; + memcpy(rc.modelViewMatrix, backEnd.orient.modelMatrix, sizeof(rc.modelViewMatrix)); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdBindPipeline(psoCache.entries[shader->addLightPipeline].handle); + db.DrawPositionOnly(vertexCount, indexCount); + } + db.EndBaseBatch(vertexCount); + if(crp_drawNormals->integer) + { + CmdBindPipeline(wireframeNormalsPipeline); + db.UploadAndDrawDebugNormals(); + } + clean_up: tess.tessellator = Tessellator::None; tess.numVertexes = 0; diff --git a/code/renderer/crp_prepass.cpp b/code/renderer/crp_prepass.cpp index bbeb1cd..903393c 100644 --- a/code/renderer/crp_prepass.cpp +++ b/code/renderer/crp_prepass.cpp @@ -26,22 +26,15 @@ along with Challenge Quake 3. If not, see . #pragma pack(push, 4) - -struct PrepassVertexRC +struct PrepassRC { float modelViewMatrix[16]; - float projectionMatrix[16]; + float modelMatrix[16]; float normalMatrix[16]; - float clipPlane[4]; -}; - -struct PrepassPixelRC -{ uint32_t textureIndex; uint32_t samplerIndex; uint32_t alphaTest; }; - #pragma pack(pop) @@ -59,9 +52,10 @@ void Prepass::Draw(const drawSceneViewCommand_t& cmd) srp.renderMode = RenderMode::World; + SCOPED_RENDER_PASS("Pre-pass", 1.0f, 0.5f, 0.5f); + backEnd.refdef = cmd.refdef; backEnd.viewParms = cmd.viewParms; - RB_CreateClipPlane(clipPlane); CmdSetViewportAndScissor(backEnd.viewParms); batchOldDepthHack = false; @@ -71,18 +65,18 @@ void Prepass::Draw(const drawSceneViewCommand_t& cmd) CmdTextureBarrier(crp.depthTexture, ResourceStates::DepthWriteBit); CmdTextureBarrier(crp.normalTexture, ResourceStates::RenderTargetBit); CmdTextureBarrier(crp.motionVectorTexture, ResourceStates::RenderTargetBit); + CmdTextureBarrier(crp.shadingPositionTexture, ResourceStates::RenderTargetBit); CmdEndBarrier(); CmdClearDepthStencilTarget(crp.depthTexture, true, 0.0f); CmdClearColorTarget(crp.normalTexture, vec4_zero, NULL); CmdClearColorTarget(crp.motionVectorTexture, vec4_zero, NULL); + CmdClearColorTarget(crp.shadingPositionTexture, vec4_zero, NULL); GeoBuffers& db = crp.dynBuffers[GetFrameIndex()]; db.BeginUpload(); - SCOPED_RENDER_PASS("Pre-pass", 1.0f, 0.5f, 0.5f); - - const HTexture renderTargets[] = { crp.normalTexture, crp.motionVectorTexture }; + const HTexture renderTargets[] = { crp.normalTexture, crp.motionVectorTexture, crp.shadingPositionTexture }; CmdBindRenderTargets(ARRAY_LEN(renderTargets), renderTargets, &crp.depthTexture); CmdBindVertexBuffers(ARRAY_LEN(db.vertexBuffers), db.vertexBuffers, db.vertexBufferStrides, NULL); CmdBindIndexBuffer(db.indexBuffer.buffer, IndexType::UInt32, 0); @@ -189,6 +183,7 @@ void Prepass::ProcessShader(shader_t& shader) desc.rasterizer.clampDepth = clampDepth; desc.AddRenderTarget(0, TextureFormat::RG32_SNorm); desc.AddRenderTarget(0, TextureFormat::RG32_Float); + desc.AddRenderTarget(0, TextureFormat::RGBA128_Float); pipeline_t& p = shader.prepassPipeline; p.firstStage = 0; @@ -222,8 +217,7 @@ void Prepass::BeginBatch(const shader_t* shader) void Prepass::EndBatch() { - PrepassVertexRC vertexRC = {}; - PrepassPixelRC pixelRC = {}; + PrepassRC rc = {}; float tempMatrix[16]; const int vertexCount = tess.numVertexes; @@ -253,13 +247,6 @@ void Prepass::EndBatch() CmdSetViewport(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight, batchDepthHack ? 0.7f : 0.0f, 1.0f); batchOldDepthHack = batchDepthHack; } - - memcpy(vertexRC.modelViewMatrix, backEnd.orient.modelMatrix, sizeof(vertexRC.modelViewMatrix)); - memcpy(vertexRC.projectionMatrix, backEnd.viewParms.projectionMatrix, sizeof(vertexRC.projectionMatrix)); - memcpy(vertexRC.clipPlane, clipPlane, sizeof(vertexRC.clipPlane)); - R_InvMatrix(backEnd.modelMatrix, tempMatrix); - R_TransposeMatrix(tempMatrix, vertexRC.normalMatrix); - CmdSetGraphicsRootConstants(0, sizeof(vertexRC), &vertexRC); const shaderStage_t* const stage = shader->stages[0]; @@ -278,10 +265,14 @@ void Prepass::EndBatch() const uint32_t alphaTest = AlphaTestShaderConstFromStateBits(stage->stateBits); Q_assert(sampIdx < ARRAY_LEN(crp.samplers)); - pixelRC.textureIndex = texIdx; - pixelRC.samplerIndex = sampIdx; - pixelRC.alphaTest = alphaTest; - CmdSetGraphicsRootConstants(sizeof(vertexRC), sizeof(pixelRC), &pixelRC); + memcpy(rc.modelViewMatrix, backEnd.orient.modelMatrix, sizeof(rc.modelViewMatrix)); + memcpy(rc.modelMatrix, backEnd.modelMatrix, sizeof(rc.modelMatrix)); + R_InvMatrix(backEnd.modelMatrix, tempMatrix); + R_TransposeMatrix(tempMatrix, rc.normalMatrix); + rc.textureIndex = texIdx; + rc.samplerIndex = sampIdx; + rc.alphaTest = alphaTest; + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); db.DrawStage(vertexCount, indexCount); diff --git a/code/renderer/crp_raytracing.cpp b/code/renderer/crp_raytracing.cpp new file mode 100644 index 0000000..36300b2 --- /dev/null +++ b/code/renderer/crp_raytracing.cpp @@ -0,0 +1,652 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - raytracing acceleration structures management + + +#include "crp_local.h" +#include "shaders/crp/raytracing.h.hlsli" + + +struct BLASBuilder +{ + bool IsEmpty() + { + return totalVertexCount <= 0 || totalIndexCount <= 0; + } + + uint32_t totalVertexCount; + uint32_t totalIndexCount; + uint32_t meshCount; + float* buildVertices; + uint32_t* buildIndices; + BLASMeshDesc* buildMeshes; + uint32_t firstVertex; + uint32_t firstIndex; + uint32_t meshIndex; + + BLASVertex* traceVertices; + uint32_t* traceIndices; + BLASMesh* traceMeshes; +}; + +static world_t* s_world; + +static bool IsStaticBLASSurface(const msurface_t* surf) +{ + if(surf->shader->numStages == 0 || + surf->shader->isDynamic || + surf->shader->polygonOffset) + { + return false; + } + + return true; +} + +static uint32_t GetBLASBucketIndex(const shader_t* shader) +{ + const uint32_t index = (uint32_t)shader->cullType; + + return index; +} + +static const char* GetBLASBucketName(uint32_t index) +{ + switch(index) + { + case CT_FRONT_SIDED: return "front-sided"; + case CT_BACK_SIDED: return "back-sided"; + case CT_TWO_SIDED: return "two-sided"; + default: Q_assert(!"Invalid bucket index"); return "???"; + } +} + +static void TransformPoint(const vec3_t original, const float* matrix4x4, vec3_t result) +{ + float x = original[0] * matrix4x4[0] + original[1] * matrix4x4[4] + original[2] * matrix4x4[ 8] + matrix4x4[12]; + float y = original[0] * matrix4x4[1] + original[1] * matrix4x4[5] + original[2] * matrix4x4[ 9] + matrix4x4[13]; + float z = original[0] * matrix4x4[2] + original[1] * matrix4x4[6] + original[2] * matrix4x4[10] + matrix4x4[14]; + float w = original[0] * matrix4x4[3] + original[1] * matrix4x4[7] + original[2] * matrix4x4[11] + matrix4x4[15]; + if(w != 1.0f && w != 0.0f) + { + x /= w; + y /= w; + z /= w; + } + result[0] = x; + result[1] = y; + result[2] = z; +} + +// true when the surface should be skipped +static bool Tessellate( + int& surfVertexCount, int& surfIndexCount, const surfaceType_t* surface, + const shader_t* shader, int entityNum, double originalTime) +{ + if(shader->numStages <= 0) + { + return true; + } + + bool depthHack; + tess.numVertexes = 0; + tess.numIndexes = 0; + tess.shader = shader; // needed by R_ComputeTexCoords etc. + UpdateEntityData(depthHack, entityNum, originalTime); + R_TessellateSurface(surface); + surfVertexCount = tess.numVertexes; + surfIndexCount = tess.numIndexes; + if(surfVertexCount <= 0 || surfIndexCount <= 0) + { + return true; + } + + RB_DeformTessGeometry(0, surfVertexCount, 0, surfIndexCount); + const shaderStage_t& stage = *shader->stages[0]; + R_ComputeColors(&stage, tess.svars[0], 0, surfVertexCount); + R_ComputeTexCoords(&stage, tess.svars[0], 0, surfVertexCount, qfalse); + + return false; +} + +// true when the surface should be skipped +static bool EstimateTessellatedSize( + int& surfVertexCount, int& surfIndexCount, const surfaceType_t* surface, + const shader_t* shader, int entityNum, double originalTime) +{ + if(shader->numStages <= 0) + { + return true; + } + + bool depthHack; + tess.numVertexes = 0; + tess.numIndexes = 0; + tess.shader = shader; + UpdateEntityData(depthHack, entityNum, originalTime); + R_ComputeTessellatedSize(&surfVertexCount, &surfIndexCount, surface); + if(surfVertexCount <= 0 || surfIndexCount <= 0) + { + return true; + } + + return false; +} + +static void CreateOrGrowBuffer(HBuffer& buffer, uint32_t& curByteCount, const BufferDesc& desc) +{ + if(desc.byteCount <= curByteCount) + { + return; + } + + curByteCount = max(curByteCount * 2, desc.byteCount); + DestroyBufferDelayed(buffer); + buffer = CreateBuffer(desc); +} + + +void Raytracing::Init() +{ + const uint32_t structByteCount = sizeof(TLASInstance); + BufferDesc desc("BLAS support instance", 2 * BLASBucket::Count * structByteCount, ResourceStates::ShaderAccessBits); + desc.shortLifeTime = true; + desc.structureByteCount = structByteCount; + tlasInstanceBuffer = CreateBuffer(desc); +} + +void Raytracing::ProcessWorld(world_t& world) +{ + TagMapSurfacesRecursively(world.nodes); + + // make sure we're not trying to use deleted buffers after a video restart + for(uint32_t i = 0; i < BLASBucket::Count; i++) + { + blasBuildBuffers[i] = {}; + staticBLASBuffers[i] = {}; + dynamicBLASBuffers[i] = {}; + } + tlasBuffer = RHI_MAKE_NULL_HANDLE(); + + BLASBuilder staticBLASes[BLASBucket::Count]; + WorldSurfaceList surfaceList; + s_world = &world; + BuildBLASes(staticBLASBuffers, staticBLASes, &surfaceList); + s_world = NULL; + + // create ASes + BeginTempCommandList(); + for(uint32_t i = 0; i < BLASBucket::Count; i++) + { + BLASBuilder& bucket = staticBLASes[i]; + if(bucket.IsEmpty()) + { + continue; + } + + BLASBuildBuffers& buffers = blasBuildBuffers[i]; + BLASDesc desc = {}; + desc.name = va("static BLAS %s", GetBLASBucketName(i)); + desc.vertexBuffer = buffers.vertexBuffer; + desc.indexBuffer = buffers.indexBuffer; + desc.meshes = bucket.buildMeshes; + desc.meshCount = bucket.meshCount; + CmdCreateBLAS(&staticBLASBuffers[i].blasBuffer, desc); + } + EndTempCommandList(); + for(uint32_t i = 0; i < BLASBucket::Count; i++) + { + BLASBuilder& bucket = staticBLASes[i]; + if(bucket.IsEmpty()) + { + continue; + } + + free(bucket.buildMeshes); + } + + staticTLASInstanceCount = 0; + tlasInstanceDescs.Clear(); + for(uint32_t i = 0; i < BLASBucket::Count; i++) + { + BLASBuilder& bucket = staticBLASes[i]; + if(bucket.IsEmpty()) + { + continue; + } + + TLASInstanceDesc inst; + inst = {}; + inst.blasBuffer = staticBLASBuffers[i].blasBuffer; + inst.cullMode = (cullType_t)i; + inst.instanceId = staticTLASInstanceCount++; + inst.instanceMask = 0xFF; + inst.transform[0] = 1.0f; + inst.transform[4] = 1.0f; + inst.transform[8] = 1.0f; + tlasInstanceDescs.Add(inst); + } +} + +void Raytracing::BeginFrame() +{ + if(tr.world == NULL || tr.sceneCounterRT == 0) + { + return; + } + + if(crp_updateRTAS->integer == 0 && !IsNullHandle(tlasBuffer)) + { + return; + } + + backEnd.refdef = tr.rtRefdef; + + BeginTempCommandList(); + const uint32_t renderPass = srp.BeginRenderPass("RTAS Build", 1.0f, 1.0f, 1.0f); + BLASBuilder dynamicBLASes[BLASBucket::Count]; + DynamicSurfaceList surfaceList; + BuildBLASes(dynamicBLASBuffers, dynamicBLASes, &surfaceList); + for(uint32_t i = 0; i < BLASBucket::Count; i++) + { + BLASBuilder& bucket = dynamicBLASes[i]; + if(bucket.IsEmpty()) + { + continue; + } + + BLASBuildBuffers& buffers = blasBuildBuffers[i]; + BLASDesc desc = {}; + desc.name = va("dynamic BLAS %s", GetBLASBucketName(i)); + desc.vertexBuffer = buffers.vertexBuffer; + desc.indexBuffer = buffers.indexBuffer; + desc.meshes = bucket.buildMeshes; + desc.meshCount = bucket.meshCount; + CmdCreateBLAS(&dynamicBLASBuffers[i].blasBuffer, desc); + } + { + uint32_t instanceId = staticTLASInstanceCount; + tlasInstanceDescs.count = staticTLASInstanceCount; + for(uint32_t i = 0; i < BLASBucket::Count; i++) + { + BLASBuilder& bucket = dynamicBLASes[i]; + if(bucket.IsEmpty()) + { + continue; + } + + TLASInstanceDesc inst = {}; + inst.blasBuffer = dynamicBLASBuffers[i].blasBuffer; + inst.cullMode = (cullType_t)i; + inst.instanceId = instanceId++; + inst.instanceMask = 0xFF; + inst.transform[0] = 1.0f; + inst.transform[4] = 1.0f; + inst.transform[8] = 1.0f; + tlasInstanceDescs.Add(inst); + } + + TLASDesc desc = {}; + desc.instanceCount = tlasInstanceDescs.count; + desc.instances = tlasInstanceDescs.items; + CmdCreateTLAS(&tlasBuffer, desc); + } + srp.EndRenderPass(renderPass); + EndTempCommandList(); + + TLASInstance* traceInstances = (TLASInstance*)BeginBufferUpload(tlasInstanceBuffer); + uint32_t instanceId = 0; + for(uint32_t i = 0; i < ARRAY_LEN(staticBLASBuffers); i++) + { + const BLASBuffers& buffers = staticBLASBuffers[i]; + if(IsNullHandle(buffers.blasBuffer)) + { + continue; + } + + Q_assert(instanceId == tlasInstanceDescs[instanceId].instanceId); + TLASInstance traceInst = {}; + traceInst.meshBufferIndex = GetBufferIndexSRV(buffers.meshBuffer); + traceInst.vertexBufferIndex = GetBufferIndexSRV(buffers.vertexBuffer); + traceInst.indexBufferIndex = GetBufferIndexSRV(buffers.indexBuffer); + traceInst.cullMode = (uint32_t)tlasInstanceDescs[instanceId++].cullMode; + *traceInstances++ = traceInst; + } + for(uint32_t i = 0; i < ARRAY_LEN(dynamicBLASBuffers); i++) + { + const BLASBuffers& buffers = dynamicBLASBuffers[i]; + if(IsNullHandle(buffers.blasBuffer)) + { + continue; + } + + Q_assert(instanceId == tlasInstanceDescs[instanceId].instanceId); + TLASInstance traceInst = {}; + traceInst.meshBufferIndex = GetBufferIndexSRV(buffers.meshBuffer); + traceInst.vertexBufferIndex = GetBufferIndexSRV(buffers.vertexBuffer); + traceInst.indexBufferIndex = GetBufferIndexSRV(buffers.indexBuffer); + traceInst.cullMode = (uint32_t)tlasInstanceDescs[instanceId++].cullMode; + *traceInstances++ = traceInst; + } + EndBufferUpload(tlasInstanceBuffer); + +#if defined(_DEBUG) + for(uint32_t i = 0; i < tlasInstanceDescs.count; i++) + { + Q_assert(tlasInstanceDescs[i].instanceId == i); + } +#endif +} + +void Raytracing::TagMapSurfacesRecursively(mnode_t* node) +{ + do + { + if(node->contents != CONTENTS_NODE) + { + break; + } + + // recurse down the children, front side first + TagMapSurfacesRecursively(node->children[0]); + + // tail recurse + node = node->children[1]; + } + while(true); + + // add the individual surfaces + int c = node->nummarksurfaces; + msurface_t** mark = node->firstmarksurface; + while(c--) + { + msurface_t* const surf = *mark++; + if(IsStaticBLASSurface(surf)) + { + surf->rtSurfType = RTST_STATIC; + } + else + { + surf->rtSurfType = RTST_DYNAMIC; + } + } +} + +void Raytracing::EnsureBuffersAreLargeEnough(Raytracing::BLASBuildBuffers& buffers, uint32_t maxVertexCount, uint32_t maxIndexCount) +{ + { + BufferDesc desc("BLAS build vertex", 2 * maxVertexCount * sizeof(vec3_t), ResourceStates::Common); + desc.shortLifeTime = true; + CreateOrGrowBuffer(buffers.vertexBuffer, buffers.vertexBufferByteCount, desc); + } + { + BufferDesc desc("BLAS build index", 2 * maxIndexCount * sizeof(uint32_t), ResourceStates::Common); + desc.shortLifeTime = true; + CreateOrGrowBuffer(buffers.indexBuffer, buffers.indexBufferByteCount, desc); + } +} + +void Raytracing::EnsureBuffersAreLargeEnough(Raytracing::BLASBuffers& buffers, uint32_t maxVertexCount, uint32_t maxIndexCount, uint32_t maxMeshCount) +{ + { + const uint32_t structByteCount = sizeof(BLASVertex); + BufferDesc desc("BLAS support vertex", maxVertexCount * structByteCount, ResourceStates::ShaderAccessBits); + desc.shortLifeTime = true; + desc.structureByteCount = structByteCount; + CreateOrGrowBuffer(buffers.vertexBuffer, buffers.vertexBufferByteCount, desc); + } + { + const uint32_t structByteCount = sizeof(uint32_t); + BufferDesc desc("BLAS support index", maxIndexCount * structByteCount, ResourceStates::ShaderAccessBits); + desc.shortLifeTime = true; + desc.structureByteCount = structByteCount; + CreateOrGrowBuffer(buffers.indexBuffer, buffers.indexBufferByteCount, desc); + } + { + const uint32_t structByteCount = sizeof(BLASMesh); + BufferDesc desc("BLAS support mesh", maxMeshCount * structByteCount, ResourceStates::ShaderAccessBits); + desc.shortLifeTime = true; + desc.structureByteCount = structByteCount; + CreateOrGrowBuffer(buffers.meshBuffer, buffers.meshBufferByteCount, desc); + } +} + +void Raytracing::BuildBLASes(BLASBuffers* blasBuffers, BLASBuilder* blasBuilders, ISurfaceList* surfaceList) +{ + tess.tessellator = Tessellator::None; + tr.forceHighestLod = true; + + memset(blasBuilders, 0, sizeof(BLASBuilder) * BLASBucket::Count); + + const double originalTime = backEnd.refdef.floatTime; + + // gather stats on all surfaces we can bake + for(uint32_t i = 0, count = surfaceList->GetSurfaceCount(); i < count; i++) + { + Surface surface; + if(surfaceList->GetSurface(surface, i)) + { + continue; + } + + int surfVertexCount, surfIndexCount; + if(EstimateTessellatedSize(surfVertexCount, surfIndexCount, surface.surface, surface.shader, surface.entityNum, originalTime)) + { + continue; + } + + BLASBuilder& bucket = blasBuilders[GetBLASBucketIndex(surface.shader)]; + bucket.totalVertexCount += surfVertexCount; + bucket.totalIndexCount += surfIndexCount; + bucket.meshCount++; + } + + // correct the vertex and index counts since the estimations might be a little off + for(uint32_t i = 0; i < BLASBucket::Count; i++) + { + BLASBuilder& bucket = blasBuilders[i]; + if(!bucket.IsEmpty()) + { + bucket.totalVertexCount = max(2 * bucket.totalVertexCount, 8192u); + bucket.totalIndexCount = max(2 * bucket.totalIndexCount, 32768u); + } + } + + // create buffers and map them + for(uint32_t i = 0; i < BLASBucket::Count; i++) + { + BLASBuilder& bucket = blasBuilders[i]; + if(bucket.IsEmpty()) + { + continue; + } + + EnsureBuffersAreLargeEnough(blasBuildBuffers[i], bucket.totalVertexCount, bucket.totalIndexCount); + bucket.buildMeshes = (BLASMeshDesc*)malloc(bucket.meshCount * sizeof(BLASMeshDesc)); + if(bucket.buildMeshes == NULL) + { + ri.Error(ERR_FATAL, "Failed to allocate %d BLASMeshDesc instances\n", (int)bucket.meshCount); + } + bucket.buildVertices = (float*)BeginBufferUpload(blasBuildBuffers[i].vertexBuffer); + bucket.buildIndices = (uint32_t*)BeginBufferUpload(blasBuildBuffers[i].indexBuffer); + + EnsureBuffersAreLargeEnough(blasBuffers[i], bucket.totalVertexCount, bucket.totalIndexCount, bucket.meshCount); + bucket.traceVertices = (BLASVertex*)BeginBufferUpload(blasBuffers[i].vertexBuffer); + bucket.traceIndices = (uint32_t*)BeginBufferUpload(blasBuffers[i].indexBuffer); + bucket.traceMeshes = (BLASMesh*)BeginBufferUpload(blasBuffers[i].meshBuffer); + } + + // upload vertex and index data + for(uint32_t i = 0, count = surfaceList->GetSurfaceCount(); i < count; i++) + { + Surface surface; + if(surfaceList->GetSurface(surface, i)) + { + continue; + } + + int surfVertexCount, surfIndexCount; + if(Tessellate(surfVertexCount, surfIndexCount, surface.surface, surface.shader, surface.entityNum, originalTime)) + { + continue; + } + + BLASBuilder& bucket = blasBuilders[GetBLASBucketIndex(surface.shader)]; + + // tess.xyz is an array of vec4_t + for(uint32_t v = 0; v < surfVertexCount; v++) + { + if(surface.entityNum == ENTITYNUM_WORLD) + { + bucket.buildVertices[0] = tess.xyz[v][0]; + bucket.buildVertices[1] = tess.xyz[v][1]; + bucket.buildVertices[2] = tess.xyz[v][2]; + } + else + { + const float original[3] = { tess.xyz[v][0], tess.xyz[v][1], tess.xyz[v][2] }; + float newPos[3]; + TransformPoint(original, backEnd.modelMatrix, newPos); + bucket.buildVertices[0] = newPos[0]; + bucket.buildVertices[1] = newPos[1]; + bucket.buildVertices[2] = newPos[2]; + } + bucket.buildVertices += 3; + + bucket.traceVertices->texCoords[0] = tess.svars[0].texcoords[v][0]; + bucket.traceVertices->texCoords[1] = tess.svars[0].texcoords[v][1]; + bucket.traceVertices->color[0] = tess.svars[0].colors[v][0]; + bucket.traceVertices->color[1] = tess.svars[0].colors[v][1]; + bucket.traceVertices->color[2] = tess.svars[0].colors[v][2]; + bucket.traceVertices->color[3] = tess.svars[0].colors[v][3]; + bucket.traceVertices++; + } + + memcpy(bucket.buildIndices, tess.indexes, surfIndexCount * sizeof(uint32_t)); + bucket.buildIndices += surfIndexCount; + + memcpy(bucket.traceIndices, tess.indexes, surfIndexCount * sizeof(uint32_t)); + bucket.traceIndices += surfIndexCount; + + const shaderStage_t& stage0 = *surface.shader->stages[0]; + const image_t& image0 = *stage0.bundle.image[0]; + bucket.buildMeshes[bucket.meshIndex].firstVertex = bucket.firstVertex; + bucket.buildMeshes[bucket.meshIndex].vertexCount = surfVertexCount; + bucket.buildMeshes[bucket.meshIndex].firstIndex = bucket.firstIndex; + bucket.buildMeshes[bucket.meshIndex].indexCount = surfIndexCount; + bucket.buildMeshes[bucket.meshIndex].isFullyOpaque = surface.shader->isOpaque && !surface.shader->isAlphaTestedOpaque; + bucket.traceMeshes[bucket.meshIndex].firstVertex = bucket.firstVertex; + bucket.traceMeshes[bucket.meshIndex].firstIndex = bucket.firstIndex; + bucket.traceMeshes[bucket.meshIndex].textureIndex = image0.textureIndex; + bucket.traceMeshes[bucket.meshIndex].samplerIndex = GetSamplerIndex(image0.wrapClampMode, TextureFilter::Linear); + bucket.traceMeshes[bucket.meshIndex].alphaTestMode = AlphaTestShaderConstFromStateBits(stage0.stateBits); + bucket.traceMeshes[bucket.meshIndex].blendBits = stage0.stateBits & GLS_BLEND_BITS; + bucket.meshIndex++; + + bucket.firstVertex += surfVertexCount; + bucket.firstIndex += surfIndexCount; + } + + // unmap buffers + for(uint32_t i = 0; i < BLASBucket::Count; i++) + { + BLASBuilder& bucket = blasBuilders[i]; + if(bucket.IsEmpty()) + { + continue; + } + + EndBufferUpload(blasBuildBuffers[i].vertexBuffer); + EndBufferUpload(blasBuildBuffers[i].indexBuffer); + bucket.buildVertices = NULL; + bucket.buildIndices = NULL; + + EndBufferUpload(blasBuffers[i].vertexBuffer); + EndBufferUpload(blasBuffers[i].indexBuffer); + EndBufferUpload(blasBuffers[i].meshBuffer); + bucket.traceVertices = NULL; + bucket.traceIndices = NULL; + bucket.traceMeshes = NULL; + } + + backEnd.refdef.floatTime = originalTime; + tr.forceHighestLod = false; +} + +uint32_t Raytracing::WorldSurfaceList::GetSurfaceCount() +{ + return s_world->numsurfaces; +} + +bool Raytracing::WorldSurfaceList::GetSurface(Surface& surface, uint32_t index) +{ + Q_assert(index < (uint32_t)s_world->numsurfaces); + + const msurface_t& surf = s_world->surfaces[index]; + if(surf.rtSurfType != RTST_STATIC || surf.shader->numStages <= 0) + { + return true; + } + + surface.surface = surf.data; + surface.shader = surf.shader; + surface.entityNum = ENTITYNUM_WORLD; + + return false; +} + +uint32_t Raytracing::DynamicSurfaceList::GetSurfaceCount() +{ + return tr.numRTSurfs + tr.world->numsurfaces; +} + +bool Raytracing::DynamicSurfaceList::GetSurface(Surface& surface, uint32_t index) +{ + Q_assert(index < (uint32_t)tr.numRTSurfs + (uint32_t)tr.world->numsurfaces); + + bool skip = false; + if(index < tr.numRTSurfs) + { + const rtSurf_t& surf = tr.rtSurfs[index]; + surface.surface = surf.surface; + surface.shader = surf.shader; + surface.entityNum = surf.entityNum; + } + else + { + index -= tr.numRTSurfs; + Q_assert(index < (uint32_t)tr.world->numsurfaces); + + const msurface_t& surf = tr.world->surfaces[index]; + if(surf.rtSurfType != RTST_DYNAMIC) + { + skip = true; + } + + surface.surface = surf.data; + surface.shader = surf.shader; + surface.entityNum = ENTITYNUM_WORLD; + } + + skip = skip || surface.shader->numStages <= 0; + + return skip; +} diff --git a/code/renderer/crp_transp_draw.cpp b/code/renderer/crp_transp_draw.cpp index b337509..34e499c 100644 --- a/code/renderer/crp_transp_draw.cpp +++ b/code/renderer/crp_transp_draw.cpp @@ -83,7 +83,6 @@ void WorldTransp::Draw(const drawSceneViewCommand_t& cmd) backEnd.refdef = cmd.refdef; backEnd.viewParms = cmd.viewParms; - RB_CreateClipPlane(clipPlane); SCOPED_RENDER_PASS("Transparent", 1.0f, 0.5f, 0.5f); @@ -270,8 +269,6 @@ void WorldTransp::EndBatch() TranspDrawVertexRC vertexRC = {}; memcpy(vertexRC.modelViewMatrix, backEnd.orient.modelMatrix, sizeof(vertexRC.modelViewMatrix)); - memcpy(vertexRC.projectionMatrix, backEnd.viewParms.projectionMatrix, sizeof(vertexRC.projectionMatrix)); - memcpy(vertexRC.clipPlane, clipPlane, sizeof(vertexRC.clipPlane)); CmdSetGraphicsRootConstants(0, sizeof(vertexRC), &vertexRC); for(int s = 0; s < shader->numStages; ++s) diff --git a/code/renderer/grp_local.h b/code/renderer/grp_local.h index 74f8239..b645d81 100644 --- a/code/renderer/grp_local.h +++ b/code/renderer/grp_local.h @@ -308,6 +308,7 @@ private: struct GRP : IRenderPipeline { void Init() override; + void LoadResources() override {} void ShutDown(bool fullShutDown) override; void CreateTexture(image_t* image, int mipCount, int width, int height) override; void UpoadTextureAndGenerateMipMaps(image_t* image, const byte* data) override; diff --git a/code/renderer/grp_main.cpp b/code/renderer/grp_main.cpp index ca26e31..c101ff8 100644 --- a/code/renderer/grp_main.cpp +++ b/code/renderer/grp_main.cpp @@ -98,6 +98,7 @@ void GRP::Init() { InitDesc initDesc; initDesc.directDescriptorHeapIndexing = false; + initDesc.inlineRaytracing = false; srp.firstInit = RHI::Init(initDesc); if(srp.firstInit) diff --git a/code/renderer/rhi_d3d12.cpp b/code/renderer/rhi_d3d12.cpp index 3f9659b..aaa07ff 100644 --- a/code/renderer/rhi_d3d12.cpp +++ b/code/renderer/rhi_d3d12.cpp @@ -584,12 +584,13 @@ namespace RHI struct DescriptorRange { void Init(D3D12_DESCRIPTOR_RANGE_TYPE type, uint32_t start, uint32_t count); - uint32_t Allocate(); + uint32_t Allocate(bool slotAtIndex0 = false); D3D12_DESCRIPTOR_RANGE_TYPE type; uint32_t start; uint32_t count; uint32_t index; + bool reservedSlotUsed; }; static const uint32_t MaxDescriptorsSRV = 65536; @@ -640,6 +641,12 @@ namespace RHI LUID uniqueId; }; + struct BufferToDelete + { + HBuffer buffer; + uint32_t beginFrameCounter; + }; + struct RHIPrivate { bool initialized; @@ -653,7 +660,7 @@ namespace RHI IDXGIFactory1* factory; #endif IDXGIAdapter1* adapter; - ID3D12Device* device; + ID3D12Device5* device; D3D12MA::Allocator* allocator; D3D12MA::Pool* umaPool; // only non-NULL when using a cache-coherent UMA adapter ID3D12CommandQueue* mainCommandQueue; @@ -734,6 +741,7 @@ namespace RHI ReadbackManager readback; StaticUnorderedArray texturesToTransition; StaticUnorderedArray buffersToTransition; + StaticUnorderedArray buffersToDelete; FrameQueries frameQueries[FrameCount]; ResolvedQueries resolvedQueries; PIX pix; @@ -741,6 +749,9 @@ namespace RHI int64_t beforeRenderingUS; GPU gpus[16]; uint32_t gpuCount; + HBuffer raytracingScratchBuffer; + HBuffer raytracingInstanceBuffer; + uint32_t beginFrameCounter; // immediate-mode barrier API TextureBarrier textureBarriers[64]; @@ -820,6 +831,30 @@ namespace RHI return false; } + static const char* GetUTF8String(const WCHAR* wideStr, const char* defaultUTF8Str) + { + static char utf8Str[256]; + const char* utf8StrPtr = defaultUTF8Str; + if(WideCharToMultiByte(CP_UTF8, 0, wideStr, -1, utf8Str, sizeof(utf8Str), NULL, NULL) > 0) + { + utf8StrPtr = utf8Str; + } + + return utf8StrPtr; + } + + static const WCHAR* GetWideString(const char* utf8Str, const WCHAR* defaultWideStr) + { + static WCHAR wideStr[256]; + const WCHAR* wideStrPtr = defaultWideStr; + if(MultiByteToWideChar(CP_UTF8, 0, utf8Str, -1, wideStr, ARRAY_LEN(wideStr)) > 0) + { + wideStrPtr = wideStr; + } + + return wideStrPtr; + } + static void SetDebugName(ID3D12DeviceChild* resource, const char* resourceName, D3DResourceType::Id resourceType) { if(resourceName == NULL || (uint32_t)resourceType >= D3DResourceType::Count) @@ -831,13 +866,17 @@ namespace RHI // ID3D12Object::SetName is a Unicode wrapper for // ID3D12Object::SetPrivateData with WKPDID_D3DDebugObjectNameW - resource->SetPrivateData(WKPDID_D3DDebugObjectName, strlen(name), name); + // it was good enough for RenderDoc and PIX, but not Nsight + //resource->SetPrivateData(WKPDID_D3DDebugObjectName, strlen(name), name); + resource->SetName(GetWideString(name, L"???")); } static uint32_t GetBytesPerPixel(TextureFormat::Id format) { switch(format) { + case TextureFormat::RGBA128_Float: + return 16; case TextureFormat::RGBA64_Float: return 8; case TextureFormat::RGBA32_UNorm: @@ -1160,12 +1199,10 @@ namespace RHI void UploadManager::EndOfBufferReached() { -#if defined(_DEBUG) - ri.Printf(PRINT_ALL, "Waiting for GPU upload: %s (%d T, %d B)\n", + ri.Printf(PRINT_DEVELOPER, "Waiting for GPU upload: %s (%d T, %d B)\n", Com_FormatBytes(bufferByteOffset), batchTextureCount, batchBufferCount); -#endif fence.WaitOnCPU(fenceValue); D3D(commandAllocator->Reset()); bufferByteOffset = 0; @@ -1385,13 +1422,6 @@ namespace RHI return index; } - uint32_t DynamicResources::DescriptorRange::Allocate() - { - ASSERT_OR_DIE(index + 1 < start + count, "Not enough descriptors"); - - return index++; - } - void DynamicResources::DescriptorRange::Init(D3D12_DESCRIPTOR_RANGE_TYPE type_, uint32_t start_, uint32_t count_) { Q_assert(count_ > 0); @@ -1399,7 +1429,23 @@ namespace RHI type = type_; start = start_; count = count_; - index = start_; + index = start_ + 1; + reservedSlotUsed = false; + } + + uint32_t DynamicResources::DescriptorRange::Allocate(bool slotAtIndex0) + { + if(slotAtIndex0) + { + ASSERT_OR_DIE(!reservedSlotUsed, "Can only use 1 reserved slot"); + reservedSlotUsed = true; + + return start; + } + + ASSERT_OR_DIE(index + 1 < start + count, "Not enough descriptors"); + + return index++; } static const char* GetDeviceRemovedReasonString(HRESULT reason) @@ -1426,18 +1472,6 @@ namespace RHI } } - static const char* GetUTF8String(const WCHAR* wideStr, const char* defaultUTF8Str) - { - static char utf8Str[256]; - const char* utf8StrPtr = defaultUTF8Str; - if(WideCharToMultiByte(CP_UTF8, 0, wideStr, -1, utf8Str, sizeof(utf8Str), NULL, NULL) > 0) - { - utf8StrPtr = utf8Str; - } - - return utf8StrPtr; - } - static bool IsSuitableAdapter(IDXGIAdapter1* adapter) { HRESULT hr = S_OK; @@ -1771,6 +1805,7 @@ namespace RHI case TextureFormat::RGBA32_UNorm: return DXGI_FORMAT_R8G8B8A8_UNORM; case TextureFormat::RGBA64_UNorm: return DXGI_FORMAT_R16G16B16A16_UNORM; case TextureFormat::RGBA64_Float: return DXGI_FORMAT_R16G16B16A16_FLOAT; + case TextureFormat::RGBA128_Float: return DXGI_FORMAT_R32G32B32A32_FLOAT; case TextureFormat::Depth32_Float: return DXGI_FORMAT_D32_FLOAT; case TextureFormat::Depth24_Stencil8: return DXGI_FORMAT_D24_UNORM_S8_UINT; case TextureFormat::RG16_UNorm: return DXGI_FORMAT_R8G8_UNORM; @@ -1853,6 +1888,7 @@ namespace RHI ADD_BITS(DepthWriteBit, D3D12_RESOURCE_STATE_DEPTH_WRITE); ADD_BITS(UnorderedAccessBit, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); ADD_BITS(PresentBit, D3D12_RESOURCE_STATE_PRESENT); + ADD_BITS(RaytracingASBit, D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE); return states; @@ -1952,6 +1988,7 @@ namespace RHI case D3D12_RESOURCE_STATE_COPY_SOURCE: return "copy source"; case D3D12_RESOURCE_STATE_GENERIC_READ: return "generic read"; case D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE: return "generic shader resource"; + case D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE: return "raytracing acceleration structure"; default: return "???"; } } @@ -2067,7 +2104,9 @@ namespace RHI ValidateResourceStateForBarrier(before); ValidateResourceStateForBarrier(after); - if(before & after & D3D12_RESOURCE_STATE_UNORDERED_ACCESS) + if((before & after & D3D12_RESOURCE_STATE_UNORDERED_ACCESS) != 0 || + ((before & D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE) != 0 && + (after & D3D12_RESOURCE_STATE_UNORDERED_ACCESS) != 0)) { // note that UAV barriers are unnecessary in a bunch of cases: // - before/after access is read-only @@ -2308,18 +2347,6 @@ namespace RHI return flags; } - static void WaitForTempCommandList() - { - rhi.tempFence.WaitOnCPU(rhi.tempFenceValue); - if(rhi.tempCommandListOpen) - { - rhi.tempCommandList->Close(); - } - D3D(rhi.tempCommandAllocator->Reset()); - D3D(rhi.tempCommandList->Reset(rhi.tempCommandAllocator, NULL)); - rhi.tempCommandListOpen = true; - } - static void WaitForSwapChain() { if(rhi.frameLatencyWaitableObject != NULL && rhi.frameLatencyWaitNeeded) @@ -2330,7 +2357,7 @@ namespace RHI } } - static uint32_t CreateSRV(ID3D12Resource* resource, D3D12_SHADER_RESOURCE_VIEW_DESC& desc) + static uint32_t CreateSRV(ID3D12Resource* resource, D3D12_SHADER_RESOURCE_VIEW_DESC& desc, bool slotAtIndex0) { Q_assert(resource); @@ -2344,8 +2371,13 @@ namespace RHI return rhi.descHeapGeneric.CreateSRV(resource, desc); } + if(desc.ViewDimension == D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE) + { + resource = NULL; + } + DynamicResources& dr = rhi.dynamicResources; - const uint32_t index = dr.srvIndex.Allocate(); + const uint32_t index = dr.srvIndex.Allocate(slotAtIndex0); D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); rhi.device->CreateShaderResourceView(resource, &desc, handle); @@ -2435,7 +2467,16 @@ namespace RHI srv.Buffer.StructureByteStride = 0; srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; } - srvIndex = CreateSRV(resource, srv); + srvIndex = CreateSRV(resource, srv, rhiDesc.useSrvIndex0); + } + else if(rhiDesc.initialState & ResourceStates::RaytracingASBit) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; + srv.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; + srv.Format = DXGI_FORMAT_UNKNOWN; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv.RaytracingAccelerationStructure.Location = buffer.gpuAddress; + srvIndex = CreateSRV(resource, srv, false); } uint32_t cbvIndex = InvalidDescriptorIndex; @@ -2495,7 +2536,7 @@ namespace RHI { srv.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS; // @TODO: } - texture.srvIndex = CreateSRV(resource, srv); + texture.srvIndex = CreateSRV(resource, srv, false); } else { @@ -2756,6 +2797,47 @@ namespace RHI } } + static void EnsureBufferIsThisLarge(HBuffer& hbuffer, const char* name, ResourceStates::Flags state, uint32_t byteCount) + { + uint32_t oldByteCount = 0; + if(!IsNullHandle(hbuffer)) + { + const Buffer& buffer = rhi.buffers.Get(hbuffer); + if(buffer.desc.byteCount >= byteCount) + { + return; + } + + oldByteCount = buffer.desc.byteCount; + } + + byteCount = max(byteCount, 2 * oldByteCount); + DestroyBufferDelayed(hbuffer); + BufferDesc desc(name, byteCount, state); + desc.shortLifeTime = true; + hbuffer = CreateBuffer(desc); + } + + static void UpdateGPUIndexRangeAndHelp() + { + Cvar_SetRange(r_gpuIndex->name, r_gpuIndex->type, "0", va("%d", rhi.gpuCount)); + + char values[256]; + StringList stringList; + stringList.Init(values, sizeof(values)); + stringList.Append("0"); + stringList.Append("Default GPU"); + stringList.Append(""); + for(uint32_t i = 0; i < rhi.gpuCount; ++i) + { + stringList.Append(va("%d", (int)i + 1)); + stringList.Append(rhi.gpus[i].name); + stringList.Append(""); + } + stringList.Terminate(); + Cvar_SetMenuData(r_gpuIndex->name, CVARCAT_DISPLAY | CVARCAT_PERFORMANCE, "GPU selection", "Choose the GPU to use", "", values); + } + static void DrawResourceUsage() { if(BeginTable("Handles", 3)) @@ -3057,6 +3139,9 @@ namespace RHI if(rhi.device != NULL) { + rhi.raytracingScratchBuffer = RHI_MAKE_NULL_HANDLE(); + rhi.raytracingInstanceBuffer = RHI_MAKE_NULL_HANDLE(); + DXGI_SWAP_CHAIN_DESC desc; D3D(rhi.swapChain->GetDesc(&desc)); @@ -3141,6 +3226,8 @@ namespace RHI UpdateDynamicResources(); + UpdateGPUIndexRangeAndHelp(); + return false; } @@ -3228,24 +3315,7 @@ namespace RHI ri.Printf(PRINT_ALL, "Selected graphics adapter: %s\n", adapterNamePtr); Q_strncpyz(rhi.adapterName, adapterNamePtr, sizeof(rhi.adapterName)); } - { - Cvar_SetRange(r_gpuIndex->name, r_gpuIndex->type, "0", va("%d", rhi.gpuCount)); - - char values[256]; - StringList stringList; - stringList.Init(values, sizeof(values)); - stringList.Append("0"); - stringList.Append("Default GPU"); - stringList.Append(""); - for(uint32_t i = 0; i < rhi.gpuCount; ++i) - { - stringList.Append(va("%d", (int)i + 1)); - stringList.Append(rhi.gpus[i].name); - stringList.Append(""); - } - stringList.Terminate(); - Cvar_SetMenuData(r_gpuIndex->name, CVARCAT_DISPLAY | CVARCAT_PERFORMANCE, "GPU selection", "Choose the GPU to use", "", values); - } + UpdateGPUIndexRangeAndHelp(); D3D(D3D12CreateDevice(rhi.adapter, FeatureLevel, IID_PPV_ARGS(&rhi.device))); @@ -3271,6 +3341,18 @@ namespace RHI } } + if(initDesc.inlineRaytracing) + { + D3D12_FEATURE_DATA_D3D12_OPTIONS5 options5 = {}; + if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &options5, sizeof(options5)))) + { + if(options5.RaytracingTier < D3D12_RAYTRACING_TIER_1_1) + { + ri.Error(ERR_FATAL, "The CRP requires DXR 1.1 capable hardware\n"); + } + } + } + { D3D12MA::ALLOCATOR_DESC desc = {}; desc.pDevice = rhi.device; @@ -3668,6 +3750,8 @@ namespace RHI } rhi.frameBegun = true; + rhi.beginFrameCounter++; + rhi.beforeRenderingUS = Sys_Microseconds(); WaitForSwapChain(); @@ -3724,6 +3808,19 @@ namespace RHI rhi.texturesToTransition.Clear(); rhi.buffersToTransition.Clear(); + for(uint32_t b = 0; b < rhi.buffersToDelete.count; ) + { + if(rhi.buffersToDelete[b].beginFrameCounter >= rhi.beginFrameCounter) + { + DestroyBuffer(rhi.buffersToDelete[b].buffer); + rhi.buffersToDelete.Remove(b); + } + else + { + b++; + } + } + CmdInsertDebugLabel("RHI::BeginFrame", 0.8f, 0.8f, 0.8f); } @@ -3814,12 +3911,22 @@ namespace RHI { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } + if(rhiDesc.initialState & ResourceStates::RaytracingASBit) + { + // @NOTE: don't use D3D12_RESOURCE_FLAG_RAYTRACING_ACCELERATION_STRUCTURE + // it's reserved for future use and isn't the right one to use + desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } bool transitionNeeded = false; D3D12_RESOURCE_STATES resourceState = D3D12_RESOURCE_STATE_COMMON; D3D12MA::ALLOCATION_DESC allocDesc = { 0 }; allocDesc.HeapType = D3D12_HEAP_TYPE_DEFAULT; - if(rhiDesc.memoryUsage == MemoryUsage::CPU || rhiDesc.memoryUsage == MemoryUsage::Upload) + if(rhiDesc.initialState == ResourceStates::RaytracingASBit) + { + resourceState = D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE; + } + else if(rhiDesc.memoryUsage == MemoryUsage::CPU || rhiDesc.memoryUsage == MemoryUsage::Upload) { allocDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD; resourceState = D3D12_RESOURCE_STATE_GENERIC_READ; // mandated @@ -3897,6 +4004,19 @@ namespace RHI rhi.buffers.Remove(handle); } + void DestroyBufferDelayed(HBuffer buffer) + { + if(IsNullHandle(buffer)) + { + return; + } + + BufferToDelete b = {}; + b.beginFrameCounter = rhi.beginFrameCounter + 2; + b.buffer = buffer; + rhi.buffersToDelete.Add(b); + } + uint8_t* MapBuffer(HBuffer handle) { Buffer& buffer = rhi.buffers.Get(handle); @@ -4509,7 +4629,7 @@ namespace RHI desc.RasterizerState.DepthBias = rhiDesc.rasterizer.polygonOffset ? 1 : 0; desc.RasterizerState.DepthBiasClamp = 0.0f; desc.RasterizerState.SlopeScaledDepthBias = rhiDesc.rasterizer.polygonOffset ? 1.0f : 0.0f; - desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + desc.RasterizerState.FillMode = rhiDesc.rasterizer.wireFrame ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID; desc.RasterizerState.ForcedSampleCount = 0; desc.RasterizerState.MultisampleEnable = FALSE; desc.RasterizerState.DepthClipEnable = rhiDesc.rasterizer.clampDepth ? FALSE : TRUE; @@ -5138,6 +5258,17 @@ namespace RHI rhi.commandList->CopyBufferRegion(dst.buffer, 0, src.buffer, 0, byteCount); } + void CmdCopyBuffer(HBuffer dest, uint32_t destOffset, HBuffer source, uint32_t sourceOffset, uint32_t byteCount) + { + Q_assert(CanWriteCommands()); + + const Buffer& dst = rhi.buffers.Get(dest); + const Buffer& src = rhi.buffers.Get(source); + Q_assert(destOffset + byteCount <= dst.desc.byteCount); + Q_assert(sourceOffset + byteCount <= src.desc.byteCount); + rhi.commandList->CopyBufferRegion(dst.buffer, destOffset, src.buffer, sourceOffset, byteCount); + } + void CmdSetShadingRate(ShadingRate::Id shadingRate) { Q_assert(CanWriteCommands()); @@ -5225,6 +5356,18 @@ namespace RHI rhi.tempCommandListOpen = false; } + void WaitForTempCommandList() + { + rhi.tempFence.WaitOnCPU(rhi.tempFenceValue); + if(rhi.tempCommandListOpen) + { + rhi.tempCommandList->Close(); + } + D3D(rhi.tempCommandAllocator->Reset()); + D3D(rhi.tempCommandList->Reset(rhi.tempCommandAllocator, NULL)); + rhi.tempCommandListOpen = true; + } + void BeginTextureReadback(MappedTexture& mappedTexture, HTexture htexture) { rhi.readback.BeginTextureReadback(mappedTexture, htexture); @@ -5363,6 +5506,160 @@ namespace RHI ri.Printf(PRINT_ALL, "%s%d^7. %s\n", S_COLOR_VAL, (int)i + 1, rhi.gpus[i].name); } } + + void CmdCreateBLAS(HBuffer* blasBuffer, const BLASDesc& rhiDesc) + { + ASSERT_DR_ENABLED(); + Q_assert(rhi.commandList == rhi.tempCommandList); + Q_assert(rhi.tempCommandListOpen); + Q_assert(blasBuffer); + Q_assert(!IsNullHandle(rhiDesc.vertexBuffer)); + Q_assert(!IsNullHandle(rhiDesc.indexBuffer)); + Q_assert(rhiDesc.meshCount > 0); + Q_assert(rhiDesc.meshes); + + const D3D12_GPU_VIRTUAL_ADDRESS baseVertexAddress = rhi.buffers.Get(rhiDesc.vertexBuffer).gpuAddress; + const D3D12_GPU_VIRTUAL_ADDRESS baseIndexAddress = rhi.buffers.Get(rhiDesc.indexBuffer).gpuAddress; + + D3D12_RAYTRACING_GEOMETRY_DESC* const geos = + (D3D12_RAYTRACING_GEOMETRY_DESC*)calloc(rhiDesc.meshCount, sizeof(D3D12_RAYTRACING_GEOMETRY_DESC)); + if(geos == NULL) + { + ri.Error(ERR_FATAL, "Failed to allocate %d D3D12_RAYTRACING_GEOMETRY_DESC instances\n", (int)rhiDesc.meshCount); + } + + for(uint32_t i = 0; i < rhiDesc.meshCount; ++i) + { + const BLASMeshDesc& mesh = rhiDesc.meshes[i]; + D3D12_RAYTRACING_GEOMETRY_DESC& geoDesc = geos[i]; + geoDesc.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES; + geoDesc.Flags = mesh.isFullyOpaque ? + D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE : + D3D12_RAYTRACING_GEOMETRY_FLAG_NONE; + geoDesc.Triangles.IndexFormat = DXGI_FORMAT_R32_UINT; + geoDesc.Triangles.IndexCount = mesh.indexCount; + geoDesc.Triangles.IndexBuffer = baseIndexAddress + mesh.firstIndex * sizeof(uint32_t); + geoDesc.Triangles.VertexFormat = DXGI_FORMAT_R32G32B32_FLOAT; + geoDesc.Triangles.VertexCount = mesh.vertexCount; + geoDesc.Triangles.VertexBuffer.StartAddress = baseVertexAddress + mesh.firstVertex * sizeof(vec3_t); + geoDesc.Triangles.VertexBuffer.StrideInBytes = sizeof(vec3_t); + geoDesc.Triangles.Transform3x4 = NULL; + } + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {}; + inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE; + inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + inputs.NumDescs = rhiDesc.meshCount; + inputs.pGeometryDescs = geos; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info = {}; + rhi.device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &info); + if(info.ResultDataMaxSizeInBytes >= UINT64(4ull << 30ull) || + info.ScratchDataSizeInBytes >= UINT64(4ull << 30ull)) + { + ri.Error(ERR_FATAL, "Attempted to create a BLAS larger than 4 GB!\n"); + } + + EnsureBufferIsThisLarge(rhi.raytracingScratchBuffer, "RTAS scratch", + ResourceStates::UnorderedAccessBit, (uint32_t)info.ScratchDataSizeInBytes); + EnsureBufferIsThisLarge(*blasBuffer, rhiDesc.name, + ResourceStates::RaytracingASBit, (uint32_t)info.ResultDataMaxSizeInBytes); + + // dest + src: D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC rtasDesc = {}; + rtasDesc.SourceAccelerationStructureData = 0; + rtasDesc.DestAccelerationStructureData = rhi.buffers.Get(*blasBuffer).gpuAddress; + rtasDesc.ScratchAccelerationStructureData = rhi.buffers.Get(rhi.raytracingScratchBuffer).gpuAddress; + rtasDesc.Inputs = inputs; + rhi.commandList->BuildRaytracingAccelerationStructure(&rtasDesc, 0, NULL); + + free(geos); + + CmdBeginBarrier(); + CmdBufferBarrier(*blasBuffer, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + } + + void CmdCreateTLAS(HBuffer* tlasBuffer, const TLASDesc& rhiDesc) + { + ASSERT_DR_ENABLED(); + Q_assert(rhi.commandList == rhi.tempCommandList); + Q_assert(rhi.tempCommandListOpen); + Q_assert(tlasBuffer != NULL); + Q_assert(rhiDesc.instances); + Q_assert(rhiDesc.instanceCount > 0); + + EnsureBufferIsThisLarge(rhi.raytracingInstanceBuffer, "RT TLAS instance", + ResourceStates::Common, rhiDesc.instanceCount * sizeof(D3D12_RAYTRACING_INSTANCE_DESC)); + + D3D12_RAYTRACING_INSTANCE_DESC* const instanceDescs = + (D3D12_RAYTRACING_INSTANCE_DESC*)BeginBufferUpload(rhi.raytracingInstanceBuffer); + for(uint32_t i = 0; i < rhiDesc.instanceCount; ++i) + { + const TLASInstanceDesc& rhiInstDesc = rhiDesc.instances[i]; + D3D12_RAYTRACING_INSTANCE_DESC instDesc = {}; + instDesc.AccelerationStructure = rhi.buffers.Get(rhiInstDesc.blasBuffer).gpuAddress; + switch(rhiInstDesc.cullMode) + { + case CT_FRONT_SIDED: instDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE; break; + case CT_BACK_SIDED: instDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_NONE; break; + default: instDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE; break; + } + instDesc.InstanceContributionToHitGroupIndex = 0; // @TODO: do we care for this? + instDesc.InstanceID = rhiInstDesc.instanceId; + instDesc.InstanceMask = rhiInstDesc.instanceMask; + instDesc.Transform[0][0] = rhiInstDesc.transform[0]; // @TODO: confirm order + instDesc.Transform[1][0] = rhiInstDesc.transform[1]; + instDesc.Transform[2][0] = rhiInstDesc.transform[2]; + instDesc.Transform[0][1] = rhiInstDesc.transform[3]; + instDesc.Transform[1][1] = rhiInstDesc.transform[4]; + instDesc.Transform[2][1] = rhiInstDesc.transform[5]; + instDesc.Transform[0][2] = rhiInstDesc.transform[6]; + instDesc.Transform[1][2] = rhiInstDesc.transform[7]; + instDesc.Transform[2][2] = rhiInstDesc.transform[8]; + instDesc.Transform[0][3] = rhiInstDesc.translation[0]; + instDesc.Transform[1][3] = rhiInstDesc.translation[1]; + instDesc.Transform[2][3] = rhiInstDesc.translation[2]; + memcpy(&instanceDescs[i], &instDesc, sizeof(D3D12_RAYTRACING_INSTANCE_DESC)); + } + EndBufferUpload(rhi.raytracingInstanceBuffer); + + // GPU wait for the copy queue to be done executing on the GPU + rhi.upload.WaitToStartDrawing(rhi.computeCommandQueue); + + // InstanceDescs: D3D12_RAYTRACING_INSTANCE_DESC_BYTE_ALIGNMENT + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = {}; + inputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL; + inputs.Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_NONE; + inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + inputs.NumDescs = rhiDesc.instanceCount; + inputs.InstanceDescs = rhi.buffers.Get(rhi.raytracingInstanceBuffer).gpuAddress; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO info = {}; + rhi.device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &info); + if(info.ResultDataMaxSizeInBytes >= UINT64(4ull << 30ull) || + info.ScratchDataSizeInBytes >= UINT64(4ull << 30ull)) + { + ri.Error(ERR_FATAL, "Attempted to create a BLAS larger than 4 GB!\n"); + } + + EnsureBufferIsThisLarge(rhi.raytracingScratchBuffer, "RTAS scratch", + ResourceStates::UnorderedAccessBit, (uint32_t)info.ScratchDataSizeInBytes); + EnsureBufferIsThisLarge(*tlasBuffer, "RT TLAS", + ResourceStates::RaytracingASBit, (uint32_t)info.ResultDataMaxSizeInBytes); + + // dest + src: D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC rtasDesc = {}; + rtasDesc.DestAccelerationStructureData = rhi.buffers.Get(*tlasBuffer).gpuAddress; + rtasDesc.ScratchAccelerationStructureData = rhi.buffers.Get(rhi.raytracingScratchBuffer).gpuAddress; + rtasDesc.Inputs = inputs; + rhi.commandList->BuildRaytracingAccelerationStructure(&rtasDesc, 0, NULL); + + CmdBeginBarrier(); + CmdBufferBarrier(*tlasBuffer, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + } } void R_WaitBeforeInputSampling() diff --git a/code/renderer/rhi_local.h b/code/renderer/rhi_local.h index c6e7bb3..639f562 100644 --- a/code/renderer/rhi_local.h +++ b/code/renderer/rhi_local.h @@ -91,6 +91,7 @@ namespace RHI DepthWriteBit = RHI_BIT(10), UnorderedAccessBit = RHI_BIT(11), PresentBit = RHI_BIT(12), + RaytracingASBit = RHI_BIT(13), ShaderAccessBits = VertexShaderAccessBit | PixelShaderAccessBit | ComputeShaderAccessBit, DepthAccessBits = DepthReadBit | DepthWriteBit }; @@ -164,6 +165,7 @@ namespace RHI RGBA32_UNorm, RGBA64_UNorm, RGBA64_Float, + RGBA128_Float, Depth32_Float, RG16_UNorm, R8_UNorm, @@ -422,6 +424,7 @@ namespace RHI cullType_t cullMode = CT_FRONT_SIDED; bool polygonOffset = false; bool clampDepth = false; + bool wireFrame = false; } rasterizer; struct RenderTarget @@ -467,10 +470,13 @@ namespace RHI BufferDesc(const char* name_, uint32_t byteCount_, ResourceStates::Flags initialState_) { name = name_; + shortLifeTime = false; byteCount = byteCount_; initialState = initialState_; memoryUsage = MemoryUsage::GPU; committedResource = false; + structureByteCount = 0; + useSrvIndex0 = false; } const char* name = NULL; @@ -480,6 +486,7 @@ namespace RHI MemoryUsage::Id memoryUsage = MemoryUsage::GPU; bool committedResource = false; uint32_t structureByteCount = 0; // > 0 means structured buffer, == 0 means byte address buffer + bool useSrvIndex0 = false; }; struct TextureDesc @@ -671,12 +678,49 @@ namespace RHI const ShaderMacro* macros = NULL; }; + struct BLASMeshDesc + { + uint32_t firstVertex; + uint32_t vertexCount; + uint32_t firstIndex; + uint32_t indexCount; + bool isFullyOpaque; // alpha testing the first stage doesn't count as opaque here + }; + + struct BLASDesc + { + const char* name; + HBuffer vertexBuffer; // GPU resident, 3x float32, no padding, released by the RHI + HBuffer indexBuffer; // GPU resident, 1x uint32, released by the RHI + const BLASMeshDesc* meshes; + uint32_t meshCount; + }; + + struct TLASInstanceDesc + { + HBuffer blasBuffer; + float transform[9]; // scale + rotation + float translation[3]; + cullType_t cullMode; + uint32_t instanceId; + uint8_t instanceMask; + }; + + struct TLASDesc + { + const TLASInstanceDesc* instances; + uint32_t instanceCount; + }; + struct InitDesc { // HLSL 6.6 Dynamic Resources // - all shader resources are exclusively used through ResourceDescriptorHeap and SamplerDescriptorHeap // - all root signature and descriptor table functions are disabled bool directDescriptorHeapIndexing = false; + + // shut down if DXR 1.1 isn't available + bool inlineRaytracing = false; }; bool Init(const InitDesc& desc); // true when a full init happened (the device was created) @@ -690,6 +734,7 @@ namespace RHI HBuffer CreateBuffer(const BufferDesc& desc); void DestroyBuffer(HBuffer buffer); + void DestroyBufferDelayed(HBuffer buffer); uint8_t* MapBuffer(HBuffer buffer); void UnmapBuffer(HBuffer buffer); @@ -742,7 +787,10 @@ namespace RHI void CmdEndDebugLabel(); void CmdSetStencilReference(uint8_t stencilRef); void CmdCopyBuffer(HBuffer dest, HBuffer source); + void CmdCopyBuffer(HBuffer dest, uint32_t destOffset, HBuffer source, uint32_t sourceOffset, uint32_t byteCount); void CmdSetShadingRate(ShadingRate::Id shadingRate); + void CmdCreateBLAS(HBuffer* blasBuffer, const BLASDesc& desc); + void CmdCreateTLAS(HBuffer* tlasBuffer, const TLASDesc& desc); // only available when dynamic resources are enabled uint32_t GetTextureIndexSRV(HTexture texture); @@ -766,6 +814,7 @@ namespace RHI // the temporary command list is guaranteed to be done executing before the next BeginFrame call ends void BeginTempCommandList(); void EndTempCommandList(); + void WaitForTempCommandList(); void BeginTextureReadback(MappedTexture& mappedTexture, HTexture texture); void EndTextureReadback(); diff --git a/code/renderer/shaders/crp/add_light.hlsl b/code/renderer/shaders/crp/add_light.hlsl new file mode 100644 index 0000000..86d09c0 --- /dev/null +++ b/code/renderer/shaders/crp/add_light.hlsl @@ -0,0 +1,67 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// fetches and returns direct light data for non-lightmapped surfaces + + +#include "common.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + matrix modelViewMatrix; +}; + +struct VIn +{ + float3 position : POSITION; +}; + +struct VOut +{ + float4 position : SV_Position; + float clipDist : SV_ClipDistance0; +}; + +VOut vs(VIn input) +{ + SceneView scene = GetSceneView(); + float4 positionVS = mul(modelViewMatrix, float4(input.position.xyz, 1)); + + VOut output; + output.position = mul(scene.projectionMatrix, positionVS); + output.clipDist = dot(positionVS, scene.clipPlane); + + return output; +} + +[earlydepthstencil] +float4 ps(VOut input) : SV_Target +{ + SceneView scene = GetSceneView(); + Texture2D lightTexture = ResourceDescriptorHeap[scene.lightTextureIndex]; + + uint3 tc = uint3(input.position.xy, 0); + float3 color = lightTexture.Load(tc).rgb; + float4 result = float4(color, 0); + + return result; +} diff --git a/code/renderer/shaders/crp/world.hlsli b/code/renderer/shaders/crp/alpha_test.h.hlsli similarity index 83% rename from code/renderer/shaders/crp/world.hlsli rename to code/renderer/shaders/crp/alpha_test.h.hlsli index c8934a2..c8f3dc4 100644 --- a/code/renderer/shaders/crp/world.hlsli +++ b/code/renderer/shaders/crp/alpha_test.h.hlsli @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2023 Gian 'myT' Schellenbaum +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -18,12 +18,18 @@ You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ -// shared world surface rendering functions +// shared alpha test symbols and functions #pragma once +#define ATEST_NONE 0u +#define ATEST_GT_0 1u +#define ATEST_LT_HALF 2u +#define ATEST_GE_HALF 3u + +#if !defined(__cplusplus) bool FailsAlphaTest(float alpha, uint alphaTest) { if(alphaTest == ATEST_GT_0) @@ -35,3 +41,4 @@ bool FailsAlphaTest(float alpha, uint alphaTest) else // ATEST_NONE return false; } +#endif diff --git a/code/renderer/shaders/crp/common.hlsli b/code/renderer/shaders/crp/common.hlsli index e035b7f..40a0a9c 100644 --- a/code/renderer/shaders/crp/common.hlsli +++ b/code/renderer/shaders/crp/common.hlsli @@ -149,7 +149,7 @@ float smoothstep01(float x) float2 OctWrap(float2 v) { - return (1.0 - abs(v.yx)) * (v.xy >= 0.0 ? 1.0 : -1.0); + return (1.0 - abs(v.yx)) * select(v.xy >= 0.0, 1.0, -1.0); } float2 OctEncode(float3 n) @@ -166,12 +166,12 @@ float3 OctDecode(float2 f) f = f * 2.0 - 1.0; float3 n = float3(f.x, f.y, 1.0 - abs(f.x) - abs(f.y)); float t = saturate(-n.z); - n.xy += n.xy >= 0.0 ? -t : t; + n.xy += select(n.xy >= 0.0, -t, t); return normalize(n); } -float3 GetPositionFromDepth(float2 tc01, float depthZW, float4x4 invMatrix) +float3 GetPositionFromDepth(float2 tc01, float depthZW, matrix invMatrix) { float x = tc01.x * 2.0 - 1.0; float y = (1.0 - tc01.y) * 2.0 - 1.0; @@ -180,3 +180,139 @@ float3 GetPositionFromDepth(float2 tc01, float depthZW, float4x4 invMatrix) return result; } + +float3 TransformNormal(float3 normal, matrix transform) +{ + return mul(transform, float4(normal, 0)).xyz; +} + +float3 TransformPoint(float3 position, matrix transform) +{ + float4 result = mul(transform, float4(position, 1)); + return result.xyz / result.w; +} + +float3 RandomColorFromUInt(uint id) +{ + float r = frac(0.420 + 1.337 * id); + float g = frac(0.69 + 1.666 * id); + float b = frac(0.13 + 1.777 * id); + + return float3(r, g, b); +} + +float3 BiasPosition(float3 position, float3 normal) +{ + float3 result = position + sign(normal) * abs(position * 0.0000002); + + return result; +} + +// from Mauricio Vives, https://gist.github.com/pixnblox/5e64b0724c186313bc7b6ce096b08820 +// Projects the specified position (point) onto the plane with the specified origin and normal. +float3 ProjectPointOnPlane(float3 position, float3 planeOrigin, float3 planeNormal) +{ + return position - dot(position - planeOrigin, planeNormal) * planeNormal; +} + +// from Mauricio Vives, https://gist.github.com/pixnblox/5e64b0724c186313bc7b6ce096b08820 +// Computes the shading position of the specified geometric position and vertex positions and +// normals. For a triangle with normals describing a convex surface, this point will be slightly +// above the surface. For a concave surface, the geometry position is used directly. +// NOTE: The difference between the shading position and geometry position is significant when +// casting shadow rays. If the geometric position is used, a triangle may fully shadow itself when +// it should be partly lit based on the shading normals; this is the "shadow terminator" problem. +float3 GetShadingPosition( + float3 geomPosition, float3 shadingNormal, + float3 positions[3], float3 normals[3], float3 barycentrics) +{ + // Project the geometric position (inside the triangle) to the planes defined by the vertex + // positions and normals. + float3 p0 = ProjectPointOnPlane(geomPosition, positions[0], normals[0]); + float3 p1 = ProjectPointOnPlane(geomPosition, positions[1], normals[1]); + float3 p2 = ProjectPointOnPlane(geomPosition, positions[2], normals[2]); + + // Interpolate the projected positions using the barycentric coordinates, which gives the + // shading position. + float3 shadingPosition = p0 * barycentrics.x + p1 * barycentrics.y + p2 * barycentrics.z; + + // Return the shading position for a convex triangle, where the shading point is above the + // triangle based on the shading normal. Otherwise use the geometric position. + bool convex = dot(shadingPosition - geomPosition, shadingNormal) > 0.0; + float3 result = convex ? shadingPosition : BiasPosition(geomPosition, shadingNormal); + + return result; +} + +// based on "Hacking the Shadow Terminator" by Johannes Hanika in "Ray Tracing Gems II" +float3 GetShadingPositionV2(float3 geomPosition, float3 positions[3], float3 normals[3], float3 barycentrics) +{ + float3 tmpu = geomPosition - positions[0]; + float3 tmpv = geomPosition - positions[1]; + float3 tmpw = geomPosition - positions[2]; + float dotu = min(0.0, dot(tmpu, normals[0])); + float dotv = min(0.0, dot(tmpv, normals[1])); + float dotw = min(0.0, dot(tmpw, normals[2])); + tmpu -= dotu * normals[0]; + tmpv -= dotv * normals[1]; + tmpw -= dotw * normals[2]; + float3 shadingPosition = geomPosition + 1.0 * (barycentrics.x * tmpu + barycentrics.y * tmpv + barycentrics.z * tmpw); + + return shadingPosition; +} + +template +T trilerp(T v0, T v1, T v2, float3 barycentrics) +{ + return + barycentrics.x * v0 + + barycentrics.y * v1 + + barycentrics.z * v2; +} + +template<> +float trilerp(float v0, float v1, float v2, float3 barycentrics) +{ + return dot(float3(v0, v1, v2), barycentrics); +} + +// Interleaved Gradient Noise by Jorge Jimenez +// from "Next Generation Post Processing in Call of Duty: Advanced Warfare" +float InterleavedGradientNoise(float2 uv) +{ + float3 magic = float3(0.06711056, 0.00583715, 52.9829189); + return frac(magic.z * frac(dot(uv, magic.xy))); +} + +template +bool IsValueInRange(T p, T min, T max) +{ + return all(p >= min) && all(p <= max); +} + +template +uint2 GetTextureSize(Texture2D texture0) +{ + uint2 size; + texture0.GetDimensions(size.x, size.y); + + return size; +} + +// by Sakib Saikia, https://sakibsaikia.github.io/graphics/2022/01/04/Nan-Checks-In-HLSL.html +bool IsNan(float x) +{ + return (asuint(x) & 0x7FFFFFFFu) > 0x7F800000u; +} + +bool isnan(float x) +{ + return IsNan(x); +} + +// from "Using Blue Noise For Raytraced Soft Shadows" by Alan Wolfe in "Ray Tracing Gems II" +// this turns the blue noise into a low discrepancy additive recurrence +float AnimateBlueNoise(float blueNoise, uint frameIndex) +{ + return frac(blueNoise + float(frameIndex % 32) * 0.61803399); +} diff --git a/code/renderer/shaders/crp/dl_denoising.hlsl b/code/renderer/shaders/crp/dl_denoising.hlsl new file mode 100644 index 0000000..c4b899a --- /dev/null +++ b/code/renderer/shaders/crp/dl_denoising.hlsl @@ -0,0 +1,93 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// denoising direct lighting from dynamic lights + + +#include "common.hlsli" +#include "fullscreen.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + uint textureIndex; +}; + +float4 ps(VOut input) : SV_Target +{ + SceneView scene = GetSceneView(); + Texture2D shadingPositionTexture = ResourceDescriptorHeap[scene.shadingPositionTextureIndex]; + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + int2 textureMax = GetTextureSize(texture0) - int2(1, 1); + + int2 tcFrag = int2(input.position.xy); + float3 positionFrag = shadingPositionTexture.Load(uint3(tcFrag, 0)).xyz; + float distThreshold = 8.0; + + float maxError = 0.0; + for(int y = -4; y <= 4; y++) + { + for(int x = -4; x <= 4; x++) + { + int2 tc = tcFrag + int2(x, y); + if(!IsValueInRange(tcFrag, int2(0, 0), textureMax)) + { + continue; + } + + uint3 tc3 = uint3(tc, 0); + float errorFrag = texture0.Load(tc3).w; + maxError = max(maxError, errorFrag); + } + } + maxError = saturate(maxError); + + float3 accum = float3(0, 0, 0); + float weightSum = 0.0; + int blurRadius = 1 + int(maxError * 8.0); + for(int y = -blurRadius; y <= blurRadius; y++) + { + for(int x = -blurRadius; x <= blurRadius; x++) + { + int2 tc = tcFrag + int2(x, y); + if(!IsValueInRange(tcFrag, int2(0, 0), textureMax)) + { + continue; + } + + uint3 tc3 = uint3(tc, 0); + float3 positionSample = shadingPositionTexture.Load(tc3).xyz; + float3 colorSample = texture0.Load(tc3).rgb; + float posWeight = 1.0 - saturate(distance(positionSample, positionFrag) / distThreshold); + float weight = posWeight; + accum += colorSample * posWeight; + weightSum += weight; + } + } + + if(weightSum > 0.0) + { + accum /= weightSum; + } + float4 result = float4(accum, 1); + + return result; +} diff --git a/code/renderer/shaders/crp/dl_draw.hlsl b/code/renderer/shaders/crp/dl_draw.hlsl new file mode 100644 index 0000000..24cb1d4 --- /dev/null +++ b/code/renderer/shaders/crp/dl_draw.hlsl @@ -0,0 +1,256 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// direct lighting from dynamic lights + + +#include "common.hlsli" +#include "fullscreen.hlsli" +#include "raytracing.h.hlsli" +#include "scene_view.h.hlsli" +#include "alpha_test.h.hlsli" + + +cbuffer RootConstants +{ + uint blueNoiseTextureIndex; +}; + +#define CLASS_OPAQUE 0u +#define CLASS_INVISIBLE 1u +#define CLASS_TRANSLUCENT 2u + +uint ClassifyNonOpaqueTriangle(inout float3 light, StructuredBuffer tlasInstanceBuffer, uint instanceId, uint meshId, uint triangleId, float2 bary2, bool frontFace) +{ + TLASInstance instance = tlasInstanceBuffer[instanceId]; +#if 0 + // @TODO: is this needed or not? + // cull mode: 0 is front-sided, 1 is back-sided + if((frontFace && instance.cullMode == 1) || + (!frontFace && instance.cullMode == 0)) + { + return CLASS_INVISIBLE; + } +#endif + StructuredBuffer meshBuffer = ResourceDescriptorHeap[instance.meshBufferIndex]; + BLASMesh mesh = meshBuffer[meshId]; + float3 barycentrics = float3(1.0 - bary2.x - bary2.y, bary2.x, bary2.y); + StructuredBuffer vertexBuffer = ResourceDescriptorHeap[instance.vertexBufferIndex]; + StructuredBuffer indexBuffer = ResourceDescriptorHeap[instance.indexBufferIndex]; + uint firstIndex = mesh.firstIndex + triangleId * 3; + uint vtxIdx0 = mesh.firstVertex + indexBuffer[firstIndex + 0]; + uint vtxIdx1 = mesh.firstVertex + indexBuffer[firstIndex + 1]; + uint vtxIdx2 = mesh.firstVertex + indexBuffer[firstIndex + 2]; + BLASVertex v0 = vertexBuffer[vtxIdx0]; + BLASVertex v1 = vertexBuffer[vtxIdx1]; + BLASVertex v2 = vertexBuffer[vtxIdx2]; + float2 texCoords = trilerp(v0.texCoords, v1.texCoords, v2.texCoords, barycentrics); + float4 vertexColor = trilerp(UnpackColor(v0.color), UnpackColor(v1.color), UnpackColor(v2.color), barycentrics); + Texture2D texture0 = ResourceDescriptorHeap[mesh.textureIndex]; + SamplerState sampler0 = SamplerDescriptorHeap[mesh.samplerIndex]; + float4 textureColor = texture0.SampleLevel(sampler0, texCoords, 0); + float4 hitColor = vertexColor * textureColor; + if(mesh.alphaTestMode == 0) + { + float3 blended; + if(mesh.blendBits == (GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE)) + { + blended = lerp(light, hitColor.rgb, Brightness(hitColor.rgb)); + } + else + { + blended = Blend(hitColor, float4(light, 1), mesh.blendBits).rgb; + } + if(all(blended == light)) + { + return CLASS_INVISIBLE; + } + light = blended; + return CLASS_TRANSLUCENT; + } + if(FailsAlphaTest(hitColor.a, mesh.alphaTestMode)) + { + return CLASS_INVISIBLE; + } + return CLASS_OPAQUE; +} + +float2 MapSquareToDisk(float2 square01) +{ + float radius = sqrt(square01.x); + float angle = square01.y * 2.0 * 3.14159265359; + float2 sinCos; + sincos(angle, sinCos.x, sinCos.y); + float2 result = radius * sinCos; + + return result; +} + +float3 GetRayDirectionForSphereLight(float2 square01, float3 surfacePos, float3 lightPos, float worldRadius) +{ + float3 direction = normalize(lightPos - surfacePos); + float radius = worldRadius / length(lightPos - surfacePos); + float2 pointInDisk = MapSquareToDisk(square01) * radius; + float3 tangent = normalize(cross(direction, float3(0, 1, 0))); + float3 bitangent = normalize(cross(tangent, direction)); + float3 result = normalize(direction + pointInDisk.x * tangent + pointInDisk.y * bitangent); + + return result; +} + +// true when fully in shadow +bool TraceShadowRay( + out float t, inout float3 light, + RaytracingAccelerationStructure rtas, StructuredBuffer instBuffer, + float3 position, float3 direction, float dist) +{ + RayDesc ray; + ray.Origin = position; + ray.Direction = direction; + ray.TMin = 0.0; + ray.TMax = dist; + + t = 0.0; + float translucentT = 0.0; + RayQuery q; + q.TraceRayInline(rtas, RAY_FLAG_NONE, 0xFF, ray); + while(q.Proceed()) + { + if(q.CandidateType() == CANDIDATE_NON_OPAQUE_TRIANGLE) + { + uint type = ClassifyNonOpaqueTriangle( + light, + instBuffer, + q.CandidateInstanceIndex(), + q.CandidateGeometryIndex(), + q.CandidatePrimitiveIndex(), + q.CandidateTriangleBarycentrics(), + q.CandidateTriangleFrontFace()); + if(type == CLASS_OPAQUE) + { + q.CommitNonOpaqueTriangleHit(); + } + else if(type == CLASS_TRANSLUCENT) + { + translucentT = q.CandidateTriangleRayT(); + t = translucentT; + } + } + } + + if(q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) + { + t = q.CommittedRayT(); + } + + if(q.CommittedStatus() == COMMITTED_TRIANGLE_HIT && t > translucentT) + { + return true; + } + + return false; +} + +// true when fully in shadow +bool TraceShadowRayOpaqueOnly( + out float t, inout float3 light, + RaytracingAccelerationStructure rtas, StructuredBuffer instBuffer, + float3 position, float3 direction, float dist) +{ + RayDesc ray; + ray.Origin = position; + ray.Direction = direction; + ray.TMin = 0.0; + ray.TMax = dist; + + t = 0.0; + bool keepLight = false; + RayQuery q; + q.TraceRayInline(rtas, RAY_FLAG_NONE, 0xFF, ray); + q.Proceed(); + if(q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) + { + t = q.CommittedRayT(); + return true; + } + + return false; +} + +float4 ps(VOut input) : SV_Target +{ + SceneView scene = GetSceneView(); + RaytracingAccelerationStructure rtas = ResourceDescriptorHeap[scene.tlasBufferIndex]; + Texture2D normalTexture = ResourceDescriptorHeap[scene.normalTextureIndex]; + Texture2D shadingPositionTexture = ResourceDescriptorHeap[scene.shadingPositionTextureIndex]; + StructuredBuffer tlasInstanceBuffer = ResourceDescriptorHeap[scene.tlasInstanceBufferIndex]; + Texture2D blueNoiseTexture = ResourceDescriptorHeap[blueNoiseTextureIndex]; + + uint2 blueNoiseTextureSize; + blueNoiseTexture.GetDimensions(blueNoiseTextureSize.x, blueNoiseTextureSize.y); + + uint3 tc = uint3(input.position.xy, 0); + float3 normalWS = normalize(OctDecode(normalTexture.Load(tc))); + float3 positionWS = shadingPositionTexture.Load(tc).xyz; + + float error = 0.0; + float3 pixelAccum = float3(0, 0, 0); + for(uint i = 0; i < scene.lightCount; i++) + { + float3 lightPosition = scene.lights[i].position; + float dist = distance(positionWS, lightPosition); + float radius = scene.lights[i].radius; + if(dist >= radius) + { + continue; + } + + float innerRadius = radius / 100.0; + float intensity = saturate(1.0 - dist / radius); + float3 lightDir = normalize(lightPosition - positionWS); + float3 lightRaw = scene.lights[i].color * intensity * max(dot(normalWS, lightDir), 0.0); + const uint SampleCount = 4; + + float3 lightAccum = float3(0, 0, 0); + for(uint r = 0; r < SampleCount; r++) + { + float3 light = lightRaw; + uint2 pos = uint2(input.position.xy) + uint2(r * 17, r * 13 + 7); + uint2 tc = pos % blueNoiseTextureSize; + float2 square01 = blueNoiseTexture.Load(uint3(tc, 0)).xy; + float3 dir = GetRayDirectionForSphereLight(square01, positionWS, lightPosition, innerRadius); + float t; + bool inShadow = TraceShadowRay(t, light, rtas, tlasInstanceBuffer, positionWS, dir, dist); + error = max(error, t / radius); + if(inShadow) + { + continue; + } + + lightAccum += light; + } + + pixelAccum += lightAccum / float(SampleCount); + } + + float4 result = float4(pixelAccum, saturate(error)); + + return result; +} diff --git a/code/renderer/shaders/crp/gbufferviz_depth.hlsl b/code/renderer/shaders/crp/gbufferviz_depth.hlsl new file mode 100644 index 0000000..7b7751f --- /dev/null +++ b/code/renderer/shaders/crp/gbufferviz_depth.hlsl @@ -0,0 +1,46 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// linearizes raw depth buffer values for visualization + + +#include "common.hlsli" +#include "fullscreen.hlsli" + + +cbuffer RootConstants +{ + uint depthTextureIndex; + float linearDepthA; + float linearDepthB; + float zFarInv; +}; + +float4 ps(VOut input) : SV_Target +{ + Texture2D depthTexture = ResourceDescriptorHeap[depthTextureIndex]; + uint3 tc = uint3(input.position.xy, 0); + float depthZW = depthTexture.Load(tc); + float depth = LinearDepth(depthZW, linearDepthA, linearDepthB); + float depth01 = depth * zFarInv; + float4 result = float4(depth01.xxx, 1); + + return result; +} diff --git a/code/renderer/shaders/crp/gbufferviz_normal.hlsl b/code/renderer/shaders/crp/gbufferviz_normal.hlsl new file mode 100644 index 0000000..97e7909 --- /dev/null +++ b/code/renderer/shaders/crp/gbufferviz_normal.hlsl @@ -0,0 +1,42 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// decodes octahedron-encoded normals for visualization + + +#include "common.hlsli" +#include "fullscreen.hlsli" + + +cbuffer RootConstants +{ + uint normalTextureIndex; +}; + +float4 ps(VOut input) : SV_Target +{ + Texture2D normalTexture = ResourceDescriptorHeap[normalTextureIndex]; + uint3 tc = uint3(input.position.xy, 0); + float3 normalWS = OctDecode(normalTexture.Load(tc)); + float3 normal = normalize(normalWS) * 0.5 + 0.5; + float4 result = float4(normal, 1); + + return result; +} diff --git a/code/renderer/shaders/crp/gbufferviz_position.hlsl b/code/renderer/shaders/crp/gbufferviz_position.hlsl new file mode 100644 index 0000000..41179c3 --- /dev/null +++ b/code/renderer/shaders/crp/gbufferviz_position.hlsl @@ -0,0 +1,44 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// decodes the per-axis differences between geometry and shading positions + + +#include "common.hlsli" +#include "fullscreen.hlsli" + + +cbuffer RootConstants +{ + uint textureIndex; + uint coloredDelta; +}; + +float4 ps(VOut input) : SV_Target +{ + Texture2D positionTexture = ResourceDescriptorHeap[textureIndex]; + uint3 tc = uint3(input.position.xy, 0); + float delta = positionTexture.Load(tc).w; + float4 unpacked = UnpackColor(asuint(delta)); + float3 color = coloredDelta > 0 ? unpacked.rgb : unpacked.aaa; + float4 result = float4(color, 1); + + return result; +} diff --git a/code/renderer/shaders/crp/imgui.hlsl b/code/renderer/shaders/crp/imgui.hlsl index bd1c65c..f5f8f33 100644 --- a/code/renderer/shaders/crp/imgui.hlsl +++ b/code/renderer/shaders/crp/imgui.hlsl @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2023 Gian 'myT' Schellenbaum +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -27,6 +27,7 @@ cbuffer RootConstants : register(b0) uint textureIndex; uint samplerIndex; float mipIndex; + float colorScale; }; struct VIn @@ -57,7 +58,8 @@ float4 ps(VOut input) : SV_Target { Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; SamplerState sampler0 = SamplerDescriptorHeap[samplerIndex]; - float4 result = input.col * texture0.SampleLevel(sampler0, input.uv, mipIndex); + float4 color = float4(colorScale.xxx, 1); + float4 result = input.col * color * texture0.SampleLevel(sampler0, input.uv, mipIndex); return result; } diff --git a/code/renderer/shaders/crp/magnifier.hlsl b/code/renderer/shaders/crp/magnifier.hlsl index 32f9e41..cddb663 100644 --- a/code/renderer/shaders/crp/magnifier.hlsl +++ b/code/renderer/shaders/crp/magnifier.hlsl @@ -50,7 +50,7 @@ float4 ps(VOut input) : SV_Target // we need to map diff values -N..-1 to -1 and 0..N-1 to 0 after division // hence the N-1 offset for negative diff values int2 negOffset = int2(magnifierScale - 1, magnifierScale - 1); - diff -= diff < int2(0, 0) ? negOffset : int2(0, 0); + diff -= select(diff < int2(0, 0), negOffset, int2(0, 0)); tc = tcCursor + diff / magnifierScale; } float3 color = colorTexture.Load(int3(tc.x, tc.y, 0)).rgb; diff --git a/code/renderer/shaders/crp/opaque.hlsl b/code/renderer/shaders/crp/opaque.hlsl index d9c7cb1..2d3738b 100644 --- a/code/renderer/shaders/crp/opaque.hlsl +++ b/code/renderer/shaders/crp/opaque.hlsl @@ -22,22 +22,22 @@ along with Challenge Quake 3. If not, see . #include "common.hlsli" -#include "world.h.hlsli" -#include "world.hlsli" +#include "alpha_test.h.hlsli" +#include "scene_view.h.hlsli" cbuffer RootConstants { // geometry matrix modelViewMatrix; - matrix projectionMatrix; - float4 clipPlane; // general uint textureIndex; uint samplerIndex; uint shaderIndexBufferIndex; uint alphaTest; + uint lightTextureIndex; + uint lightmapPass; float greyscale; // shader trace @@ -64,14 +64,15 @@ struct VOut VOut vs(VIn input) { + SceneView scene = GetSceneView(); float4 positionVS = mul(modelViewMatrix, float4(input.position.xyz, 1)); VOut output; - output.position = mul(projectionMatrix, positionVS); + output.position = mul(scene.projectionMatrix, positionVS); output.normal = input.normal; output.texCoords = input.texCoords; output.color = input.color; - output.clipDist = dot(positionVS, clipPlane); + output.clipDist = dot(positionVS, scene.clipPlane); return output; } @@ -88,6 +89,13 @@ float4 ps(VOut input) : SV_Target discard; } + if(lightmapPass) + { + Texture2D lightTexture = ResourceDescriptorHeap[lightTextureIndex]; + float3 directLight = lightTexture.Load(uint3(input.position.xy, 0)).rgb; + dst.rgb += directLight; + } + dst = MakeGreyscale(dst, greyscale); // @TODO: dithering (need to figure out the tone mapping function first) diff --git a/code/renderer/shaders/crp/prepass.hlsl b/code/renderer/shaders/crp/prepass.hlsl index b2673ab..e8a333c 100644 --- a/code/renderer/shaders/crp/prepass.hlsl +++ b/code/renderer/shaders/crp/prepass.hlsl @@ -22,19 +22,15 @@ along with Challenge Quake 3. If not, see . #include "common.hlsli" -#include "world.h.hlsli" -#include "world.hlsli" +#include "alpha_test.h.hlsli" +#include "scene_view.h.hlsli" cbuffer RootConstants { - // geometry matrix modelViewMatrix; - matrix projectionMatrix; + matrix modelMatrix; matrix normalMatrix; - float4 clipPlane; - - // general uint textureIndex; uint samplerIndex; uint alphaTest; @@ -51,7 +47,8 @@ struct VIn struct VOut { float4 position : SV_Position; - float3 normal : NORMAL; + nointerpolation float3 normalWS : NORMAL; + nointerpolation float3 positionWS : POSITION; float2 texCoords : TEXCOORD0; float4 color : COLOR0; float clipDist : SV_ClipDistance0; @@ -59,14 +56,17 @@ struct VOut VOut vs(VIn input) { + SceneView scene = GetSceneView(); + matrix projectionMatrix = scene.projectionMatrix; float4 positionVS = mul(modelViewMatrix, float4(input.position.xyz, 1)); VOut output; output.position = mul(projectionMatrix, positionVS); - output.normal = mul(normalMatrix, float4(input.normal, 0)).xyz; + output.normalWS = mul(normalMatrix, float4(input.normal, 0)).xyz; + output.positionWS = mul(modelMatrix, float4(input.position, 1)).xyz; output.texCoords = input.texCoords; output.color = input.color; - output.clipDist = dot(positionVS, clipPlane); + output.clipDist = dot(positionVS, scene.clipPlane); return output; } @@ -75,9 +75,25 @@ struct POut { float2 normal : SV_Target0; float2 motionVector : SV_Target1; + float4 shadingPosition : SV_Target2; }; -POut ps(VOut input) +float3 FixNormal(float3 vertexNormal, float3 faceNormal) +{ + if(length(vertexNormal) < 0.5) + { + return faceNormal; + } + + if(dot(vertexNormal, faceNormal) < 0.0) + { + return -vertexNormal; + } + + return vertexNormal; +} + +POut ps(VOut input, float3 barycentrics : SV_Barycentrics) { if(alphaTest != ATEST_NONE) { @@ -90,9 +106,29 @@ POut ps(VOut input) } } + float3 p0 = GetAttributeAtVertex(input.positionWS, 0); + float3 p1 = GetAttributeAtVertex(input.positionWS, 1); + float3 p2 = GetAttributeAtVertex(input.positionWS, 2); + float3 position = barycentrics.x * p0 + barycentrics.y * p1 + barycentrics.z * p2; + + float3 n0 = GetAttributeAtVertex(input.normalWS, 0); + float3 n1 = GetAttributeAtVertex(input.normalWS, 1); + float3 n2 = GetAttributeAtVertex(input.normalWS, 2); + float3 normal = barycentrics.x * n0 + barycentrics.y * n1 + barycentrics.z * n2; + + float3 pos[3] = { p0, p1, p2 }; + float3 nor[3] = { n0, n1, n2 }; + float3 shadingPosition = GetShadingPosition(position, normal, pos, nor, barycentrics); + shadingPosition += 0.01 * normal; + + float3 dist3 = saturate(abs(shadingPosition - position)); + float dist = saturate(distance(shadingPosition, position)); + float positionDelta = asfloat(PackColor(float4(dist3, dist))); + POut output; - output.normal = OctEncode(normalize(input.normal)); + output.normal = OctEncode(normalize(normal)); output.motionVector = float2(0, 0); // @TODO: + output.shadingPosition = float4(shadingPosition, positionDelta); return output; } diff --git a/code/renderer/shaders/crp/raytracing.h.hlsli b/code/renderer/shaders/crp/raytracing.h.hlsli new file mode 100644 index 0000000..9c040fd --- /dev/null +++ b/code/renderer/shaders/crp/raytracing.h.hlsli @@ -0,0 +1,58 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// raytracing structures shared with C++ code + + +#pragma once + + +#include "typedefs.h.hlsli" +#if defined(__cplusplus) +# pragma pack(push, 4) +#endif + +struct BLASVertex +{ + float2 texCoords; + color4ub color; +}; + +struct BLASMesh +{ + uint firstVertex; + uint firstIndex; + uint textureIndex; + uint samplerIndex; + uint alphaTestMode; + uint blendBits; +}; + +struct TLASInstance +{ + uint vertexBufferIndex; + uint indexBufferIndex; + uint meshBufferIndex; + uint cullMode; +}; + +#if defined(__cplusplus) +# pragma pack(pop) +#endif diff --git a/code/renderer/shaders/crp/scene_view.h.hlsli b/code/renderer/shaders/crp/scene_view.h.hlsli new file mode 100644 index 0000000..862de44 --- /dev/null +++ b/code/renderer/shaders/crp/scene_view.h.hlsli @@ -0,0 +1,78 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// shared structure for a given scene view + + +#pragma once + + +#include "typedefs.h.hlsli" +#if defined(__cplusplus) +# pragma pack(push, 4) +#endif + +struct DynamicLight +{ + float3 position; + float radius; + float3 color; + float padding; +}; + +#define SCENE_VIEW_MAX_LIGHTS 32 + +struct SceneView +{ + matrix projectionMatrix; + matrix invProjectionMatrix; + matrix viewMatrix; + matrix invViewMatrix; + float4 clipPlane; + float4 debug; + uint sceneViewIndex; + uint frameIndex; + uint depthTextureIndex; + uint normalTextureIndex; + uint shadingPositionTextureIndex; + uint lightTextureIndex; + uint tlasBufferIndex; + uint tlasInstanceBufferIndex; + uint lightCount; + DynamicLight lights[SCENE_VIEW_MAX_LIGHTS]; +}; + +#if defined(__cplusplus) +# pragma pack(pop) +#endif + +#if defined(__cplusplus) + static_assert(sizeof(DynamicLight) == 32, "sizeof(DynamicLight) is wrong"); +#endif + +#if !defined(__cplusplus) + SceneView GetSceneView() + { + StructuredBuffer sceneViewBuffer = ResourceDescriptorHeap[0]; + SceneView sceneView = sceneViewBuffer[0]; + + return sceneView; + } +#endif diff --git a/code/renderer/shaders/crp/transp_draw.hlsl b/code/renderer/shaders/crp/transp_draw.hlsl index fb45ff9..17456ea 100644 --- a/code/renderer/shaders/crp/transp_draw.hlsl +++ b/code/renderer/shaders/crp/transp_draw.hlsl @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2023 Gian 'myT' Schellenbaum +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -22,17 +22,17 @@ along with Challenge Quake 3. If not, see . #include "common.hlsli" -#include "world.h.hlsli" -#include "world.hlsli" +#include "alpha_test.h.hlsli" #include "oit.h.hlsli" +#include "scene_view.h.hlsli" cbuffer RootConstants { + // geometry matrix modelViewMatrix; - matrix projectionMatrix; - float4 clipPlane; + // general uint textureIndex; uint samplerIndex; uint alphaTest; @@ -67,14 +67,16 @@ struct VOut VOut vs(VIn input) { + SceneView scene = GetSceneView(); float4 positionVS = mul(modelViewMatrix, float4(input.position.xyz, 1)); + matrix projectionMatrix = scene.projectionMatrix; VOut output; output.position = mul(projectionMatrix, positionVS); output.normal = input.normal; output.texCoords = input.texCoords; output.color = input.color; - output.clipDist = dot(positionVS, clipPlane); + output.clipDist = dot(positionVS, scene.clipPlane); output.proj2232 = float2(-projectionMatrix[2][2], projectionMatrix[2][3]); output.depthVS = -positionVS.z; diff --git a/code/renderer/shaders/crp/world.h.hlsli b/code/renderer/shaders/crp/typedefs.h.hlsli similarity index 73% rename from code/renderer/shaders/crp/world.h.hlsli rename to code/renderer/shaders/crp/typedefs.h.hlsli index 336f92a..ca3b746 100644 --- a/code/renderer/shaders/crp/world.h.hlsli +++ b/code/renderer/shaders/crp/typedefs.h.hlsli @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2023 Gian 'myT' Schellenbaum +Copyright (C) 2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -18,13 +18,19 @@ You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ -// shared world rendering constants +// type definitions shared with C++ code #pragma once -#define ATEST_NONE 0 -#define ATEST_GT_0 1 -#define ATEST_LT_HALF 2 -#define ATEST_GE_HALF 3 +#if defined(__cplusplus) + typedef uint32_t uint; + typedef vec2_t float2; + typedef vec3_t float3; + typedef vec4_t float4; + typedef matrix4x4_t matrix; + typedef color4ub_t color4ub; +#else + typedef uint color4ub; +#endif diff --git a/code/renderer/shaders/crp/wireframe_normals.hlsl b/code/renderer/shaders/crp/wireframe_normals.hlsl new file mode 100644 index 0000000..4e5d554 --- /dev/null +++ b/code/renderer/shaders/crp/wireframe_normals.hlsl @@ -0,0 +1,65 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// wireframe normals for debugging + + +#include "common.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + matrix modelViewMatrix; +}; + +struct VIn +{ + float3 position : POSITION; + float4 color : COLOR0; +}; + +struct VOut +{ + float4 position : SV_Position; + float4 color : COLOR0; + float clipDist : SV_ClipDistance0; +}; + +VOut vs(VIn input) +{ + SceneView scene = GetSceneView(); + float4 positionVS = mul(modelViewMatrix, float4(input.position.xyz, 1)); + + VOut output; + output.position = mul(scene.projectionMatrix, positionVS); + output.clipDist = dot(positionVS, scene.clipPlane); + output.color = input.color; + + return output; +} + +[earlydepthstencil] +float4 ps(VOut input) : SV_Target +{ + float4 result = float4(input.color.rgb, 1); + + return result; +} diff --git a/code/renderer/srp_imgui.cpp b/code/renderer/srp_imgui.cpp index f962986..46903a8 100644 --- a/code/renderer/srp_imgui.cpp +++ b/code/renderer/srp_imgui.cpp @@ -30,6 +30,7 @@ along with Challenge Quake 3. If not, see . #pragma pack(push, 4) + struct VertexRC { float mvp[16]; @@ -40,7 +41,9 @@ struct PixelRC uint32_t texture; uint32_t sampler; float mip; + float colorScale; }; + #pragma pack(pop) @@ -256,6 +259,7 @@ void ImGUI::Draw(HTexture renderTarget) pixelRC.texture = (uint32_t)cmd->TextureId & 0xFFFF; pixelRC.sampler = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); pixelRC.mip = (float)(((uint32_t)cmd->TextureId >> 16) & 0xFFFF); + pixelRC.colorScale = tr.identityLight; if(ddhi) { CmdSetGraphicsRootConstants(sizeof(vertexRC), sizeof(pixelRC), &pixelRC); diff --git a/code/renderer/srp_local.h b/code/renderer/srp_local.h index de16472..beb334a 100644 --- a/code/renderer/srp_local.h +++ b/code/renderer/srp_local.h @@ -433,3 +433,12 @@ inline bool IsDepthFadeEnabled(const shader_t& shader) shader.dfType > DFT_NONE && shader.dfType < DFT_TBD; } + +inline bool IsViewportFullscreen(const viewParms_t& vp) +{ + return + vp.viewportX == 0 && + vp.viewportY == 0 && + vp.viewportWidth == glConfig.vidWidth && + vp.viewportHeight == glConfig.vidHeight; +} diff --git a/code/renderer/srp_main.cpp b/code/renderer/srp_main.cpp index 661dd1a..7123593 100644 --- a/code/renderer/srp_main.cpp +++ b/code/renderer/srp_main.cpp @@ -23,7 +23,7 @@ along with Challenge Quake 3. If not, see . #include "srp_local.h" #include "../client/cl_imgui.h" -#include "shaders/crp/world.h.hlsli" +#include "shaders/crp/alpha_test.h.hlsli" extern IRenderPipeline* grpp; diff --git a/code/renderer/tr_bsp.cpp b/code/renderer/tr_bsp.cpp index 0f789a6..d773783 100644 --- a/code/renderer/tr_bsp.cpp +++ b/code/renderer/tr_bsp.cpp @@ -405,6 +405,9 @@ static void ParseFace( const dsurface_t* ds, const drawVert_t* verts, msurface_t } VectorAdd(mins, maxs, cv->localOrigin); VectorScale(cv->localOrigin, 0.5f, cv->localOrigin); + + R_SmoothNormals( &cv->verts[0].normal[0], sizeof(cv->verts[0]), cv->indexes, + &cv->verts[0].xyz[0], sizeof(cv->verts[0]), cv->numVerts, cv->numIndexes ); } @@ -511,6 +514,9 @@ static void ParseTriSurf( const dsurface_t* ds, const drawVert_t* verts, msurfac ri.Error( ERR_DROP, "Bad index in triangle surface" ); } } + + R_SmoothNormals( &tri->verts[0].normal[0], sizeof(tri->verts[0]), tri->indexes, + &tri->verts[0].xyz[0], sizeof(tri->verts[0]), tri->numVerts, tri->numIndexes ); } @@ -1638,6 +1644,7 @@ void RE_LoadWorldMap( const char* name ) // clear tr.world so if the level fails to load, the next // try will not look at the partially loaded version tr.world = NULL; + tr.forceHighestLod = true; Com_Memset( &s_worldData, 0, sizeof( s_worldData ) ); Q_strncpyz( s_worldData.name, name, sizeof( s_worldData.name ) ); @@ -1689,6 +1696,7 @@ void RE_LoadWorldMap( const char* name ) } tr.worldMapLoaded = qtrue; + tr.forceHighestLod = false; } diff --git a/code/renderer/tr_cmds.cpp b/code/renderer/tr_cmds.cpp index 914b661..3e1a95d 100644 --- a/code/renderer/tr_cmds.cpp +++ b/code/renderer/tr_cmds.cpp @@ -307,6 +307,8 @@ void RE_BeginFrame( stereoFrame_t stereoFrame ) tr.frameCount++; tr.frameSceneNum = 0; tr.renderMode = RM_NONE; + tr.sceneCounterRT = 0; + tr.numRTSurfs = 0; // delayed screenshot if ( r_delayedScreenshotPending ) { diff --git a/code/renderer/tr_gui.cpp b/code/renderer/tr_gui.cpp index 282c92c..edb2efc 100644 --- a/code/renderer/tr_gui.cpp +++ b/code/renderer/tr_gui.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2023 Gian 'myT' Schellenbaum +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -646,6 +646,11 @@ static void DrawImageWindow() TitleText(image->name); + if(ImGui::Button("Copy Name")) + { + ImGui::SetClipboardText(image->name); + } + char pakName[256]; if(FS_GetPakPath(pakName, sizeof(pakName), image->pakChecksum)) { @@ -818,6 +823,11 @@ static void DrawShaderWindow() shader_t* shader = window.shader; TitleText(shader->name); + if(ImGui::Button("Copy Name")) + { + ImGui::SetClipboardText(shader->name); + } + const char* const shaderPath = R_GetShaderPath(shader); if(shaderPath != NULL) { @@ -1459,7 +1469,10 @@ static void DrawCVarToolTip(cvar_t* cvar) static void DrawCVarNoValue(cvar_t* cvar) { Q_assert(cvar != NULL); - Q_assert(IsNonEmpty(cvar->gui.title)); + if((cvar->gui.categories & CVARCAT_DEBUGGING) == 0) + { + Q_assert(IsNonEmpty(cvar->gui.title)); + } ImGui::TableNextRow(); ImGui::TableSetColumnIndex(0); @@ -1714,10 +1727,12 @@ static bool DrawCVarTable(bool* restartNeeded, const char* title, int categoryMa for(cvar_t* var = Cvar_GetFirst(); var != NULL; var = var->next) { +#if !defined(_DEBUG) if(var->gui.categories & CVARCAT_DEBUGGING) { continue; } +#endif if(var->gui.categories & categoryMask) { @@ -1790,7 +1805,7 @@ static void DrawSettings() ImGui::BeginTabBar("Tabs#ClientSettings"); if(ImGui::BeginTabItem("All")) { - DrawCVarTable(&restartNeeded, "All settings", -1 & (~CVARCAT_DEBUGGING)); + DrawCVarTable(&restartNeeded, "All settings", -1); ImGui::EndTabItem(); } if(ImGui::BeginTabItem("General")) diff --git a/code/renderer/tr_init.cpp b/code/renderer/tr_init.cpp index ef1da8f..e34ac26 100644 --- a/code/renderer/tr_init.cpp +++ b/code/renderer/tr_init.cpp @@ -115,6 +115,9 @@ cvar_t *r_debugSort; cvar_t *r_debugUI; cvar_t *r_debugInput; +cvar_t *r_normalSmoothing; +cvar_t *r_normalAreaWeight; + // these limits apply to the sum of all scenes in a frame: // the main view, all the 3D icons, and even the console etc #define DEFAULT_MAX_POLYS 8192 @@ -477,6 +480,14 @@ static const cvarTableItem_t r_cvars[] = &r_singleShader, "r_singleShader", "0", CVAR_CHEAT | CVAR_LATCH, CVART_BOOL, NULL, NULL, "forces the default shader on all world surfaces except the sky", "Force default shader", CVARCAT_GRAPHICS | CVARCAT_DEBUGGING, "Forces it on all world surfaces except the sky", "" }, + { + &r_normalSmoothing, "r_normalSmoothing", "1", CVAR_TEMP | CVAR_LATCH, CVART_BOOL, NULL, NULL, "compute smooth vertex normals", + "Smooth vertex normals", CVARCAT_GRAPHICS, "Computes brand new vertex normals", "" + }, + { + &r_normalAreaWeight, "r_normalAreaWeight", "1", CVAR_TEMP | CVAR_LATCH, CVART_BOOL, NULL, NULL, "weights normals by area", + "Normal area weighting", CVARCAT_GRAPHICS, "Weights vertex normals by area", "" + }, // // archived variables that can change at any time @@ -778,6 +789,8 @@ void R_Init() R_ModelInit(); + renderPipeline->LoadResources(); + QSUBSYSTEM_INIT_DONE( "Renderer" ); } diff --git a/code/renderer/tr_local.h b/code/renderer/tr_local.h index bf4f067..f3188d4 100644 --- a/code/renderer/tr_local.h +++ b/code/renderer/tr_local.h @@ -427,7 +427,8 @@ struct shader_t { qbool isOpaque; // no alpha blending, alpha test is OK if opaque qbool isAlphaTestedOpaque; // no alpha blending, first stage is alpha tested - qbool isDynamic; // at least one vertex attribute must generated on the fly + qbool isDynamic; // at least one vertex attribute must be generated on the fly + qbool hasLightmapStage; pipeline_t pipelines[MAX_SHADER_STAGES]; int numPipelines; @@ -437,6 +438,8 @@ struct shader_t { pipeline_t prepassPipeline; + int addLightPipeline; + shader_t* next; }; @@ -538,6 +541,9 @@ void R_ComputeTessellatedSize( int* numVertexes, int* numIndexes, const surfaceT // R_ComputeTessellatedSize is unused for now but might be of use a bit later // we can use it to compute the required size of the static geometry buffers in the GRP +void R_SmoothNormals( float* normals, int normalStride, const int* indexes, + const float* positions, int positionStride, int numVertexes, int numIndexes ); + struct litSurf_t { unsigned sort; // bit combination for fast compares @@ -559,6 +565,14 @@ struct dlight_t { }; +struct rtSurf_t { + const surfaceType_t* surface; + const shader_t* shader; + int entityNum; + qhandle_t model; +}; + + #define MAX_FACE_POINTS 64 #define MAX_PATCH_SIZE 32 // max dimensions of a patch mesh in map file @@ -682,6 +696,13 @@ BRUSH MODELS // in memory representation // +enum raytracingSurfaceType_t { + RTST_NONE, + RTST_STATIC, + RTST_DYNAMIC, + RTST_COUNT +}; + #define SIDE_FRONT 0 #define SIDE_BACK 1 #define SIDE_ON 2 @@ -695,6 +716,7 @@ struct msurface_t { int staticGeoChunk; int zppFirstIndex; int zppIndexCount; + raytracingSurfaceType_t rtSurfType; const surfaceType_t* data; // any of srf*_t }; @@ -908,6 +930,8 @@ typedef struct { qbool worldMapLoaded; world_t* world; + qbool forceHighestLod; // for curves and MD3 models alike + const byte* externalVisData; // from RE_SetWorldVisData, shared with CM_Load image_t* defaultImage; @@ -982,7 +1006,12 @@ typedef struct { qbool shaderParseSaveState; qbool shaderParseFailed; int shaderParseNumWarnings; - + + rtSurf_t rtSurfs[MAX_DRAWSURFS]; + int numRTSurfs; + int sceneCounterRT; + trRefdef_t rtRefdef; + } trGlobals_t; extern backEndState_t backEnd; @@ -1099,6 +1128,9 @@ extern cvar_t *r_debugSort; extern cvar_t *r_debugUI; extern cvar_t *r_debugInput; +extern cvar_t *r_normalSmoothing; +extern cvar_t *r_normalAreaWeight; + void R_NoiseInit(); double R_NoiseGet4f( double x, double y, double z, double t ); @@ -1111,6 +1143,7 @@ void R_AddPolygonSurfaces(); void R_AddDrawSurf( const surfaceType_t* surface, const shader_t* shader, int staticGeoChunk = 0, int zppFirstIndex = 0, int zppIndexCount = 0, float radiusOverZ = 666.0f ); void R_AddLitSurf( const surfaceType_t* surface, const shader_t* shader, int staticGeoChunk ); +void R_AddRTSurf( const surfaceType_t* surface, const shader_t* shader ); uint64_t R_ComposeSort( int entityNum, const shader_t* shader, int staticGeoChunk ); void R_DecomposeSort( uint64_t sort, int* entityNum, const shader_t** shader ); uint32_t R_ComposeLitSort( int entityNum, const shader_t* shader, int staticGeoChunk ); @@ -1664,6 +1697,7 @@ extern int r_delayedScreenshotFrame; struct IRenderPipeline { virtual void Init() = 0; + virtual void LoadResources() = 0; virtual void ShutDown(bool fullShutDown) = 0; virtual void ProcessWorld(world_t& world) = 0; diff --git a/code/renderer/tr_main.cpp b/code/renderer/tr_main.cpp index 6c92cfe..9e277dd 100644 --- a/code/renderer/tr_main.cpp +++ b/code/renderer/tr_main.cpp @@ -1322,6 +1322,39 @@ void R_AddLitSurf( const surfaceType_t* surface, const shader_t* shader, int sta } +void R_AddRTSurf( const surfaceType_t* surface, const shader_t* shader ) +{ + if (tr.numRTSurfs >= ARRAY_LEN(tr.rtSurfs)) + return; + + if (tr.sceneCounterRT != 1) + return; + + if (shader->numStages <= 0 || shader->isSky) + return; + + // the mod uses this for its particles/sprites + if (*surface == SF_POLY) + return; + + if (*surface == SF_ENTITY) { + const refEntity_t* const ent = &backEnd.refdef.entities[tr.currentEntityNum].e; + if (ent->reType == RT_SPRITE || + ent->reType == RT_LIGHTNING || + ent->reType == RT_POLY || + ent->reType == RT_PORTALSURFACE) { + return; + } + } + + rtSurf_t* const surf = &tr.rtSurfs[tr.numRTSurfs++]; + surf->surface = surface; + surf->shader = shader; + surf->entityNum = tr.currentEntityNum; + surf->model = tr.currentModel != NULL ? tr.currentModel->index : 0; +} + + uint64_t R_ComposeSort( int entityNum, const shader_t* shader, int staticGeoChunk ) { return @@ -1612,6 +1645,7 @@ static void R_AddEntitySurfaces() } shader = R_GetShaderByHandle( ent->e.customShader ); R_AddDrawSurf( &entitySurface, shader ); + R_AddRTSurf( &entitySurface, shader ); // @TODO: billboards need to be procedural geometry break; case RT_MODEL: diff --git a/code/renderer/tr_mesh.cpp b/code/renderer/tr_mesh.cpp index 5fbfcec..32538da 100644 --- a/code/renderer/tr_mesh.cpp +++ b/code/renderer/tr_mesh.cpp @@ -161,6 +161,11 @@ static int R_ComputeLOD( const trRefEntity_t* ent ) float projectedRadius; int lod; + if ( tr.forceHighestLod ) + { + return 0; + } + if ( tr.currentModel->numLods < 2 ) { // model has only 1 LOD level, skip computations and bias @@ -239,8 +244,8 @@ void R_AddMD3Surfaces( trRefEntity_t* ent ) const md3Header_t* header = tr.currentModel->md3[lod]; // cull the entire model if merged bounding box of both frames is outside the view frustum - if (R_CullModel(header, ent) == CULL_OUT) - return; + const qbool culled = R_CullModel( header, ent ) == CULL_OUT; + // @TODO: early out here if no RT is used // set up lighting now that we know we aren't culled if (!personalModel) @@ -281,7 +286,9 @@ void R_AddMD3Surfaces( trRefEntity_t* ent ) // don't add third_person objects if not viewing through a portal if ( !personalModel ) { - R_AddDrawSurf( (const surfaceType_t*)surface, shader ); + if( !culled ) + R_AddDrawSurf( (const surfaceType_t*)surface, shader ); + R_AddRTSurf( (const surfaceType_t*)surface, shader ); } surface = (const md3Surface_t*)( (byte *)surface + surface->ofsEnd ); diff --git a/code/renderer/tr_model.cpp b/code/renderer/tr_model.cpp index 0f6458d..1c9c739 100644 --- a/code/renderer/tr_model.cpp +++ b/code/renderer/tr_model.cpp @@ -195,6 +195,11 @@ static qbool R_LoadMD3( model_t *mod, int lod, void *buffer, const char *mod_nam surf = (md3Surface_t *)( (byte *)surf + surf->ofsEnd ); } + // @TODO: fix normals of MD3 models + // decompress positions and normals into tess + // call R_SmoothNormals + // compress position and normals back into surf + return qtrue; } diff --git a/code/renderer/tr_scene.cpp b/code/renderer/tr_scene.cpp index ef336bf..bdcef01 100644 --- a/code/renderer/tr_scene.cpp +++ b/code/renderer/tr_scene.cpp @@ -84,6 +84,9 @@ void R_AddPolygonSurfaces() const srfPoly_t* poly = tr.refdef.polys; for (int i = 0; i < tr.refdef.numPolys; ++i, ++poly) { R_AddDrawSurf( (const surfaceType_t*)poly, R_GetShaderByHandle( poly->hShader ) ); + // @TODO: polygons are sometimes used to replace sprites (e.g. CPMA rocket smoke), + // they should be procedural geometry + R_AddRTSurf( (const surfaceType_t*)poly, R_GetShaderByHandle( poly->hShader ) ); } } @@ -280,6 +283,9 @@ void RE_RenderScene( const refdef_t* fd, int us ) // each scene / view. tr.frameSceneNum++; tr.sceneCount++; + if ((tr.refdef.rdflags & RDF_NOWORLDMODEL) == 0) { + tr.sceneCounterRT++; + } // setup view parms for the initial view // @@ -306,6 +312,11 @@ void RE_RenderScene( const refdef_t* fd, int us ) R_RenderScene( &parms ); + if ((tr.refdef.rdflags & RDF_NOWORLDMODEL) == 0 && + tr.sceneCounterRT == 1) { + tr.rtRefdef = tr.refdef; + } + // the next scene rendered in this frame will tack on after this one r_firstSceneDrawSurf = tr.refdef.numDrawSurfs; r_firstSceneLitSurf = tr.refdef.numLitSurfs; diff --git a/code/renderer/tr_shader.cpp b/code/renderer/tr_shader.cpp index a30edbe..b2b0e51 100644 --- a/code/renderer/tr_shader.cpp +++ b/code/renderer/tr_shader.cpp @@ -2290,7 +2290,6 @@ static qbool UsesExternalLightmap( const shaderStage_t* stage ) { return stage->active && - stage->type == ST_DIFFUSE && !stage->bundle.isVideoMap && stage->bundle.numImageAnimations <= 1 && stage->bundle.image[0] != NULL && @@ -2376,8 +2375,6 @@ static void BuildPerImageShaderList( shader_t* newShader ) static shader_t* FinishShader( shader_t* sh = NULL ) { - int stage; - // // set polygon offset // @@ -2385,9 +2382,18 @@ static shader_t* FinishShader( shader_t* sh = NULL ) shader.sort = SS_DECAL; } + // it's fine if there's polygonoffset, the effect on the depth buffer is acceptable + if ( r_pipeline->integer == 1 ) { + const int blendBits = stages[0].stateBits & GLS_BLEND_BITS; + if ( blendBits == 0 || blendBits == (GLS_SRCBLEND_ONE | GLS_DSTBLEND_ZERO) ) { + shader.sort = SS_OPAQUE; + } + } + // // set appropriate stage information // + int stage; for ( stage = 0; stage < MAX_SHADER_STAGES; stage++ ) { shaderStage_t *pStage = &stages[stage]; @@ -2544,6 +2550,23 @@ static shader_t* FinishShader( shader_t* sh = NULL ) BuildPerImageShaderList( newShader ); + // make sure external lightmap stages are correctly marked as lightmap stages + for ( int s = 0; s < newShader->numStages; s++ ) { + shaderStage_t* const stagePtr = newShader->stages[s]; + if ( stagePtr->type == ST_DIFFUSE && UsesExternalLightmap( stagePtr ) ) { + stagePtr->type = ST_LIGHTMAP; + } + } + + newShader->hasLightmapStage = qfalse; + for ( int s = 0; s < newShader->numStages; s++ ) { + shaderStage_t* const stagePtr = newShader->stages[s]; + if ( stagePtr->type == ST_LIGHTMAP ) { + newShader->hasLightmapStage = qtrue; + break; + } + } + return newShader; } diff --git a/code/renderer/tr_surface.cpp b/code/renderer/tr_surface.cpp index 5ab9561..5e0a849 100644 --- a/code/renderer/tr_surface.cpp +++ b/code/renderer/tr_surface.cpp @@ -595,8 +595,7 @@ static float LodErrorForVolume( vec3_t local, float radius ) { return 0; } - if ( !tr.worldMapLoaded ) { - // if we tessellate during map load, it's for static geometry pre-processing + if ( tr.forceHighestLod ) { // we want a high level of detail, so consider the distance d to be 1 return r_lodCurveError->value; } @@ -846,6 +845,25 @@ static void (*rb_surfaceTable[SF_NUM_SURFACE_TYPES])( const void* ) = { }; +static float Angle( const vec3_t a, const vec3_t b ) +{ + return acosf( DotProduct( a, b ) ); +} + + +// angle between (a - base) and (b - base) +static float Angle( const vec3_t a, const vec3_t b, const vec3_t base ) +{ + vec3_t u, v; + VectorSubtract( a, base, u ); + VectorSubtract( b, base, v ); + VectorNormalize( u ); + VectorNormalize( v ); + + return Angle( u, v ); +} + + void R_TessellateSurface( const surfaceType_t* surfType ) { rb_surfaceTable[ *surfType ]( surfType ); @@ -948,3 +966,69 @@ void R_ComputeTessellatedSize( int* numVertexes, int* numIndexes, const surfaceT { rb_surfaceSizeTable[ *surfType ]( numVertexes, numIndexes, surfType ); } + + +void R_SmoothNormals( float* normalsFlt, int normalStride, const int* indexes, + const float* positionsFlt, int positionStride, int numVertexes, int numIndexes ) +{ +#define PositionAt(v) ( (const float*)((const byte*)positionsFlt + v * positionStride) ) +#define NormalAt(v) ( (float*)((byte*)normalsFlt + v * normalStride) ) + + static vec3_t normalAccum[SHADER_MAX_VERTEXES]; + Q_assert( numVertexes <= SHADER_MAX_VERTEXES ); + numVertexes = min( numVertexes, SHADER_MAX_VERTEXES ); + + if ( r_normalSmoothing->integer == 0 || + numVertexes == 0 || + numIndexes == 0 ) { + return; + } + + Com_Memset( normalAccum, 0, numVertexes * sizeof( normalAccum[0] ) ); + + const qbool weightNormalByArea = r_normalAreaWeight->integer != 0; + for ( int i = 0; i < numIndexes; i += 3 ) { + const int v0 = indexes[i + 0]; + const int v1 = indexes[i + 1]; + const int v2 = indexes[i + 2]; + const float* const p0 = PositionAt(v0); + const float* const p1 = PositionAt(v1); + const float* const p2 = PositionAt(v2); + float* const n0 = normalAccum[v0]; + float* const n1 = normalAccum[v1]; + float* const n2 = normalAccum[v2]; + + // n = (p1 - p0) X (p2 - p0) + vec3_t p1p0, p2p0, n; + VectorSubtract( p1, p0, p1p0 ); + VectorSubtract( p2, p0, p2p0 ); + CrossProduct( p1p0, p2p0, n ); + + if ( DotProduct( n, NormalAt(v0) ) < 0.0f ) { + VectorNegate( n, n ); + } + + if ( !weightNormalByArea ) { + VectorNormalize( n ); + } + + const float a0 = Angle( p1, p2, p0 ); + const float a1 = Angle( p2, p0, p1 ); + const float a2 = Angle( p0, p1, p2 ); + + VectorMA( n0, a0, n, n0 ); + VectorMA( n1, a1, n, n1 ); + VectorMA( n2, a2, n, n2 ); + } + + for ( int v = 0; v < numVertexes; v++ ) { + float* const n = NormalAt(v); + float* const accum = normalAccum[v]; + + VectorNormalize( accum ); + VectorCopy( accum, n ); + } + +#undef PositionAt +#undef NormalAt +} diff --git a/code/renderer/tr_world.cpp b/code/renderer/tr_world.cpp index 5f3fd5a..dc4300d 100644 --- a/code/renderer/tr_world.cpp +++ b/code/renderer/tr_world.cpp @@ -442,6 +442,11 @@ void R_AddBrushModelSurfaces( const trRefEntity_t* re ) const model_t* model = R_GetModelByHandle( re->e.hModel ); const bmodel_t* bmodel = model->bmodel; + for ( int s = 0; s < bmodel->numSurfaces; ++s ) { + const msurface_t* const surf = bmodel->firstSurface + s; + R_AddRTSurf( surf->data, surf->shader ); + } + if ( R_CullLocalBox( bmodel->bounds ) == CULL_OUT ) return; diff --git a/code/shadercomp/shadercomp.cpp b/code/shadercomp/shadercomp.cpp index 1cf23bc..43a0cab 100644 --- a/code/shadercomp/shadercomp.cpp +++ b/code/shadercomp/shadercomp.cpp @@ -101,7 +101,7 @@ void CompileShader(const ShaderArgs& args, int extraCount = 0, const char** extr // -Gis: Force IEEE strictness // -Zi: Embed debug info // -Qembed_debug: Embed debug info in shader container - strcpy(temp, va("%s -Fh %s -E %s -T %s -WX -Ges -Gis -Zi -Qembed_debug", + strcpy(temp, va("%s -HV 2021 -Fh %s -E %s -T %s -WX -Ges -Gis -Zi -Qembed_debug", dxcPath, headerPath, args.entryPoint, args.targetProfile)); for(int i = 0; i < extraCount; ++i) @@ -407,6 +407,13 @@ void ProcessCRP() CompileGraphics("fog_inside.h", "fog_inside.hlsl", "inside"); CompileGraphics("fog_outside.h", "fog_outside.hlsl", "outside"); CompilePixelShader("magnifier.h", "magnifier.hlsl", "magnifier"); + CompilePixelShader("dl_draw.h", "dl_draw.hlsl", "dl_draw"); + CompilePixelShader("dl_denoising.h", "dl_denoising.hlsl", "dl_denoising"); + CompileGraphics("add_light.h", "add_light.hlsl", "add_light"); + CompilePixelShader("gbufferviz_depth.h", "gbufferviz_depth.hlsl", "gbufferviz_depth"); + CompilePixelShader("gbufferviz_normal.h", "gbufferviz_normal.hlsl", "gbufferviz_normal"); + CompilePixelShader("gbufferviz_position.h", "gbufferviz_position.hlsl", "gbufferviz_position"); + CompileGraphics("wireframe_normals.h", "wireframe_normals.hlsl", "wireframe_normals"); } int main(int /*argc*/, const char** argv) diff --git a/makefiles/windows_vs2019/renderer.vcxproj b/makefiles/windows_vs2019/renderer.vcxproj index 0c62fc3..1cec637 100644 --- a/makefiles/windows_vs2019/renderer.vcxproj +++ b/makefiles/windows_vs2019/renderer.vcxproj @@ -129,12 +129,15 @@ + + + @@ -183,9 +186,18 @@ true + + true + true + + true + + + true + true @@ -216,6 +228,15 @@ true + + true + + + true + + + true + true @@ -255,6 +276,9 @@ true + + true + true @@ -308,14 +332,16 @@ + - - + + + diff --git a/makefiles/windows_vs2019/renderer.vcxproj.filters b/makefiles/windows_vs2019/renderer.vcxproj.filters index c11dcc9..380aeee 100644 --- a/makefiles/windows_vs2019/renderer.vcxproj.filters +++ b/makefiles/windows_vs2019/renderer.vcxproj.filters @@ -33,12 +33,15 @@ + + + @@ -87,9 +90,18 @@ shaders\crp + + shaders\crp + shaders\crp + + shaders\crp + + + shaders\crp + shaders\crp @@ -120,6 +132,15 @@ shaders\crp + + shaders\crp + + + shaders\crp + + + shaders\crp + shaders\crp @@ -159,6 +180,9 @@ shaders\crp + + shaders\crp + shaders\grp @@ -218,6 +242,9 @@ shaders\common + + shaders\crp + shaders\crp @@ -236,10 +263,13 @@ shaders\crp - + shaders\crp - + + shaders\crp + + shaders\crp diff --git a/makefiles/windows_vs2022/renderer.vcxproj b/makefiles/windows_vs2022/renderer.vcxproj index d82fb8c..f386590 100644 --- a/makefiles/windows_vs2022/renderer.vcxproj +++ b/makefiles/windows_vs2022/renderer.vcxproj @@ -131,12 +131,15 @@ + + + @@ -185,9 +188,18 @@ true + + true + true + + true + + + true + true @@ -218,6 +230,15 @@ true + + true + + + true + + + true + true @@ -257,6 +278,9 @@ true + + true + true @@ -310,14 +334,16 @@ + - - + + + diff --git a/makefiles/windows_vs2022/renderer.vcxproj.filters b/makefiles/windows_vs2022/renderer.vcxproj.filters index c11dcc9..380aeee 100644 --- a/makefiles/windows_vs2022/renderer.vcxproj.filters +++ b/makefiles/windows_vs2022/renderer.vcxproj.filters @@ -33,12 +33,15 @@ + + + @@ -87,9 +90,18 @@ shaders\crp + + shaders\crp + shaders\crp + + shaders\crp + + + shaders\crp + shaders\crp @@ -120,6 +132,15 @@ shaders\crp + + shaders\crp + + + shaders\crp + + + shaders\crp + shaders\crp @@ -159,6 +180,9 @@ shaders\crp + + shaders\crp + shaders\grp @@ -218,6 +242,9 @@ shaders\common + + shaders\crp + shaders\crp @@ -236,10 +263,13 @@ shaders\crp - + shaders\crp - + + shaders\crp + + shaders\crp