From ad3f942a8deca8beb0172570972891e738f2aa63 Mon Sep 17 00:00:00 2001 From: myT <> Date: Sat, 13 Jan 2024 22:40:13 +0100 Subject: [PATCH] added the Cinematic Rendering Pipeline --- .gitignore | 1 - changelog.txt | 46 +- code/client/cl_main.cpp | 29 + code/client/cl_ui.cpp | 7 - code/qcommon/q_math.c | 6 + code/qcommon/q_shared.h | 19 + code/renderer/compshaders/.gitignore | 2 + code/renderer/crp_dof_accum.cpp | 366 ++++++++ code/renderer/crp_dof_gather.cpp | 479 ++++++++++ code/renderer/crp_fog.cpp | 208 +++++ code/renderer/crp_geometry.cpp | 155 ++++ code/renderer/crp_local.h | 330 +++++++ code/renderer/crp_main.cpp | 677 +++++++++++++++ code/renderer/crp_opaque.cpp | 376 ++++++++ code/renderer/crp_tone_map.cpp | 139 +++ code/renderer/crp_transp_draw.cpp | 368 ++++++++ code/renderer/crp_transp_resolve.cpp | 111 +++ code/renderer/grp_geometry.cpp | 113 +++ code/renderer/grp_local.h | 451 +--------- code/renderer/grp_main.cpp | 460 ++-------- code/renderer/grp_post.cpp | 12 +- code/renderer/grp_smaa.cpp | 6 +- .../{uber_shaders.h => grp_uber_shaders.h} | 26 + code/renderer/grp_world.cpp | 98 +-- code/renderer/rhi_d3d12.cpp | 815 +++++++++++++++--- code/renderer/rhi_local.h | 38 +- code/renderer/{hlsl => shaders}/.gitignore | 0 code/renderer/shaders/common/blend.hlsli | 81 ++ code/renderer/shaders/common/mip_gen.hlsli | 96 +++ .../shaders/common/state_bits.h.hlsli | 65 ++ code/renderer/shaders/crp/accumdof_accum.hlsl | 65 ++ code/renderer/shaders/crp/accumdof_debug.hlsl | 99 +++ code/renderer/shaders/crp/accumdof_norm.hlsl | 65 ++ code/renderer/shaders/crp/blit.hlsl | 69 ++ code/renderer/shaders/crp/common.hlsli | 144 ++++ code/renderer/shaders/crp/dof.hlsli | 54 ++ code/renderer/shaders/crp/fog.hlsli | 58 ++ code/renderer/shaders/crp/fog_inside.hlsl | 43 + code/renderer/shaders/crp/fog_outside.hlsl | 47 + code/renderer/shaders/crp/gatherdof.hlsli | 61 ++ code/renderer/shaders/crp/gatherdof_blur.hlsl | 207 +++++ .../shaders/crp/gatherdof_coc_tile_gen.hlsl | 65 ++ .../shaders/crp/gatherdof_coc_tile_max.hlsl | 64 ++ .../shaders/crp/gatherdof_combine.hlsl | 84 ++ .../renderer/shaders/crp/gatherdof_debug.hlsl | 93 ++ code/renderer/shaders/crp/gatherdof_fill.hlsl | 76 ++ .../renderer/shaders/crp/gatherdof_split.hlsl | 78 ++ code/renderer/shaders/crp/imgui.hlsl | 73 ++ code/renderer/shaders/crp/mip_1.hlsl | 40 + code/renderer/shaders/crp/mip_2.hlsl | 46 + code/renderer/shaders/crp/mip_3.hlsl | 44 + code/renderer/shaders/crp/nuklear.hlsl | 72 ++ code/renderer/shaders/crp/oit.h.hlsli | 58 ++ code/renderer/shaders/crp/opaque.hlsl | 131 +++ code/renderer/shaders/crp/tone_map.hlsl | 74 ++ .../shaders/crp/tone_map_inverse.hlsl | 72 ++ code/renderer/shaders/crp/transp_draw.hlsl | 136 +++ code/renderer/shaders/crp/transp_resolve.hlsl | 216 +++++ code/renderer/shaders/crp/ui.hlsl | 73 ++ code/renderer/shaders/crp/world.h.hlsli | 30 + code/renderer/shaders/crp/world.hlsli | 37 + .../{hlsl => shaders/grp}/depth_pre_pass.hlsl | 0 .../{hlsl => shaders/grp}/dynamic_light.hlsl | 0 code/renderer/{hlsl => shaders/grp}/fog.hlsli | 0 .../{hlsl => shaders/grp}/fog_inside.hlsl | 0 .../{hlsl => shaders/grp}/fog_outside.hlsl | 0 .../renderer/{hlsl => shaders/grp}/imgui.hlsl | 0 .../renderer/{hlsl => shaders/grp}/mip_1.hlsl | 14 +- .../renderer/{hlsl => shaders/grp}/mip_2.hlsl | 36 +- .../renderer/{hlsl => shaders/grp}/mip_3.hlsl | 22 +- .../{hlsl => shaders/grp}/nuklear.hlsl | 0 .../{hlsl => shaders/grp}/post_gamma.hlsl | 0 .../grp}/post_inverse_gamma.hlsl | 2 +- .../{hlsl => shaders/grp}/shared.hlsli | 5 + .../renderer/{hlsl => shaders/grp}/smaa.hlsli | 0 .../{hlsl => shaders/grp}/smaa_1.hlsl | 0 .../{hlsl => shaders/grp}/smaa_2.hlsl | 0 .../{hlsl => shaders/grp}/smaa_3.hlsl | 0 .../{hlsl => shaders/grp}/uber_shader.hlsl | 82 +- code/renderer/{hlsl => shaders/grp}/ui.hlsl | 0 .../renderer/{grp_imgui.cpp => srp_imgui.cpp} | 89 +- code/renderer/srp_local.h | 435 ++++++++++ code/renderer/srp_main.cpp | 539 ++++++++++++ .../{grp_mip_gen.cpp => srp_mip_gen.cpp} | 178 ++-- .../{grp_nuklear.cpp => srp_nuklear.cpp} | 64 +- code/renderer/{grp_ui.cpp => srp_ui.cpp} | 79 +- code/renderer/tr_init.cpp | 13 +- code/renderer/tr_local.h | 69 +- code/renderer/tr_main.cpp | 117 ++- code/shadercomp/shadercomp.cpp | 179 +++- makefiles/compile_shader.cmd | 5 - makefiles/premake5.lua | 18 +- makefiles/windows_vs2019/cnq3.vcxproj | 7 +- makefiles/windows_vs2019/cnq3.vcxproj.filters | 15 +- makefiles/windows_vs2019/renderer.vcxproj | 143 ++- .../windows_vs2019/renderer.vcxproj.filters | 214 ++++- makefiles/windows_vs2019/shadercomp.vcxproj | 10 +- makefiles/windows_vs2022/cnq3.vcxproj | 7 +- makefiles/windows_vs2022/cnq3.vcxproj.filters | 15 +- makefiles/windows_vs2022/renderer.vcxproj | 143 ++- .../windows_vs2022/renderer.vcxproj.filters | 214 ++++- makefiles/windows_vs2022/shadercomp.vcxproj | 10 +- 102 files changed, 9129 insertions(+), 1649 deletions(-) create mode 100644 code/renderer/compshaders/.gitignore create mode 100644 code/renderer/crp_dof_accum.cpp create mode 100644 code/renderer/crp_dof_gather.cpp create mode 100644 code/renderer/crp_fog.cpp create mode 100644 code/renderer/crp_geometry.cpp create mode 100644 code/renderer/crp_local.h create mode 100644 code/renderer/crp_main.cpp create mode 100644 code/renderer/crp_opaque.cpp create mode 100644 code/renderer/crp_tone_map.cpp create mode 100644 code/renderer/crp_transp_draw.cpp create mode 100644 code/renderer/crp_transp_resolve.cpp create mode 100644 code/renderer/grp_geometry.cpp rename code/renderer/{uber_shaders.h => grp_uber_shaders.h} (81%) rename code/renderer/{hlsl => shaders}/.gitignore (100%) create mode 100644 code/renderer/shaders/common/blend.hlsli create mode 100644 code/renderer/shaders/common/mip_gen.hlsli create mode 100644 code/renderer/shaders/common/state_bits.h.hlsli create mode 100644 code/renderer/shaders/crp/accumdof_accum.hlsl create mode 100644 code/renderer/shaders/crp/accumdof_debug.hlsl create mode 100644 code/renderer/shaders/crp/accumdof_norm.hlsl create mode 100644 code/renderer/shaders/crp/blit.hlsl create mode 100644 code/renderer/shaders/crp/common.hlsli create mode 100644 code/renderer/shaders/crp/dof.hlsli create mode 100644 code/renderer/shaders/crp/fog.hlsli create mode 100644 code/renderer/shaders/crp/fog_inside.hlsl create mode 100644 code/renderer/shaders/crp/fog_outside.hlsl create mode 100644 code/renderer/shaders/crp/gatherdof.hlsli create mode 100644 code/renderer/shaders/crp/gatherdof_blur.hlsl create mode 100644 code/renderer/shaders/crp/gatherdof_coc_tile_gen.hlsl create mode 100644 code/renderer/shaders/crp/gatherdof_coc_tile_max.hlsl create mode 100644 code/renderer/shaders/crp/gatherdof_combine.hlsl create mode 100644 code/renderer/shaders/crp/gatherdof_debug.hlsl create mode 100644 code/renderer/shaders/crp/gatherdof_fill.hlsl create mode 100644 code/renderer/shaders/crp/gatherdof_split.hlsl create mode 100644 code/renderer/shaders/crp/imgui.hlsl create mode 100644 code/renderer/shaders/crp/mip_1.hlsl create mode 100644 code/renderer/shaders/crp/mip_2.hlsl create mode 100644 code/renderer/shaders/crp/mip_3.hlsl create mode 100644 code/renderer/shaders/crp/nuklear.hlsl create mode 100644 code/renderer/shaders/crp/oit.h.hlsli create mode 100644 code/renderer/shaders/crp/opaque.hlsl create mode 100644 code/renderer/shaders/crp/tone_map.hlsl create mode 100644 code/renderer/shaders/crp/tone_map_inverse.hlsl create mode 100644 code/renderer/shaders/crp/transp_draw.hlsl create mode 100644 code/renderer/shaders/crp/transp_resolve.hlsl create mode 100644 code/renderer/shaders/crp/ui.hlsl create mode 100644 code/renderer/shaders/crp/world.h.hlsli create mode 100644 code/renderer/shaders/crp/world.hlsli rename code/renderer/{hlsl => shaders/grp}/depth_pre_pass.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/dynamic_light.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/fog.hlsli (100%) rename code/renderer/{hlsl => shaders/grp}/fog_inside.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/fog_outside.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/imgui.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/mip_1.hlsl (85%) rename code/renderer/{hlsl => shaders/grp}/mip_2.hlsl (62%) rename code/renderer/{hlsl => shaders/grp}/mip_3.hlsl (71%) rename code/renderer/{hlsl => shaders/grp}/nuklear.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/post_gamma.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/post_inverse_gamma.hlsl (98%) rename code/renderer/{hlsl => shaders/grp}/shared.hlsli (97%) rename code/renderer/{hlsl => shaders/grp}/smaa.hlsli (100%) rename code/renderer/{hlsl => shaders/grp}/smaa_1.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/smaa_2.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/smaa_3.hlsl (100%) rename code/renderer/{hlsl => shaders/grp}/uber_shader.hlsl (70%) rename code/renderer/{hlsl => shaders/grp}/ui.hlsl (100%) rename code/renderer/{grp_imgui.cpp => srp_imgui.cpp} (82%) create mode 100644 code/renderer/srp_local.h create mode 100644 code/renderer/srp_main.cpp rename code/renderer/{grp_mip_gen.cpp => srp_mip_gen.cpp} (64%) rename code/renderer/{grp_nuklear.cpp => srp_nuklear.cpp} (81%) rename code/renderer/{grp_ui.cpp => srp_ui.cpp} (80%) delete mode 100644 makefiles/compile_shader.cmd diff --git a/.gitignore b/.gitignore index 72046bd..1855963 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,6 @@ *.idb code/qcommon/git.h code/win32/winquake.res -code/renderer/hlsl/*.h makefiles/windows_vs*/*sdf makefiles/*/obj .build diff --git a/changelog.txt b/changelog.txt index a0d1483..2da4fa8 100644 --- a/changelog.txt +++ b/changelog.txt @@ -65,21 +65,55 @@ add: r_shadingRate <0 to 6> (default: 0) sets the variable-rate shading (VRS) mo prefer horizontal subsampling as many maps have textures with thin horizontal lines which become an aliased mess when vertically subsampled +add: Cinematic Rendering Pipeline CVars + depth of field: + crp_dof <0|1|2> (default: 1) selects the depth of field mode + 0 - disabled + 1 - scatter-as-gather + 2 - accumulation + crp_dof_overlay <0|1|2> (default: 0) draws a debug overlay mode + 0 - disabled + 1 - colorized blur + 2 - focus plane + crp_dof_blades <0 to 16> (default: 6) is the aperture's blade count + set to less than 3 for a disk shape + crp_dof_angle <0 to 360> (default: 20) is the aperture's angle, in degrees + scatter-as-gather depth of field (fast, doesn't handle transparency properly): + crp_gatherDof_focusNearDist <1 to 2048> (default: 192) is the near-field's focus distance + crp_gatherDof_focusNearRange <1 to 2048> (default: 256) is the near-field's focus range + crp_gatherDof_focusFarDist <1 to 2048> (default: 512) is the far-field's focus distance + crp_gatherDof_focusFarRange <1 to 2048> (default: 384) is the far-field's focus range + crp_gatherDof_brightness <0 to 8> (default: 2) is the blur brightness' weight + accumulation depth of field (extremely slow, looks perfect with enough samples): + crp_accumDof_focusDist <2 to 2048> (default: 256) is the focus distance + crp_accumDof_radius <0.001 to 20> (default: 0.1) is the aperture radius in world units + crp_accumDof_samples <1 to 12> (default: 2) is per-axis sampling density + density N means (2N + 1)^2 scene renders in total, so be careful or you'll trigger TDR + crp_accumDof_preview <0|1|2> (default: 0) selects the real-time preview mode + 0 - disabled + 1 - 1/4 pixel count, 9 samples total + 2 - 1/16 pixel count, 25 samples total + chg: dropped 32-bit support chg: dropped Linux/FreeBSD client chg: Windows support is limited to 10 and 11 -chg: much improved rendering: - - removed all Direct3D 11 and OpenGL code - - rendering with Direct3D 12 (improved performance and better worst case input latency) +chg: reworked renderer with 2 new rendering pipelines + - removed all the Direct3D 11 and OpenGL code, now using Direct3D 12 - much improved input latency when V-Sync is enabled - - improved frame-time consistency - - surfaces are sorted and rendered more efficiently + - improved frame-time consistency ("frame pacing") - fog handling has been completely overhauled (faster, simpler, decoupled from surfaces) - MSAA and alpha-to-coverage have been removed - - added SMAA for anti-aliasing (gamma-corrected and not applied to UI for best results) + - Gameplay Rendering Pipeline (GRP) + - improved performance and better worst case input latency + - added SMAA for anti-aliasing (gamma-corrected and not applied to UI for best results) + - added VRS (Variable Rate Shading) support + - Cinematic Rendering Pipeline (CRP) + - order-independent transparency + - depth of field (scatter-as-gather or accumulation) + - all corresponding CVars have the "crp_" prefix chg: removed cl_drawMouseLag, r_backend, r_frameSleep, r_gpuMipGen, r_alphaToCoverage, r_alphaToCoverageMipBoost removed r_d3d11_syncOffsets, r_d3d11_presentMode, r_gl3_geoStream, r_ignoreGLErrors, r_finish, r_khr_debug diff --git a/code/client/cl_main.cpp b/code/client/cl_main.cpp index 1fc2f05..4b9936e 100644 --- a/code/client/cl_main.cpp +++ b/code/client/cl_main.cpp @@ -1591,12 +1591,35 @@ static void CL_CheckUserinfo() } +static void CL_CheckCRP() +{ + // demo playback and listen servers are always OK + if ( Cvar_VariableIntegerValue( "r_pipeline" ) != 1 || + CL_DemoPlaying() || + Cvar_VariableIntegerValue( "sv_running" ) ) + return; + + switch ( cls.state ) { + case CA_CHALLENGING: + case CA_CONNECTING: + case CA_CONNECTED: + Cbuf_AddText( "r_pipeline 0\nvid_restart\nreconnect\n" ); + Com_Printf( "^3WARNING: switched to GRP (r_pipeline 0) for online play\n" ); + break; + default: + break; + } +} + + void CL_Frame( int msec ) { if ( !com_cl_running->integer ) { return; } + CL_CheckCRP(); + if ( cls.cddialog ) { // bring up the cd error dialog if needed cls.cddialog = qfalse; @@ -3014,3 +3037,9 @@ void CL_SetMenuData( qboolean typeOnly ) } } } + + +qbool CL_DemoPlaying() +{ + return clc.demoplaying; +} diff --git a/code/client/cl_ui.cpp b/code/client/cl_ui.cpp index 50afef5..ddb3e7e 100644 --- a/code/client/cl_ui.cpp +++ b/code/client/cl_ui.cpp @@ -1222,10 +1222,3 @@ qbool UI_GameCommand() { return (uivm && VM_Call( uivm, UI_CONSOLE_COMMAND, cls.realtime )); } - - -qbool CL_DemoPlaying() -{ - return clc.demoplaying; -} - diff --git a/code/qcommon/q_math.c b/code/qcommon/q_math.c index d864b4a..55a280e 100644 --- a/code/qcommon/q_math.c +++ b/code/qcommon/q_math.c @@ -25,8 +25,14 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "q_shared.h" +const vec3_t vec2_zero = { 0, 0 }; +const vec3_t vec2_one = { 1, 1 }; const vec3_t vec3_origin = { 0, 0, 0 }; +const vec3_t vec3_zero = { 0, 0, 0 }; +const vec3_t vec3_one = { 1, 1, 1 }; const vec4_t vec4_zero = { 0, 0, 0, 0 }; +const vec4_t vec4_one = { 1, 1, 1, 1 }; + #if defined(Q3_VM) // lcc can't cope with "const vec3_t []" vec3_t axisDefault[3] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 } }; #else diff --git a/code/qcommon/q_shared.h b/code/qcommon/q_shared.h index 946f39f..7c8e323 100644 --- a/code/qcommon/q_shared.h +++ b/code/qcommon/q_shared.h @@ -178,6 +178,13 @@ typedef int clipHandle_t; #define STRINGIZE_NE(x) #x // no expansion #define STRINGIZE(x) STRINGIZE_NE(x) // with expansion +// #define A 42 +// #define B 69 +// CONCAT_NE(A, B) -> AB +// CONCAT(A, B) -> 4269 +#define CONCAT_NE(x, y) x ## y // no expansion +#define CONCAT(x, y) CONCAT_NE(x, y) // with expansion + // angle indexes #define PITCH 0 // up / down #define YAW 1 // left / right @@ -259,13 +266,25 @@ typedef float vec_t; typedef vec_t vec2_t[2]; typedef vec_t vec3_t[3]; typedef vec_t vec4_t[4]; +extern const vec3_t vec2_zero; +extern const vec3_t vec2_one; extern const vec3_t vec3_origin; +extern const vec3_t vec3_zero; +extern const vec3_t vec3_one; extern const vec4_t vec4_zero; +extern const vec4_t vec4_one; #ifndef M_PI #define M_PI 3.14159265358979323846f // matches value in gcc v2 math.h #endif +#define M_PI_D2 (M_PI / 2.0f) +#define M_PI_D4 (M_PI / 4.0f) +#define M_PI_D8 (M_PI / 8.0f) +#define M_PI_M2 (M_PI * 2.0f) +#define M_PI_M4 (M_PI * 4.0f) +#define M_PI_M8 (M_PI * 8.0f) + // all drawing is done to a 640*480 virtual screen size // and will be automatically scaled to the real resolution diff --git a/code/renderer/compshaders/.gitignore b/code/renderer/compshaders/.gitignore new file mode 100644 index 0000000..a8ccfa6 --- /dev/null +++ b/code/renderer/compshaders/.gitignore @@ -0,0 +1,2 @@ +*.h +*.temp diff --git a/code/renderer/crp_dof_accum.cpp b/code/renderer/crp_dof_accum.cpp new file mode 100644 index 0000000..e10f8a3 --- /dev/null +++ b/code/renderer/crp_dof_accum.cpp @@ -0,0 +1,366 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - accumulation depth of field + + +#include "crp_local.h" +namespace dof_accum +{ +#include "compshaders/crp/accumdof_accum_vs.h" +#include "compshaders/crp/accumdof_accum_ps.h" +} +namespace dof_norm +{ +#include "compshaders/crp/accumdof_norm_vs.h" +#include "compshaders/crp/accumdof_norm_ps.h" +} +namespace dof_debug +{ +#include "compshaders/crp/accumdof_debug_vs.h" +#include "compshaders/crp/accumdof_debug_ps.h" +} + + +#pragma pack(push, 4) + +struct DOFAccumRC +{ + uint32_t textureIndex; +}; + +struct DOFNormRC +{ + uint32_t textureIndex; +}; + +struct DOFDebugRC +{ + float mvp[16]; // displaced view, to project to CS + float invMvp[16]; // main view, to unproject to WS + uint32_t colorTextureIndex; + uint32_t depthTextureIndex; + uint32_t debugMode; // 1: colorized coc, 2: constant intensity far field + uint32_t tcScale; + float focusDist; + float linearDepthA; // main view, to unproject to WS + float linearDepthB; + float maxNearCocCS; + float maxFarCocCS; +}; + +#pragma pack(pop) + + +// the input is in [0,1]^2, the output polygon is centered at the origin +static void MapUnitSquareToPolygon(const vec2_t square01, float apertureBladeCount, float apertureAngleRad, vec2_t polygon) +{ + // needed to avoid inf/nan propagation through theta for samples + // that are exactly in the middle of the quad on either axis + // (i.e. square.x|y == 0.5f gets remapped to 0.0f) + const float epsilon = 0.000001f; + + // morph into a square in [-1,1]^2 + vec2_t square; + square[0] = square01[0] * 2.0f - 1.0f; + square[1] = square01[1] * 2.0f - 1.0f; + + // morph the square into a disk + // "A Low Distortion Map Between Disk and Square" by Peter Shirley and Kenneth Chiu + float radius, angle; + vec2_t square2; + square2[0] = square[0] * square[0]; + square2[1] = square[1] * square[1]; + if(square2[0] > square2[1]) + { + // left and right quadrants + radius = square[0]; + angle = (square[1] * M_PI_D4) / (square[0] + epsilon); + } + else + { + // top and bottom quadrants + radius = square[1]; + angle = M_PI_D2 - (square[0] * M_PI_D4) / (square[1] + epsilon); + } + if(radius < 0.0f) + { + radius = -radius; + angle += M_PI; + } + + // morph the disk into a polygon + // "Graphics Gems from CryENGINE 3" by Tiago Sousa + const float edgeCount = apertureBladeCount; + if(edgeCount >= 3.0f) + { + const float num = cosf(M_PI / edgeCount); + const float den0 = M_PI_M2 / edgeCount; + const float den1 = (angle * edgeCount + M_PI) / M_PI_M2; + const float den = angle - (den0 * floorf(den1)); + radius *= num / cosf(den); + angle += apertureAngleRad; + } + + polygon[0] = cosf(angle) * radius; + polygon[1] = sinf(angle) * radius; +} + +static int GetResolutionScale() +{ + switch(crp_accumDof_preview->integer) + { + case 0: return 1; + case 1: return 2; + case 2: return 4; + default: Q_assert(0); return 1; + } +} + + +void AccumDepthOfField::Init() +{ + { + GraphicsPipelineDesc desc("DOF Accumulate"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(dof_accum::g_vs); + desc.pixelShader = ShaderByteCode(dof_accum::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(GLS_SRCBLEND_ONE | GLS_DSTBLEND_ONE, crp.renderTargetFormat); + accumPipeline = CreateGraphicsPipeline(desc); + } + + { + GraphicsPipelineDesc desc("DOF Normalize"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(dof_norm::g_vs); + desc.pixelShader = ShaderByteCode(dof_norm::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + normPipeline = CreateGraphicsPipeline(desc); + } + + { + GraphicsPipelineDesc desc("DOF Debug"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(dof_debug::g_vs); + desc.pixelShader = ShaderByteCode(dof_debug::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + debugPipeline = CreateGraphicsPipeline(desc); + } + + { + TextureDesc desc("DOF accumulation", glConfig.vidWidth, glConfig.vidHeight); + desc.initialState = ResourceStates::RenderTargetBit; + desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit; + Vector4Clear(desc.clearColor); + desc.usePreferredClearValue = true; + desc.committedResource = true; + desc.format = crp.renderTargetFormat; + desc.shortLifeTime = true; + accumTexture = CreateTexture(desc); + } +} + +void AccumDepthOfField::Begin(const drawSceneViewCommand_t& cmd) +{ + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + + const TextureBarrier texBarriers[] = + { + TextureBarrier(accumTexture, ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(texBarriers), texBarriers); + + CmdClearColorTarget(accumTexture, vec4_zero); + + // project a point a few units in front of the main view onto a viewpoint with maximum displacement + // the distance to the clip space center (i.e. the 2D origin) is the maximum expected CoC + vec3_t testPoint; + VectorMA(cmd.viewParms.world.viewOrigin, 16.0f, cmd.viewParms.orient.axis[0], testPoint); + drawSceneViewCommand_t newCmd; + FixCommand(newCmd, cmd, 0, 0); + vec4_t eye, clip; + R_TransformModelToClip(testPoint, newCmd.viewParms.world.modelMatrix, newCmd.viewParms.projectionMatrix, eye, clip); + Vector4Scale(clip, 1.0f / clip[3], clip); + maxNearCocCS = sqrtf(clip[0] * clip[0] + clip[1] * clip[1]); + + // same thing for a point far away + VectorMA(cmd.viewParms.world.viewOrigin, 69420.0f, cmd.viewParms.orient.axis[0], testPoint); + FixCommand(newCmd, cmd, 0, 0); + R_TransformModelToClip(testPoint, newCmd.viewParms.world.modelMatrix, newCmd.viewParms.projectionMatrix, eye, clip); + Vector4Scale(clip, 1.0f / clip[3], clip); + maxFarCocCS = sqrtf(clip[0] * clip[0] + clip[1] * clip[1]); +} + +uint32_t AccumDepthOfField::GetSampleCount() +{ + switch(crp_accumDof_preview->integer) + { + case 1: return 3; + case 2: return 5; + default: break; + } + + return 2 * crp_accumDof_samples->integer + 1; +} + +void AccumDepthOfField::FixCommand(drawSceneViewCommand_t& newCmd, const drawSceneViewCommand_t& cmd, uint32_t x, uint32_t y) +{ + const float radius = crp_accumDof_radius->value; + const float tcScale = 1.0f / (float)(GetSampleCount() - 1); + + vec2_t square01; + square01[0] = x * tcScale; + square01[1] = y * tcScale; + vec2_t polygon; + MapUnitSquareToPolygon(square01, crp_dof_blades->value, DEG2RAD(crp_dof_angle->value), polygon); + + // 0=forward, 1=left, 2=up + vec3_t axis[3]; + VectorCopy(cmd.viewParms.orient.axis[0], axis[0]); + VectorCopy(cmd.viewParms.orient.axis[1], axis[1]); + VectorCopy(cmd.viewParms.orient.axis[2], axis[2]); + + vec3_t origin; + VectorMA(cmd.viewParms.world.viewOrigin, radius * polygon[0], axis[1], origin); + VectorMA(origin, radius * polygon[1], axis[2], origin); + + vec3_t focusPoint; + VectorMA(cmd.viewParms.world.viewOrigin, crp_accumDof_focusDist->value, axis[0], focusPoint); + VectorSubtract(focusPoint, origin, axis[0]); // forward + VectorNormalize(axis[0]); + CrossProduct(axis[2], axis[0], axis[1]); // left + VectorNormalize(axis[1]); + CrossProduct(axis[0], axis[1], axis[2]); // up + VectorNormalize(axis[2]); + + newCmd = cmd; + VectorCopy(origin, newCmd.viewParms.orient.origin); + VectorCopy(origin, newCmd.viewParms.world.viewOrigin); + R_CreateWorldModelMatrix(origin, axis, newCmd.viewParms.world.modelMatrix); + newCmd.viewParms.viewportWidth /= GetResolutionScale(); + newCmd.viewParms.viewportHeight /= GetResolutionScale(); + + if(x == 0 && y == 0) + { + memcpy(modelViewMatrix, newCmd.viewParms.world.modelMatrix, sizeof(modelViewMatrix)); + memcpy(projMatrix, newCmd.viewParms.projectionMatrix, sizeof(projMatrix)); + } +} + +void AccumDepthOfField::Accumulate() +{ + srp.renderMode = RenderMode::None; + + SCOPED_RENDER_PASS("DOF Accum", 0.5f, 1.0f, 0.5f); + + const TextureBarrier texBarriers[] = + { + TextureBarrier(crp.renderTarget, ResourceStates::PixelShaderAccessBit), + TextureBarrier(accumTexture, ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(texBarriers), texBarriers); + + DOFAccumRC rc = {}; + rc.textureIndex = GetTextureIndexSRV(crp.renderTarget); + + CmdBindRenderTargets(1, &accumTexture, NULL); + CmdBindPipeline(accumPipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); + +} + +void AccumDepthOfField::Normalize() +{ + srp.renderMode = RenderMode::None; + + { + SCOPED_RENDER_PASS("DOF Norm", 0.5f, 1.0f, 0.5f); + + const TextureBarrier texBarriers[] = + { + TextureBarrier(accumTexture, ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.renderTarget, ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(texBarriers), texBarriers); + + DOFNormRC rc = {}; + rc.textureIndex = GetTextureIndexSRV(accumTexture); + + CmdBindRenderTargets(1, &crp.renderTarget, NULL); + CmdBindPipeline(normPipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); + } + + if(crp_accumDof_preview->integer) + { + crp.SwapRenderTargets(); + const float scale = 1.0f / (float)GetResolutionScale(); + const vec2_t tcScale = { scale, scale }; + crp.Blit(crp.GetWriteRenderTarget(), crp.GetReadRenderTarget(), "DOF Upscale", true, tcScale, vec2_zero); + } +} + +void AccumDepthOfField::DrawDebug() +{ + if(crp_dof_overlay->integer == 0) + { + return; + } + + srp.renderMode = RenderMode::None; + + SCOPED_RENDER_PASS("DOF Debug", 0.5f, 1.0f, 0.5f); + + crp.SwapRenderTargets(); + const TextureBarrier texBarriers[] = + { + TextureBarrier(crp.GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.depthTexture, ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.GetWriteRenderTarget(), ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(texBarriers), texBarriers); + + float mvp[16]; + DOFDebugRC rc = {}; + rc.colorTextureIndex = GetTextureIndexSRV(crp.GetReadRenderTarget()); + rc.depthTextureIndex = GetTextureIndexSRV(crp.depthTexture); + rc.debugMode = crp_dof_overlay->integer; + rc.focusDist = crp_accumDof_focusDist->value; + rc.maxNearCocCS = maxNearCocCS; + rc.maxFarCocCS = maxFarCocCS; + rc.tcScale = GetResolutionScale(); + R_MultMatrix(modelViewMatrix, projMatrix, rc.mvp); + RB_LinearDepthConstants(&rc.linearDepthA, &rc.linearDepthB); + R_MultMatrix(backEnd.viewParms.world.modelMatrix, backEnd.viewParms.projectionMatrix, mvp); + R_InvMatrix(mvp, rc.invMvp); + + CmdBindRenderTargets(1, &crp.renderTarget, NULL); + CmdBindPipeline(debugPipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); +} diff --git a/code/renderer/crp_dof_gather.cpp b/code/renderer/crp_dof_gather.cpp new file mode 100644 index 0000000..5edf6b6 --- /dev/null +++ b/code/renderer/crp_dof_gather.cpp @@ -0,0 +1,479 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - scatter-as-gather depth of field + + +#include "crp_local.h" +namespace debug +{ +#include "compshaders/crp/gatherdof_debug_vs.h" +#include "compshaders/crp/gatherdof_debug_ps.h" +} +namespace split +{ +#include "compshaders/crp/gatherdof_split.h" +} +namespace near_coc_tile_gen +{ +#include "compshaders/crp/gatherdof_coc_tile_gen.h" +} +namespace near_coc_tile_max +{ +#include "compshaders/crp/gatherdof_coc_tile_max.h" +} +namespace blur +{ +#include "compshaders/crp/gatherdof_blur.h" +} +namespace fill +{ +#include "compshaders/crp/gatherdof_fill.h" +} +namespace combine +{ +#include "compshaders/crp/gatherdof_combine_vs.h" +#include "compshaders/crp/gatherdof_combine_ps.h" +} + + +#pragma pack(push, 4) + +struct DOFDebugRC +{ + uint32_t colorTextureIndex; + uint32_t depthTextureIndex; + uint32_t debugMode; + float linearDepthA; + float linearDepthB; + float focusNearMin; + float focusNearMax; + float focusFarMin; + float focusFarMax; + float focusDist; +}; + +struct DOFSplitRC +{ + uint32_t depthTextureIndex; + uint32_t colorTextureIndex; + uint32_t nearColorTextureIndex; + uint32_t farColorTextureIndex; + uint32_t nearCocTextureIndex; + uint32_t farCocTextureIndex; + float linearDepthA; + float linearDepthB; + float focusNearMin; + float focusNearMax; + float focusFarMin; + float focusFarMax; + float brightnessScale; +}; + +struct DOFNearCocMaxRC +{ + uint32_t inputTextureIndex; + uint32_t outputTextureIndex; + uint32_t samplerIndex; + int32_t kernelRadius; + float kernelDirectionX; + float kernelDirectionY; +}; + +struct DOFNearCocBlurRC +{ + uint32_t inputTextureIndex; + uint32_t outputTextureIndex; + uint32_t samplerIndex; + int32_t kernelRadius; + float kernelDirectionX; + float kernelDirectionY; +}; + +struct DOFNearCocTileGenRC +{ + uint32_t inputTextureIndex; + uint32_t outputTextureIndex; +}; + +struct DOFNearCocTileMaxRC +{ + uint32_t inputTextureIndex; + uint32_t outputTextureIndex; + uint32_t samplerIndex; // point/clamp +}; + +struct DOFBlurRC +{ + uint32_t colorTextureIndex; + uint32_t nearColorTextureIndex; + uint32_t nearMaxCocTextureIndex; + uint32_t nearCocTextureIndex; // blurry + uint32_t nearOutputTextureIndex; + uint32_t farColorTextureIndex; + uint32_t farCocTextureIndex; // sharp + uint32_t farOutputTextureIndex; + uint32_t samplerIndex; // linear/clamp + float brightnessScale; + float bladeCount; + float bokehAngleRad; +}; + +struct DOFFillRC +{ + uint32_t nearInputTextureIndex; + uint32_t nearOutputTextureIndex; + uint32_t farInputTextureIndex; + uint32_t farOutputTextureIndex; + uint32_t samplerIndex; // point/clamp +}; + +struct DOFCombineRC +{ + uint32_t nearTextureIndex; + uint32_t farTextureIndex; + uint32_t nearCocTextureIndex; + uint32_t farCocTextureIndex; + uint32_t sharpTextureIndex; + uint32_t samplerIndex; // point/clamp +}; + +#pragma pack(pop) + + +void GatherDepthOfField::Init() +{ + const TextureFormat::Id renderTargetFormat = TextureFormat::RGBA64_Float; + + tileWidth = (uint32_t)(glConfig.vidWidth + 15) / 16; + tileHeight = (uint32_t)(glConfig.vidHeight + 15) / 16; + + { + ComputePipelineDesc desc("DOF split"); + desc.shortLifeTime = true; + desc.shader = ShaderByteCode(split::g_cs); + splitPipeline = CreateComputePipeline(desc); + } + + { + ComputePipelineDesc desc("DOF near CoC tile generation"); + desc.shortLifeTime = true; + desc.shader = ShaderByteCode(near_coc_tile_gen::g_cs); + nearCocTileGenPipeline = CreateComputePipeline(desc); + } + + { + ComputePipelineDesc desc("DOF near CoC tile dilation"); + desc.shortLifeTime = true; + desc.shader = ShaderByteCode(near_coc_tile_max::g_cs); + nearCocTileMaxPipeline = CreateComputePipeline(desc); + } + + { + ComputePipelineDesc desc("DOF blur"); + desc.shortLifeTime = true; + desc.shader = ShaderByteCode(blur::g_cs); + blurPipeline = CreateComputePipeline(desc); + } + + { + ComputePipelineDesc desc("DOF fill"); + desc.shortLifeTime = true; + desc.shader = ShaderByteCode(fill::g_cs); + fillPipeline = CreateComputePipeline(desc); + } + + { + GraphicsPipelineDesc desc("DOF combine"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(combine::g_vs); + desc.pixelShader = ShaderByteCode(combine::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, renderTargetFormat); + combinePipeline = CreateGraphicsPipeline(desc); + } + + { + GraphicsPipelineDesc desc("DOF viz"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(debug::g_vs); + desc.pixelShader = ShaderByteCode(debug::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, renderTargetFormat); + debugPipeline = CreateGraphicsPipeline(desc); + } + + { + TextureDesc desc("DOF far field color", glConfig.vidWidth, glConfig.vidHeight); + desc.shortLifeTime = true; + desc.committedResource = true; + desc.initialState = ResourceStates::UnorderedAccessBit; + desc.allowedState = ResourceStates::UnorderedAccessBit | ResourceStates::ComputeShaderAccessBit | ResourceStates::PixelShaderAccessBit; + desc.format = renderTargetFormat; + farColorTexture = CreateTexture(desc); + desc.name = "DOF near field color"; + nearColorTexture = CreateTexture(desc); + desc.name = "DOF near field blurred color"; + nearBlurTexture = CreateTexture(desc); + desc.name = "DOF far field blurred color"; + farBlurTexture = CreateTexture(desc); + + desc.format = TextureFormat::R8_UNorm; + desc.name = "DOF near field CoC #1"; + nearCocTexture = CreateTexture(desc); + desc.name = "DOF near field CoC #2"; + nearCocTexture2 = CreateTexture(desc); + desc.name = "DOF far field CoC"; + farCocTexture = CreateTexture(desc); + + desc.width = tileWidth; + desc.height = tileHeight; + desc.name = "DOF near field CoC tile #1"; + nearCocTileTexture = CreateTexture(desc); + desc.name = "DOF near field CoC tile #2"; + nearCocTileTexture2 = CreateTexture(desc); + } +} + +void GatherDepthOfField::Draw() +{ + if(crp_dof->integer != DOFMethod::Gather) + { + return; + } + + if(backEnd.viewParms.viewportX != 0 || + backEnd.viewParms.viewportY != 0 || + backEnd.viewParms.viewportWidth != glConfig.vidWidth || + backEnd.viewParms.viewportHeight != glConfig.vidHeight) + { + return; + } + + DrawSplit(); + DrawNearCocTileGen(); + DrawNearCocTileMax(); + DrawBlur(); + DrawFill(); + DrawCombine(); + DrawDebug(); +} + +void GatherDepthOfField::DrawDebug() +{ + if(crp_dof_overlay->integer == 0) + { + return; + } + + SCOPED_RENDER_PASS("DOF Debug", 0.125f, 0.125f, 0.25f); + + crp.SwapRenderTargets(); + const TextureBarrier barriers[] = + { + TextureBarrier(crp.GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.depthTexture, ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.GetWriteRenderTarget(), ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(barriers), barriers); + + DOFDebugRC rc = {}; + rc.colorTextureIndex = GetTextureIndexSRV(crp.GetReadRenderTarget()); + rc.depthTextureIndex = GetTextureIndexSRV(crp.depthTexture); + rc.debugMode = crp_dof_overlay->integer; + RB_LinearDepthConstants(&rc.linearDepthA, &rc.linearDepthB); + rc.focusNearMin = crp_gatherDof_focusNearDist->value - 0.5f * crp_gatherDof_focusNearRange->value; + rc.focusNearMax = crp_gatherDof_focusNearDist->value + 0.5f * crp_gatherDof_focusNearRange->value; + rc.focusFarMin = crp_gatherDof_focusFarDist->value - 0.5f * crp_gatherDof_focusFarRange->value; + rc.focusFarMax = crp_gatherDof_focusFarDist->value + 0.5f * crp_gatherDof_focusFarRange->value; + rc.focusDist = 0.5f * (rc.focusNearMax + rc.focusFarMin); + + CmdBindRenderTargets(1, &crp.renderTarget, NULL); + CmdBindPipeline(debugPipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); +} + +void GatherDepthOfField::DrawSplit() +{ + SCOPED_RENDER_PASS("DOF Split", 0.125f, 0.125f, 0.25f); + + const TextureBarrier barriers[] = + { + TextureBarrier(crp.depthTexture, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(crp.renderTarget, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(nearColorTexture, ResourceStates::UnorderedAccessBit), + TextureBarrier(farColorTexture, ResourceStates::UnorderedAccessBit), + TextureBarrier(nearCocTexture, ResourceStates::UnorderedAccessBit), + TextureBarrier(farCocTexture, ResourceStates::UnorderedAccessBit) + }; + CmdBarrier(ARRAY_LEN(barriers), barriers); + + DOFSplitRC rc = {}; + rc.depthTextureIndex = GetTextureIndexSRV(crp.depthTexture); + rc.colorTextureIndex = GetTextureIndexSRV(crp.renderTarget); + rc.nearColorTextureIndex = GetTextureIndexUAV(nearColorTexture, 0); + rc.farColorTextureIndex = GetTextureIndexUAV(farColorTexture, 0); + rc.nearCocTextureIndex = GetTextureIndexUAV(nearCocTexture, 0); + rc.farCocTextureIndex = GetTextureIndexUAV(farCocTexture, 0); + RB_LinearDepthConstants(&rc.linearDepthA, &rc.linearDepthB); + rc.focusNearMin = crp_gatherDof_focusNearDist->value - 0.5f * crp_gatherDof_focusNearRange->value; + rc.focusNearMax = crp_gatherDof_focusNearDist->value + 0.5f * crp_gatherDof_focusNearRange->value; + rc.focusFarMin = crp_gatherDof_focusFarDist->value - 0.5f * crp_gatherDof_focusFarRange->value; + rc.focusFarMax = crp_gatherDof_focusFarDist->value + 0.5f * crp_gatherDof_focusFarRange->value; + rc.brightnessScale = crp_gatherDof_brightness->value; + + CmdBindPipeline(splitPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((glConfig.vidWidth + 7) / 8, (glConfig.vidHeight + 7) / 8, 1); +} + +void GatherDepthOfField::DrawNearCocTileGen() +{ + SCOPED_RENDER_PASS("DOF Tile Gen", 0.125f, 0.125f, 0.25f); + + const TextureBarrier barriers[] = + { + TextureBarrier(nearCocTexture, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(nearCocTileTexture, ResourceStates::UnorderedAccessBit) + }; + CmdBarrier(ARRAY_LEN(barriers), barriers); + + DOFNearCocTileGenRC rc = {}; + rc.inputTextureIndex = GetTextureIndexSRV(nearCocTexture); + rc.outputTextureIndex = GetTextureIndexUAV(nearCocTileTexture, 0); + + CmdBindPipeline(nearCocTileGenPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((tileWidth + 7) / 8, (tileHeight + 7) / 8, 1); +} + +void GatherDepthOfField::DrawNearCocTileMax() +{ + SCOPED_RENDER_PASS("DOF Tile Max", 0.125f, 0.125f, 0.25f); + + const TextureBarrier barriers[] = + { + TextureBarrier(nearCocTileTexture, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(nearCocTileTexture2, ResourceStates::UnorderedAccessBit) + }; + CmdBarrier(ARRAY_LEN(barriers), barriers); + + DOFNearCocTileMaxRC rc = {}; + rc.inputTextureIndex = GetTextureIndexSRV(nearCocTileTexture); + rc.outputTextureIndex = GetTextureIndexUAV(nearCocTileTexture2, 0); + rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Point); + + CmdBindPipeline(nearCocTileMaxPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((tileWidth + 7) / 8, (tileHeight + 7) / 8, 1); +} + +void GatherDepthOfField::DrawBlur() +{ + SCOPED_RENDER_PASS("DOF Blur", 0.125f, 0.125f, 0.25f); + + const TextureBarrier barriers[] = + { + TextureBarrier(crp.renderTarget, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(nearColorTexture, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(farColorTexture, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(nearCocTexture, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(nearCocTileTexture2, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(farCocTexture, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(nearBlurTexture, ResourceStates::UnorderedAccessBit), + TextureBarrier(farBlurTexture, ResourceStates::UnorderedAccessBit) + }; + CmdBarrier(ARRAY_LEN(barriers), barriers); + + DOFBlurRC rc = {}; + rc.colorTextureIndex = GetTextureIndexSRV(crp.renderTarget); + rc.nearColorTextureIndex = GetTextureIndexSRV(nearColorTexture); + rc.nearMaxCocTextureIndex = GetTextureIndexSRV(nearCocTileTexture2); + rc.nearCocTextureIndex = GetTextureIndexSRV(nearCocTexture); + rc.nearOutputTextureIndex = GetTextureIndexUAV(nearBlurTexture, 0); + rc.farColorTextureIndex = GetTextureIndexSRV(farColorTexture); + rc.farCocTextureIndex = GetTextureIndexSRV(farCocTexture); + rc.farOutputTextureIndex = GetTextureIndexUAV(farBlurTexture, 0); + rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); + rc.brightnessScale = crp_gatherDof_brightness->value; + rc.bladeCount = crp_dof_blades->value; + rc.bokehAngleRad = DEG2RAD(crp_dof_angle->value); + + CmdBindPipeline(blurPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((glConfig.vidWidth + 7) / 8, (glConfig.vidHeight + 7) / 8, 1); +} + +void GatherDepthOfField::DrawFill() +{ + SCOPED_RENDER_PASS("DOF Fill", 0.125f, 0.125f, 0.25f); + + const TextureBarrier barriers[] = + { + TextureBarrier(nearBlurTexture, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(farBlurTexture, ResourceStates::ComputeShaderAccessBit), + TextureBarrier(nearColorTexture, ResourceStates::UnorderedAccessBit), + TextureBarrier(farColorTexture, ResourceStates::UnorderedAccessBit) + }; + CmdBarrier(ARRAY_LEN(barriers), barriers); + + DOFFillRC rc = {}; + rc.nearInputTextureIndex = GetTextureIndexSRV(nearBlurTexture); + rc.farInputTextureIndex = GetTextureIndexSRV(farBlurTexture); + rc.nearOutputTextureIndex = GetTextureIndexUAV(nearColorTexture, 0); + rc.farOutputTextureIndex = GetTextureIndexUAV(farColorTexture, 0); + rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Point); + + CmdBindPipeline(fillPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((glConfig.vidWidth + 7) / 8, (glConfig.vidHeight + 7) / 8, 1); +} + +void GatherDepthOfField::DrawCombine() +{ + SCOPED_RENDER_PASS("DOF Combine", 0.125f, 0.125f, 0.25f); + + const TextureBarrier barriers[] = + { + TextureBarrier(nearColorTexture, ResourceStates::PixelShaderAccessBit), + TextureBarrier(farColorTexture, ResourceStates::PixelShaderAccessBit), + TextureBarrier(nearCocTexture, ResourceStates::PixelShaderAccessBit), + TextureBarrier(farCocTexture, ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.GetWriteRenderTarget(), ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(barriers), barriers); + + DOFCombineRC rc = {}; + rc.nearTextureIndex = GetTextureIndexSRV(nearColorTexture); + rc.farTextureIndex = GetTextureIndexSRV(farColorTexture); + rc.nearCocTextureIndex = GetTextureIndexSRV(nearCocTexture); + rc.farCocTextureIndex = GetTextureIndexSRV(farCocTexture); + rc.sharpTextureIndex = GetTextureIndexSRV(crp.renderTarget); + rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Point); + + CmdBindRenderTargets(1, &crp.renderTarget, NULL); + CmdBindPipeline(combinePipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); +} diff --git a/code/renderer/crp_fog.cpp b/code/renderer/crp_fog.cpp new file mode 100644 index 0000000..a067d1b --- /dev/null +++ b/code/renderer/crp_fog.cpp @@ -0,0 +1,208 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - fog volumes + + +#include "crp_local.h" +namespace fog_outside +{ +#include "compshaders/crp/fog_outside_vs.h" +#include "compshaders/crp/fog_outside_ps.h" +} +namespace fog_inside +{ +#include "compshaders/crp/fog_inside_vs.h" +#include "compshaders/crp/fog_inside_ps.h" +} + + +#pragma pack(push, 4) +struct FogRC +{ + float modelViewMatrix[16]; + float projectionMatrix[16]; + float boxMin[4]; + float boxMax[4]; + float color[4]; + float depth; + float linearDepthA; + float linearDepthB; + uint32_t depthTextureIndex; +}; +#pragma pack(pop) + + +void Fog::Init() +{ + { + const uint32_t indices[] = + { + 0, 1, 2, 2, 1, 3, + 4, 0, 6, 6, 0, 2, + 7, 5, 6, 6, 5, 4, + 3, 1, 7, 7, 1, 5, + 4, 5, 0, 0, 5, 1, + 3, 7, 2, 2, 7, 6 + }; + + BufferDesc desc("box index", sizeof(indices), ResourceStates::IndexBufferBit); + desc.shortLifeTime = true; + boxIndexBuffer = CreateBuffer(desc); + + uint8_t* mapped = BeginBufferUpload(boxIndexBuffer); + memcpy(mapped, indices, sizeof(indices)); + EndBufferUpload(boxIndexBuffer); + } + { + const float vertices[] = + { + 0.0f, 1.0f, 0.0f, + 1.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, + 1.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, + 0.0f, 0.0f, 1.0f, + 1.0f, 0.0f, 1.0f + }; + + BufferDesc desc("box vertex", sizeof(vertices), ResourceStates::VertexBufferBit); + desc.shortLifeTime = true; + boxVertexBuffer = CreateBuffer(desc); + + uint8_t* mapped = BeginBufferUpload(boxVertexBuffer); + memcpy(mapped, vertices, sizeof(vertices)); + EndBufferUpload(boxVertexBuffer); + } + { + GraphicsPipelineDesc desc("fog outside"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(fog_outside::g_vs); + desc.pixelShader = ShaderByteCode(fog_outside::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_BACK_SIDED; + desc.rasterizer.polygonOffset = false; + desc.rasterizer.clampDepth = true; + desc.AddRenderTarget(GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA, crp.renderTargetFormat); + desc.vertexLayout.AddAttribute(0, ShaderSemantic::Position, DataType::Float32, 3, 0); + fogOutsidePipeline = CreateGraphicsPipeline(desc); + } + { + GraphicsPipelineDesc desc("fog inside"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(fog_inside::g_vs); + desc.pixelShader = ShaderByteCode(fog_inside::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_FRONT_SIDED; + desc.rasterizer.polygonOffset = false; + desc.rasterizer.clampDepth = true; + desc.AddRenderTarget(GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA, crp.renderTargetFormat); + desc.vertexLayout.AddAttribute(0, ShaderSemantic::Position, DataType::Float32, 3, 0); + fogInsidePipeline = CreateGraphicsPipeline(desc); + } +} + +void Fog::Draw() +{ + // @NOTE: fog 0 is invalid, it must be skipped + if(tr.world == NULL || + tr.world->numfogs <= 1 || + (backEnd.refdef.rdflags & RDF_NOWORLDMODEL) != 0) + { + return; + } + + SCOPED_RENDER_PASS("Fog", 0.25f, 0.125f, 0.0f); + + srp.renderMode = RenderMode::World; + + const uint32_t stride = sizeof(vec3_t); + CmdBindVertexBuffers(1, &boxVertexBuffer, &stride, NULL); + CmdBindIndexBuffer(boxIndexBuffer, IndexType::UInt32, 0); + + const TextureBarrier barriers[] = + { + TextureBarrier(crp.depthTexture, ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.renderTarget, ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(barriers), barriers); + + CmdBindRenderTargets(1, &crp.renderTarget, NULL); + + int insideIndex = -1; + for(int f = 1; f < tr.world->numfogs; ++f) + { + const fog_t& fog = tr.world->fogs[f]; + + bool inside = true; + for(int a = 0; a < 3; ++a) + { + if(backEnd.viewParms.orient.origin[a] <= fog.bounds[0][a] || + backEnd.viewParms.orient.origin[a] >= fog.bounds[1][a]) + { + inside = false; + break; + } + } + + if(inside) + { + insideIndex = f; + break; + } + } + + FogRC rc = {}; + memcpy(rc.modelViewMatrix, backEnd.viewParms.world.modelMatrix, sizeof(rc.modelViewMatrix)); + memcpy(rc.projectionMatrix, backEnd.viewParms.projectionMatrix, sizeof(rc.projectionMatrix)); + RB_LinearDepthConstants(&rc.linearDepthA, &rc.linearDepthB); + rc.depthTextureIndex = GetTextureIndexSRV(crp.depthTexture); + + CmdBindPipeline(fogOutsidePipeline); + for(int f = 1; f < tr.world->numfogs; ++f) + { + if(f == insideIndex) + { + continue; + } + + const fog_t& fog = tr.world->fogs[f]; + VectorScale(fog.parms.color, tr.identityLight, rc.color); + rc.depth = fog.parms.depthForOpaque; + VectorCopy(fog.bounds[0], rc.boxMin); + VectorCopy(fog.bounds[1], rc.boxMax); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDrawIndexed(36, 0, 0); + } + + if(insideIndex > 0) + { + CmdBindPipeline(fogInsidePipeline); + + const fog_t& fog = tr.world->fogs[insideIndex]; + VectorScale(fog.parms.color, tr.identityLight, rc.color); + rc.depth = fog.parms.depthForOpaque; + VectorCopy(fog.bounds[0], rc.boxMin); + VectorCopy(fog.bounds[1], rc.boxMax); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDrawIndexed(36, 0, 0); + } +} diff --git a/code/renderer/crp_geometry.cpp b/code/renderer/crp_geometry.cpp new file mode 100644 index 0000000..966a1a8 --- /dev/null +++ b/code/renderer/crp_geometry.cpp @@ -0,0 +1,155 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - vertex and index buffer management + + +#include "crp_local.h" + + +void GeoBuffers::Create(const char* name, uint32_t vertexCount, uint32_t indexCount) +{ + baseVertexBuffers[BaseBufferId::Position].CreateVertexBuffer( + va("%s position", name), MemoryUsage::Upload, vertexCount, sizeof(vec3_t)); + baseVertexBuffers[BaseBufferId::Normal].CreateVertexBuffer( + va("%s normal", name), MemoryUsage::Upload, vertexCount, sizeof(vec3_t)); + stageVertexBuffers[StageBufferId::TexCoords].CreateVertexBuffer( + va("%s tc", name), MemoryUsage::Upload, vertexCount * MAX_SHADER_STAGES, sizeof(vec2_t)); + stageVertexBuffers[StageBufferId::Color].CreateVertexBuffer( + va("%s color", name), MemoryUsage::Upload, vertexCount * MAX_SHADER_STAGES, sizeof(color4ub_t)); + indexBuffer.Create(name, MemoryUsage::Upload, indexCount); + + vertexBuffers[0] = baseVertexBuffers[BaseBufferId::Position].buffer; + vertexBuffers[1] = baseVertexBuffers[BaseBufferId::Normal].buffer; + vertexBuffers[2] = stageVertexBuffers[StageBufferId::TexCoords].buffer; + vertexBuffers[3] = stageVertexBuffers[StageBufferId::Color].buffer; + vertexBufferStrides[0] = sizeof(vec3_t); + vertexBufferStrides[1] = sizeof(vec3_t); + vertexBufferStrides[2] = sizeof(vec2_t); + vertexBufferStrides[3] = sizeof(color4ub_t); +} + +void GeoBuffers::Rewind() +{ + for(uint32_t b = 0; b < ARRAY_LEN(baseVertexBuffers); ++b) + { + baseVertexBuffers[b].Rewind(); + } + for(uint32_t b = 0; b < ARRAY_LEN(stageVertexBuffers); ++b) + { + stageVertexBuffers[b].Rewind(); + } + indexBuffer.Rewind(); +} + +void GeoBuffers::BeginUpload() +{ + for(uint32_t b = 0; b < ARRAY_LEN(baseVertexBuffers); ++b) + { + baseVertexBuffers[b].BeginUpload(); + } + for(uint32_t b = 0; b < ARRAY_LEN(stageVertexBuffers); ++b) + { + stageVertexBuffers[b].BeginUpload(); + } + indexBuffer.BeginUpload(); +} + +void GeoBuffers::EndUpload() +{ + for(uint32_t b = 0; b < ARRAY_LEN(baseVertexBuffers); ++b) + { + baseVertexBuffers[b].EndUpload(); + } + for(uint32_t b = 0; b < ARRAY_LEN(stageVertexBuffers); ++b) + { + stageVertexBuffers[b].EndUpload(); + } + indexBuffer.EndUpload(); +} + +void GeoBuffers::UploadBase() +{ + indexBuffer.Upload(); + + const uint32_t batchOffset = baseVertexBuffers[0].batchFirst + baseVertexBuffers[0].batchCount; + + float* pos = (float*)baseVertexBuffers[BaseBufferId::Position].mapped + 3 * batchOffset; + for(int v = 0; v < tess.numVertexes; ++v) + { + pos[0] = tess.xyz[v][0]; + pos[1] = tess.xyz[v][1]; + pos[2] = tess.xyz[v][2]; + pos += 3; + } + + float* nor = (float*)baseVertexBuffers[BaseBufferId::Normal].mapped + 3 * batchOffset; + for(int v = 0; v < tess.numVertexes; ++v) + { + nor[0] = tess.normal[v][0]; + nor[1] = tess.normal[v][1]; + nor[2] = tess.normal[v][2]; + nor += 3; + } +} + +void GeoBuffers::UploadStage(uint32_t svarsIndex) +{ + const uint32_t batchOffset = stageVertexBuffers[0].batchFirst + stageVertexBuffers[0].batchCount; + const stageVars_t& sv = tess.svars[svarsIndex]; + + uint8_t* const tcBuffer = stageVertexBuffers[StageBufferId::TexCoords].mapped; + float* tc = (float*)tcBuffer + 2 * batchOffset; + memcpy(tc, &sv.texcoords[0], tess.numVertexes * sizeof(vec2_t)); + + uint8_t* const colBuffer = stageVertexBuffers[StageBufferId::Color].mapped; + uint32_t* col = (uint32_t*)colBuffer + batchOffset; + memcpy(col, &sv.colors[0], tess.numVertexes * sizeof(color4ub_t)); +} + +void GeoBuffers::EndBaseBatch(uint32_t vertexCount) +{ + baseVertexBuffers[BaseBufferId::Position].EndBatch(vertexCount); + baseVertexBuffers[BaseBufferId::Normal].EndBatch(vertexCount); + indexBuffer.EndBatch(tess.numIndexes); +} + +bool GeoBuffers::CanAdd(uint32_t vertexCount, uint32_t indexCount, uint32_t stageCount) +{ + return + baseVertexBuffers[0].CanAdd(vertexCount) && + stageVertexBuffers[0].CanAdd(vertexCount * stageCount) && + indexBuffer.CanAdd(indexCount); +} + +void GeoBuffers::DrawStage(uint32_t vertexCount, uint32_t indexCount) +{ + const uint32_t vertexOffset = stageVertexBuffers[0].batchFirst - baseVertexBuffers[0].batchFirst; + uint32_t byteOffsets[BaseBufferId::Count + StageBufferId::Count] = {}; + byteOffsets[BaseBufferId::Count + StageBufferId::TexCoords] = vertexOffset * sizeof(vec2_t); + byteOffsets[BaseBufferId::Count + StageBufferId::Color] = vertexOffset * sizeof(color4ub_t); + CmdBindVertexBuffers(ARRAY_LEN(vertexBuffers), vertexBuffers, vertexBufferStrides, byteOffsets); + + CmdDrawIndexed(indexCount, indexBuffer.batchFirst, baseVertexBuffers[0].batchFirst); + + // @NOTE: must happen after the final vertex buffer byte offsets have been computed + stageVertexBuffers[StageBufferId::TexCoords].EndBatch(vertexCount); + stageVertexBuffers[StageBufferId::Color].EndBatch(vertexCount); +} diff --git a/code/renderer/crp_local.h b/code/renderer/crp_local.h new file mode 100644 index 0000000..11ad82f --- /dev/null +++ b/code/renderer/crp_local.h @@ -0,0 +1,330 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - private declarations + + +#pragma once + + +#include "srp_local.h" + + +extern cvar_t* crp_dof; +extern cvar_t* crp_dof_overlay; +extern cvar_t* crp_dof_blades; +extern cvar_t* crp_dof_angle; +extern cvar_t* crp_gatherDof_focusNearDist; +extern cvar_t* crp_gatherDof_focusNearRange; +extern cvar_t* crp_gatherDof_focusFarDist; +extern cvar_t* crp_gatherDof_focusFarRange; +extern cvar_t* crp_gatherDof_brightness; +extern cvar_t* crp_accumDof_focusDist; +extern cvar_t* crp_accumDof_radius; +extern cvar_t* crp_accumDof_samples; +extern cvar_t* crp_accumDof_preview; + +struct DOFMethod +{ + enum Id + { + None, + Gather, + Accumulation, + Count + }; +}; + +struct Tessellator +{ + enum Id + { + None, + Opaque, + Transp, + Count + }; +}; + + +using namespace RHI; + + +struct WorldVertexRC +{ + float modelViewMatrix[16]; + float projectionMatrix[16]; + float clipPlane[4]; +}; + +struct PSOCache +{ + struct Entry + { + GraphicsPipelineDesc desc; + HPipeline handle; + }; + + void Init(Entry* entries, uint32_t maxEntryCount); + int AddPipeline(const GraphicsPipelineDesc& desc, const char* name); + + Entry* entries = NULL; + uint32_t maxEntryCount = 0; + uint32_t entryCount = 1; // we treat index 0 as invalid +}; + +struct WorldOpaque +{ + void Init(); + void Draw(const drawSceneViewCommand_t& cmd); + void ProcessShader(shader_t& shader); + void TessellationOverflow(); + void DrawSkyBox(); + void DrawClouds(); + +private: + void BeginBatch(const shader_t* shader); + void EndBatch(); + void EndSkyBatch(); + + PSOCache::Entry psoCacheEntries[128]; + PSOCache psoCache; + + float clipPlane[4]; + bool batchOldDepthHack; + bool batchDepthHack; +}; + +struct WorldTransp +{ + void Init(); + void Draw(const drawSceneViewCommand_t& cmd); + void ProcessShader(shader_t& shader); + void TessellationOverflow(); + +private: + void BeginBatch(const shader_t* shader); + void EndBatch(); + + PSOCache::Entry psoCacheEntries[32]; + PSOCache psoCache; + + float clipPlane[4]; + bool batchOldDepthHack; + bool batchDepthHack; +}; + +struct Fog +{ + void Init(); + void Draw(); + +private: + HBuffer boxIndexBuffer; + HBuffer boxVertexBuffer; + HPipeline fogInsidePipeline; + HPipeline fogOutsidePipeline; +}; + +struct TranspResolve +{ + void Init(); + void Draw(const drawSceneViewCommand_t& cmd); + +private: + HPipeline pipeline; +}; + +struct ToneMap +{ + void Init(); + void DrawToneMap(); + void DrawInverseToneMap(); + +private: + HPipeline pipeline; + HPipeline inversePipeline; +}; + +struct AccumDepthOfField +{ + void Init(); + void Begin(const drawSceneViewCommand_t& cmd); + uint32_t GetSampleCount(); + void FixCommand(drawSceneViewCommand_t& newCmd, const drawSceneViewCommand_t& cmd, uint32_t x, uint32_t y); + void Accumulate(); + void Normalize(); + void DrawDebug(); + +private: + HPipeline accumPipeline; + HPipeline normPipeline; + HPipeline debugPipeline; + HTexture accumTexture; + float maxNearCocCS; + float maxFarCocCS; + float modelViewMatrix[16]; + float projMatrix[16]; +}; + +struct GatherDepthOfField +{ + void Init(); + void Draw(); + +private: + void DrawDebug(); + void DrawSplit(); + void DrawNearCocTileGen(); + void DrawNearCocTileMax(); + void DrawBlur(); + void DrawFill(); + void DrawCombine(); + + HPipeline debugPipeline; + HPipeline splitPipeline; + HPipeline nearCocTileGenPipeline; + HPipeline nearCocTileMaxPipeline; + HPipeline blurPipeline; + HPipeline fillPipeline; + HPipeline combinePipeline; + HTexture nearColorTexture; + HTexture farColorTexture; + HTexture nearBlurTexture; + HTexture farBlurTexture; + HTexture nearCocTexture; + HTexture nearCocTexture2; + HTexture nearCocTileTexture; + HTexture nearCocTileTexture2; + HTexture farCocTexture; + uint32_t tileWidth; + uint32_t tileHeight; +}; + +struct BaseBufferId +{ + enum Id + { + Position, + Normal, + Count + }; +}; + +struct StageBufferId +{ + enum Id + { + TexCoords, + Color, + Count + }; +}; + +struct GeoBuffers +{ + void Create(const char* name, uint32_t vertexCount, uint32_t indexCount); + void Rewind(); + void BeginUpload(); + void EndUpload(); + void UploadBase(); + void UploadStage(uint32_t svarsIndex); + void EndBaseBatch(uint32_t vertexCount); + bool CanAdd(uint32_t vertexCount, uint32_t indexCount, uint32_t stageCount); + void DrawStage(uint32_t vertexCount, uint32_t indexCount); + + GeometryBuffer baseVertexBuffers[BaseBufferId::Count]; + GeometryBuffer stageVertexBuffers[StageBufferId::Count]; + IndexBuffer indexBuffer; + HBuffer vertexBuffers[BaseBufferId::Count + StageBufferId::Count]; + uint32_t vertexBufferStrides[BaseBufferId::Count + StageBufferId::Count]; +}; + +struct CRP : IRenderPipeline +{ + void Init() override; + void ShutDown(bool fullShutDown) override; + + void ProcessWorld(world_t& world) override; + void ProcessModel(model_t& model) override; + void ProcessShader(shader_t& shader) override; + + void CreateTexture(image_t* image, int mipCount, int width, int height) override; + void UpoadTextureAndGenerateMipMaps(image_t* image, const byte* data) override; + void BeginTextureUpload(MappedTexture& mappedTexture, image_t* image) override; + void EndTextureUpload() override; + + void ExecuteRenderCommands(const byte* data, bool readbackRequested) override; + void TessellationOverflow() override; + void DrawSkyBox() override { opaque.DrawSkyBox(); } + void DrawClouds() override { opaque.DrawClouds(); } + void ReadPixels(int w, int h, int alignment, colorSpace_t colorSpace, void* out) override; + + uint32_t GetSamplerDescriptorIndexFromBaseIndex(uint32_t baseIndex) override; + + void BeginFrame(); + void EndFrame(); + + void Blit(HTexture destination, HTexture source, const char* passName, bool hdr, const vec2_t tcScale, const vec2_t tcBias); + void BlitRenderTarget(HTexture destination, const char* passName); + void DrawSceneView(const drawSceneViewCommand_t& cmd); + + HTexture GetReadRenderTarget(); + HTexture GetWriteRenderTarget(); + void SwapRenderTargets(); + + // general + float frameSeed; + HTexture readbackRenderTarget; + HTexture depthTexture; + HTexture renderTarget; + TextureFormat::Id renderTargetFormat; + HTexture renderTargets[2]; + uint32_t renderTargetIndex; // the one to write to + HSampler samplers[BASE_SAMPLER_COUNT]; // all base samplers + uint32_t samplerIndices[BASE_SAMPLER_COUNT]; // descriptor heap indices + + // blit + HPipeline blitPipelineLDR; + HPipeline blitPipelineHDR; + + // world geometry + GeoBuffers dynBuffers[FrameCount]; // for rendering world surfaces + + // for rendering transparent world surfaces + HTexture oitIndexTexture; + HBuffer oitFragmentBuffer; + HBuffer oitCounterBuffer; + HBuffer oitCounterStagingBuffer; + + UI ui; + MipMapGenerator mipMapGen; + ImGUI imgui; + Nuklear nuklear; + WorldOpaque opaque; + WorldTransp transp; + TranspResolve transpResolve; + ToneMap toneMap; + GatherDepthOfField gatherDof; + AccumDepthOfField accumDof; + Fog fog; +}; + +extern CRP crp; diff --git a/code/renderer/crp_main.cpp b/code/renderer/crp_main.cpp new file mode 100644 index 0000000..1aaa241 --- /dev/null +++ b/code/renderer/crp_main.cpp @@ -0,0 +1,677 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - main interface + + +#include "crp_local.h" +#include "../client/cl_imgui.h" +#include "shaders/crp/oit.h.hlsli" +namespace blit +{ +#include "compshaders/crp/blit_vs.h" +#include "compshaders/crp/blit_ps.h" +} +namespace ui +{ +#include "compshaders/crp/ui_vs.h" +#include "compshaders/crp/ui_ps.h" +} +namespace imgui +{ +#include "compshaders/crp/imgui_vs.h" +#include "compshaders/crp/imgui_ps.h" +} +namespace nuklear +{ +#include "compshaders/crp/nuklear_vs.h" +#include "compshaders/crp/nuklear_ps.h" +} +namespace mip_1 +{ +#include "compshaders/crp/mip_1_cs.h" +} +namespace mip_2 +{ +#include "compshaders/crp/mip_2_cs.h" +} +namespace mip_3 +{ +#include "compshaders/crp/mip_3_cs.h" +} + + +CRP crp; +IRenderPipeline* crpp = &crp; + +cvar_t* crp_dof; +cvar_t* crp_dof_overlay; +cvar_t* crp_dof_blades; +cvar_t* crp_dof_angle; +cvar_t* crp_gatherDof_focusNearDist; +cvar_t* crp_gatherDof_focusNearRange; +cvar_t* crp_gatherDof_focusFarDist; +cvar_t* crp_gatherDof_focusFarRange; +cvar_t* crp_gatherDof_brightness; +cvar_t* crp_accumDof_focusDist; +cvar_t* crp_accumDof_radius; +cvar_t* crp_accumDof_samples; +cvar_t* crp_accumDof_preview; + +static const cvarTableItem_t crp_cvars[] = +{ + { + &crp_dof, "crp_dof", "1", CVAR_ARCHIVE, CVART_INTEGER, "0", "2", + "enables depth of field\n" + S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n" + S_COLOR_VAL " 1 " S_COLOR_HELP "= Gather (fast, more flexible, issues with transparency)\n" + S_COLOR_VAL " 2 " S_COLOR_HELP "= Accumulation (slow, less flexible, great IQ)\n", + "DoF mode", CVARCAT_GRAPHICS, "Depth of field mode", "", + CVAR_GUI_VALUE("0", "Disabled", "") + CVAR_GUI_VALUE("1", "Gather", "Fast, lower IQ") + CVAR_GUI_VALUE("2", "Accumulation", "Very slow, great IQ") + }, + { + &crp_dof_overlay, "crp_dof_overlay", "0", CVAR_ARCHIVE, CVART_INTEGER, "0", "2", + "debug overlay mode\n" + S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n" + S_COLOR_VAL " 1 " S_COLOR_HELP "= Colorized Blur\n" + S_COLOR_VAL " 2 " S_COLOR_HELP "= Focus Plane", + "DoF overlay mode", CVARCAT_GRAPHICS, "Debug overlay mode", "", + CVAR_GUI_VALUE("0", "Disabled", "") + CVAR_GUI_VALUE("1", "Colorized Blur", "") + CVAR_GUI_VALUE("2", "Focus Plane", "") + }, + { + &crp_dof_blades, "crp_dof_blades", "6", CVAR_ARCHIVE, CVART_FLOAT, "0", "16", + "aperture blade count\n" + "Set to less than 3 for a disk shape.", + "DoF blade count", CVARCAT_GRAPHICS, "Aperture blade count", "Set to less than 3 for a disk shape." + }, + { + &crp_dof_angle, "crp_dof_angle", "20", CVAR_ARCHIVE, CVART_FLOAT, "0", "360", "aperture angle, in degrees", + "DoF aperture angle", CVARCAT_GRAPHICS, "Aperture angle, in degrees", "" + }, + { + &crp_accumDof_focusDist, "crp_accumDof_focusDist", "256", CVAR_ARCHIVE, CVART_FLOAT, "2", "2048", "focus distance", + "Accum DoF focus distance", CVARCAT_GRAPHICS, "Focus distance", "" + }, + { + &crp_accumDof_radius, "crp_accumDof_blurRadius", "0.1", CVAR_ARCHIVE, CVART_FLOAT, "0.001", "20", "aperture radius in world units", + "Accum DoF aperture radius", CVARCAT_GRAPHICS, "Aperture radius in world units", "" + }, + { + &crp_accumDof_samples, "crp_accumDof_samples", "2", CVAR_ARCHIVE, CVART_INTEGER, "1", "12", + "per-axis sampling density\n" + "Density N means (2N + 1)(2N + 1) scene renders in total.", + "Accum DoF sample count", CVARCAT_GRAPHICS, "Per-axis sampling density", "Density N means (2N + 1)^2 scene renders in total." + }, + { + &crp_accumDof_preview, "crp_accumDof_preview", "0", CVAR_ARCHIVE, CVART_INTEGER, "0", "2", + "low-res preview mode\n" + S_COLOR_VAL " 0 " S_COLOR_HELP "= Disabled\n" + S_COLOR_VAL " 1 " S_COLOR_HELP "= 1/4 pixel count, 9 samples total\n" + S_COLOR_VAL " 2 " S_COLOR_HELP "= 1/16 pixel count, 25 samples total", + "Accum DoF preview mode", CVARCAT_GRAPHICS, "Low-resolution preview modes", "", + CVAR_GUI_VALUE("0", "Disabled", "") + CVAR_GUI_VALUE("1", "1/4 pixel count", "9 samples total") + CVAR_GUI_VALUE("2", "1/16 pixel count", "25 samples total") + }, + { + &crp_gatherDof_focusNearDist, "crp_gatherDof_focusNearDist", "192", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "near focus distance", + "Gather DoF near focus distance", CVARCAT_GRAPHICS, "Near focus distance", "" + }, + { + &crp_gatherDof_focusNearRange, "crp_gatherDof_focusNearRange", "256", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "near focus range", + "Gather DoF near focus range", CVARCAT_GRAPHICS, "Near focus range", "" + }, + { + &crp_gatherDof_focusFarDist, "crp_gatherDof_focusFarDist", "512", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "far focus distance", + "Gather DoF far focus distance", CVARCAT_GRAPHICS, "Far focus distance", "" + }, + { + &crp_gatherDof_focusFarRange, "crp_gatherDof_focusFarRange", "384", CVAR_ARCHIVE, CVART_FLOAT, "1", "2048", "far focus range", + "Gather DoF far focus range", CVARCAT_GRAPHICS, "Far focus range", "" + }, + { + &crp_gatherDof_brightness, "crp_gatherDof_brightness", "2", CVAR_ARCHIVE, CVART_FLOAT, "0", "8", "blur brightness weight", + "Gather DoF bokeh brightness", CVARCAT_GRAPHICS, "Blur brightness weight", "" + } +}; + + +void PSOCache::Init(Entry* entries_, uint32_t maxEntryCount_) +{ + entries = entries_; + maxEntryCount = maxEntryCount_; + entryCount = 1; // we treat index 0 as invalid +} + +int PSOCache::AddPipeline(const GraphicsPipelineDesc& desc, const char* name) +{ + // we treat index 0 as invalid, so start at 1 + for(uint32_t i = 1; i < entryCount; ++i) + { + Entry& entry = entries[i]; + if(memcmp(&entry.desc, &desc, sizeof(desc)) == 0) + { + return (int)i; + } + } + + ASSERT_OR_DIE(entryCount < maxEntryCount, "Not enough entries in the PSO cache"); + + GraphicsPipelineDesc namedDesc = desc; + namedDesc.name = name; + + const uint32_t index = entryCount++; + Entry& entry = entries[index]; + entry.desc = desc; // keep the original desc for proper comparison results + entry.handle = CreateGraphicsPipeline(namedDesc); + + return (int)index; +} + +void CRP::Init() +{ + static bool veryFirstInit = true; + if(veryFirstInit) + { + ri.Cvar_RegisterTable(crp_cvars, ARRAY_LEN(crp_cvars)); + veryFirstInit = false; + } + + InitDesc initDesc; + initDesc.directDescriptorHeapIndexing = true; + srp.firstInit = RHI::Init(initDesc); + srp.psoStatsValid = false; + + if(srp.firstInit) + { + srp.CreateShaderTraceBuffers(); + + for(uint32_t f = 0; f < FrameCount; ++f) + { + // the doubled index count is for the depth pre-pass + const int MaxDynamicVertexCount = 16 << 20; + const int MaxDynamicIndexCount = MaxDynamicVertexCount * 4; + GeoBuffers& db = dynBuffers[f]; + db.Create(va("world #%d", f + 1), MaxDynamicVertexCount, MaxDynamicIndexCount); + } + } + + // we recreate the samplers on every vid_restart to create the right level + // of anisotropy based on the latched CVar + for(uint32_t w = 0; w < TW_COUNT; ++w) + { + for(uint32_t f = 0; f < TextureFilter::Count; ++f) + { + for(uint32_t m = 0; m < MaxTextureMips; ++m) + { + const textureWrap_t wrap = (textureWrap_t)w; + const TextureFilter::Id filter = (TextureFilter::Id)f; + const uint32_t s = GetBaseSamplerIndex(wrap, filter, m); + SamplerDesc desc(wrap, filter, (float)m); + desc.shortLifeTime = true; + samplers[s] = CreateSampler(desc); + samplerIndices[s] = RHI::GetSamplerIndex(samplers[s]); + } + } + } + + { + renderTargetFormat = TextureFormat::RGBA64_Float; + + TextureDesc desc("render target #1", glConfig.vidWidth, glConfig.vidHeight); + desc.initialState = ResourceStates::RenderTargetBit; + desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit; + Vector4Clear(desc.clearColor); + desc.usePreferredClearValue = true; + desc.committedResource = true; + desc.format = renderTargetFormat; + desc.shortLifeTime = true; + renderTargets[0] = RHI::CreateTexture(desc); + desc.name = "render target #2"; + renderTargets[1] = RHI::CreateTexture(desc); + renderTargetIndex = 0; + renderTarget = renderTargets[0]; + } + + { + TextureDesc desc("readback render target", glConfig.vidWidth, glConfig.vidHeight); + desc.initialState = ResourceStates::RenderTargetBit; + desc.allowedState = ResourceStates::RenderTargetBit | ResourceStates::PixelShaderAccessBit; + Vector4Clear(desc.clearColor); + desc.usePreferredClearValue = true; + desc.committedResource = true; + desc.format = TextureFormat::RGBA32_UNorm; + desc.shortLifeTime = true; + readbackRenderTarget = RHI::CreateTexture(desc); + } + + { + TextureDesc desc("OIT index", glConfig.vidWidth, glConfig.vidHeight); + desc.initialState = ResourceStates::UnorderedAccessBit; + desc.allowedState = ResourceStates::UnorderedAccessBit | ResourceStates::PixelShaderAccessBit | ResourceStates::ComputeShaderAccessBit; + desc.committedResource = true; + desc.format = TextureFormat::R32_UInt; + desc.shortLifeTime = true; + oitIndexTexture = RHI::CreateTexture(desc); + } + + uint32_t oitMaxFragmentCount = 0; + { + const int byteCountPerFragment = sizeof(OIT_Fragment); + const int fragmentCount = glConfig.vidWidth * glConfig.vidHeight * OIT_AVG_FRAGMENTS_PER_PIXEL; + const int byteCount = byteCountPerFragment * fragmentCount; + oitMaxFragmentCount = fragmentCount; + + BufferDesc desc("OIT fragment", byteCount, ResourceStates::UnorderedAccessBit); + desc.committedResource = true; + desc.memoryUsage = MemoryUsage::GPU; + desc.structureByteCount = byteCountPerFragment; + desc.shortLifeTime = true; + oitFragmentBuffer = CreateBuffer(desc); + } + + { + const int byteCount = sizeof(OIT_Counter); + + { + BufferDesc desc("OIT counter", byteCount, ResourceStates::UnorderedAccessBit); + desc.committedResource = true; + desc.memoryUsage = MemoryUsage::GPU; + desc.structureByteCount = byteCount; + desc.shortLifeTime = true; + oitCounterBuffer = CreateBuffer(desc); + } + { + BufferDesc desc("OIT counter staging", byteCount, ResourceStates::Common); + desc.committedResource = false; + desc.memoryUsage = MemoryUsage::Upload; + desc.structureByteCount = byteCount; + desc.shortLifeTime = true; + oitCounterStagingBuffer = CreateBuffer(desc); + + uint32_t* dst = (uint32_t*)MapBuffer(oitCounterStagingBuffer); + dst[0] = 1; // fragment index 0 is the end-of-list value + dst[1] = oitMaxFragmentCount; + dst[2] = 0; + UnmapBuffer(oitCounterStagingBuffer); + } + } + + { + TextureDesc desc("depth buffer", glConfig.vidWidth, glConfig.vidHeight); + desc.committedResource = true; + desc.shortLifeTime = true; + desc.initialState = ResourceStates::DepthWriteBit; + desc.allowedState = ResourceStates::DepthAccessBits | ResourceStates::PixelShaderAccessBit; + desc.format = TextureFormat::Depth32_Float; + desc.SetClearDepthStencil(0.0f, 0); + depthTexture = RHI::CreateTexture(desc); + } + + { + GraphicsPipelineDesc desc("blit LDR"); + desc.vertexShader = ShaderByteCode(blit::g_vs); + desc.pixelShader = ShaderByteCode(blit::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, TextureFormat::RGBA32_UNorm); + blitPipelineLDR = CreateGraphicsPipeline(desc); + desc.name = "blit HDR"; + desc.renderTargets[0].format = TextureFormat::RGBA64_Float; + blitPipelineHDR = CreateGraphicsPipeline(desc); + } + + ui.Init(true, ShaderByteCode(ui::g_vs), ShaderByteCode(ui::g_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL); + imgui.Init(true, ShaderByteCode(imgui::g_vs), ShaderByteCode(imgui::g_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL); + nuklear.Init(true, ShaderByteCode(nuklear::g_vs), ShaderByteCode(nuklear::g_ps), renderTargetFormat, RHI_MAKE_NULL_HANDLE(), NULL); + mipMapGen.Init(true, ShaderByteCode(mip_1::g_cs), ShaderByteCode(mip_2::g_cs), ShaderByteCode(mip_3::g_cs)); + opaque.Init(); + transp.Init(); + transpResolve.Init(); + toneMap.Init(); + gatherDof.Init(); + accumDof.Init(); + fog.Init(); + + srp.firstInit = false; +} + +void CRP::ShutDown(bool fullShutDown) +{ + RHI::ShutDown(fullShutDown); +} + +void CRP::BeginFrame() +{ + renderTargetIndex = 0; + renderTarget = renderTargets[0]; + + srp.BeginFrame(); + + // have it be first to we can use ImGUI in the other components too + imgui.BeginFrame(); + + RHI::BeginFrame(); + ui.BeginFrame(); + nuklear.BeginFrame(); + + const float clearColor[4] = { 0.0f, 0.5f, 0.0f, 0.0f }; + const TextureBarrier barrier(renderTarget, ResourceStates::RenderTargetBit); + CmdBarrier(1, &barrier); + CmdClearColorTarget(renderTarget, clearColor); + + frameSeed = (float)rand() / (float)RAND_MAX; + + dynBuffers[GetFrameIndex()].Rewind(); +} + +void CRP::EndFrame() +{ + srp.DrawGUI(); + imgui.Draw(renderTarget); + toneMap.DrawToneMap(); + BlitRenderTarget(GetSwapChainTexture(), "Blit to Swap Chain"); + BlitRenderTarget(readbackRenderTarget, "Blit to Readback Texture"); + srp.EndFrame(); +} + +void CRP::Blit(HTexture destination, HTexture source, const char* passName, bool hdr, const vec2_t tcScale, const vec2_t tcBias) +{ + SCOPED_RENDER_PASS(passName, 0.125f, 0.125f, 0.5f); + + const TextureBarrier barriers[2] = + { + TextureBarrier(source, ResourceStates::PixelShaderAccessBit), + TextureBarrier(destination, ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(barriers), barriers); + +#pragma pack(push, 4) + struct BlitRC + { + uint32_t textureIndex; + uint32_t samplerIndex; + float tcScale[2]; + float tcBias[2]; + }; +#pragma pack(pop) + + BlitRC rc; + rc.textureIndex = GetTextureIndexSRV(source); + rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); + rc.tcScale[0] = tcScale[0]; + rc.tcScale[1] = tcScale[1]; + rc.tcBias[0] = tcBias[0]; + rc.tcBias[1] = tcBias[0]; + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + CmdBindRenderTargets(1, &destination, NULL); + CmdBindPipeline(hdr ? blitPipelineHDR : blitPipelineLDR); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); +} + +void CRP::BlitRenderTarget(HTexture destination, const char* passName) +{ + Blit(destination, crp.renderTarget, passName, false, vec2_one, vec2_zero); +} + +void CRP::CreateTexture(image_t* image, int mipCount, int width, int height) +{ + TextureDesc desc(image->name, width, height, mipCount); + desc.committedResource = width * height >= (1 << 20); + desc.shortLifeTime = true; + if(mipCount > 1) + { + desc.allowedState |= ResourceStates::UnorderedAccessBit; // for mip-map generation + } + + image->texture = ::RHI::CreateTexture(desc); + image->textureIndex = GetTextureIndexSRV(image->texture); +} + +void CRP::UpoadTextureAndGenerateMipMaps(image_t* image, const byte* data) +{ + MappedTexture texture; + RHI::BeginTextureUpload(texture, image->texture); + for(uint32_t r = 0; r < texture.rowCount; ++r) + { + memcpy(texture.mappedData + r * texture.dstRowByteCount, data + r * texture.srcRowByteCount, texture.srcRowByteCount); + } + RHI::EndTextureUpload(); + + mipMapGen.GenerateMipMaps(image->texture); +} + +void CRP::BeginTextureUpload(MappedTexture& mappedTexture, image_t* image) +{ + RHI::BeginTextureUpload(mappedTexture, image->texture); +} + +void CRP::EndTextureUpload() +{ + RHI::EndTextureUpload(); +} + +void CRP::ProcessWorld(world_t&) +{ +} + +void CRP::ProcessModel(model_t&) +{ +} + +void CRP::ProcessShader(shader_t& shader) +{ + if(shader.isOpaque) + { + opaque.ProcessShader(shader); + } + else + { + transp.ProcessShader(shader); + } +} + +void CRP::ExecuteRenderCommands(const byte* data, bool /*readbackRequested*/) +{ + // @NOTE: the CRP always blits the final result to the readback texture + + for(;;) + { + const int commandId = ((const renderCommandBase_t*)data)->commandId; + + if(commandId < 0 || commandId >= RC_COUNT) + { + assert(!"Invalid render command type"); + return; + } + + if(commandId == RC_END_OF_LIST) + { + return; + } + + switch(commandId) + { + case RC_UI_SET_COLOR: + ui.CmdSetColor(*(const uiSetColorCommand_t*)data); + break; + case RC_UI_DRAW_QUAD: + ui.CmdDrawQuad(*(const uiDrawQuadCommand_t*)data); + break; + case RC_UI_DRAW_TRIANGLE: + ui.CmdDrawTriangle(*(const uiDrawTriangleCommand_t*)data); + break; + case RC_DRAW_SCENE_VIEW: + DrawSceneView(*(const drawSceneViewCommand_t*)data); + break; + case RC_BEGIN_FRAME: + BeginFrame(); + break; + case RC_SWAP_BUFFERS: + EndFrame(); + break; + case RC_BEGIN_UI: + ui.Begin(renderTarget); + break; + case RC_END_UI: + ui.End(); + break; + case RC_BEGIN_3D: + // @TODO: + srp.renderMode = RenderMode::None; + break; + case RC_END_3D: + // @TODO: + srp.renderMode = RenderMode::None; + break; + case RC_END_SCENE: + // @TODO: post-processing + break; + case RC_BEGIN_NK: + nuklear.Begin(renderTarget); + break; + case RC_END_NK: + nuklear.End(); + break; + case RC_NK_UPLOAD: + nuklear.Upload(*(const nuklearUploadCommand_t*)data); + break; + case RC_NK_DRAW: + nuklear.Draw(*(const nuklearDrawCommand_t*)data); + break; + default: + Q_assert(!"Unsupported render command type"); + return; + } + + data += renderCommandSizes[commandId]; + } +} + +void CRP::TessellationOverflow() +{ + switch(tess.tessellator) + { + case Tessellator::Opaque: opaque.TessellationOverflow(); break; + case Tessellator::Transp: transp.TessellationOverflow(); break; + default: break; + } + tess.numIndexes = 0; + tess.numVertexes = 0; +} + +void CRP::DrawSceneView(const drawSceneViewCommand_t& cmd) +{ + const viewParms_t& vp = cmd.viewParms; + if(cmd.shouldClearColor) + { + const Rect rect(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight); + const TextureBarrier tb(renderTarget, ResourceStates::RenderTargetBit); + CmdBarrier(1, &tb); + CmdClearColorTarget(renderTarget, cmd.clearColor, &rect); + } + + if(cmd.numDrawSurfs <= 0 || !cmd.shouldDrawScene) + { + return; + } + + if(crp_dof->integer == DOFMethod::Accumulation && + vp.viewportX == 0 && + vp.viewportY == 0 && + vp.viewportWidth == glConfig.vidWidth && + vp.viewportHeight == glConfig.vidHeight) + { + const Rect rect(0, 0, glConfig.vidWidth, glConfig.vidHeight); + accumDof.Begin(cmd); + const uint32_t sampleCount = accumDof.GetSampleCount(); + for(uint32_t y = 0; y < sampleCount; y++) + { + for(uint32_t x = 0; x < sampleCount; x++) + { + srp.enableRenderPassQueries = x == 0 && y == 0; + drawSceneViewCommand_t newCmd; + accumDof.FixCommand(newCmd, cmd, x, y); + const TextureBarrier tb(renderTarget, ResourceStates::RenderTargetBit); + CmdBarrier(1, &tb); + CmdClearColorTarget(renderTarget, cmd.clearColor, &rect); + opaque.Draw(newCmd); + fog.Draw(); + transp.Draw(newCmd); + transpResolve.Draw(newCmd); + accumDof.Accumulate(); + + // geometry allocation is a linear allocation instead of a ring buffer + // we force a CPU-GPU sync point after every full scene render + // that way, we can keep the buffer sizes at least somewhat reasonable + SubmitAndContinue(); + dynBuffers[GetFrameIndex()].Rewind(); + } + } + CmdSetViewportAndScissor(backEnd.viewParms); + srp.enableRenderPassQueries = true; + accumDof.Normalize(); + backEnd.viewParms = cmd.viewParms; + backEnd.refdef = cmd.refdef; + accumDof.DrawDebug(); + } + else + { + opaque.Draw(cmd); + fog.Draw(); + transp.Draw(cmd); + transpResolve.Draw(cmd); + CmdSetViewportAndScissor(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight); + gatherDof.Draw(); + } +} + +void CRP::ReadPixels(int w, int h, int alignment, colorSpace_t colorSpace, void* outPixels) +{ + ReadTextureImage(outPixels, readbackRenderTarget, w, h, alignment, colorSpace); +} + +uint32_t CRP::GetSamplerDescriptorIndexFromBaseIndex(uint32_t baseIndex) +{ + Q_assert(baseIndex < ARRAY_LEN(samplerIndices)); + + return samplerIndices[baseIndex]; +} + +HTexture CRP::GetReadRenderTarget() +{ + return renderTargets[renderTargetIndex ^ 1]; +} + +HTexture CRP::GetWriteRenderTarget() +{ + return renderTargets[renderTargetIndex]; +} + +void CRP::SwapRenderTargets() +{ + renderTargetIndex ^= 1; + renderTarget = GetWriteRenderTarget(); +} diff --git a/code/renderer/crp_opaque.cpp b/code/renderer/crp_opaque.cpp new file mode 100644 index 0000000..321980a --- /dev/null +++ b/code/renderer/crp_opaque.cpp @@ -0,0 +1,376 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - opaque surfaces + + +#include "crp_local.h" +namespace opaque +{ +#include "compshaders/crp/opaque_vs.h" +#include "compshaders/crp/opaque_ps.h" +} + + +#pragma pack(push, 4) + +struct OpaqueVertexRC : WorldVertexRC +{ +}; + +struct OpaquePixelRC +{ + // general + uint32_t textureIndex; + uint32_t samplerIndex; + uint32_t shaderIndexBufferIndex; + uint32_t alphaTest; + float greyscale; + + // shader trace + uint32_t shaderTrace; // shader index: 14 - frame index: 2 - enable: 1 + uint16_t centerPixelX; + uint16_t centerPixelY; +}; + +#pragma pack(pop) + + +void WorldOpaque::Init() +{ + psoCache.Init(psoCacheEntries, ARRAY_LEN(psoCacheEntries)); +} + +void WorldOpaque::Draw(const drawSceneViewCommand_t& cmd) +{ + if(cmd.numDrawSurfs - cmd.numTranspSurfs <= 0) + { + return; + } + + srp.renderMode = RenderMode::World; + + backEnd.refdef = cmd.refdef; + backEnd.viewParms = cmd.viewParms; + + if(backEnd.viewParms.isPortal) + { + float plane[4]; + plane[0] = backEnd.viewParms.portalPlane.normal[0]; + plane[1] = backEnd.viewParms.portalPlane.normal[1]; + plane[2] = backEnd.viewParms.portalPlane.normal[2]; + plane[3] = backEnd.viewParms.portalPlane.dist; + + float plane2[4]; + plane2[0] = DotProduct(backEnd.viewParms.orient.axis[0], plane); + plane2[1] = DotProduct(backEnd.viewParms.orient.axis[1], plane); + plane2[2] = DotProduct(backEnd.viewParms.orient.axis[2], plane); + plane2[3] = DotProduct(plane, backEnd.viewParms.orient.origin) - plane[3]; + + float* o = plane; + const float* m = s_flipMatrix; + const float* v = plane2; + o[0] = m[0] * v[0] + m[4] * v[1] + m[8] * v[2] + m[12] * v[3]; + o[1] = m[1] * v[0] + m[5] * v[1] + m[9] * v[2] + m[13] * v[3]; + o[2] = m[2] * v[0] + m[6] * v[1] + m[10] * v[2] + m[14] * v[3]; + o[3] = m[3] * v[0] + m[7] * v[1] + m[11] * v[2] + m[15] * v[3]; + + memcpy(clipPlane, plane, sizeof(clipPlane)); + } + else + { + memset(clipPlane, 0, sizeof(clipPlane)); + } + + CmdSetViewportAndScissor(backEnd.viewParms); + batchOldDepthHack = false; + batchDepthHack = false; + + TextureBarrier tb(crp.depthTexture, ResourceStates::DepthWriteBit); + BufferBarrier bb(srp.traceRenderBuffer, ResourceStates::UnorderedAccessBit); + CmdBarrier(1, &tb, 1, &bb); + + CmdClearDepthStencilTarget(crp.depthTexture, true, 0.0f); + + GeoBuffers& db = crp.dynBuffers[GetFrameIndex()]; + db.BeginUpload(); + + SCOPED_RENDER_PASS("Opaque", 1.0f, 0.5f, 0.5f); + + CmdBindRenderTargets(1, &crp.renderTarget, &crp.depthTexture); + CmdBindVertexBuffers(ARRAY_LEN(db.vertexBuffers), db.vertexBuffers, db.vertexBufferStrides, NULL); + CmdBindIndexBuffer(db.indexBuffer.buffer, IndexType::UInt32, 0); + + const drawSurf_t* drawSurfs = cmd.drawSurfs; + const int surfCount = cmd.numDrawSurfs - cmd.numTranspSurfs; + const double originalTime = backEnd.refdef.floatTime; + + const shader_t* shader = NULL; + const shader_t* oldShader = NULL; + int oldEntityNum = -1; + backEnd.currentEntity = &tr.worldEntity; + + tess.numVertexes = 0; + tess.numIndexes = 0; + + int ds; + const drawSurf_t* drawSurf; + for(ds = 0, drawSurf = drawSurfs; ds < surfCount; ++ds, ++drawSurf) + { + int entityNum; + R_DecomposeSort(drawSurf->sort, &entityNum, &shader); + Q_assert(shader != NULL); + Q_assert(shader->isOpaque); + + // sky shaders can have no stages and be valid (box drawn with no clouds) + if(!shader->isSky) + { + if(shader->numPipelines == 0 || + shader->pipelines[0].pipeline <= 0 || + shader->pipelines[0].numStages <= 0) + { + continue; + } + } + + const bool shaderChanged = shader != oldShader; + const bool entityChanged = entityNum != oldEntityNum; + if(shaderChanged || entityChanged) + { + oldShader = shader; + oldEntityNum = entityNum; + EndSkyBatch(); + EndBatch(); + BeginBatch(shader); + tess.greyscale = drawSurf->greyscale; + } + + if(entityChanged) + { + UpdateEntityData(batchDepthHack, entityNum, originalTime); + } + + R_TessellateSurface(drawSurf->surface); + } + + backEnd.refdef.floatTime = originalTime; + + EndSkyBatch(); + EndBatch(); + + db.EndUpload(); + + // restores the potentially "hacked" depth range as well + CmdSetViewportAndScissor(backEnd.viewParms); + batchOldDepthHack = false; + batchDepthHack = false; +} + +void WorldOpaque::ProcessShader(shader_t& shader) +{ + Q_assert(shader.isOpaque || shader.isSky); + + if(shader.numStages < 1) + { + shader.numPipelines = 0; + return; + } + + const bool clampDepth = r_depthClamp->integer != 0 || shader.isSky; + + for(int s = 0; s < shader.numStages; ++s) + { + const shaderStage_t& stage = *shader.stages[s]; + const unsigned int stateBits = stage.stateBits & (~GLS_POLYMODE_LINE); + int a = 0; + + // @NOTE: we are not using any CTOR because we deliberately want to 0-init the struct + // this is necessary for padding bytes not to mess up comparisons in the PSO cache + GraphicsPipelineDesc desc = {}; + desc.name = "opaque"; + desc.rootSignature = RHI_MAKE_NULL_HANDLE(); + desc.shortLifeTime = true; // the PSO cache is only valid for this map! + desc.vertexShader = opaque::g_vs; + desc.pixelShader = opaque::g_ps; + desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Position, DataType::Float32, 3, 0); + desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Normal, DataType::Float32, 2, 0); + desc.vertexLayout.AddAttribute(a++, ShaderSemantic::TexCoord, DataType::Float32, 2, 0); + desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Color, DataType::UNorm8, 4, 0); + desc.depthStencil.depthStencilFormat = TextureFormat::Depth32_Float; + desc.depthStencil.depthComparison = + (stateBits & GLS_DEPTHFUNC_EQUAL) != 0 ? + ComparisonFunction::Equal : + ComparisonFunction::GreaterEqual; + desc.depthStencil.enableDepthTest = (stateBits & GLS_DEPTHTEST_DISABLE) == 0; + desc.depthStencil.enableDepthWrites = (stateBits & GLS_DEPTHMASK_TRUE) != 0; + desc.rasterizer.cullMode = shader.cullType; + desc.rasterizer.polygonOffset = shader.polygonOffset != 0; + desc.rasterizer.clampDepth = clampDepth; + desc.AddRenderTarget(stateBits & GLS_BLEND_BITS, crp.renderTargetFormat); + + pipeline_t& p = shader.pipelines[s]; + p.firstStage = s; + p.numStages = 1; + p.pipeline = psoCache.AddPipeline(desc, va("opaque %d %d", psoCache.entryCount, s + 1)); + desc.rasterizer.cullMode = GetMirrorredCullType(desc.rasterizer.cullMode); + p.mirrorPipeline = psoCache.AddPipeline(desc, va("opaque %d %d mirrored", psoCache.entryCount, s + 1)); + } + + shader.numPipelines = shader.numStages; +} + +void WorldOpaque::TessellationOverflow() +{ + EndBatch(); + BeginBatch(tess.shader); +} + +void WorldOpaque::BeginBatch(const shader_t* shader) +{ + tess.tessellator = Tessellator::Opaque; + tess.numVertexes = 0; + tess.numIndexes = 0; + tess.depthFade = DFT_NONE; + tess.deformsPreApplied = qfalse; + tess.xstages = (const shaderStage_t**)shader->stages; + tess.shader = shader; + tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset; + if(tess.shader->clampTime && tess.shaderTime >= tess.shader->clampTime) + { + tess.shaderTime = tess.shader->clampTime; + } +} + +void WorldOpaque::EndBatch() +{ + const int vertexCount = tess.numVertexes; + const int indexCount = tess.numIndexes; + if(vertexCount <= 0 || + indexCount <= 0 || + tess.shader->numStages == 0 || + tess.shader->numPipelines <= 0) + { + goto clean_up; + } + + const shader_t* const shader = tess.shader; + + GeoBuffers& db = crp.dynBuffers[GetFrameIndex()]; + if(!db.CanAdd(vertexCount, indexCount, shader->numStages)) + { + Q_assert(!"World surface geometry buffer too small!"); + goto clean_up; + } + + RB_DeformTessGeometry(0, vertexCount, 0, indexCount); + db.UploadBase(); + + if(batchDepthHack != batchOldDepthHack) + { + const viewParms_t& vp = backEnd.viewParms; + CmdSetViewport(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight, batchDepthHack ? 0.7f : 0.0f, 1.0f); + batchOldDepthHack = batchDepthHack; + } + + OpaqueVertexRC vertexRC = {}; + memcpy(vertexRC.modelViewMatrix, backEnd.orient.modelMatrix, sizeof(vertexRC.modelViewMatrix)); + memcpy(vertexRC.projectionMatrix, backEnd.viewParms.projectionMatrix, sizeof(vertexRC.projectionMatrix)); + memcpy(vertexRC.clipPlane, clipPlane, sizeof(vertexRC.clipPlane)); + CmdSetGraphicsRootConstants(0, sizeof(vertexRC), &vertexRC); + + for(int s = 0; s < shader->numStages; ++s) + { + const shaderStage_t* const stage = shader->stages[s]; + + R_ComputeColors(stage, tess.svars[0], 0, vertexCount); + R_ComputeTexCoords(stage, tess.svars[0], 0, vertexCount, qfalse); + db.UploadStage(0); + + const pipeline_t& pipeline = shader->pipelines[s]; + const int psoIndex = backEnd.viewParms.isMirror ? pipeline.mirrorPipeline : pipeline.pipeline; + Q_assert(psoIndex > 0); + CmdBindPipeline(psoCache.entries[psoIndex].handle); + + const image_t* image = GetBundleImage(stage->bundle); + const uint32_t texIdx = image->textureIndex; + const uint32_t sampIdx = GetSamplerIndex(image); + const uint32_t alphaTest = AlphaTestShaderConstFromStateBits(stage->stateBits); + const uint32_t enableShaderTrace = tr.traceWorldShader && s == 0 ? 1 : 0; + const uint32_t bufferIndex = GetBufferIndexUAV(srp.traceRenderBuffer); + Q_assert(sampIdx < ARRAY_LEN(crp.samplers)); + + OpaquePixelRC pixelRC = {}; + pixelRC.textureIndex = texIdx; + pixelRC.samplerIndex = sampIdx; + pixelRC.shaderIndexBufferIndex = bufferIndex; + pixelRC.alphaTest = alphaTest; + pixelRC.greyscale = tess.greyscale; + pixelRC.shaderTrace = ((uint32_t)shader->index << 3) | (RHI::GetFrameIndex() << 1) | enableShaderTrace; + pixelRC.centerPixelX = glConfig.vidWidth / 2; + pixelRC.centerPixelY = glConfig.vidHeight / 2; + CmdSetGraphicsRootConstants(sizeof(vertexRC), sizeof(pixelRC), &pixelRC); + + db.DrawStage(vertexCount, indexCount); + } + + db.EndBaseBatch(vertexCount); + +clean_up: + tess.tessellator = Tessellator::None; + tess.numVertexes = 0; + tess.numIndexes = 0; +} + +void WorldOpaque::EndSkyBatch() +{ + // this only exists as a separate function from EndBatch so that + // we don't have to deal with recursion (through the call to RB_DrawSky) + + if(tess.shader == NULL || + !tess.shader->isSky || + tess.numVertexes <= 0 || + tess.numIndexes <= 0) + { + return; + } + + SCOPED_RENDER_PASS("Sky", 0.0, 0.5f, 1.0f); + + const viewParms_t& vp = backEnd.viewParms; + CmdSetViewport(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight, 0.0f, 0.0f); + RB_DrawSky(); + CmdSetViewport(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight, 0.0f, 1.0f); + tess.numVertexes = 0; + tess.numIndexes = 0; +} + +void WorldOpaque::DrawSkyBox() +{ + // force creation of a PSO for the temp shader + ProcessShader((shader_t&)*tess.shader); + + tess.deformsPreApplied = qtrue; + EndBatch(); +} + +void WorldOpaque::DrawClouds() +{ + EndBatch(); +} diff --git a/code/renderer/crp_tone_map.cpp b/code/renderer/crp_tone_map.cpp new file mode 100644 index 0000000..619f4d3 --- /dev/null +++ b/code/renderer/crp_tone_map.cpp @@ -0,0 +1,139 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - tone mapping + + +#include "crp_local.h" +namespace tone_map +{ +#include "compshaders/crp/tone_map_vs.h" +#include "compshaders/crp/tone_map_ps.h" +} +namespace inverse_tone_map +{ +#include "compshaders/crp/tone_map_inverse_vs.h" +#include "compshaders/crp/tone_map_inverse_ps.h" +} + + +#pragma pack(push, 4) + +struct ToneMapRC +{ + uint32_t textureIndex; + uint32_t samplerIndex; + float invGamma; + float brightness; + float greyscale; +}; + +struct InverseToneMapRC +{ + uint32_t textureIndex; + uint32_t samplerIndex; + float gamma; + float invBrightness; +}; + +#pragma pack(pop) + + +void ToneMap::Init() +{ + { + GraphicsPipelineDesc desc("Tone Map"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(tone_map::g_vs); + desc.pixelShader = ShaderByteCode(tone_map::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + pipeline = CreateGraphicsPipeline(desc); + } + + { + GraphicsPipelineDesc desc("Inverse Tone Map"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(tone_map::g_vs); + desc.pixelShader = ShaderByteCode(tone_map::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + pipeline = CreateGraphicsPipeline(desc); + } +} + +void ToneMap::DrawToneMap() +{ + srp.renderMode = RenderMode::None; + + SCOPED_RENDER_PASS("Tone Map", 1.0f, 1.0f, 1.0f); + + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + + crp.SwapRenderTargets(); + const TextureBarrier texBarriers[] = + { + TextureBarrier(crp.GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.GetWriteRenderTarget(), ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(texBarriers), texBarriers, 0, NULL); + + ToneMapRC rc = {}; + rc.textureIndex = GetTextureIndexSRV(crp.GetReadRenderTarget()); + rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); + rc.invGamma = 1.0f / r_gamma->value; + rc.brightness = r_brightness->value; + rc.greyscale = r_greyscale->value; + + CmdBindRenderTargets(1, &crp.renderTarget, NULL); + CmdBindPipeline(pipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); +} + +void ToneMap::DrawInverseToneMap() +{ + srp.renderMode = RenderMode::None; + + SCOPED_RENDER_PASS("Inverse Tone Map", 1.0f, 1.0f, 1.0f); + + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + + crp.SwapRenderTargets(); + const TextureBarrier texBarriers[] = + { + TextureBarrier(crp.GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.GetWriteRenderTarget(), ResourceStates::RenderTargetBit) + }; + CmdBarrier(ARRAY_LEN(texBarriers), texBarriers, 0, NULL); + + InverseToneMapRC rc = {}; + rc.textureIndex = GetTextureIndexSRV(crp.GetReadRenderTarget()); + rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); + rc.gamma = r_gamma->value; + rc.invBrightness = 1.0f / r_brightness->value; + + CmdBindRenderTargets(1, &crp.renderTarget, NULL); + CmdBindPipeline(inversePipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); +} diff --git a/code/renderer/crp_transp_draw.cpp b/code/renderer/crp_transp_draw.cpp new file mode 100644 index 0000000..f2cc839 --- /dev/null +++ b/code/renderer/crp_transp_draw.cpp @@ -0,0 +1,368 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - OIT geometry pass + + +#include "crp_local.h" +namespace transp_draw +{ +#include "compshaders/crp/transp_draw_vs.h" +#include "compshaders/crp/transp_draw_ps.h" +} + + +#pragma pack(push, 4) + +struct TranspDrawVertexRC : WorldVertexRC +{ +}; + +struct TranspDrawPixelRC +{ + uint32_t textureIndex; + uint32_t samplerIndex; + uint32_t alphaTest; + uint32_t counterBuffer; + uint32_t indexTexture; + uint32_t fragmentBuffer; + float greyscale; + uint32_t stateBits; + uint32_t shaderTrace; + uint16_t hFadeDistance; + uint16_t hFadeOffset; + uint32_t depthFadeScaleBias; // color bias: 4 - color scale: 4 +}; + +#pragma pack(pop) + + +static uint32_t GetFixedStageBits(uint32_t stateBits, uint32_t stageIndex) +{ + // makes sure we're not overwriting anything useful + assert((stateBits & GLS_STAGEINDEX_BITS) == 0); + + // transform "no blend" into a "replace" blend mode + if((stateBits & (GLS_SRCBLEND_BITS | GLS_DSTBLEND_BITS)) == 0) + { + stateBits |= GLS_SRCBLEND_ONE | GLS_DSTBLEND_ZERO; + } + + stateBits |= stageIndex << GLS_STAGEINDEX_SHIFT; + + return stateBits; +} + + +void WorldTransp::Init() +{ + psoCache.Init(psoCacheEntries, ARRAY_LEN(psoCacheEntries)); +} + +void WorldTransp::Draw(const drawSceneViewCommand_t& cmd) +{ + if(cmd.numTranspSurfs <= 0) + { + return; + } + + srp.renderMode = RenderMode::World; + + backEnd.refdef = cmd.refdef; + backEnd.viewParms = cmd.viewParms; + + if(backEnd.viewParms.isPortal) + { + float plane[4]; + plane[0] = backEnd.viewParms.portalPlane.normal[0]; + plane[1] = backEnd.viewParms.portalPlane.normal[1]; + plane[2] = backEnd.viewParms.portalPlane.normal[2]; + plane[3] = backEnd.viewParms.portalPlane.dist; + + float plane2[4]; + plane2[0] = DotProduct(backEnd.viewParms.orient.axis[0], plane); + plane2[1] = DotProduct(backEnd.viewParms.orient.axis[1], plane); + plane2[2] = DotProduct(backEnd.viewParms.orient.axis[2], plane); + plane2[3] = DotProduct(plane, backEnd.viewParms.orient.origin) - plane[3]; + + float* o = plane; + const float* m = s_flipMatrix; + const float* v = plane2; + o[0] = m[0] * v[0] + m[4] * v[1] + m[8] * v[2] + m[12] * v[3]; + o[1] = m[1] * v[0] + m[5] * v[1] + m[9] * v[2] + m[13] * v[3]; + o[2] = m[2] * v[0] + m[6] * v[1] + m[10] * v[2] + m[14] * v[3]; + o[3] = m[3] * v[0] + m[7] * v[1] + m[11] * v[2] + m[15] * v[3]; + + memcpy(clipPlane, plane, sizeof(clipPlane)); + } + else + { + memset(clipPlane, 0, sizeof(clipPlane)); + } + + SCOPED_RENDER_PASS("Transparent", 1.0f, 0.5f, 0.5f); + + CmdSetViewportAndScissor(backEnd.viewParms); + batchOldDepthHack = false; + batchDepthHack = false; + + const TextureBarrier texBarriers[] = + { + TextureBarrier(crp.depthTexture, ResourceStates::DepthWriteBit), + TextureBarrier(crp.oitIndexTexture, ResourceStates::UnorderedAccessBit) + }; + const BufferBarrier bufBarriers[] = + { + BufferBarrier(crp.oitFragmentBuffer, ResourceStates::UnorderedAccessBit), + BufferBarrier(crp.oitCounterBuffer, ResourceStates::UnorderedAccessBit) + }; + CmdBarrier(ARRAY_LEN(texBarriers), texBarriers, ARRAY_LEN(bufBarriers), bufBarriers); + + GeoBuffers& db = crp.dynBuffers[GetFrameIndex()]; + db.BeginUpload(); + + CmdBindRenderTargets(0, NULL, &crp.depthTexture); + CmdBindVertexBuffers(ARRAY_LEN(db.vertexBuffers), db.vertexBuffers, db.vertexBufferStrides, NULL); + CmdBindIndexBuffer(db.indexBuffer.buffer, IndexType::UInt32, 0); + + // reset the fragment counter + { + BufferBarrier b0(crp.oitCounterBuffer, ResourceStates::CopyDestinationBit); + CmdBarrier(0, NULL, 1, &b0); + + CmdCopyBuffer(crp.oitCounterBuffer, crp.oitCounterStagingBuffer); + + BufferBarrier b1(crp.oitCounterBuffer, ResourceStates::UnorderedAccessBit); + CmdBarrier(0, NULL, 1, &b1); + } + + // clear the index texture + { + const uint32_t zeroes[4] = {}; + CmdClearTextureUAV(crp.oitIndexTexture, 0, zeroes); + } + + // really should just be just for the counter buffer and the index texture + CmdBarrier(ARRAY_LEN(texBarriers), texBarriers, ARRAY_LEN(bufBarriers), bufBarriers); + + const drawSurf_t* drawSurfs = cmd.drawSurfs; + const int opaqueSurfCount = cmd.numDrawSurfs - cmd.numTranspSurfs; + const int transpSurfCount = cmd.numTranspSurfs; + const double originalTime = backEnd.refdef.floatTime; + + const shader_t* shader = NULL; + const shader_t* oldShader = NULL; + int oldEntityNum = -1; + backEnd.currentEntity = &tr.worldEntity; + + tess.numVertexes = 0; + tess.numIndexes = 0; + + const drawSurf_t* drawSurf = drawSurfs + opaqueSurfCount; + for(int ds = 0; ds < transpSurfCount; ++ds, ++drawSurf) + { + int entityNum; + R_DecomposeSort(drawSurf->sort, &entityNum, &shader); + Q_assert(shader != NULL); + Q_assert(!shader->isOpaque); + + const bool shaderChanged = shader != oldShader; + const bool entityChanged = entityNum != oldEntityNum; + if(shaderChanged || entityChanged) + { + oldShader = shader; + oldEntityNum = entityNum; + EndBatch(); + BeginBatch(shader); + tess.greyscale = drawSurf->greyscale; + } + + if(entityChanged) + { + UpdateEntityData(batchDepthHack, entityNum, originalTime); + } + + R_TessellateSurface(drawSurf->surface); + } + + backEnd.refdef.floatTime = originalTime; + + EndBatch(); + + db.EndUpload(); + + // restores the potentially "hacked" depth range as well + CmdSetViewportAndScissor(backEnd.viewParms); + batchOldDepthHack = false; + batchDepthHack = false; +} + +void WorldTransp::ProcessShader(shader_t& shader) +{ + Q_assert(!shader.isOpaque); + + if(shader.numStages < 1) + { + shader.numTranspPipelines = 0; + return; + } + + const bool clampDepth = r_depthClamp->integer != 0 || shader.isSky; + + for(int s = 0; s < shader.numStages; ++s) + { + int a = 0; + + // @NOTE: we are not using any CTOR because we deliberately want to 0-init the struct + // this is necessary for padding bytes not to mess up comparisons in the PSO cache + GraphicsPipelineDesc desc = {}; + desc.name = "transp"; + desc.rootSignature = RHI_MAKE_NULL_HANDLE(); + desc.shortLifeTime = true; // the PSO cache is only valid for this map! + desc.vertexShader = transp_draw::g_vs; + desc.pixelShader = transp_draw::g_ps; + desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Position, DataType::Float32, 3, 0); + desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Normal, DataType::Float32, 2, 0); + desc.vertexLayout.AddAttribute(a++, ShaderSemantic::TexCoord, DataType::Float32, 2, 0); + desc.vertexLayout.AddAttribute(a++, ShaderSemantic::Color, DataType::UNorm8, 4, 0); + desc.depthStencil.depthStencilFormat = TextureFormat::Depth32_Float; + desc.depthStencil.depthComparison = ComparisonFunction::GreaterEqual; + desc.depthStencil.enableDepthTest = true; + desc.depthStencil.enableDepthWrites = false; + desc.rasterizer.cullMode = shader.cullType; + desc.rasterizer.polygonOffset = shader.polygonOffset != 0; + desc.rasterizer.clampDepth = clampDepth; + + pipeline_t& p = shader.transpPipelines[s]; + p.firstStage = s; + p.numStages = 1; + p.pipeline = psoCache.AddPipeline(desc, va("transp %d %d", psoCache.entryCount, s + 1)); + desc.rasterizer.cullMode = GetMirrorredCullType(desc.rasterizer.cullMode); + p.mirrorPipeline = psoCache.AddPipeline(desc, va("transp %d %d mirrored", psoCache.entryCount, s + 1)); + } + + shader.numTranspPipelines = shader.numStages; +} + +void WorldTransp::TessellationOverflow() +{ + EndBatch(); + BeginBatch(tess.shader); +} + +void WorldTransp::BeginBatch(const shader_t* shader) +{ + tess.tessellator = Tessellator::Transp; + tess.numVertexes = 0; + tess.numIndexes = 0; + tess.depthFade = DFT_NONE; + tess.deformsPreApplied = qfalse; + tess.xstages = (const shaderStage_t**)shader->stages; + tess.shader = shader; + tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset; + if(tess.shader->clampTime && tess.shaderTime >= tess.shader->clampTime) + { + tess.shaderTime = tess.shader->clampTime; + } +} + +void WorldTransp::EndBatch() +{ + const int vertexCount = tess.numVertexes; + const int indexCount = tess.numIndexes; + if(vertexCount <= 0 || + indexCount <= 0 || + tess.shader->numStages == 0 || + tess.shader->numTranspPipelines <= 0) + { + goto clean_up; + } + + const shader_t* const shader = tess.shader; + + GeoBuffers& db = crp.dynBuffers[GetFrameIndex()]; + if(!db.CanAdd(vertexCount, indexCount, shader->numStages)) + { + Q_assert(!"World surface geometry buffer too small!"); + goto clean_up; + } + + RB_DeformTessGeometry(0, vertexCount, 0, indexCount); + db.UploadBase(); + + if(batchDepthHack != batchOldDepthHack) + { + const viewParms_t& vp = backEnd.viewParms; + CmdSetViewport(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight, batchDepthHack ? 0.7f : 0.0f, 1.0f); + batchOldDepthHack = batchDepthHack; + } + + TranspDrawVertexRC vertexRC = {}; + memcpy(vertexRC.modelViewMatrix, backEnd.orient.modelMatrix, sizeof(vertexRC.modelViewMatrix)); + memcpy(vertexRC.projectionMatrix, backEnd.viewParms.projectionMatrix, sizeof(vertexRC.projectionMatrix)); + memcpy(vertexRC.clipPlane, clipPlane, sizeof(vertexRC.clipPlane)); + CmdSetGraphicsRootConstants(0, sizeof(vertexRC), &vertexRC); + + for(int s = 0; s < shader->numStages; ++s) + { + const shaderStage_t* const stage = shader->stages[s]; + + R_ComputeColors(stage, tess.svars[0], 0, vertexCount); + R_ComputeTexCoords(stage, tess.svars[0], 0, vertexCount, qfalse); + db.UploadStage(0); + + const pipeline_t& pipeline = shader->transpPipelines[s]; + const int psoIndex = backEnd.viewParms.isMirror ? pipeline.mirrorPipeline : pipeline.pipeline; + Q_assert(psoIndex > 0); + CmdBindPipeline(psoCache.entries[psoIndex].handle); + + const image_t* image = GetBundleImage(stage->bundle); + const uint32_t texIdx = image->textureIndex; + const uint32_t sampIdx = GetSamplerIndex(image); + const uint32_t alphaTest = AlphaTestShaderConstFromStateBits(stage->stateBits); + const uint32_t enableShaderTrace = tr.traceWorldShader && s == 0 ? 1 : 0; + const uint32_t enableDepthFade = shader->dfType != DFT_NONE ? 1 : 0; + Q_assert(sampIdx < ARRAY_LEN(crp.samplers)); + + TranspDrawPixelRC pixelRC = {}; + pixelRC.alphaTest = alphaTest; + pixelRC.counterBuffer = GetBufferIndexUAV(crp.oitCounterBuffer); + pixelRC.fragmentBuffer = GetBufferIndexUAV(crp.oitFragmentBuffer); + pixelRC.greyscale = tess.greyscale; + pixelRC.indexTexture = GetTextureIndexUAV(crp.oitIndexTexture, 0); + pixelRC.samplerIndex = sampIdx; + pixelRC.stateBits = GetFixedStageBits(stage->stateBits, s); + pixelRC.textureIndex = texIdx; + pixelRC.shaderTrace = ((uint32_t)shader->index << 3) | (RHI::GetFrameIndex() << 1) | enableShaderTrace; + pixelRC.hFadeDistance = f32tof16(shader->dfInvDist); + pixelRC.hFadeOffset = f32tof16(shader->dfBias); + pixelRC.depthFadeScaleBias = (enableDepthFade << 8) | (uint32_t)r_depthFadeScaleAndBias[shader->dfType]; + CmdSetGraphicsRootConstants(sizeof(vertexRC), sizeof(pixelRC), &pixelRC); + + db.DrawStage(vertexCount, indexCount); + } + + db.EndBaseBatch(vertexCount); + +clean_up: + tess.tessellator = Tessellator::None; + tess.numVertexes = 0; + tess.numIndexes = 0; +} diff --git a/code/renderer/crp_transp_resolve.cpp b/code/renderer/crp_transp_resolve.cpp new file mode 100644 index 0000000..7a7b453 --- /dev/null +++ b/code/renderer/crp_transp_resolve.cpp @@ -0,0 +1,111 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - OIT resolve pass + + +#include "crp_local.h" +namespace transp_resolve +{ +#include "compshaders/crp/transp_resolve_vs.h" +#include "compshaders/crp/transp_resolve_ps.h" +} + + +#pragma pack(push, 4) +struct TranspResolveRC +{ + uint32_t renderTargetTexture; + uint32_t shaderIndexBuffer; + uint32_t indexTexture; + uint32_t fragmentBuffer; + uint16_t centerPixelX; + uint16_t centerPixelY; + uint32_t depthTexture; + float proj22; + float proj32; + float scissorMinX; + float scissorMinY; + float scissorMaxX; + float scissorMaxY; +}; +#pragma pack(pop) + + +void TranspResolve::Init() +{ + GraphicsPipelineDesc desc("OIT Resolve"); + desc.shortLifeTime = true; + desc.vertexShader = ShaderByteCode(transp_resolve::g_vs); + desc.pixelShader = ShaderByteCode(transp_resolve::g_ps); + desc.depthStencil.DisableDepth(); + desc.rasterizer.cullMode = CT_TWO_SIDED; + desc.AddRenderTarget(0, crp.renderTargetFormat); + pipeline = CreateGraphicsPipeline(desc); +} + +void TranspResolve::Draw(const drawSceneViewCommand_t& cmd) +{ + if(cmd.numTranspSurfs <= 0) + { + return; + } + + srp.renderMode = RenderMode::World; + + SCOPED_RENDER_PASS("OIT Resolve", 1.0f, 0.5f, 0.5f); + + CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); + + crp.SwapRenderTargets(); + const TextureBarrier texBarriers[] = + { + TextureBarrier(crp.GetReadRenderTarget(), ResourceStates::PixelShaderAccessBit), + TextureBarrier(crp.GetWriteRenderTarget(), ResourceStates::RenderTargetBit), + TextureBarrier(crp.oitIndexTexture, ResourceStates::UnorderedAccessBit), + TextureBarrier(crp.depthTexture, ResourceStates::PixelShaderAccessBit) + }; + const BufferBarrier bufBarriers[] = + { + BufferBarrier(crp.oitFragmentBuffer, ResourceStates::UnorderedAccessBit), + BufferBarrier(srp.traceRenderBuffer, ResourceStates::UnorderedAccessBit) + }; + CmdBarrier(ARRAY_LEN(texBarriers), texBarriers, ARRAY_LEN(bufBarriers), bufBarriers); + + TranspResolveRC rc = {}; + rc.fragmentBuffer = GetBufferIndexUAV(crp.oitFragmentBuffer); + rc.indexTexture = GetTextureIndexUAV(crp.oitIndexTexture, 0); + rc.renderTargetTexture = GetTextureIndexSRV(crp.GetReadRenderTarget()); + rc.shaderIndexBuffer = GetBufferIndexUAV(srp.traceRenderBuffer); + rc.centerPixelX = glConfig.vidWidth / 2; + rc.centerPixelY = glConfig.vidHeight / 2; + rc.depthTexture = GetTextureIndexSRV(crp.depthTexture); + rc.proj22 = -backEnd.viewParms.projectionMatrix[2 * 4 + 2]; + rc.proj32 = backEnd.viewParms.projectionMatrix[3 * 4 + 2]; + rc.scissorMinX = backEnd.viewParms.viewportX; + rc.scissorMinY = backEnd.viewParms.viewportY; + rc.scissorMaxX = rc.scissorMinX + backEnd.viewParms.viewportWidth - 1; + rc.scissorMaxY = rc.scissorMinY + backEnd.viewParms.viewportHeight - 1; + + CmdBindRenderTargets(1, &crp.renderTarget, NULL); + CmdBindPipeline(pipeline); + CmdSetGraphicsRootConstants(0, sizeof(rc), &rc); + CmdDraw(3, 0); +} diff --git a/code/renderer/grp_geometry.cpp b/code/renderer/grp_geometry.cpp new file mode 100644 index 0000000..05a56b4 --- /dev/null +++ b/code/renderer/grp_geometry.cpp @@ -0,0 +1,113 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Gameplay Rendering Pipeline - vertex and index buffer management + + +#include "grp_local.h" + + +void VertexBuffers::Create(const char* name, MemoryUsage::Id memoryUsage, uint32_t vertexCount) +{ + totalCount = vertexCount; + + BufferDesc desc = {}; + desc.committedResource = true; + desc.initialState = ResourceStates::VertexBufferBit; + desc.memoryUsage = memoryUsage; + + desc.name = va("%s position vertex", name); + desc.byteCount = vertexCount * sizeof(vec3_t); + buffers[BasePosition] = CreateBuffer(desc); + strides[BasePosition] = sizeof(vec3_t); + + desc.name = va("%s normal vertex", name); + desc.byteCount = vertexCount * sizeof(vec3_t); + buffers[BaseNormal] = CreateBuffer(desc); + strides[BaseNormal] = sizeof(vec3_t); + + for(uint32_t s = 0; s < MAX_SHADER_STAGES; ++s) + { + desc.name = va("%s tex coords #%d vertex", name, (int)s + 1); + desc.byteCount = vertexCount * sizeof(vec2_t); + buffers[BaseCount + s * StageCount + StageTexCoords] = CreateBuffer(desc); + strides[BaseCount + s * StageCount + StageTexCoords] = sizeof(vec2_t); + + desc.name = va("%s color #%d vertex", name, (int)s + 1); + desc.byteCount = vertexCount * sizeof(color4ub_t); + buffers[BaseCount + s * StageCount + StageColors] = CreateBuffer(desc); + strides[BaseCount + s * StageCount + StageColors] = sizeof(color4ub_t); + } +} + +void VertexBuffers::BeginUpload() +{ + for(uint32_t b = 0; b < BufferCount; ++b) + { + mapped[b] = BeginBufferUpload(buffers[b]); + } +} + +void VertexBuffers::EndUpload() +{ + for(uint32_t b = 0; b < BufferCount; ++b) + { + EndBufferUpload(buffers[b]); + mapped[b] = NULL; + } +} + +void VertexBuffers::Upload(uint32_t firstStage, uint32_t stageCount) +{ + Q_assert(mapped[0] != NULL); + + const uint32_t batchOffset = batchFirst + batchCount; + + float* pos = (float*)mapped[BasePosition] + 3 * batchOffset; + for(int v = 0; v < tess.numVertexes; ++v) + { + pos[0] = tess.xyz[v][0]; + pos[1] = tess.xyz[v][1]; + pos[2] = tess.xyz[v][2]; + pos += 3; + } + + float* nor = (float*)mapped[BaseNormal] + 3 * batchOffset; + for(int v = 0; v < tess.numVertexes; ++v) + { + nor[0] = tess.normal[v][0]; + nor[1] = tess.normal[v][1]; + nor[2] = tess.normal[v][2]; + nor += 3; + } + + for(uint32_t s = 0; s < stageCount; ++s) + { + const stageVars_t& sv = tess.svars[s + firstStage]; + + uint8_t* const tcBuffer = mapped[BaseCount + s * StageCount + StageTexCoords]; + float* tc = (float*)tcBuffer + 2 * batchOffset; + memcpy(tc, &sv.texcoords[0], tess.numVertexes * sizeof(vec2_t)); + + uint8_t* const colBuffer = mapped[BaseCount + s * StageCount + StageColors]; + uint32_t* col = (uint32_t*)colBuffer + batchOffset; + memcpy(col, &sv.colors[0], tess.numVertexes * sizeof(color4ub_t)); + } +} diff --git a/code/renderer/grp_local.h b/code/renderer/grp_local.h index 84140c4..bb2413b 100644 --- a/code/renderer/grp_local.h +++ b/code/renderer/grp_local.h @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2022-2023 Gian 'myT' Schellenbaum +Copyright (C) 2022-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -24,16 +24,7 @@ along with Challenge Quake 3. If not, see . #pragma once -#include "tr_local.h" -#include "rhi_local.h" - - -using namespace RHI; - - -// @TODO: move out -#define CONCAT_IMM(x, y) x ## y -#define CONCAT(x, y) CONCAT_IMM(x, y) +#include "srp_local.h" #pragma pack(push, 4) @@ -70,36 +61,6 @@ struct WorldPixelRC #pragma pack(pop) -struct BufferBase -{ - bool CanAdd(uint32_t count_) - { - return batchFirst + batchCount + count_ <= totalCount; - } - - void EndBatch() - { - batchFirst += batchCount; - batchCount = 0; - } - - void EndBatch(uint32_t size) - { - batchFirst += size; - batchCount = 0; - } - - void Rewind() - { - batchFirst = 0; - batchCount = 0; - } - - uint32_t totalCount = 0; - uint32_t batchFirst = 0; - uint32_t batchCount = 0; -}; - struct VertexBuffers : BufferBase { enum BaseId @@ -116,93 +77,10 @@ struct VertexBuffers : BufferBase StageCount }; - void Create(const char* name, MemoryUsage::Id memoryUsage, uint32_t vertexCount) - { - totalCount = vertexCount; - - BufferDesc desc = {}; - desc.committedResource = true; - desc.initialState = ResourceStates::VertexBufferBit; - desc.memoryUsage = memoryUsage; - - desc.name = va("%s position vertex", name); - desc.byteCount = vertexCount * sizeof(vec3_t); - buffers[BasePosition] = CreateBuffer(desc); - strides[BasePosition] = sizeof(vec3_t); - - desc.name = va("%s normal vertex", name); - desc.byteCount = vertexCount * sizeof(vec3_t); - buffers[BaseNormal] = CreateBuffer(desc); - strides[BaseNormal] = sizeof(vec3_t); - - for(uint32_t s = 0; s < MAX_SHADER_STAGES; ++s) - { - desc.name = va("%s tex coords #%d vertex", name, (int)s + 1); - desc.byteCount = vertexCount * sizeof(vec2_t); - buffers[BaseCount + s * StageCount + StageTexCoords] = CreateBuffer(desc); - strides[BaseCount + s * StageCount + StageTexCoords] = sizeof(vec2_t); - - desc.name = va("%s color #%d vertex", name, (int)s + 1); - desc.byteCount = vertexCount * sizeof(color4ub_t); - buffers[BaseCount + s * StageCount + StageColors] = CreateBuffer(desc); - strides[BaseCount + s * StageCount + StageColors] = sizeof(color4ub_t); - } - } - - void BeginUpload() - { - for(uint32_t b = 0; b < BufferCount; ++b) - { - mapped[b] = BeginBufferUpload(buffers[b]); - } - } - - void EndUpload() - { - for(uint32_t b = 0; b < BufferCount; ++b) - { - EndBufferUpload(buffers[b]); - mapped[b] = NULL; - } - } - - void Upload(uint32_t firstStage, uint32_t stageCount) - { - Q_assert(mapped[0] != NULL); - - const uint32_t batchOffset = batchFirst + batchCount; - - float* pos = (float*)mapped[BasePosition] + 3 * batchOffset; - for(int v = 0; v < tess.numVertexes; ++v) - { - pos[0] = tess.xyz[v][0]; - pos[1] = tess.xyz[v][1]; - pos[2] = tess.xyz[v][2]; - pos += 3; - } - - float* nor = (float*)mapped[BaseNormal] + 3 * batchOffset; - for(int v = 0; v < tess.numVertexes; ++v) - { - nor[0] = tess.normal[v][0]; - nor[1] = tess.normal[v][1]; - nor[2] = tess.normal[v][2]; - nor += 3; - } - - for(uint32_t s = 0; s < stageCount; ++s) - { - const stageVars_t& sv = tess.svars[s + firstStage]; - - uint8_t* const tcBuffer = mapped[BaseCount + s * StageCount + StageTexCoords]; - float* tc = (float*)tcBuffer + 2 * batchOffset; - memcpy(tc, &sv.texcoords[0], tess.numVertexes * sizeof(vec2_t)); - - uint8_t* const colBuffer = mapped[BaseCount + s * StageCount + StageColors]; - uint32_t* col = (uint32_t*)colBuffer + batchOffset; - memcpy(col, &sv.colors[0], tess.numVertexes * sizeof(color4ub_t)); - } - } + void Create(const char* name, MemoryUsage::Id memoryUsage, uint32_t vertexCount); + void BeginUpload(); + void EndUpload(); + void Upload(uint32_t firstStage, uint32_t stageCount); static const uint32_t BufferCount = BaseCount + StageCount * MAX_SHADER_STAGES; HBuffer buffers[BufferCount] = {}; @@ -210,66 +88,6 @@ struct VertexBuffers : BufferBase uint8_t* mapped[BufferCount] = {}; }; -struct IndexBuffer : BufferBase -{ - void Create(const char* name, MemoryUsage::Id memoryUsage, uint32_t indexCount) - { - totalCount = indexCount; - - BufferDesc desc = {}; - desc.committedResource = true; - desc.initialState = ResourceStates::IndexBufferBit; - desc.memoryUsage = memoryUsage; - desc.name = va("%s index", name); - desc.byteCount = indexCount * sizeof(uint32_t); - buffer = CreateBuffer(desc); - } - - void BeginUpload() - { - mapped = (uint32_t*)BeginBufferUpload(buffer); - } - - void EndUpload() - { - EndBufferUpload(buffer); - mapped = NULL; - } - - void Upload() - { - Q_assert(mapped != NULL); - - uint32_t* const idx = mapped + batchFirst + batchCount; - memcpy(idx, &tess.indexes[0], tess.numIndexes * sizeof(uint32_t)); - } - - uint32_t* GetCurrentAddress() - { - return mapped + batchFirst + batchCount; - } - - HBuffer buffer = RHI_MAKE_NULL_HANDLE(); - uint32_t* mapped = NULL; -}; - -struct GeometryBuffer : BufferBase -{ - void Init(uint32_t count_, uint32_t stride_) - { - buffer = RHI_MAKE_NULL_HANDLE(); - byteCount = count_ * stride_; - stride = stride_; - totalCount = count_; - batchFirst = 0; - batchCount = 0; - } - - HBuffer buffer = RHI_MAKE_NULL_HANDLE(); - uint32_t byteCount = 0; - uint32_t stride = 0; -}; - struct GeometryBuffers { void Rewind() @@ -291,20 +109,6 @@ struct StaticGeometryChunk uint32_t firstCPUIndex; }; -struct FrameStats -{ - enum { MaxFrames = 1024 }; - - void EndFrame(); - - float temp[MaxFrames]; - float p2pMS[MaxFrames]; - stats_t p2pStats; - int frameCount; - int frameIndex; - int skippedFrames; -}; - struct BatchType { enum Id @@ -396,10 +200,6 @@ struct World HBuffer boxVertexBuffer; HBuffer boxIndexBuffer; - // shader trace - HBuffer traceRenderBuffer; - HBuffer traceReadbackBuffer; - // dynamic lights HRootSignature dlRootSignature; HPipeline dlPipelines[CT_COUNT * 2 * 2]; // { cull type, polygon offset, depth test } @@ -412,176 +212,6 @@ struct World // we get "light holes" in opaque surfaces, which is not what we want }; -struct UI -{ - void Init(); - void BeginFrame(); - void Begin(); - void End(); - void DrawBatch(); - void UISetColor(const uiSetColorCommand_t& cmd); - void UIDrawQuad(const uiDrawQuadCommand_t& cmd); - void UIDrawTriangle(const uiDrawTriangleCommand_t& cmd); - - // 32-bit needed until the render logic is fixed! - typedef uint32_t Index; - const IndexType::Id indexType = IndexType::UInt32; - - uint32_t renderPassIndex; - -#pragma pack(push, 1) - struct Vertex - { - vec2_t position; - vec2_t texCoords; - uint32_t color; - }; -#pragma pack(pop) - int maxIndexCount; - int maxVertexCount; - int firstIndex; - int firstVertex; - int indexCount; - int vertexCount; - HRootSignature rootSignature; - HPipeline pipeline; - HBuffer indexBuffer; - HBuffer vertexBuffer; - Index* indices; - Vertex* vertices; - uint32_t color; - const shader_t* shader; -}; - -struct MipMapGenerator -{ - void Init(); - void GenerateMipMaps(HTexture texture); - - struct Stage - { - enum Id - { - Start, // gamma to linear - DownSample, // down sample on 1 axis - End, // linear to gamma - Count - }; - - HRootSignature rootSignature; - HDescriptorTable descriptorTable; - HPipeline pipeline; - }; - - struct MipSlice - { - enum Id - { - Float16_0, - Float16_1, - Count - }; - }; - - HTexture textures[MipSlice::Count]; - Stage stages[3]; -}; - -struct ImGUI -{ - void Init(); - void RegisterFontAtlas(); - void Draw(); - void SafeBeginFrame(); - void SafeEndFrame(); - - struct FrameResources - { - HBuffer indexBuffer; - HBuffer vertexBuffer; - }; - - HRootSignature rootSignature; - HPipeline pipeline; - HTexture fontAtlas; - FrameResources frameResources[FrameCount]; - bool frameStarted = false; -}; - -struct Nuklear -{ - void Init(); - void BeginFrame(); - void Begin(); - void End(); - void Upload(const nuklearUploadCommand_t& cmd); - void Draw(const nuklearDrawCommand_t& cmd); - - struct FrameResources - { - HBuffer indexBuffer; - HBuffer vertexBuffer; - }; - - HRootSignature rootSignature; - HPipeline pipeline; - FrameResources frameResources[FrameCount]; - uint32_t renderPassIndex; - int prevScissorRect[4]; - - // reset every frame - int firstVertex; - int firstIndex; - int numVertexes; // set in Upload - int numIndexes; // set in Upload -}; - -struct RenderMode -{ - enum Id - { - None, - UI, - World, - ImGui, - Nuklear, - Count - }; -}; - -struct RenderPassQueries -{ - char name[64]; - uint32_t gpuDurationUS; - uint32_t cpuDurationUS; - int64_t cpuStartUS; - uint32_t queryIndex; -}; - -enum -{ - MaxRenderPasses = 64, // cg_draw3dIcons forces tons of 2D/3D transitions... - MaxStatsFrameCount = 64 -}; - -struct RenderPassStats -{ - void EndFrame(uint32_t cpu, uint32_t gpu); - - uint32_t samplesCPU[MaxStatsFrameCount]; - uint32_t samplesGPU[MaxStatsFrameCount]; - stats_t statsCPU; - stats_t statsGPU; - uint32_t count; - uint32_t index; -}; - -struct RenderPassFrame -{ - RenderPassQueries passes[MaxRenderPasses]; - uint32_t count; -}; - #pragma pack(push, 1) struct PSODesc @@ -616,6 +246,7 @@ struct PostProcess void SetToneMapInput(HTexture toneMapInput); void SetInverseToneMapInput(HTexture inverseToneMapInput); +private: HPipeline toneMapPipeline; HRootSignature toneMapRootSignature; HDescriptorTable toneMapDescriptorTable; @@ -645,6 +276,7 @@ struct SMAA }; }; +private: // fixed HTexture areaTexture; HTexture searchTexture; @@ -686,27 +318,18 @@ struct GRP : IRenderPipeline void ProcessShader(shader_t& shader) override; void ExecuteRenderCommands(const byte* data, bool readbackRequested) override; - - void UISetColor(const uiSetColorCommand_t& cmd) override { ui.UISetColor(cmd); } - void UIDrawQuad(const uiDrawQuadCommand_t& cmd) override { ui.UIDrawQuad(cmd); } - void UIDrawTriangle(const uiDrawTriangleCommand_t& cmd) override { ui.UIDrawTriangle(cmd); } - void DrawSceneView(const drawSceneViewCommand_t& cmd) override { world.DrawSceneView(cmd); } void TessellationOverflow() override { world.RestartBatch(); } void DrawSkyBox() override { world.DrawSkyBox(); } void DrawClouds() override { world.DrawClouds(); } - void ReadPixels(int w, int h, int alignment, colorSpace_t colorSpace, void* out) override; + uint32_t GetSamplerDescriptorIndexFromBaseIndex(uint32_t baseIndex) override { return baseIndex; } + void BeginFrame(); void EndFrame(); uint32_t RegisterTexture(HTexture htexture); - uint32_t BeginRenderPass(const char* name, float r, float g, float b); - void EndRenderPass(uint32_t index); - - void DrawGUI(); - uint32_t CreatePSO(CachedPSO& cache, const char* name); void UpdateReadbackTexture(); @@ -718,14 +341,9 @@ struct GRP : IRenderPipeline PostProcess post; SMAA smaa; Nuklear nuklear; - bool firstInit = true; - RenderMode::Id renderMode; // necessary for sampler selection, useful for debugging float frameSeed; bool updateReadbackTexture; - // @TODO: what's up with rootSignature and uberRootSignature? - // probably need to nuke one of them... - HTexture renderTarget; TextureFormat::Id renderTargetFormat; HTexture readbackRenderTarget; @@ -733,14 +351,7 @@ struct GRP : IRenderPipeline HRootSignature rootSignature; HDescriptorTable descriptorTable; uint32_t textureIndex; - HSampler samplers[TW_COUNT * TextureFilter::Count * MaxTextureMips]; - - RenderPassFrame renderPasses[FrameCount]; - RenderPassFrame tempRenderPasses; - RenderPassStats renderPassStats[MaxRenderPasses]; - RenderPassStats wholeFrameStats; - - FrameStats frameStats; + HSampler samplers[BASE_SAMPLER_COUNT]; // all base samplers CachedPSO psos[1024]; uint32_t psoCount; @@ -748,43 +359,3 @@ struct GRP : IRenderPipeline }; extern GRP grp; - -struct ScopedRenderPass -{ - ScopedRenderPass(const char* name, float r, float g, float b) - { - index = grp.BeginRenderPass(name, r, g, b); - } - - ~ScopedRenderPass() - { - grp.EndRenderPass(index); - } - - uint32_t index; -}; - -#define SCOPED_RENDER_PASS(Name, R, G, B) ScopedRenderPass CONCAT(rp_, __LINE__)(Name, R, G, B) - -inline void CmdSetViewportAndScissor(uint32_t x, uint32_t y, uint32_t w, uint32_t h) -{ - CmdSetViewport(x, y, w, h); - CmdSetScissor(x, y, w, h); -} - -inline void CmdSetViewportAndScissor(const viewParms_t& vp) -{ - CmdSetViewportAndScissor(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight); -} - -inline bool IsDepthFadeEnabled(const shader_t& shader) -{ - return - r_depthFade->integer != 0 && - shader.dfType > DFT_NONE && - shader.dfType < DFT_TBD; -} - -const image_t* GetBundleImage(const textureBundle_t& bundle); -uint32_t GetSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD = 0); -uint32_t GetSamplerIndex(const image_t* image); diff --git a/code/renderer/grp_main.cpp b/code/renderer/grp_main.cpp index d18c7b9..b214da0 100644 --- a/code/renderer/grp_main.cpp +++ b/code/renderer/grp_main.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2022-2023 Gian 'myT' Schellenbaum +Copyright (C) 2022-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -22,14 +22,42 @@ along with Challenge Quake 3. If not, see . #include "grp_local.h" -#include "uber_shaders.h" -#include "hlsl/uber_shader.h" -#include "hlsl/complete_uber_vs.h" -#include "hlsl/complete_uber_ps.h" +#include "grp_uber_shaders.h" +#include "compshaders/grp/uber_shader.h" +#include "compshaders/grp/complete_uber_vs.h" +#include "compshaders/grp/complete_uber_ps.h" #include "../client/cl_imgui.h" +namespace ui +{ +#include "compshaders/grp/ui_vs.h" +#include "compshaders/grp/ui_ps.h" +} +namespace imgui +{ +#include "compshaders/grp/imgui_vs.h" +#include "compshaders/grp/imgui_ps.h" +} +namespace nuklear +{ +#include "compshaders/grp/nuklear_vs.h" +#include "compshaders/grp/nuklear_ps.h" +} +namespace mip_1 +{ +#include "compshaders/grp/mip_1_cs.h" +} +namespace mip_2 +{ +#include "compshaders/grp/mip_2_cs.h" +} +namespace mip_3 +{ +#include "compshaders/grp/mip_3_cs.h" +} GRP grp; +IRenderPipeline* grpp = &grp; static const ShaderByteCode vertexShaderByteCodes[8] = { @@ -64,79 +92,6 @@ static const uint32_t uberPixelShaderCacheSize = UBER_SHADER_PS_LIST(PS) 0; static UberPixelShaderState uberPixelShaderStates[uberPixelShaderCacheSize]; -static ImPlotPoint FrameTimeGetter(int index, void*) -{ - const FrameStats& fs = grp.frameStats; - const int realIndex = (fs.frameIndex + index) % fs.frameCount; - const float value = fs.p2pMS[realIndex]; - - ImPlotPoint p; - p.x = index; - p.y = value; - - return p; -} - -static void UpdateAnimatedImage(image_t* image, int w, int h, const byte* data, qbool dirty) -{ - if(w != image->width || h != image->height) - { - // @TODO: ? - /*image->width = w; - image->height = h; - CreateTexture(&d3d.textures[image->texnum], image, 1, w, h); - GAL_UpdateTexture(image, 0, 0, 0, w, h, data);*/ - } - else if(dirty) - { - // @TODO: ? - //GAL_UpdateTexture(image, 0, 0, 0, w, h, data); - } -} - -const image_t* GetBundleImage(const textureBundle_t& bundle) -{ - return R_UpdateAndGetBundleImage(&bundle, &UpdateAnimatedImage); -} - -uint32_t GetSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD) -{ - Q_assert((uint32_t)wrap < TW_COUNT); - Q_assert((uint32_t)filter < TextureFilter::Count); - - const uint32_t index = - (uint32_t)filter + - (uint32_t)TextureFilter::Count * (uint32_t)wrap + - (uint32_t)TextureFilter::Count * (uint32_t)TW_COUNT * minLOD; - - return index; -} - -uint32_t GetSamplerIndex(const image_t* image) -{ - TextureFilter::Id filter = TextureFilter::Anisotropic; - if(r_lego->integer && - grp.renderMode == RenderMode::World && - (image->flags & (IMG_LMATLAS | IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0) - { - filter = TextureFilter::Point; - } - else if((image->flags & IMG_NOAF) != 0 || - grp.renderMode != RenderMode::World) - { - filter = TextureFilter::Linear; - } - - int minLOD = 0; - if(grp.renderMode == RenderMode::World && - (image->flags & IMG_NOPICMIP) == 0) - { - minLOD = Com_ClampInt(0, MaxTextureMips - 1, r_picmip->integer); - } - - return GetSamplerIndex(image->wrapClampMode, filter, (uint32_t)minLOD); -} - static bool IsCommutativeBlendState(unsigned int stateBits) { const unsigned int blendStates[] = @@ -159,42 +114,14 @@ static bool IsCommutativeBlendState(unsigned int stateBits) return false; } -static cullType_t GetMirrorredCullType(cullType_t cullType) -{ - switch(cullType) - { - case CT_BACK_SIDED: return CT_FRONT_SIDED; - case CT_FRONT_SIDED: return CT_BACK_SIDED; - default: return CT_TWO_SIDED; - } -} - - -void FrameStats::EndFrame() -{ - frameCount = min(frameCount + 1, (int)MaxFrames); - frameIndex = (frameIndex + 1) % MaxFrames; - Com_StatsFromArray(p2pMS, frameCount, temp, &p2pStats); -} - - -void RenderPassStats::EndFrame(uint32_t cpu, uint32_t gpu) -{ - static uint32_t tempSamples[MaxStatsFrameCount]; - samplesCPU[index] = cpu; - samplesGPU[index] = gpu; - count = min(count + 1, (uint32_t)MaxStatsFrameCount); - index = (index + 1) % MaxStatsFrameCount; - Com_StatsFromArray((const int*)samplesCPU, count, (int*)tempSamples, &statsCPU); - Com_StatsFromArray((const int*)samplesGPU, count, (int*)tempSamples, &statsGPU); -} - void GRP::Init() { - firstInit = RHI::Init(); + InitDesc initDesc; + initDesc.directDescriptorHeapIndexing = false; + srp.firstInit = RHI::Init(initDesc); - if(firstInit) + if(srp.firstInit) { RootSignatureDesc desc("main"); desc.usingVertexBuffers = true; @@ -223,6 +150,11 @@ void GRP::Init() Q_assert(!"ParseUberPixelShaderState failed!"); } } + + srp.CreateShaderTraceBuffers(); + DescriptorTableUpdate update; + update.SetRWBuffers(1, &srp.traceRenderBuffer, MAX_DRAWIMAGES * 2); + UpdateDescriptorTable(descriptorTable, update); } // we recreate the samplers on every vid_restart to create the right level @@ -235,7 +167,7 @@ void GRP::Init() { const textureWrap_t wrap = (textureWrap_t)w; const TextureFilter::Id filter = (TextureFilter::Id)f; - const uint32_t s = GetSamplerIndex(wrap, filter, m); + const uint32_t s = GetBaseSamplerIndex(wrap, filter, m); SamplerDesc desc(wrap, filter, (float)m); desc.shortLifeTime = true; samplers[s] = CreateSampler(desc); @@ -291,16 +223,18 @@ void GRP::Init() readbackRenderTarget = RHI::CreateTexture(desc); } - ui.Init(); + ui.Init(false, ShaderByteCode(ui::g_vs), ShaderByteCode(ui::g_ps), renderTargetFormat, descriptorTable, &rootSignatureDesc); world.Init(); - mipMapGen.Init(); - imgui.Init(); - nuklear.Init(); + mipMapGen.Init(false, ShaderByteCode(mip_1::g_cs), ShaderByteCode(mip_2::g_cs), ShaderByteCode(mip_3::g_cs)); + const HTexture fontAtlas = imgui.Init(false, ShaderByteCode(imgui::g_vs), ShaderByteCode(imgui::g_ps), renderTargetFormat, descriptorTable, &rootSignatureDesc); + const uint32_t fontAtlasSRV = RegisterTexture(fontAtlas); + imgui.RegisterFontAtlas(fontAtlasSRV); + nuklear.Init(false, ShaderByteCode(nuklear::g_vs), ShaderByteCode(nuklear::g_ps), renderTargetFormat, descriptorTable, &rootSignatureDesc); post.Init(); post.SetToneMapInput(renderTarget); smaa.Init(); // must be after post - firstInit = false; + srp.firstInit = false; } void GRP::ShutDown(bool fullShutDown) @@ -310,14 +244,15 @@ void GRP::ShutDown(bool fullShutDown) void GRP::BeginFrame() { - renderPasses[tr.frameCount % FrameCount].count = 0; - - R_SetColorMappings(); + srp.psoCount = psoCount; + srp.psoChangeCount = world.psoChangeCount; + srp.psoStatsValid = true; + srp.BeginFrame(); smaa.Update(); // have it be first to we can use ImGUI in the other components too - grp.imgui.SafeBeginFrame(); + grp.imgui.BeginFrame(); RHI::BeginFrame(); ui.BeginFrame(); @@ -330,35 +265,19 @@ void GRP::BeginFrame() CmdClearColorTarget(renderTarget, clearColor); // nothing is bound to the command list yet! - renderMode = RenderMode::None; + srp.renderMode = RenderMode::None; frameSeed = (float)rand() / (float)RAND_MAX; } void GRP::EndFrame() { - DrawGUI(); - R_DrawGUI(); - imgui.Draw(); + srp.DrawGUI(); + imgui.Draw(renderTarget); post.Draw("Post-process", GetSwapChainTexture()); world.EndFrame(); UpdateReadbackTexture(); - RHI::EndFrame(); - - if(rhie.presentToPresentUS > 0) - { - frameStats.p2pMS[frameStats.frameIndex] = (float)rhie.presentToPresentUS / 1000.0f; - frameStats.EndFrame(); - } - else - { - frameStats.skippedFrames++; - } - - if(backEnd.renderFrame) - { - Sys_V_EndFrame(); - } + srp.EndFrame(); } void GRP::UpdateReadbackTexture() @@ -525,206 +444,6 @@ uint32_t GRP::RegisterTexture(HTexture htexture) return index; } -uint32_t GRP::BeginRenderPass(const char* name, float r, float g, float b) -{ - RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount]; - if(f.count >= ARRAY_LEN(f.passes)) - { - Q_assert(0); - return UINT32_MAX; - } - - CmdBeginDebugLabel(name, r, g, b); - - const uint32_t index = f.count++; - RenderPassQueries& q = f.passes[index]; - Q_strncpyz(q.name, name, sizeof(q.name)); - q.cpuStartUS = Sys_Microseconds(); - q.queryIndex = CmdBeginDurationQuery(); - - return index; -} - -void GRP::EndRenderPass(uint32_t index) -{ - RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount]; - if(index >= f.count) - { - Q_assert(0); - return; - } - - CmdEndDebugLabel(); - - RenderPassQueries& q = f.passes[index]; - q.cpuDurationUS = (uint32_t)(Sys_Microseconds() - q.cpuStartUS); - CmdEndDurationQuery(q.queryIndex); -} - -void GRP::DrawGUI() -{ - uint32_t durations[MaxDurationQueries]; - GetDurations(durations); - - wholeFrameStats.EndFrame(rhie.renderToPresentUS, durations[0]); - - const RenderPassFrame& currFrame = renderPasses[(tr.frameCount % FrameCount) ^ 1]; - RenderPassFrame& tempFrame = tempRenderPasses; - - // see if the render pass list is the same as the previous frame's - bool sameRenderPass = true; - if(currFrame.count == tempRenderPasses.count) - { - for(uint32_t p = 0; p < currFrame.count; ++p) - { - if(Q_stricmp(currFrame.passes[p].name, tempRenderPasses.passes[p].name) != 0) - { - sameRenderPass = false; - break; - } - } - } - else - { - sameRenderPass = false; - } - - // write out the displayed timings into the temp buffer - tempFrame.count = currFrame.count; - if(sameRenderPass) - { - for(uint32_t p = 0; p < currFrame.count; ++p) - { - const uint32_t index = currFrame.passes[p].queryIndex; - if(index < MaxDurationQueries) - { - renderPassStats[p].EndFrame(currFrame.passes[p].cpuDurationUS, durations[index]); - tempFrame.passes[p].gpuDurationUS = renderPassStats[p].statsGPU.median; - tempFrame.passes[p].cpuDurationUS = renderPassStats[p].statsCPU.median; - } - } - } - else - { - for(uint32_t p = 0; p < currFrame.count; ++p) - { - const uint32_t index = currFrame.passes[p].queryIndex; - if(index < MaxDurationQueries) - { - tempFrame.passes[p].gpuDurationUS = durations[index]; - tempFrame.passes[p].cpuDurationUS = currFrame.passes[p].cpuDurationUS; - } - } - } - - static bool breakdownActive = false; - ToggleBooleanWithShortcut(breakdownActive, ImGuiKey_F); - GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame breakdown", "Ctrl+F", &breakdownActive); - if(breakdownActive) - { - if(ImGui::Begin("Frame breakdown", &breakdownActive, ImGuiWindowFlags_AlwaysAutoResize)) - { - if(BeginTable("Frame breakdown", 3)) - { - TableHeader(3, "Pass", "GPU [us]", "CPU [us]"); - - TableRow(3, "Whole frame", - va("%d", (int)wholeFrameStats.statsGPU.median), - va("%d", (int)wholeFrameStats.statsCPU.median)); - - for(uint32_t p = 0; p < currFrame.count; ++p) - { - const RenderPassQueries& rp = tempFrame.passes[p]; - if(rp.queryIndex < MaxDurationQueries) - { - TableRow(3, rp.name, - va("%d", (int)rp.gpuDurationUS), - va("%d", (int)rp.cpuDurationUS)); - } - } - - ImGui::EndTable(); - } - - ImGui::Text("PSO count: %d", (int)grp.psoCount); - ImGui::Text("PSO changes: %d", (int)grp.world.psoChangeCount); - } - ImGui::End(); - } - - // save the current render pass list in the temp buffer - memcpy(&tempFrame, &currFrame, sizeof(tempFrame)); - - static bool frameTimeActive = false; - GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame stats", NULL, &frameTimeActive); - if(frameTimeActive) - { - if(ImGui::Begin("Frame stats", &frameTimeActive, ImGuiWindowFlags_AlwaysAutoResize)) - { - if(BeginTable("Frame stats", 2)) - { - const FrameStats& fs = frameStats; - const stats_t& s = fs.p2pStats; - TableRow2("Skipped frames", fs.skippedFrames); - TableRow2("Frame time target", rhie.targetFrameDurationMS); - TableRow2("Frame time average", s.average); - TableRow2("Frame time std dev.", s.stdDev); - TableRow2("Input to render", (float)rhie.inputToRenderUS / 1000.0f); - TableRow2("Input to present", (float)rhie.inputToPresentUS / 1000.0f); - - ImGui::EndTable(); - } - } - ImGui::End(); - } - - static bool graphsActive = false; - ToggleBooleanWithShortcut(graphsActive, ImGuiKey_G); - GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame time graphs", "Ctrl+G", &graphsActive); - if(graphsActive) - { - const int windowFlags = - ImGuiWindowFlags_NoDecoration | - ImGuiWindowFlags_NoBackground | - ImGuiWindowFlags_NoMove; - ImGui::SetNextWindowSize(ImVec2(glConfig.vidWidth, glConfig.vidHeight / 2), ImGuiCond_Always); - ImGui::SetNextWindowPos(ImVec2(0, glConfig.vidHeight / 2), ImGuiCond_Always); - if(ImGui::Begin("Frame time graphs", &graphsActive, windowFlags)) - { - const FrameStats& fs = frameStats; - const double target = (double)rhie.targetFrameDurationMS; - - static bool autoFit = false; - ImGui::Checkbox("Auto-fit", &autoFit); - - if(ImPlot::BeginPlot("Frame Times", ImVec2(-1, -1), ImPlotFlags_NoInputs)) - { - const int axisFlags = 0; // ImPlotAxisFlags_NoTickLabels - const int axisFlagsY = axisFlags | (autoFit ? ImPlotAxisFlags_AutoFit : 0); - ImPlot::SetupAxes(NULL, NULL, axisFlags, axisFlagsY); - ImPlot::SetupAxisLimits(ImAxis_X1, 0, FrameStats::MaxFrames, ImGuiCond_Always); - if(!autoFit) - { - ImPlot::SetupAxisLimits(ImAxis_Y1, max(target - 2.0, 0.0), target + 2.0, ImGuiCond_Always); - } - - ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f); - ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f); - ImPlot::PlotInfLines("Target", &target, 1, ImPlotInfLinesFlags_Horizontal); - - ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f); - ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f); - ImPlot::PlotLineG("Frame Time", &FrameTimeGetter, NULL, fs.frameCount, ImPlotLineFlags_None); - - ImPlot::EndPlot(); - } - } - ImGui::End(); - } - - GUI_DrawMainMenu(); -} - uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name) { Q_assert(cache.stageCount > 0); @@ -785,6 +504,9 @@ uint32_t GRP::CreatePSO(CachedPSO& cache, const char* name) { uint32_t macroCount = 0; ShaderMacro macros[64]; + macros[macroCount].name = "DISABLE_PRAGMA_ONCE"; + macros[macroCount].value = "1"; + macroCount++; macros[macroCount].name = "STAGE_COUNT"; macros[macroCount].value = va("%d", cache.stageCount); macroCount++; @@ -908,16 +630,16 @@ void GRP::ExecuteRenderCommands(const byte* data, bool readbackRequested) switch(commandId) { case RC_UI_SET_COLOR: - UISetColor(*(const uiSetColorCommand_t*)data); + ui.CmdSetColor(*(const uiSetColorCommand_t*)data); break; case RC_UI_DRAW_QUAD: - UIDrawQuad(*(const uiDrawQuadCommand_t*)data); + ui.CmdDrawQuad(*(const uiDrawQuadCommand_t*)data); break; case RC_UI_DRAW_TRIANGLE: - UIDrawTriangle(*(const uiDrawTriangleCommand_t*)data); + ui.CmdDrawTriangle(*(const uiDrawTriangleCommand_t*)data); break; case RC_DRAW_SCENE_VIEW: - DrawSceneView(*(const drawSceneViewCommand_t*)data); + world.DrawSceneView(*(const drawSceneViewCommand_t*)data); break; case RC_BEGIN_FRAME: BeginFrame(); @@ -926,7 +648,7 @@ void GRP::ExecuteRenderCommands(const byte* data, bool readbackRequested) EndFrame(); break; case RC_BEGIN_UI: - ui.Begin(); + ui.Begin(renderTarget); break; case RC_END_UI: ui.End(); @@ -941,7 +663,7 @@ void GRP::ExecuteRenderCommands(const byte* data, bool readbackRequested) smaa.Draw(((const endSceneCommand_t*)data)->viewParms); break; case RC_BEGIN_NK: - nuklear.Begin(); + nuklear.Begin(renderTarget); break; case RC_END_NK: nuklear.End(); @@ -963,49 +685,5 @@ void GRP::ExecuteRenderCommands(const byte* data, bool readbackRequested) void GRP::ReadPixels(int w, int h, int alignment, colorSpace_t colorSpace, void* outPixels) { - MappedTexture mapped; - BeginTextureReadback(mapped, grp.readbackRenderTarget); - - byte* const out0 = (byte*)outPixels; - const byte* const in0 = mapped.mappedData; - - if(colorSpace == CS_RGBA) - { - const int dstRowSizeNoPadding = w * 4; - mapped.dstRowByteCount = AlignUp(dstRowSizeNoPadding, alignment); - - for(int y = 0; y < mapped.rowCount; ++y) - { - byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount; - const byte* in = in0 + y * mapped.srcRowByteCount; - memcpy(out, in, dstRowSizeNoPadding); - } - } - else if(colorSpace == CS_BGR) - { - mapped.dstRowByteCount = AlignUp(w * 3, alignment); - - for(int y = 0; y < mapped.rowCount; ++y) - { - byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount; - const byte* in = in0 + y * mapped.srcRowByteCount; - for(int x = 0; x < mapped.columnCount; ++x) - { - out[2] = in[0]; - out[1] = in[1]; - out[0] = in[2]; - out += 3; - in += 4; - } - } - } - else - { - Q_assert(!"Unsupported color space"); - } - - EndTextureReadback(); + ReadTextureImage(outPixels, readbackRenderTarget, w, h, alignment, colorSpace); } - -// @TODO: move out once the cinematic render pipeline is added -IRenderPipeline* renderPipeline = &grp; diff --git a/code/renderer/grp_post.cpp b/code/renderer/grp_post.cpp index bce3bcc..d0f13c7 100644 --- a/code/renderer/grp_post.cpp +++ b/code/renderer/grp_post.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2023 Gian 'myT' Schellenbaum +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -24,13 +24,13 @@ along with Challenge Quake 3. If not, see . #include "grp_local.h" namespace tone_map { -#include "hlsl/post_gamma_vs.h" -#include "hlsl/post_gamma_ps.h" +#include "compshaders/grp/post_gamma_vs.h" +#include "compshaders/grp/post_gamma_ps.h" } namespace inverse_tone_map { -#include "hlsl/post_inverse_gamma_vs.h" -#include "hlsl/post_inverse_gamma_ps.h" +#include "compshaders/grp/post_inverse_gamma_vs.h" +#include "compshaders/grp/post_inverse_gamma_ps.h" } @@ -60,7 +60,7 @@ struct InverseGammaPixelRC void PostProcess::Init() { - if(!grp.firstInit) + if(!srp.firstInit) { return; } diff --git a/code/renderer/grp_smaa.cpp b/code/renderer/grp_smaa.cpp index 94516bf..31124b9 100644 --- a/code/renderer/grp_smaa.cpp +++ b/code/renderer/grp_smaa.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2022-2023 Gian 'myT' Schellenbaum +Copyright (C) 2022-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -24,7 +24,7 @@ along with Challenge Quake 3. If not, see . #include "grp_local.h" #include "smaa_area_texture.h" #include "smaa_search_texture.h" -#include "hlsl/complete_smaa.h" +#include "compshaders/grp/complete_smaa.h" #define SMAA_PRESET_LIST(P) \ @@ -96,7 +96,7 @@ void SMAA::Update() bool createPresetDep = justEnabled || (alwaysEnabled && presetChanged); bool destroyPresetDep = justDisabled || (alwaysEnabled && presetChanged); - if(grp.firstInit) + if(srp.firstInit) { // first init or device change: we have nothing to destroy const bool enableSMAA = newMode != Mode::Disabled; diff --git a/code/renderer/uber_shaders.h b/code/renderer/grp_uber_shaders.h similarity index 81% rename from code/renderer/uber_shaders.h rename to code/renderer/grp_uber_shaders.h index d2c11ff..6441366 100644 --- a/code/renderer/uber_shaders.h +++ b/code/renderer/grp_uber_shaders.h @@ -1,3 +1,29 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Gameplay Rendering Pipeline - defines all pre-compiled uber pixel shader combinations + + +#pragma once + + // format: stage_count global_state (stage_state_in_hex)+ #define UBER_SHADER_PS_LIST(PS) \ PS(1_0_0) \ diff --git a/code/renderer/grp_world.cpp b/code/renderer/grp_world.cpp index cac1c85..81b4487 100644 --- a/code/renderer/grp_world.cpp +++ b/code/renderer/grp_world.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2022-2023 Gian 'myT' Schellenbaum +Copyright (C) 2022-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -28,25 +28,25 @@ along with Challenge Quake 3. If not, see . #include "../client/cl_imgui.h" namespace zpp { -#include "hlsl/depth_pre_pass_vs.h" -#include "hlsl/depth_pre_pass_ps.h" +#include "compshaders/grp/depth_pre_pass_vs.h" +#include "compshaders/grp/depth_pre_pass_ps.h" } namespace fog { -#include "hlsl/fog_vs.h" +#include "compshaders/grp/fog_vs.h" } namespace fog_inside { -#include "hlsl/fog_inside_ps.h" +#include "compshaders/grp/fog_inside_ps.h" } namespace fog_outside { -#include "hlsl/fog_outside_ps.h" +#include "compshaders/grp/fog_outside_ps.h" } namespace dyn_light { -#include "hlsl/dynamic_light_vs.h" -#include "hlsl/dynamic_light_ps.h" +#include "compshaders/grp/dynamic_light_vs.h" +#include "compshaders/grp/dynamic_light_ps.h" } @@ -126,40 +126,6 @@ static bool HasStaticGeo(int staticGeoChunk, const shader_t* shader) staticGeoChunk < ARRAY_LEN(grp.world.statChunks); } -static void UpdateEntityData(bool& depthHack, int entityNum, double originalTime) -{ - depthHack = false; - - if(entityNum != ENTITYNUM_WORLD) - { - backEnd.currentEntity = &backEnd.refdef.entities[entityNum]; - if(backEnd.currentEntity->intShaderTime) - backEnd.refdef.floatTime = originalTime - (double)backEnd.currentEntity->e.shaderTime.iShaderTime / 1000.0; - else - backEnd.refdef.floatTime = originalTime - backEnd.currentEntity->e.shaderTime.fShaderTime; - // we have to reset the shaderTime as well otherwise image animations start - // from the wrong frame - tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset; - - // set up the transformation matrix - R_RotateForEntity(backEnd.currentEntity, &backEnd.viewParms, &backEnd.orient); - - if(backEnd.currentEntity->e.renderfx & RF_DEPTHHACK) - { - depthHack = true; - } - } - else - { - backEnd.currentEntity = &tr.worldEntity; - backEnd.refdef.floatTime = originalTime; - backEnd.orient = backEnd.viewParms.world; - // we have to reset the shaderTime as well otherwise image animations on - // the world (like water) continue with the wrong frame - tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset; - } -} - static int GetDynamicLightPipelineIndex(cullType_t cullType, qbool polygonOffset, qbool depthTestEquals) { return (int)cullType + CT_COUNT * (int)polygonOffset + CT_COUNT * 2 * (int)depthTestEquals; @@ -168,7 +134,7 @@ static int GetDynamicLightPipelineIndex(cullType_t cullType, qbool polygonOffset void World::Init() { - if(grp.firstInit) + if(srp.firstInit) { fogDescriptorTable = RHI_MAKE_NULL_HANDLE(); } @@ -192,7 +158,7 @@ void World::Init() } } - if(grp.firstInit) + if(srp.firstInit) { // // depth pre-pass @@ -313,23 +279,6 @@ void World::Init() EndBufferUpload(boxVertexBuffer); } - // - // shader trace - // - { - BufferDesc desc("shader trace opaque", 2 * sizeof(uint32_t), ResourceStates::UnorderedAccessBit); - traceRenderBuffer = CreateBuffer(desc); - - DescriptorTableUpdate update; - update.SetRWBuffers(1, &traceRenderBuffer, MAX_DRAWIMAGES * 2); - UpdateDescriptorTable(grp.descriptorTable, update); - } - { - BufferDesc desc("shader trace opaque readback", 2 * sizeof(uint32_t), ResourceStates::Common); - desc.memoryUsage = MemoryUsage::Readback; - traceReadbackBuffer = CreateBuffer(desc); - } - // // dynamic lights // @@ -425,34 +374,11 @@ void World::BeginFrame() void World::EndFrame() { - tr.tracedWorldShaderIndex = -1; - if(tr.traceWorldShader && tr.world != NULL) - { - // schedule a GPU -> CPU transfer - { - BufferBarrier barrier(traceRenderBuffer, ResourceStates::CopySourceBit); - CmdBarrier(0, NULL, 1, &barrier); - } - CmdCopyBuffer(traceReadbackBuffer, traceRenderBuffer); - { - BufferBarrier barrier(traceRenderBuffer, ResourceStates::UnorderedAccessBit); - CmdBarrier(0, NULL, 1, &barrier); - } - - // grab last frame's result - uint32_t* shaderIndices = (uint32_t*)MapBuffer(traceReadbackBuffer); - const uint32_t shaderIndex = shaderIndices[RHI::GetFrameIndex() ^ 1]; - UnmapBuffer(traceReadbackBuffer); - if(shaderIndex < (uint32_t)tr.numShaders) - { - tr.tracedWorldShaderIndex = (int)shaderIndex; - } - } } void World::Begin() { - grp.renderMode = RenderMode::World; + srp.renderMode = RenderMode::World; if(backEnd.viewParms.isPortal) { @@ -495,7 +421,7 @@ void World::Begin() void World::End() { - grp.renderMode = RenderMode::None; + srp.renderMode = RenderMode::None; } void World::DrawPrePass(const drawSceneViewCommand_t& cmd) diff --git a/code/renderer/rhi_d3d12.cpp b/code/renderer/rhi_d3d12.cpp index 42ba5b3..0813483 100644 --- a/code/renderer/rhi_d3d12.cpp +++ b/code/renderer/rhi_d3d12.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2022-2023 Gian 'myT' Schellenbaum +Copyright (C) 2022-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -577,6 +577,35 @@ namespace RHI bool canBeginAndEnd; }; + struct DynamicResources + { + struct DescriptorRange + { + void Init(D3D12_DESCRIPTOR_RANGE_TYPE type, uint32_t start, uint32_t count); + uint32_t Allocate(); + + D3D12_DESCRIPTOR_RANGE_TYPE type; + uint32_t start; + uint32_t count; + uint32_t index; + }; + + static const uint32_t MaxDescriptorsSRV = 65536; + static const uint32_t MaxDescriptorsUAV = 65536; + static const uint32_t MaxDescriptorsCBV = 65536; + static const uint32_t MaxDescriptorsSamplers = 1024; + static const uint32_t MaxDescriptorsGeneric = MaxDescriptorsSRV + MaxDescriptorsUAV + MaxDescriptorsCBV; + + ID3D12RootSignature* rootSignature; + ID3D12DescriptorHeap* genericDescriptorHeap; // CPU write, GPU read + ID3D12DescriptorHeap* samplerDescriptorHeap; // CPU write, GPU read + ID3D12DescriptorHeap* genericCPUDescriptorHeap; // CPU read/write, for UAV clears and the like + DescriptorRange srvIndex; + DescriptorRange uavIndex; + DescriptorRange cbvIndex; + DescriptorRange samplerIndex; + }; + struct RHIPrivate { bool initialized; @@ -622,6 +651,8 @@ namespace RHI bool frameBegun; bool baseVRSSupport; bool extendedVRSSupport; + bool useDynamicResources; + DynamicResources dynamicResources; HMODULE dxcModule; HMODULE dxilModule; @@ -695,6 +726,9 @@ namespace RHI # define D3DDDIERR_DEVICEREMOVED ((HRESULT)0x88760870L) #endif +#define ASSERT_DR_ENABLED() ASSERT_OR_DIE(rhi.useDynamicResources, "RHI API requires DR on") +#define ASSERT_DR_DISABLED() ASSERT_OR_DIE(!rhi.useDynamicResources, "RHI API requires DR off") + static const char* GetSystemErrorString(HRESULT hr) { // FormatMessage might not always give us the string we want but that's ok, @@ -1240,11 +1274,6 @@ namespace RHI Q_assert(resource); Q_assert(type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - if(desc.Format == DXGI_FORMAT_D32_FLOAT) - { - desc.Format = DXGI_FORMAT_R32_FLOAT; - } - const uint32_t index = freeList.Allocate(); rhi.device->CreateShaderResourceView(resource, &desc, GetCPUHandle(index)); @@ -1304,6 +1333,23 @@ namespace RHI return index; } + uint32_t DynamicResources::DescriptorRange::Allocate() + { + ASSERT_OR_DIE(index + 1 < start + count, "Not enough descriptors"); + + return index++; + } + + void DynamicResources::DescriptorRange::Init(D3D12_DESCRIPTOR_RANGE_TYPE type_, uint32_t start_, uint32_t count_) + { + Q_assert(count_ > 0); + + type = type_; + start = start_; + count = count_; + index = start_; + } + static const char* GetDeviceRemovedReasonString(HRESULT reason) { switch(reason) @@ -1632,6 +1678,7 @@ namespace RHI case TextureFormat::RG16_UNorm: return DXGI_FORMAT_R8G8_UNORM; case TextureFormat::R8_UNorm: return DXGI_FORMAT_R8_UNORM; case TextureFormat::R10G10B10A2_UNorm: return DXGI_FORMAT_R10G10B10A2_UNORM; + case TextureFormat::R32_UInt: return DXGI_FORMAT_R32_UINT; default: Q_assert(!"Unsupported texture format"); return DXGI_FORMAT_R8G8B8A8_UNORM; } } @@ -2183,6 +2230,400 @@ namespace RHI } } + static uint32_t CreateSRV(ID3D12Resource* resource, D3D12_SHADER_RESOURCE_VIEW_DESC& desc) + { + Q_assert(resource); + + if(desc.Format == DXGI_FORMAT_D32_FLOAT) + { + desc.Format = DXGI_FORMAT_R32_FLOAT; + } + + if(!rhi.useDynamicResources) + { + return rhi.descHeapGeneric.CreateSRV(resource, desc); + } + + DynamicResources& dr = rhi.dynamicResources; + const uint32_t index = dr.srvIndex.Allocate(); + D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + rhi.device->CreateShaderResourceView(resource, &desc, handle); + + return index; + } + + static uint32_t CreateUAV(ID3D12Resource* resource, D3D12_UNORDERED_ACCESS_VIEW_DESC& desc) + { + Q_assert(resource); + + if(!rhi.useDynamicResources) + { + return rhi.descHeapGeneric.CreateUAV(resource, desc); + } + + DynamicResources& dr = rhi.dynamicResources; + const uint32_t index = dr.uavIndex.Allocate(); + D3D12_CPU_DESCRIPTOR_HANDLE handleGPU = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + handleGPU.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + rhi.device->CreateUnorderedAccessView(resource, NULL, &desc, handleGPU); + + // make a CPU-visible copy at the same offset in another heap to enable UAV clears + D3D12_CPU_DESCRIPTOR_HANDLE handleCPU = dr.genericCPUDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + handleCPU.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + rhi.device->CreateUnorderedAccessView(resource, NULL, &desc, handleCPU); + + return index; + } + + static uint32_t CreateCBV(D3D12_CONSTANT_BUFFER_VIEW_DESC& desc) + { + if(!rhi.useDynamicResources) + { + return rhi.descHeapGeneric.CreateCBV(desc); + } + + DynamicResources& dr = rhi.dynamicResources; + const uint32_t index = dr.cbvIndex.Allocate(); + D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.genericDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + rhi.device->CreateConstantBufferView(&desc, handle); + + return index; + } + + static uint32_t CreateSampler(D3D12_SAMPLER_DESC& desc) + { + if(!rhi.useDynamicResources) + { + return rhi.descHeapSamplers.CreateSampler(desc); + } + + DynamicResources& dr = rhi.dynamicResources; + const uint32_t index = dr.samplerIndex.Allocate(); + D3D12_CPU_DESCRIPTOR_HANDLE handle = dr.samplerDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + handle.ptr += index * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + rhi.device->CreateSampler(&desc, handle); + + return index; + } + + static void CreateBufferDescriptors(Buffer& buffer) + { + const BufferDesc& rhiDesc = buffer.desc; + ID3D12Resource* const resource = buffer.buffer; + + uint32_t srvIndex = InvalidDescriptorIndex; + if(rhiDesc.initialState & ResourceStates::ShaderAccessBits) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; + srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srv.Buffer.FirstElement = 0; + if(rhiDesc.structureByteCount > 0) + { + srv.Format = DXGI_FORMAT_UNKNOWN; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount; + srv.Buffer.StructureByteStride = rhiDesc.structureByteCount; + srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + } + else + { + srv.Format = DXGI_FORMAT_R32_TYPELESS; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv.Buffer.NumElements = rhiDesc.byteCount / 4; + srv.Buffer.StructureByteStride = 0; + srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + } + srvIndex = CreateSRV(resource, srv); + } + + uint32_t cbvIndex = InvalidDescriptorIndex; + if(rhiDesc.initialState & ResourceStates::ConstantBufferBit) + { + D3D12_CONSTANT_BUFFER_VIEW_DESC cbv = {}; + cbv.BufferLocation = resource->GetGPUVirtualAddress(); + cbv.SizeInBytes = rhiDesc.byteCount; + cbvIndex = CreateCBV(cbv); + } + + uint32_t uavIndex = InvalidDescriptorIndex; + if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit) + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uav = {}; + uav.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uav.Buffer.CounterOffsetInBytes = 0; + uav.Buffer.FirstElement = 0; + if(rhiDesc.structureByteCount > 0) + { + uav.Format = DXGI_FORMAT_UNKNOWN; + uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + uav.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount; + uav.Buffer.StructureByteStride = rhiDesc.structureByteCount; + } + else + { + uav.Format = DXGI_FORMAT_R32_TYPELESS; + uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + uav.Buffer.NumElements = rhiDesc.byteCount / 4; + uav.Buffer.StructureByteStride = 0; + } + uavIndex = CreateUAV(resource, uav); + } + + buffer.cbvIndex = cbvIndex; + buffer.uavIndex = uavIndex; + buffer.srvIndex = srvIndex; + } + + static void CreateTextureDescriptors(Texture& texture) + { + const TextureDesc& rhiDesc = texture.desc; + ID3D12Resource* const resource = texture.texture; + + if(rhiDesc.allowedState & ResourceStates::ShaderAccessBits) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv.Format = GetD3DFormat(rhiDesc.format); + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv.Texture2D.MipLevels = rhiDesc.mipCount; + srv.Texture2D.MostDetailedMip = 0; + srv.Texture2D.PlaneSlice = 0; + srv.Texture2D.ResourceMinLODClamp = 0.0f; + if(rhiDesc.format == TextureFormat::Depth24_Stencil8) + { + srv.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS; // @TODO: + } + texture.srvIndex = CreateSRV(resource, srv); + } + else + { + texture.srvIndex = InvalidDescriptorIndex; + } + + if(rhiDesc.allowedState & ResourceStates::UnorderedAccessBit) + { + for(uint32_t m = 0; m < rhiDesc.mipCount; ++m) + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uav = {}; + uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uav.Format = GetD3DFormat(rhiDesc.format); + uav.Texture2D.MipSlice = m; + uav.Texture2D.PlaneSlice = 0; + texture.mips[m].uavIndex = CreateUAV(resource, uav); + } + } + else + { + for(uint32_t m = 0; m < rhiDesc.mipCount; ++m) + { + texture.mips[m].uavIndex = InvalidDescriptorIndex; + } + } + } + + static void InitDynamicResourceDescriptorRanges() + { + const uint32_t uavStart = DynamicResources::MaxDescriptorsSRV; + const uint32_t cbvStart = uavStart + DynamicResources::MaxDescriptorsUAV; + DynamicResources& dr = rhi.dynamicResources; + dr.srvIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, DynamicResources::MaxDescriptorsSRV); + dr.uavIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, uavStart, DynamicResources::MaxDescriptorsUAV); + dr.cbvIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, cbvStart, DynamicResources::MaxDescriptorsCBV); + dr.samplerIndex.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1024); + } + + static void CreateDynamicResources() + { + if(!rhi.useDynamicResources) + { + return; + } + + const uint32_t genericDescriptorCount = DynamicResources::MaxDescriptorsGeneric; + const uint32_t samplerDescriptorCount = DynamicResources::MaxDescriptorsSamplers; + DynamicResources& dr = rhi.dynamicResources; + InitDynamicResourceDescriptorRanges(); + + { + // keep 1.0 behavior for max. flexibility, + // effectively disabling optimizations + const D3D12_DESCRIPTOR_RANGE_FLAGS genericRangeFlags = + D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE | + D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; + const D3D12_DESCRIPTOR_RANGE_FLAGS samplerRangeFlags = + D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE; + + const DynamicResources::DescriptorRange generic[3] = + { + dr.srvIndex, + dr.uavIndex, + dr.cbvIndex + }; + D3D12_DESCRIPTOR_RANGE1 mainRanges[3] = {}; + for(uint32_t i = 0; i < ARRAY_LEN(generic); ++i) + { + mainRanges[i].RangeType = generic[i].type; + mainRanges[i].BaseShaderRegister = generic[i].start; + mainRanges[i].NumDescriptors = generic[i].count; + mainRanges[i].RegisterSpace = 0; + mainRanges[i].OffsetInDescriptorsFromTableStart = generic[i].start; + mainRanges[i].Flags = genericRangeFlags; + } + D3D12_DESCRIPTOR_RANGE1 samplerRange = {}; + samplerRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + samplerRange.BaseShaderRegister = 0; + samplerRange.NumDescriptors = samplerDescriptorCount; + samplerRange.RegisterSpace = 0; + samplerRange.OffsetInDescriptorsFromTableStart = 0; + samplerRange.Flags = samplerRangeFlags; + + D3D12_ROOT_PARAMETER1 rootParameters[3] = {}; + rootParameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters[0].DescriptorTable.NumDescriptorRanges = ARRAY_LEN(mainRanges); + rootParameters[0].DescriptorTable.pDescriptorRanges = mainRanges; + rootParameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters[1].DescriptorTable.NumDescriptorRanges = 1; + rootParameters[1].DescriptorTable.pDescriptorRanges = &samplerRange; + rootParameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rootParameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters[2].Constants.Num32BitValues = 64 - 2; // all the available space minus the 2 tables + rootParameters[2].Constants.ShaderRegister = 0; // access the RC at register b0 + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC desc; + desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; + desc.Desc_1_1.Flags = + D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | + D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | + D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED | + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; + desc.Desc_1_1.NumParameters = ARRAY_LEN(rootParameters); + desc.Desc_1_1.pParameters = rootParameters; + desc.Desc_1_1.NumStaticSamplers = 0; + desc.Desc_1_1.pStaticSamplers = NULL; + + ID3DBlob* blob; + ID3DBlob* errorBlob; + if(FAILED(D3D12SerializeVersionedRootSignature(&desc, &blob, &errorBlob))) + { + ri.Error(ERR_FATAL, "Root signature creation failed!\n%s\n", (const char*)errorBlob->GetBufferPointer()); + } + COM_RELEASE(errorBlob); + + ID3D12RootSignature* signature; + D3D(rhi.device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(&signature))); + COM_RELEASE(blob); + SetDebugName(signature, "Uber", D3DResourceType::RootSignature); + dr.rootSignature = signature; + } + + { + ID3D12DescriptorHeap* heap; + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; + heapDesc.NumDescriptors = genericDescriptorCount; + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap))); + SetDebugName(heap, "Uber GPU generic", D3DResourceType::DescriptorHeap); + dr.genericDescriptorHeap = heap; + + heapDesc.NumDescriptors = samplerDescriptorCount; + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap))); + SetDebugName(heap, "Uber GPU sampler", D3DResourceType::DescriptorHeap); + dr.samplerDescriptorHeap = heap; + + heapDesc.NumDescriptors = genericDescriptorCount; + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + D3D(rhi.device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap))); + SetDebugName(heap, "Uber CPU generic", D3DResourceType::DescriptorHeap); + dr.genericCPUDescriptorHeap = heap; + } + } + + static void DestroyDynamicResources() + { + DynamicResources& dr = rhi.dynamicResources; + COM_RELEASE(dr.rootSignature); + COM_RELEASE(dr.genericDescriptorHeap); + COM_RELEASE(dr.samplerDescriptorHeap); + COM_RELEASE(dr.genericCPUDescriptorHeap); + } + + static void BindDynamicResources() + { + if(!rhi.useDynamicResources) + { + return; + } + + // @NOTE: Set*RootSignature must be called after SetDescriptorHeaps + DynamicResources& dr = rhi.dynamicResources; + ID3D12DescriptorHeap* heaps[] = { dr.genericDescriptorHeap, dr.samplerDescriptorHeap }; + rhi.commandList->SetDescriptorHeaps(ARRAY_LEN(heaps), heaps); + if(rhi.commandList->GetType() == D3D12_COMMAND_LIST_TYPE_DIRECT) + { + rhi.commandList->SetGraphicsRootSignature(dr.rootSignature); + rhi.commandList->SetGraphicsRootDescriptorTable(0, dr.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + rhi.commandList->SetGraphicsRootDescriptorTable(1, dr.samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + } + rhi.commandList->SetComputeRootSignature(dr.rootSignature); + rhi.commandList->SetComputeRootDescriptorTable(0, dr.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + rhi.commandList->SetComputeRootDescriptorTable(1, dr.samplerDescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + } + + static void UpdateDynamicResources() + { + if(!rhi.useDynamicResources) + { + return; + } + + InitDynamicResourceDescriptorRanges(); + + Texture* texture; + HTexture htexture; + for(int i = 0; rhi.textures.FindNext(&texture, &htexture, &i);) + { + CreateTextureDescriptors(*texture); + } + + Buffer* buffer; + HBuffer hbuffer; + for(int i = 0; rhi.buffers.FindNext(&buffer, &hbuffer, &i);) + { + CreateBufferDescriptors(*buffer); + } + } + + static void SetRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants, bool graphics) + { + Q_assert(CanWriteCommands()); + Q_assert(byteCount > 0); + Q_assert(byteCount % 4 == 0); + Q_assert(byteOffset % 4 == 0); + Q_assert(byteOffset + byteCount <= 256 - 2 * 4); // 8 bytes are used for the 2 descriptor tables + Q_assert(constants); + + // parameter index is 2 because the 2 descriptor tables come first + if(graphics) + { + rhi.commandList->SetGraphicsRoot32BitConstants(2, (UINT)byteCount / 4, constants, (UINT)byteOffset / 4); + } + else + { + rhi.commandList->SetComputeRoot32BitConstants(2, (UINT)byteCount / 4, constants, (UINT)byteOffset / 4); + } + } + static void DrawResourceUsage() { if(BeginTable("Handles", 3)) @@ -2211,10 +2652,23 @@ namespace RHI TableHeader(3, "Type", "Count", "Max"); #define ITEM(Name, Variable) TableRow(3, Name, va("%d", (int)Variable.allocatedItemCount), va("%d", (int)Variable.size)) - ITEM("CBV/SRV/UAV", rhi.descHeapGeneric.freeList); - ITEM("Samplers", rhi.descHeapSamplers.freeList); - ITEM("RTV", rhi.descHeapRTVs.freeList); - ITEM("DSV", rhi.descHeapDSVs.freeList); +#define DRITEM(Name, Index) TableRow(3, Name, va("%d", (int)(Index.index - Index.start)), va("%d", (int)Index.count)); + if(rhi.useDynamicResources) + { + const DynamicResources& dr = rhi.dynamicResources; + DRITEM("GPU SRV", dr.srvIndex); + DRITEM("GPU UAV", dr.uavIndex); + DRITEM("GPU CBV", dr.cbvIndex); + DRITEM("GPU Samplers", dr.samplerIndex); + } + else + { + ITEM("CPU CBV/SRV/UAV", rhi.descHeapGeneric.freeList); + ITEM("CPU Samplers", rhi.descHeapSamplers.freeList); + } + ITEM("CPU RTV", rhi.descHeapRTVs.freeList); + ITEM("CPU DSV", rhi.descHeapDSVs.freeList); +#undef DRITEM #undef ITEM ImGui::EndTable(); @@ -2465,7 +2919,7 @@ namespace RHI } } - bool Init() + bool Init(const InitDesc& initDesc) { Sys_V_Init(); @@ -2553,11 +3007,14 @@ namespace RHI rhi.vsync = vsync; + UpdateDynamicResources(); + return false; } // @NOTE: we can't use memset because of the StaticPool members new (&rhi) RHIPrivate(); + rhi.useDynamicResources = initDesc.directDescriptorHeapIndexing; // check for the presence of our 3 DLLs ASAP { @@ -2634,6 +3091,28 @@ namespace RHI D3D(D3D12CreateDevice(rhi.adapter, FeatureLevel, IID_PPV_ARGS(&rhi.device))); + if(rhi.useDynamicResources) + { + D3D12_FEATURE_DATA_D3D12_OPTIONS options0 = {}; + if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options0, sizeof(options0)))) + { + if(options0.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3) + { + ri.Error(ERR_FATAL, "The CRP requires Resource Binding Tier 3 capable hardware\n"); + } + } + + D3D12_FEATURE_DATA_SHADER_MODEL shaderModel = {}; + shaderModel.HighestShaderModel = D3D_SHADER_MODEL_6_6; + if(SUCCEEDED(rhi.device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel)))) + { + if(shaderModel.HighestShaderModel < D3D_SHADER_MODEL_6_6) + { + ri.Error(ERR_FATAL, "The CRP requires Shader Model 6.6 capable hardware\n"); + } + } + } + { D3D12MA::ALLOCATOR_DESC desc = {}; desc.pDevice = rhi.device; @@ -2825,6 +3304,7 @@ namespace RHI rhi.timeStampBuffers[f] = CreateBuffer(desc); } + CreateDynamicResources(); CreateNullResources(); // queue some actual work... @@ -2950,6 +3430,11 @@ namespace RHI WaitUntilDeviceIsIdle(); + if(destroyWindow) + { + DestroyDynamicResources(); + } + if(rhi.frameLatencyWaitableObject != NULL) { CloseHandle(rhi.frameLatencyWaitableObject); @@ -3047,6 +3532,7 @@ namespace RHI // reclaim used memory and start recording D3D(rhi.mainCommandAllocators[rhi.frameIndex]->Reset()); D3D(rhi.commandList->Reset(rhi.mainCommandAllocators[rhi.frameIndex], NULL)); + BindDynamicResources(); rhi.frameDurationQueryIndex = CmdBeginDurationQuery(); @@ -3200,64 +3686,6 @@ namespace RHI AllocateAndFixName(rhiDesc); SetDebugName(resource, rhiDesc.name, D3DResourceType::Buffer); - uint32_t srvIndex = InvalidDescriptorIndex; - if(rhiDesc.initialState & ResourceStates::ShaderAccessBits) - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; - srv.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srv.Buffer.FirstElement = 0; - if(rhiDesc.structureByteCount > 0) - { - srv.Format = DXGI_FORMAT_UNKNOWN; - srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount; - srv.Buffer.StructureByteStride = rhiDesc.structureByteCount; - srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; - } - else - { - srv.Format = DXGI_FORMAT_R32_TYPELESS; - srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv.Buffer.NumElements = rhiDesc.byteCount / 4; - srv.Buffer.StructureByteStride = 0; - srv.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; - } - srvIndex = rhi.descHeapGeneric.CreateSRV(resource, srv); - } - - uint32_t cbvIndex = InvalidDescriptorIndex; - if(rhiDesc.initialState & ResourceStates::ConstantBufferBit) - { - D3D12_CONSTANT_BUFFER_VIEW_DESC cbv = { 0 }; - cbv.BufferLocation = resource->GetGPUVirtualAddress(); - cbv.SizeInBytes = rhiDesc.byteCount; - cbvIndex = rhi.descHeapGeneric.CreateCBV(cbv); - } - - uint32_t uavIndex = InvalidDescriptorIndex; - if(rhiDesc.initialState & ResourceStates::UnorderedAccessBit) - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav = { 0 }; - uav.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uav.Buffer.CounterOffsetInBytes = 0; - uav.Buffer.FirstElement = 0; - if(rhiDesc.structureByteCount > 0) - { - uav.Format = DXGI_FORMAT_UNKNOWN; - uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; - uav.Buffer.NumElements = rhiDesc.byteCount / rhiDesc.structureByteCount; - uav.Buffer.StructureByteStride = rhiDesc.structureByteCount; - } - else - { - uav.Format = DXGI_FORMAT_R32_TYPELESS; - uav.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; - uav.Buffer.NumElements = rhiDesc.byteCount / 4; - uav.Buffer.StructureByteStride = 0; - } - uavIndex = rhi.descHeapGeneric.CreateUAV(resource, uav); - } - Buffer buffer = {}; buffer.desc = rhiDesc; buffer.allocation = allocation; @@ -3265,8 +3693,7 @@ namespace RHI buffer.gpuAddress = resource->GetGPUVirtualAddress(); buffer.currentState = resourceState; buffer.shortLifeTime = rhiDesc.shortLifeTime; - buffer.cbvIndex = cbvIndex; - buffer.uavIndex = uavIndex; + CreateBufferDescriptors(buffer); const HBuffer hbuffer = rhi.buffers.Add(buffer); if(transitionNeeded) @@ -3284,6 +3711,21 @@ namespace RHI { UnmapBuffer(handle); } + if(!rhi.useDynamicResources) + { + if(buffer.srvIndex != InvalidDescriptorIndex) + { + rhi.descHeapGeneric.Free(buffer.srvIndex); + } + if(buffer.uavIndex != InvalidDescriptorIndex) + { + rhi.descHeapGeneric.Free(buffer.uavIndex); + } + if(buffer.cbvIndex != InvalidDescriptorIndex) + { + rhi.descHeapGeneric.Free(buffer.cbvIndex); + } + } COM_RELEASE(buffer.buffer); COM_RELEASE(buffer.allocation); rhi.buffers.Remove(handle); @@ -3402,24 +3844,6 @@ namespace RHI AllocateAndFixName(rhiDesc); SetDebugName(resource, rhiDesc.name, D3DResourceType::Texture); - uint32_t srvIndex = InvalidDescriptorIndex; - if(rhiDesc.allowedState & ResourceStates::ShaderAccessBits) - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv = { 0 }; - srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srv.Format = desc.Format; - srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv.Texture2D.MipLevels = desc.MipLevels; - srv.Texture2D.MostDetailedMip = 0; - srv.Texture2D.PlaneSlice = 0; - srv.Texture2D.ResourceMinLODClamp = 0.0f; - if(rhiDesc.format == TextureFormat::Depth24_Stencil8) - { - srv.Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS; // @TODO: - } - srvIndex = rhi.descHeapGeneric.CreateSRV(resource, srv); - } - uint32_t rtvIndex = InvalidDescriptorIndex; if(rhiDesc.allowedState & ResourceStates::RenderTargetBit) { @@ -3450,30 +3874,11 @@ namespace RHI texture.desc = rhiDesc; texture.allocation = allocation; texture.texture = resource; - texture.srvIndex = srvIndex; texture.rtvIndex = rtvIndex; texture.dsvIndex = dsvIndex; texture.currentState = D3D12_RESOURCE_STATE_COPY_DEST; texture.shortLifeTime = rhiDesc.shortLifeTime; - if(rhiDesc.allowedState & ResourceStates::UnorderedAccessBit) - { - for(uint32_t m = 0; m < rhiDesc.mipCount; ++m) - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uav = { 0 }; - uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - uav.Format = desc.Format; - uav.Texture2D.MipSlice = m; - uav.Texture2D.PlaneSlice = 0; - texture.mips[m].uavIndex = rhi.descHeapGeneric.CreateUAV(resource, uav); - } - } - else - { - for(uint32_t m = 0; m < rhiDesc.mipCount; ++m) - { - texture.mips[m].uavIndex = InvalidDescriptorIndex; - } - } + CreateTextureDescriptors(texture); const HTexture handle = rhi.textures.Add(texture); if(rhiDesc.nativeResource == NULL) @@ -3487,10 +3892,6 @@ namespace RHI void DestroyTexture(HTexture handle) { Texture& texture = rhi.textures.Get(handle); - if(texture.srvIndex != InvalidDescriptorIndex) - { - rhi.descHeapGeneric.Free(texture.srvIndex); - } if(texture.rtvIndex != InvalidDescriptorIndex) { rhi.descHeapRTVs.Free(texture.rtvIndex); @@ -3499,12 +3900,20 @@ namespace RHI { rhi.descHeapDSVs.Free(texture.dsvIndex); } - for(uint32_t m = 0; m < texture.desc.mipCount; ++m) + if(!rhi.useDynamicResources) { - const uint32_t uavIndex = texture.mips[m].uavIndex; - if(uavIndex != InvalidDescriptorIndex) + if(texture.srvIndex != InvalidDescriptorIndex) { - rhi.descHeapGeneric.Free(uavIndex); + rhi.descHeapGeneric.Free(texture.srvIndex); + } + + for(uint32_t m = 0; m < texture.desc.mipCount; ++m) + { + const uint32_t uavIndex = texture.mips[m].uavIndex; + if(uavIndex != InvalidDescriptorIndex) + { + rhi.descHeapGeneric.Free(uavIndex); + } } } COM_RELEASE(texture.texture); @@ -3537,7 +3946,7 @@ namespace RHI desc.MinLOD = rhiDesc.minLOD; desc.MipLODBias = rhiDesc.mipLODBias; desc.Filter = filter; - const uint32_t index = rhi.descHeapSamplers.CreateSampler(desc); + const uint32_t index = CreateSampler(desc); Sampler sampler; sampler.desc = rhiDesc; @@ -3551,7 +3960,10 @@ namespace RHI void DestroySampler(HSampler hsampler) { const Sampler& sampler = rhi.samplers.Get(hsampler); - rhi.descHeapSamplers.Free(sampler.heapIndex); + if(!rhi.useDynamicResources) + { + rhi.descHeapSamplers.Free(sampler.heapIndex); + } rhi.samplers.Remove(hsampler); } @@ -3567,6 +3979,8 @@ namespace RHI HRootSignature CreateRootSignature(const RootSignatureDesc& rhiDesc) { + ASSERT_DR_DISABLED(); + RootSignature rhiSignature = { 0 }; rhiSignature.genericTableIndex = UINT32_MAX; rhiSignature.samplerTableIndex = UINT32_MAX; @@ -3709,12 +4123,16 @@ namespace RHI void DestroyRootSignature(HRootSignature signature) { + ASSERT_DR_DISABLED(); + COM_RELEASE(rhi.rootSignatures.Get(signature).signature); rhi.rootSignatures.Remove(signature); } HDescriptorTable CreateDescriptorTable(const DescriptorTableDesc& desc) { + ASSERT_DR_DISABLED(); + const RootSignature& sig = rhi.rootSignatures.Get(desc.rootSignature); const char* srvName = AllocateName(va("%s GPU-visible CBV SRV UAV", desc.name), desc.shortLifeTime); @@ -3764,6 +4182,8 @@ namespace RHI void UpdateDescriptorTable(HDescriptorTable htable, const DescriptorTableUpdate& update) { + ASSERT_DR_DISABLED(); + Q_assert(update.textures != NULL); DescriptorTable& table = rhi.descriptorTables.Get(htable); @@ -3831,6 +4251,8 @@ namespace RHI void DestroyDescriptorTable(HDescriptorTable handle) { + ASSERT_DR_DISABLED(); + DescriptorTable& table = rhi.descriptorTables.Get(handle); COM_RELEASE(table.genericHeap); COM_RELEASE(table.samplerHeap); @@ -3840,11 +4262,17 @@ namespace RHI HPipeline CreateGraphicsPipeline(const GraphicsPipelineDesc& rhiDesc) { - Q_assert(rhi.rootSignatures.Get(rhiDesc.rootSignature).desc.pipelineType == PipelineType::Graphics); + if(!rhi.useDynamicResources) + { + Q_assert(!IsNullHandle(rhiDesc.rootSignature)); + Q_assert(rhi.rootSignatures.Get(rhiDesc.rootSignature).desc.pipelineType == PipelineType::Graphics); + } D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = { 0 }; desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; // none available so far - desc.pRootSignature = rhi.rootSignatures.Get(rhiDesc.rootSignature).signature; + desc.pRootSignature = rhi.useDynamicResources ? + rhi.dynamicResources.rootSignature : + rhi.rootSignatures.Get(rhiDesc.rootSignature).signature; desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; desc.SampleDesc.Count = 1; desc.SampleMask = UINT_MAX; @@ -3936,11 +4364,17 @@ namespace RHI HPipeline CreateComputePipeline(const ComputePipelineDesc& rhiDesc) { - Q_assert(rhi.rootSignatures.Get(rhiDesc.rootSignature).desc.pipelineType == PipelineType::Compute); + if(!rhi.useDynamicResources) + { + Q_assert(!IsNullHandle(rhiDesc.rootSignature)); + Q_assert(rhi.rootSignatures.Get(rhiDesc.rootSignature).desc.pipelineType == PipelineType::Compute); + } D3D12_COMPUTE_PIPELINE_STATE_DESC desc = { 0 }; desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; // none available so far - desc.pRootSignature = rhi.rootSignatures.Get(rhiDesc.rootSignature).signature; + desc.pRootSignature = rhi.useDynamicResources ? + rhi.dynamicResources.rootSignature : + rhi.rootSignatures.Get(rhiDesc.rootSignature).signature; desc.CS.pShaderBytecode = rhiDesc.shader.data; desc.CS.BytecodeLength = rhiDesc.shader.byteCount; @@ -4229,6 +4663,8 @@ namespace RHI void CmdSetRootConstants(HRootSignature rootSignature, ShaderStage::Id shaderType, const void* constants) { + ASSERT_DR_DISABLED(); + Q_assert(CanWriteCommands()); Q_assert(constants); @@ -4248,6 +4684,22 @@ namespace RHI } } + void CmdSetGraphicsRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants) + { + ASSERT_DR_ENABLED(); + Q_assert(CanWriteCommands()); + + SetRootConstants(byteOffset, byteCount, constants, true); + } + + void CmdSetComputeRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants) + { + ASSERT_DR_ENABLED(); + Q_assert(CanWriteCommands()); + + SetRootConstants(byteOffset, byteCount, constants, false); + } + void CmdDraw(uint32_t vertexCount, uint32_t firstVertex) { Q_assert(CanWriteCommands()); @@ -4405,8 +4857,9 @@ namespace RHI rhi.commandList->ClearDepthStencilView(dsvHandle, flags, depth, stencil, rectCount, d3dRectPtr); } - void CmdClearTextureUAV(HTexture htexture, HDescriptorTable hdescTable, uint32_t tableIndex, uint32_t mipIndex, const uint32_t* values) + void CmdClearTextureUAV(HTexture htexture, uint32_t mipIndex, const uint32_t* values) { + ASSERT_DR_ENABLED(); Q_assert(CanWriteCommands()); Q_assert(values); static_assert(sizeof(UINT) == 4, "sizeof(UINT) isn't 4 as expected"); @@ -4414,12 +4867,12 @@ namespace RHI const Texture& texture = rhi.textures.Get(htexture); Q_assert(mipIndex < texture.desc.mipCount); - const uint32_t cpuDescIndex = texture.mips[mipIndex].uavIndex; - const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = rhi.descHeapGeneric.GetCPUHandle(cpuDescIndex); - const DescriptorTable& descTable = rhi.descriptorTables.Get(hdescTable); - D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = descTable.genericHeap->GetGPUDescriptorHandleForHeapStart(); - const uint32_t gpuDescIndex = tableIndex; - gpuHandle.ptr += gpuDescIndex * rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + const uint32_t descIndex = texture.mips[mipIndex].uavIndex; + const UINT descSize = rhi.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = rhi.dynamicResources.genericCPUDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = rhi.dynamicResources.genericDescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + cpuHandle.ptr += descIndex * descSize; + gpuHandle.ptr += descIndex * descSize; rhi.commandList->ClearUnorderedAccessViewUint(gpuHandle, cpuHandle, texture.texture, values, 0, NULL); } @@ -4549,6 +5002,8 @@ namespace RHI // GPU wait for the copy queue to be done executing on the GPU rhi.upload.WaitToStartDrawing(rhi.computeCommandQueue); + + BindDynamicResources(); } void EndTempCommandList() @@ -4594,6 +5049,106 @@ namespace RHI // upload queue rhi.upload.fence.WaitOnCPU(rhi.upload.fenceValue); } + + void SubmitAndContinue() + { + ASSERT_DR_ENABLED(); + Q_assert(rhi.commandList == rhi.mainCommandList); + + CmdInsertDebugLabel("RHI::SubmitAndWaitOnCPU", 0.8f, 0.8f, 0.8f); + + rhi.frameBegun = false; + D3D(rhi.commandList->Close()); + ID3D12CommandList* commandListArray[] = { rhi.commandList }; + rhi.mainCommandQueue->ExecuteCommandLists(ARRAY_LEN(commandListArray), commandListArray); + const UINT64 currentFenceValue = rhi.mainFenceValues[rhi.frameIndex]; +#if RHI_DEBUG_FENCE + Sys_DebugPrintf("Signal: %d (SubmitAndWaitOnCPU)\n", (int)currentFenceValue); + Sys_DebugPrintf("Wait: %d (SubmitAndWaitOnCPU)\n", (int)currentFenceValue); +#endif + rhi.mainFence.Signal(rhi.mainCommandQueue, currentFenceValue); + rhi.mainFence.WaitOnCPU(currentFenceValue); + rhi.mainFenceValues[rhi.frameIndex] = currentFenceValue + 1; + D3D(rhi.mainCommandAllocators[rhi.frameIndex]->Reset()); + D3D(rhi.commandList->Reset(rhi.mainCommandAllocators[rhi.frameIndex], NULL)); + BindDynamicResources(); + rhi.commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + rhi.currentRootSignature = RHI_MAKE_NULL_HANDLE(); + rhi.frameBegun = true; + } + + uint32_t GetTextureIndexSRV(HTexture htexture) + { + ASSERT_DR_ENABLED(); + Q_assert(!IsNullHandle(htexture)); + + Texture& texture = rhi.textures.Get(htexture); + + return texture.srvIndex; + } + + uint32_t GetTextureIndexUAV(HTexture htexture, uint32_t mipIndex) + { + ASSERT_DR_ENABLED(); + Q_assert(!IsNullHandle(htexture)); + + Texture& texture = rhi.textures.Get(htexture); + Q_assert(mipIndex < texture.desc.mipCount); + + return texture.mips[mipIndex].uavIndex; + } + + uint32_t GetBufferIndexSRV(HBuffer hbuffer) + { + ASSERT_DR_ENABLED(); + Q_assert(!IsNullHandle(hbuffer)); + + Buffer& buffer = rhi.buffers.Get(hbuffer); + + return buffer.srvIndex; + } + + uint32_t GetBufferIndexUAV(HBuffer hbuffer) + { + ASSERT_DR_ENABLED(); + Q_assert(!IsNullHandle(hbuffer)); + + Buffer& buffer = rhi.buffers.Get(hbuffer); + + return buffer.uavIndex; + } + + uint32_t GetBufferIndexCBV(HBuffer hbuffer) + { + ASSERT_DR_ENABLED(); + Q_assert(!IsNullHandle(hbuffer)); + + Buffer& buffer = rhi.buffers.Get(hbuffer); + + return buffer.cbvIndex; + } + + uint32_t GetSamplerIndex(HSampler hsampler) + { + ASSERT_DR_ENABLED(); + Q_assert(!IsNullHandle(hsampler)); + + const Sampler& sampler = rhi.samplers.Get(hsampler); + + return sampler.heapIndex; + } + + void CmdBarrierUAV() + { + ASSERT_DR_ENABLED(); + Q_assert(CanWriteCommands()); + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.UAV.pResource = NULL; + rhi.commandList->ResourceBarrier(1, &barrier); + } } void R_WaitBeforeInputSampling() diff --git a/code/renderer/rhi_local.h b/code/renderer/rhi_local.h index 9013637..36f207f 100644 --- a/code/renderer/rhi_local.h +++ b/code/renderer/rhi_local.h @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2022-2023 Gian 'myT' Schellenbaum +Copyright (C) 2022-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -169,6 +169,7 @@ namespace RHI R8_UNorm, Depth24_Stencil8, R10G10B10A2_UNorm, + R32_UInt, Count }; }; @@ -353,6 +354,11 @@ namespace RHI name = name_; rootSignature = rootSignature_; } + explicit GraphicsPipelineDesc(const char* name_) + { + name = name_; + rootSignature = RHI_MAKE_NULL_HANDLE(); + } const char* name = NULL; bool shortLifeTime = false; @@ -434,6 +440,11 @@ namespace RHI name = name_; rootSignature = rootSignature_; } + explicit ComputePipelineDesc(const char* name_) + { + name = name_; + rootSignature = RHI_MAKE_NULL_HANDLE(); + } const char* name = NULL; bool shortLifeTime = false; @@ -677,7 +688,15 @@ namespace RHI const ShaderMacro* macros = NULL; }; - bool Init(); // true when a full init happened (the device was created) + struct InitDesc + { + // HLSL 6.6 Dynamic Resources + // - all shader resources are exclusively used through ResourceDescriptorHeap and SamplerDescriptorHeap + // - all root signature and descriptor table functions are disabled + bool directDescriptorHeapIndexing = false; + }; + + bool Init(const InitDesc& desc); // true when a full init happened (the device was created) void ShutDown(bool destroyWindow); void BeginFrame(); @@ -721,6 +740,8 @@ namespace RHI void CmdSetViewport(uint32_t x, uint32_t y, uint32_t w, uint32_t h, float minDepth = 0.0f, float maxDepth = 1.0f); void CmdSetScissor(uint32_t x, uint32_t y, uint32_t w, uint32_t h); void CmdSetRootConstants(HRootSignature rootSignature, ShaderStage::Id shaderType, const void* constants); + void CmdSetGraphicsRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants); + void CmdSetComputeRootConstants(uint32_t byteOffset, uint32_t byteCount, const void* constants); void CmdDraw(uint32_t vertexCount, uint32_t firstVertex); void CmdDrawIndexed(uint32_t indexCount, uint32_t firstIndex, uint32_t firstVertex); void CmdDispatch(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); @@ -729,7 +750,7 @@ namespace RHI void CmdBarrier(uint32_t texCount, const TextureBarrier* textures, uint32_t buffCount = 0, const BufferBarrier* buffers = NULL); void CmdClearColorTarget(HTexture texture, const vec4_t clearColor, const Rect* rect = NULL); void CmdClearDepthStencilTarget(HTexture texture, bool clearDepth, float depth, bool clearStencil = false, uint8_t stencil = 0, const Rect* rect = NULL); - void CmdClearTextureUAV(HTexture texture, HDescriptorTable descTable, uint32_t tableIndex, uint32_t mipIndex, const uint32_t* values); + void CmdClearTextureUAV(HTexture texture, uint32_t mipIndex, const uint32_t* values); void CmdInsertDebugLabel(const char* name, float r = 1.0f, float g = 1.0f, float b = 1.0f); void CmdBeginDebugLabel(const char* name, float r = 1.0f, float g = 1.0f, float b = 1.0f); void CmdEndDebugLabel(); @@ -737,6 +758,15 @@ namespace RHI void CmdCopyBuffer(HBuffer dest, HBuffer source); void CmdSetShadingRate(ShadingRate::Id shadingRate); + // only available when dynamic resources are enabled + uint32_t GetTextureIndexSRV(HTexture texture); + uint32_t GetTextureIndexUAV(HTexture texture, uint32_t mipIndex); + uint32_t GetBufferIndexSRV(HBuffer buffer); + uint32_t GetBufferIndexUAV(HBuffer buffer); + uint32_t GetBufferIndexCBV(HBuffer buffer); + uint32_t GetSamplerIndex(HSampler sampler); + void CmdBarrierUAV(); + // the duration at index 0 is for the entire frame uint32_t GetDurationCount(); void GetDurations(uint32_t* gpuMicroSeconds); @@ -756,6 +786,8 @@ namespace RHI void WaitUntilDeviceIsIdle(); + void SubmitAndContinue(); + const Handle HandleIndexBitCount = 16; const Handle HandleIndexBitOffset = 0; const Handle HandleGenBitCount = 10; diff --git a/code/renderer/hlsl/.gitignore b/code/renderer/shaders/.gitignore similarity index 100% rename from code/renderer/hlsl/.gitignore rename to code/renderer/shaders/.gitignore diff --git a/code/renderer/shaders/common/blend.hlsli b/code/renderer/shaders/common/blend.hlsli new file mode 100644 index 0000000..298d4f5 --- /dev/null +++ b/code/renderer/shaders/common/blend.hlsli @@ -0,0 +1,81 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Quake 3 blend equations + + +#if !defined(DISABLE_PRAGMA_ONCE) +#pragma once +#endif + + +float4 BlendSource(float4 src, float4 dst, uint stateBits) +{ + if(stateBits == GLS_SRCBLEND_ZERO) + return float4(0.0, 0.0, 0.0, 0.0); + else if(stateBits == GLS_SRCBLEND_ONE) + return src; + else if(stateBits == GLS_SRCBLEND_DST_COLOR) + return src * dst; + else if(stateBits == GLS_SRCBLEND_ONE_MINUS_DST_COLOR) + return src * (float4(1.0, 1.0, 1.0, 1.0) - dst); + else if(stateBits == GLS_SRCBLEND_SRC_ALPHA) + return src * float4(src.a, src.a, src.a, 1.0); + else if(stateBits == GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA) + return src * float4(1.0 - src.a, 1.0 - src.a, 1.0 - src.a, 1.0); + else if(stateBits == GLS_SRCBLEND_DST_ALPHA) + return src * float4(dst.a, dst.a, dst.a, 1.0); + else if(stateBits == GLS_SRCBLEND_ONE_MINUS_DST_ALPHA) + return src * float4(1.0 - dst.a, 1.0 - dst.a, 1.0 - dst.a, 1.0); + else if(stateBits == GLS_SRCBLEND_ALPHA_SATURATE) + return src * float4(src.a, src.a, src.a, 1.0); + else + return src; +} + +float4 BlendDest(float4 src, float4 dst, uint stateBits) +{ + if(stateBits == GLS_DSTBLEND_ZERO) + return float4(0.0, 0.0, 0.0, 0.0); + else if(stateBits == GLS_DSTBLEND_ONE) + return dst; + else if(stateBits == GLS_DSTBLEND_SRC_COLOR) + return dst * src; + else if(stateBits == GLS_DSTBLEND_ONE_MINUS_SRC_COLOR) + return dst * float4(1.0 - src.r, 1.0 - src.g, 1.0 - src.b, 1.0 - src.a); + else if(stateBits == GLS_DSTBLEND_SRC_ALPHA) + return dst * float4(src.a, src.a, src.a, 1.0); + else if(stateBits == GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA) + return dst * float4(1.0 - src.a, 1.0 - src.a, 1.0 - src.a, 0.0); + else if(stateBits == GLS_DSTBLEND_DST_ALPHA) + return dst * float4(dst.a, dst.a, dst.a, 1.0); + else if(stateBits == GLS_DSTBLEND_ONE_MINUS_DST_ALPHA) + return dst * float4(1.0 - dst.a, 1.0 - dst.a, 1.0 - dst.a, 1.0); + else + return float4(0.0, 0.0, 0.0, 0.0); +} + +float4 Blend(float4 src, float4 dst, uint stateBits) +{ + float4 srcOut = BlendSource(src, dst, stateBits & GLS_SRCBLEND_BITS); + float4 dstOut = BlendDest(src, dst, stateBits & GLS_DSTBLEND_BITS); + + return srcOut + dstOut; +} diff --git a/code/renderer/shaders/common/mip_gen.hlsli b/code/renderer/shaders/common/mip_gen.hlsli new file mode 100644 index 0000000..12f6951 --- /dev/null +++ b/code/renderer/shaders/common/mip_gen.hlsli @@ -0,0 +1,96 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// mip-map generation + + +#pragma once + + +uint2 MipGen_FixCoords(int2 tc, int2 maxSize, uint clampCoords) +{ + if(clampCoords > 0) + { + // clamp + return uint2(clamp(tc, int2(0, 0), maxSize)); + } + + // repeat + return uint2(tc & maxSize); +} + +void MipGen_GammaToLinear(RWTexture2D dst, RWTexture2D src, uint3 dtid, float gamma) +{ + uint w, h; + dst.GetDimensions(w, h); + if(any(dtid.xy >= uint2(w, h))) + { + return; + } + + float4 v = src[dtid.xy]; + dst[dtid.xy] = float4(pow(v.xyz, gamma), v.a); +} + +void MipGen_LinearToGamma(RWTexture2D dst, RWTexture2D src, uint3 dtid, float4 blendColor, float intensity, float invGamma) +{ + uint w, h; + dst.GetDimensions(w, h); + if(any(dtid.xy >= uint2(w, h))) + { + return; + } + + // yes, intensity *should* be done in light-linear space + // but we keep the old behavior for consistency... + float4 in0 = src[dtid.xy]; + float3 in1 = 0.5 * (in0.rgb + blendColor.rgb); + float3 inV = lerp(in0.rgb, in1.rgb, blendColor.a); + float3 out0 = pow(max(inV, 0.0), invGamma); + float3 out1 = out0 * intensity; + float4 outV = saturate(float4(out1, in0.a)); + dst[dtid.xy] = outV; +} + +void MipGen_DownSample(RWTexture2D dst, RWTexture2D src, uint3 dtid, int2 maxSize, uint clampCoords, int2 scale, int2 offset, float4 weights) +{ + uint w, h; + dst.GetDimensions(w, h); + if(any(dtid.xy >= uint2(w, h))) + { + return; + } + +#define FixCoords(tc) MipGen_FixCoords(tc, maxSize, clampCoords) + + int2 base = int2(dtid.xy) * scale; + float4 r = float4(0, 0, 0, 0); + r += src[FixCoords(base - offset * 3)] * weights.x; + r += src[FixCoords(base - offset * 2)] * weights.y; + r += src[FixCoords(base - offset * 1)] * weights.z; + r += src[base] * weights.w; + r += src[base + offset] * weights.w; + r += src[FixCoords(base + offset * 2)] * weights.z; + r += src[FixCoords(base + offset * 3)] * weights.y; + r += src[FixCoords(base + offset * 4)] * weights.x; + dst[dtid.xy] = r; + +#undef FixCoords +} diff --git a/code/renderer/shaders/common/state_bits.h.hlsli b/code/renderer/shaders/common/state_bits.h.hlsli new file mode 100644 index 0000000..0cd7295 --- /dev/null +++ b/code/renderer/shaders/common/state_bits.h.hlsli @@ -0,0 +1,65 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// shader stage state constants + + +#if !defined(DISABLE_PRAGMA_ONCE) +#pragma once +#endif + + +#define GLS_SRCBLEND_ZERO 0x00000001u +#define GLS_SRCBLEND_ONE 0x00000002u +#define GLS_SRCBLEND_DST_COLOR 0x00000003u +#define GLS_SRCBLEND_ONE_MINUS_DST_COLOR 0x00000004u +#define GLS_SRCBLEND_SRC_ALPHA 0x00000005u +#define GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA 0x00000006u +#define GLS_SRCBLEND_DST_ALPHA 0x00000007u +#define GLS_SRCBLEND_ONE_MINUS_DST_ALPHA 0x00000008u +#define GLS_SRCBLEND_ALPHA_SATURATE 0x00000009u +#define GLS_SRCBLEND_BITS 0x0000000fu + +#define GLS_DSTBLEND_ZERO 0x00000010u +#define GLS_DSTBLEND_ONE 0x00000020u +#define GLS_DSTBLEND_SRC_COLOR 0x00000030u +#define GLS_DSTBLEND_ONE_MINUS_SRC_COLOR 0x00000040u +#define GLS_DSTBLEND_SRC_ALPHA 0x00000050u +#define GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA 0x00000060u +#define GLS_DSTBLEND_DST_ALPHA 0x00000070u +#define GLS_DSTBLEND_ONE_MINUS_DST_ALPHA 0x00000080u +#define GLS_DSTBLEND_BITS 0x000000f0u + +#define GLS_BLEND_BITS 0x000000ffu + +#define GLS_DEPTHMASK_TRUE 0x00000100u // enable depth writes + +#define GLS_POLYMODE_LINE 0x00001000u // wireframe polygon filling, not line rendering + +#define GLS_DEPTHTEST_DISABLE 0x00010000u // disable depth tests +#define GLS_DEPTHFUNC_EQUAL 0x00020000u + +#define GLS_STAGEINDEX_BITS 0x00700000u +#define GLS_STAGEINDEX_SHIFT 20u + +#define GLS_ATEST_GT_0 0x10000000u +#define GLS_ATEST_LT_80 0x20000000u +#define GLS_ATEST_GE_80 0x40000000u +#define GLS_ATEST_BITS 0x70000000u diff --git a/code/renderer/shaders/crp/accumdof_accum.hlsl b/code/renderer/shaders/crp/accumdof_accum.hlsl new file mode 100644 index 0000000..12ddc4d --- /dev/null +++ b/code/renderer/shaders/crp/accumdof_accum.hlsl @@ -0,0 +1,65 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// accumulation depth of field: accumulation pass + + +#include "common.hlsli" + + +cbuffer RootConstants +{ + uint textureIndex; +}; + +struct VOut +{ + float4 position : SV_Position; +}; + + +#if VERTEX_SHADER + +VOut vs(uint id : SV_VertexID) +{ + VOut output; + output.position = FSTrianglePosFromVertexId(id); + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + + int2 tc = int2(input.position.xy); + float3 color = texture0.Load(int3(tc.x, tc.y, 0)).rgb; + float weight = 1.0 + Brightness(color); + float4 result = float4(color * weight, weight); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/accumdof_debug.hlsl b/code/renderer/shaders/crp/accumdof_debug.hlsl new file mode 100644 index 0000000..c31d9aa --- /dev/null +++ b/code/renderer/shaders/crp/accumdof_debug.hlsl @@ -0,0 +1,99 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// accumulation depth of field: debug overlay + + +#include "common.hlsli" +#include "dof.hlsli" + + +cbuffer RootConstants +{ + matrix mvp; // displaced view, to project to CS + matrix invMvp; // main view, to unproject to WS + uint colorTextureIndex; + uint depthTextureIndex; + uint debugMode; // 1: colorized coc, 2: constant intensity far field + int tcScale; + float focusDist; + float linearDepthA; // main view, to unproject to WS + float linearDepthB; + float maxNearCocCS; + float maxFarCocCS; +}; + +struct VOut +{ + float4 position : SV_Position; + float2 texCoords : TEXCOORD0; +}; + + +#if VERTEX_SHADER + +VOut vs(uint id : SV_VertexID) +{ + VOut output; + output.position = FSTrianglePosFromVertexId(id); + output.texCoords = FSTriangleTCFromVertexId(id); + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D colorTexture = ResourceDescriptorHeap[colorTextureIndex]; + Texture2D depthTexture = ResourceDescriptorHeap[depthTextureIndex]; + + int3 tcColor = int3(input.position.xy, 0); + int3 tcDepth = int3(input.position.xy / tcScale, 0); + float3 color = colorTexture.Load(tcColor).rgb; + float depthZW = depthTexture.Load(tcDepth); + float depth = LinearDepth(depthZW, linearDepthA, linearDepthB); + bool nearField = depth < focusDist; + float4 result; + if(debugMode == 1) + { + float quadPosXCS = input.texCoords.x * 2.0 - 1.0; + float quadPosYCS = (1.0 - input.texCoords.y) * 2.0 - 1.0; + float4 positionWS = mul(invMvp, float4(quadPosXCS, quadPosYCS, depthZW, 1)); + float4 positionCS = mul(mvp, float4(positionWS.xyz / positionWS.w, 1)); + float coc = distance(positionCS.xy / positionCS.w, float2(quadPosXCS, quadPosYCS)); + result = DOF_DebugCoc(color, nearField, saturate(coc / maxNearCocCS), saturate(coc / maxFarCocCS)); + } + else if(debugMode == 2) + { + result = DOF_DebugFocusPlane(color, nearField); + } + else + { + result = float4(color, 1); + } + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/accumdof_norm.hlsl b/code/renderer/shaders/crp/accumdof_norm.hlsl new file mode 100644 index 0000000..c928626 --- /dev/null +++ b/code/renderer/shaders/crp/accumdof_norm.hlsl @@ -0,0 +1,65 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// accumulation depth of field: normalization pass + + +#include "common.hlsli" + + +cbuffer RootConstants +{ + uint textureIndex; +}; + +struct VOut +{ + float4 position : SV_Position; +}; + + +#if VERTEX_SHADER + +VOut vs(uint id : SV_VertexID) +{ + VOut output; + output.position = FSTrianglePosFromVertexId(id); + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + + int2 tc = int2(input.position.xy); + float4 sum = texture0.Load(int3(tc.x, tc.y, 0)); + float3 color = saturate(sum.rgb / sum.a); + float4 result = float4(color, 1); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/blit.hlsl b/code/renderer/shaders/crp/blit.hlsl new file mode 100644 index 0000000..c7ab20f --- /dev/null +++ b/code/renderer/shaders/crp/blit.hlsl @@ -0,0 +1,69 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// blit shader - unlike texture copies, it doesn't care about the specific formats used + + +#include "common.hlsli" + + +cbuffer RootConstants : register(b0) +{ + uint textureIndex; + uint samplerIndex; + float2 tcScale; + float2 tcBias; +}; + +struct VOut +{ + float4 position : SV_Position; + float2 texCoords : TEXCOORD0; +}; + + +#if VERTEX_SHADER + +VOut vs(uint id : SV_VertexID) +{ + VOut output; + output.position = FSTrianglePosFromVertexId(id); + output.texCoords = FSTriangleTCFromVertexId(id); + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + SamplerState sampler0 = SamplerDescriptorHeap[samplerIndex]; + float2 tc = input.texCoords * tcScale + tcBias; + float3 base = texture0.Sample(sampler0, tc).rgb; + float4 result = float4(base, 1.0); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/common.hlsli b/code/renderer/shaders/crp/common.hlsli new file mode 100644 index 0000000..dd7a53c --- /dev/null +++ b/code/renderer/shaders/crp/common.hlsli @@ -0,0 +1,144 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// shared utilities + + +#pragma once + + +#include "../common/state_bits.h.hlsli" +#include "../common/blend.hlsli" + + +#define PI 3.1415926535897932384626433832795 +#define PI_D2 (PI / 2.0) +#define PI_D4 (PI / 4.0) +#define PI_M2 (PI * 2.0) + + +float DegToRad(float deg) +{ + return PI * (deg / 180.0); +} + +float RadToDeg(float rad) +{ + return 180.0 * (rad / PI); +} + +float Brightness(float3 color) +{ + float brightness = dot(color, float3(0.299, 0.587, 0.114)); + + return brightness; +} + +float4 MakeGreyscale(float4 input, float amount) +{ + float grey = dot(input.rgb, float3(0.299, 0.587, 0.114)); + float4 result = lerp(input, float4(grey, grey, grey, input.a), amount); + + return result; +} + +/* +f = far clip plane distance +n = near clip plane distance +exp = exponential depth value (as stored in the Z-buffer) + + 2 * f * n B +linear(exp) = ----------------------- = ------- + (f + n) - exp * (f - n) exp - A + + f + n -2 * f * n +with A = ----- and B = ---------- + f - n f - n +*/ +float LinearDepth(float zwDepth, float A, float B) +{ + return B / (zwDepth - A); +} + +float4 FSTrianglePosFromVertexId(uint id) +{ + return float4( + (float)(id / 2) * 4.0 - 1.0, + (float)(id % 2) * 4.0 - 1.0, + 0.0, + 1.0); +} + +float2 FSTriangleTCFromVertexId(uint id) +{ + return float2( + (float)(id / 2) * 2.0, + 1.0 - (float)(id % 2) * 2.0); +} + +uint PackColor(float4 c) +{ + uint4 u = uint4(saturate(c) * 255.0); + uint r = u.r | (u.g << 8) | (u.b << 16) | (u.a << 24); + + return r; +} + +float4 UnpackColor(uint c) +{ + uint4 u = uint4(c & 0xFFu, (c >> 8) & 0xFFu, (c >> 16) & 0xFFu, (c >> 24) & 0xFFu); + float4 r = float4(u) / 255.0; + + return r; +} + +float EaseInCubic(float x) +{ + return x * x * x; +} + +float EaseOutCubic(float x) +{ + float y = 1.0 - x; + + return 1.0 - y * y * y; +} + +float EaseInOutCubic(float x) +{ + if(x < 0.5) + { + return 4 * x * x * x; + } + + float y = -2 * x + 2; + + return 1 - 0.5 * y * y * y; +} + +float EaseInQuad(float x) +{ + return x * x; +} + +float SmoothStep(float x) +{ + return smoothstep(0.0, 1.0, x); +} diff --git a/code/renderer/shaders/crp/dof.hlsli b/code/renderer/shaders/crp/dof.hlsli new file mode 100644 index 0000000..b4c0f97 --- /dev/null +++ b/code/renderer/shaders/crp/dof.hlsli @@ -0,0 +1,54 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// depth of field: debug overlay support functions + + +#pragma once + + +float4 DOF_DebugCoc(float3 color, bool nearField, float nearCoc, float farCoc) +{ + float blendFactor; + float3 target; + if(nearField) + { + blendFactor = 0.5 * nearCoc; + target = float3(0, 1, 0); + } + else + { + blendFactor = 0.5 * farCoc; + target = float3(0, 0, 1); + } + float4 result = float4(lerp(color, target, blendFactor), 1); + + return result; +} + +float4 DOF_DebugFocusPlane(float3 color, bool nearField) +{ + float farFieldFactor = nearField ? 0.0 : 0.25; + float3 farFieldColor = float3(0.5, 0, 0.5); + float3 mixed = lerp(color, farFieldColor, farFieldFactor); + float4 result = float4(mixed, 1); + + return result; +} diff --git a/code/renderer/shaders/crp/fog.hlsli b/code/renderer/shaders/crp/fog.hlsli new file mode 100644 index 0000000..6d08bb9 --- /dev/null +++ b/code/renderer/shaders/crp/fog.hlsli @@ -0,0 +1,58 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// fog volume (AABB) rendering - shared code + + +struct VOut +{ + float4 position : SV_Position; + float depthVS : DEPTHVS; +}; + +cbuffer RootConstants +{ + matrix modelViewMatrix; + matrix projectionMatrix; + float4 boxMin; + float4 boxMax; + float4 color; + float depth; + float linearDepthA; + float linearDepthB; + uint depthTextureIndex; +}; + + +#ifdef VERTEX_SHADER + +VOut vs(float3 positionOS : POSITION) +{ + float3 positionWS = boxMin.xyz + positionOS * (boxMax.xyz - boxMin.xyz); + float4 positionVS = mul(modelViewMatrix, float4(positionWS, 1)); + + VOut output; + output.position = mul(projectionMatrix, positionVS); + output.depthVS = -positionVS.z; + + return output; +} + +#endif diff --git a/code/renderer/shaders/crp/fog_inside.hlsl b/code/renderer/shaders/crp/fog_inside.hlsl new file mode 100644 index 0000000..1cbbe63 --- /dev/null +++ b/code/renderer/shaders/crp/fog_inside.hlsl @@ -0,0 +1,43 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// fog volume (AABB) seen from inside + + +#include "common.hlsli" +#include "fog.hlsli" + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D depthTexture = ResourceDescriptorHeap[depthTextureIndex]; + float depthZW = depthTexture.Load(int3(input.position.xy, 0)); + float depthBuff = LinearDepth(depthZW, linearDepthA, linearDepthB); + float depthFrag = input.depthVS; + float depthMin = min(depthBuff, depthFrag); + float fogOpacity = saturate(depthMin / depth); + float4 result = float4(color.rgb, fogOpacity); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/fog_outside.hlsl b/code/renderer/shaders/crp/fog_outside.hlsl new file mode 100644 index 0000000..e02ad90 --- /dev/null +++ b/code/renderer/shaders/crp/fog_outside.hlsl @@ -0,0 +1,47 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// fog volume (AABB) seen from outside + + +#include "common.hlsli" +#include "fog.hlsli" + + +#ifdef PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D depthTexture = ResourceDescriptorHeap[depthTextureIndex]; + float depthZW = depthTexture.Load(int3(input.position.xy, 0)); + float depthBuff = LinearDepth(depthZW, linearDepthA, linearDepthB); + float depthFrag = input.depthVS; + if(depthFrag > depthBuff) + { + discard; + } + + float fogOpacity = saturate((depthBuff - depthFrag) / depth); + float4 result = float4(color.rgb, fogOpacity); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/gatherdof.hlsli b/code/renderer/shaders/crp/gatherdof.hlsli new file mode 100644 index 0000000..de9a62c --- /dev/null +++ b/code/renderer/shaders/crp/gatherdof.hlsli @@ -0,0 +1,61 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// gather depth of field: support functions + + +#pragma once + + +#include "common.hlsli" + + +#define MAX_BLUR_DIAMETER 32.0 +#define MAX_COC 16.0 + + +float CircleOfConfusion(float depth, float focusNearMin, float focusNearMax, float focusFarMin, float focusFarMax) +{ + if(depth <= focusNearMin) + { + return -1.0; + } + + if(depth > focusNearMin && depth < focusNearMax) + { + float t = 1.0 - (depth - focusNearMin) / (focusNearMax - focusNearMin); + + return -t; + } + + if(depth > focusFarMin && depth < focusFarMax) + { + float t = (depth - focusFarMin) / (focusFarMax - focusFarMin); + + return t; + } + + if(depth >= focusFarMax) + { + return 1.0; + } + + return 0.0; +} diff --git a/code/renderer/shaders/crp/gatherdof_blur.hlsl b/code/renderer/shaders/crp/gatherdof_blur.hlsl new file mode 100644 index 0000000..c97ea68 --- /dev/null +++ b/code/renderer/shaders/crp/gatherdof_blur.hlsl @@ -0,0 +1,207 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// gather depth of field: near-field and far-field blur + + +#include "common.hlsli" +#include "gatherdof.hlsli" + + +cbuffer RootConstants : register(b0) +{ + uint colorTextureIndex; + uint nearColorTextureIndex; + uint nearMaxCocTextureIndex; // tile + uint nearCocTextureIndex; // blurry + uint nearOutputTextureIndex; + uint farColorTextureIndex; + uint farCocTextureIndex; // sharp + uint farOutputTextureIndex; + uint samplerIndex; // linear/clamp + float brightnessScale; + float bladeCount; + float bokehAngleRad; +}; + +// the input is in [0,1]^2, the output polygon is centered at the origin +float2 MapUnitSquareToPolygon(float2 square, float apertureBladeCount, float apertureAngleRad) +{ + // needed to avoid inf/nan propagation through theta for samples + // that are exactly in the middle of the quad on either axis + // (i.e. square.x|y == 0.5 gets remapped to 0.0) + const float epsilon = 0.000001; + + // morph into a square in [-1,1]^2 + square = square * 2.0 - 1.0; + + // morph the square into a disk + // "A Low Distortion Map Between Disk and Square" by Peter Shirley and Kenneth Chiu + float radius, angle; + float2 square2 = square * square; + if(square2.x > square2.y) + { + // left and right quadrants + radius = square.x; + angle = (square.y * PI_D4) / (square.x + epsilon); + } + else + { + // top and bottom quadrants + radius = square.y; + angle = PI_D2 - (square.x * PI_D4) / (square.y + epsilon); + } + if(radius < 0.0) + { + radius = -radius; + angle += PI; + } + + // morph the disk into a polygon + // "Graphics Gems from CryENGINE 3" by Tiago Sousa + float edgeCount = apertureBladeCount; + if(edgeCount >= 3.0) + { + float num = cos(PI / edgeCount); + float den0 = PI_M2 / edgeCount; + float den1 = (angle * edgeCount + PI) / PI_M2; + float den = angle - (den0 * floor(den1)); + radius *= num / cos(den); + angle += apertureAngleRad; + } + + float2 disk; + sincos(angle, disk.y, disk.x); + disk *= radius; + + return disk; +} + +float4 BlurFarField(Texture2D inTexture, SamplerState samplerState, float coc, float2 tc01, float2 pixelSize) +{ + const int TAP_COUNT_BLUR = 16; + float2 tcScale = 16.0 * coc * pixelSize; + + float4 result = inTexture.SampleLevel(samplerState, tc01, 0); + for(int y = 0; y < TAP_COUNT_BLUR; ++y) + { + for(int x = 0; x < TAP_COUNT_BLUR; ++x) + { + float2 tcQuad = float2(x, y) / float(TAP_COUNT_BLUR - 1); + float2 tcOffset = MapUnitSquareToPolygon(tcQuad, bladeCount, bokehAngleRad) * tcScale; + float4 sampleValue = inTexture.SampleLevel(samplerState, tc01 + tcOffset, 0); + result += sampleValue; + } + } + + result /= result.a; + + return result; +} + +float4 BlurNearField(Texture2D inTexture, SamplerState samplerState, float tileMaxCoc, float2 tc01, float2 pixelSize) +{ + const int TAP_COUNT_BLUR = 15; // must be odd so we generate 1 sample at 0.5, 0.5 in the quad + float2 tcScale = 16.0 * tileMaxCoc * pixelSize; + float insideCount = 0.0; + float totalCount = 1.0 + float(TAP_COUNT_BLUR * TAP_COUNT_BLUR); + float4 result = float4(0, 0, 0, 0); + float weightSum = 0.0; + + for(int y = 0; y < TAP_COUNT_BLUR; ++y) + { + for(int x = 0; x < TAP_COUNT_BLUR; ++x) + { + float2 tcQuad = float2(x, y) / float(TAP_COUNT_BLUR - 1); + float2 tcOffset = MapUnitSquareToPolygon(tcQuad, bladeCount, bokehAngleRad) * tcScale; + float4 sampleValue = inTexture.SampleLevel(samplerState, tc01 + tcOffset, 0); + float inside = sampleValue.a > 0.0 ? 1.0 : 0.0; + float brightnessWeight = 1.0 + brightnessScale * Brightness(sampleValue.rgb); + float colorWeight = (sampleValue.a / tileMaxCoc) * brightnessWeight; + insideCount += inside; + weightSum += inside * colorWeight; + result += inside * float4(colorWeight.xxx, 1) * sampleValue; + } + } + + if(insideCount >= 1.0) + { + result.rgb /= weightSum; + result.a /= insideCount; + result.a *= EaseInOutCubic(saturate(2.0 * (insideCount / totalCount))); + } + else + { + result = float4(1, 1, 0, 0); + } + + return result; +} + +[numthreads(8, 8, 1)] +void cs(uint3 dtid : SV_DispatchThreadID) +{ + uint2 tc = dtid.xy; + RWTexture2D nearOutputTexture = ResourceDescriptorHeap[nearOutputTextureIndex]; + RWTexture2D farOutputTexture = ResourceDescriptorHeap[farOutputTextureIndex]; + uint width, height; + farOutputTexture.GetDimensions(width, height); + if(any(dtid.xy >= uint2(width, height))) + { + return; + } + + SamplerState samplerState = SamplerDescriptorHeap[samplerIndex]; + Texture2D colorTexture = ResourceDescriptorHeap[colorTextureIndex]; + Texture2D nearColorTexture = ResourceDescriptorHeap[nearColorTextureIndex]; + Texture2D farColorTexture = ResourceDescriptorHeap[farColorTextureIndex]; + Texture2D nearCocTexture = ResourceDescriptorHeap[nearCocTextureIndex]; + Texture2D nearMaxCocTexture = ResourceDescriptorHeap[nearMaxCocTextureIndex]; + Texture2D farCocTexture = ResourceDescriptorHeap[farCocTextureIndex]; + RWTexture2D nearOutTexture = ResourceDescriptorHeap[nearOutputTextureIndex]; + RWTexture2D farOutTexture = ResourceDescriptorHeap[farOutputTextureIndex]; + float2 tc01 = (float2(dtid.xy) + float2(0.5, 0.5)) / float2(width, height); + float2 pixelSize = float2(1, 1) / float2(width, height); + float nearCoc = nearCocTexture.SampleLevel(samplerState, tc01, 0); + float nearMaxCoc = nearMaxCocTexture.SampleLevel(samplerState, tc01, 0); + float farCoc = farCocTexture.SampleLevel(samplerState, tc01, 0); + float4 color = colorTexture.SampleLevel(samplerState, tc01, 0); + + if(nearMaxCoc > 0.0) + { + nearOutTexture[tc] = BlurNearField(nearColorTexture, samplerState, nearMaxCoc, tc01, pixelSize); + } + else + { + // A must be 0 to disable the near field from being blended + nearOutTexture[tc] = float4(color.rgb, 0); + } + + if(farCoc > 0.0) + { + farOutTexture[tc] = BlurFarField(farColorTexture, samplerState, farCoc, tc01, pixelSize); + } + else + { + // RGB must be 0 to not mess up the fill pass of neighbor pixels that are inside the near field + // A must be 0 to disable the far field from being blended + farOutTexture[tc] = float4(0, 0, 0, 0); + } +} diff --git a/code/renderer/shaders/crp/gatherdof_coc_tile_gen.hlsl b/code/renderer/shaders/crp/gatherdof_coc_tile_gen.hlsl new file mode 100644 index 0000000..a2b65bb --- /dev/null +++ b/code/renderer/shaders/crp/gatherdof_coc_tile_gen.hlsl @@ -0,0 +1,65 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// gather depth of field: near-field circle of confusion tile generation + + +#include "common.hlsli" +#include "gatherdof.hlsli" + + +cbuffer RootConstants : register(b0) +{ + uint inputTextureIndex; + uint outputTextureIndex; +}; + +[numthreads(8, 8, 1)] +void cs(uint3 dtid : SV_DispatchThreadID, uint3 gid : SV_GroupID, uint3 gtid : SV_GroupThreadID) +{ + uint2 tcOut = dtid.xy; + RWTexture2D outputTexture = ResourceDescriptorHeap[outputTextureIndex]; + uint width, height; + outputTexture.GetDimensions(width, height); + if(any(dtid.xy >= uint2(width, height))) + { + return; + } + + Texture2D inputTexture = ResourceDescriptorHeap[inputTextureIndex]; + + // This loop can read out of bounds in the inputTexture. + // Each full-res pixel has a corresponding tile pixel, but the reverse isn't always true. + // Texture.Load is specced to return 0 on OOB accesses. + // Since we max() the values, zeroes have no effect on the final result. + uint2 tcInCorner = tcOut * uint2(16, 16); + float maxCoc = 0.0; + for(uint y = 0; y < 16; y++) + { + for(uint x = 0; x < 16; x++) + { + uint2 tcIn = tcInCorner + uint2(x, y); + float coc = inputTexture.Load(uint3(tcIn.x, tcIn.y, 0)); + maxCoc = max(maxCoc, coc); + } + } + + outputTexture[tcOut] = maxCoc; +} diff --git a/code/renderer/shaders/crp/gatherdof_coc_tile_max.hlsl b/code/renderer/shaders/crp/gatherdof_coc_tile_max.hlsl new file mode 100644 index 0000000..8d9c96f --- /dev/null +++ b/code/renderer/shaders/crp/gatherdof_coc_tile_max.hlsl @@ -0,0 +1,64 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// gather depth of field: near-field circle of confusion tile dilation + + +#include "common.hlsli" +#include "gatherdof.hlsli" + + +cbuffer RootConstants : register(b0) +{ + uint inputTextureIndex; + uint outputTextureIndex; + uint samplerIndex; // point/clamp +}; + +[numthreads(8, 8, 1)] +void cs(uint3 dtid : SV_DispatchThreadID) +{ + uint2 tc = dtid.xy; + RWTexture2D outputTexture = ResourceDescriptorHeap[outputTextureIndex]; + uint width, height; + outputTexture.GetDimensions(width, height); + if(any(dtid.xy >= uint2(width, height))) + { + return; + } + + Texture2D inputTexture = ResourceDescriptorHeap[inputTextureIndex]; + SamplerState samplerState = SamplerDescriptorHeap[samplerIndex]; + + float2 tcShifted = float2(tc) + float2(0.5, 0.5); + float2 pixelSize = float2(1, 1) / float2(width, height); + float maxCoc = 0.0; + for(int y = -1; y <= 1; y++) + { + for(int x = -1; x <= 1; x++) + { + float2 tc01 = (tcShifted + float2(x, y)) * pixelSize; + float coc = inputTexture.SampleLevel(samplerState, tc01, 0); + maxCoc = max(maxCoc, coc); + } + } + + outputTexture[tc] = maxCoc; +} diff --git a/code/renderer/shaders/crp/gatherdof_combine.hlsl b/code/renderer/shaders/crp/gatherdof_combine.hlsl new file mode 100644 index 0000000..7c67a23 --- /dev/null +++ b/code/renderer/shaders/crp/gatherdof_combine.hlsl @@ -0,0 +1,84 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// gather depth of field: final blend pass + + +#include "common.hlsli" +#include "gatherdof.hlsli" + + +cbuffer RootConstants : register(b0) +{ + uint nearTextureIndex; + uint farTextureIndex; + uint nearCocTextureIndex; + uint farCocTextureIndex; + uint sharpTextureIndex; + uint samplerIndex; // point/clamp +}; + +struct VOut +{ + float4 position : SV_Position; + float2 texCoords : TEXCOORD0; +}; + + +#if VERTEX_SHADER + +VOut vs(uint id : SV_VertexID) +{ + VOut output; + output.position = FSTrianglePosFromVertexId(id); + output.texCoords = FSTriangleTCFromVertexId(id); + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + SamplerState samplerState = SamplerDescriptorHeap[samplerIndex]; + Texture2D nearColorTexture = ResourceDescriptorHeap[nearTextureIndex]; + Texture2D farColorTexture = ResourceDescriptorHeap[farTextureIndex]; + Texture2D nearCocTexture = ResourceDescriptorHeap[nearCocTextureIndex]; + Texture2D farCocTexture = ResourceDescriptorHeap[farCocTextureIndex]; + Texture2D sharpTexture = ResourceDescriptorHeap[sharpTextureIndex]; + + float4 nearColor = nearColorTexture.Sample(samplerState, input.texCoords); + float4 farColor = farColorTexture.Sample(samplerState, input.texCoords); + //float nearCoc = nearCocTexture.Sample(samplerState, input.texCoords); + float nearCoc = saturate(nearColor.a); + float farCoc = farCocTexture.Sample(samplerState, input.texCoords); + float4 sharp = sharpTexture.Sample(samplerState, input.texCoords); + + float3 color = lerp(sharp.rgb, farColor.rgb, farCoc); + color = lerp(color, nearColor.rgb, nearCoc); + float4 result = float4(color, 1.0); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/gatherdof_debug.hlsl b/code/renderer/shaders/crp/gatherdof_debug.hlsl new file mode 100644 index 0000000..54f09bd --- /dev/null +++ b/code/renderer/shaders/crp/gatherdof_debug.hlsl @@ -0,0 +1,93 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// gather depth of field: debug overlay + + +#include "common.hlsli" +#include "gatherdof.hlsli" +#include "dof.hlsli" + + +cbuffer RootConstants : register(b0) +{ + uint colorTextureIndex; + uint depthTextureIndex; + uint debugMode; + float linearDepthA; + float linearDepthB; + float focusNearMin; + float focusNearMax; + float focusFarMin; + float focusFarMax; + float focusDist; +}; + +struct VOut +{ + float4 position : SV_Position; + float2 texCoords : TEXCOORD0; +}; + + +#if VERTEX_SHADER + +VOut vs(uint id : SV_VertexID) +{ + VOut output; + output.position = FSTrianglePosFromVertexId(id); + output.texCoords = FSTriangleTCFromVertexId(id); + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D colorTexture = ResourceDescriptorHeap[colorTextureIndex]; + Texture2D depthTexture = ResourceDescriptorHeap[depthTextureIndex]; + uint3 tc = uint3(input.position.x, input.position.y, 0); + float3 color = colorTexture.Load(tc).rgb; + float depthZW = depthTexture.Load(tc); + float depth = LinearDepth(depthZW, linearDepthA, linearDepthB); + float coc = CircleOfConfusion(depth, focusNearMin, focusNearMax, focusFarMin, focusFarMax); + float nearField = coc < 0.0; + float4 result; + if(debugMode == 1) + { + result = DOF_DebugCoc(color, nearField, saturate(-coc), saturate(coc)); + } + else if(debugMode == 2) + { + result = DOF_DebugFocusPlane(color, nearField); + } + else + { + result = float4(color, 1); + } + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/gatherdof_fill.hlsl b/code/renderer/shaders/crp/gatherdof_fill.hlsl new file mode 100644 index 0000000..545b76e --- /dev/null +++ b/code/renderer/shaders/crp/gatherdof_fill.hlsl @@ -0,0 +1,76 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// gather depth of field: max blur post-filter to combat undersampling + + +#include "common.hlsli" +#include "gatherdof.hlsli" + + +cbuffer RootConstants : register(b0) +{ + uint nearInputTextureIndex; + uint nearOutputTextureIndex; + uint farInputTextureIndex; + uint farOutputTextureIndex; + uint samplerIndex; // point/clamp +}; + +[numthreads(8, 8, 1)] +void cs(uint3 dtid : SV_DispatchThreadID) +{ + uint2 tc = dtid.xy; + RWTexture2D nearOutputTexture = ResourceDescriptorHeap[nearOutputTextureIndex]; + RWTexture2D farOutputTexture = ResourceDescriptorHeap[farOutputTextureIndex]; + uint width, height, levels; + nearOutputTexture.GetDimensions(width, height); + if(any(dtid.xy >= uint2(width, height))) + { + return; + } + + SamplerState samplerState = SamplerDescriptorHeap[samplerIndex]; + Texture2D nearInputTexture = ResourceDescriptorHeap[nearInputTextureIndex]; + Texture2D farInputTexture = ResourceDescriptorHeap[farInputTextureIndex]; + + float2 tc01 = (float2(tc) + float2(0.5, 0.5)) / float2(width, height); + float2 pixelSize = float2(1, 1) / float2(width, height); + float4 nearFilled = float4(0, 0, 0, 0); + float4 farFilled = float4(0, 0, 0, 0); + for(int y = -1; y <= 1; y++) + { + for(int x = -1; x <= 1; x++) + { + float2 tcSample01 = tc01 + float2(x, y) * pixelSize; + float4 nearSample = nearInputTexture.SampleLevel(samplerState, tcSample01, 0); + float4 farSample = farInputTexture.SampleLevel(samplerState, tcSample01, 0); + nearFilled = max(nearFilled, nearSample); + farFilled = max(farFilled, farSample); + } + } + + // make sure to keep the original blend factors + nearFilled.a = nearInputTexture.Load(uint3(tc.x, tc.y, 0)).a; + farFilled.a = farInputTexture.Load(uint3(tc.x, tc.y, 0)).a; + + nearOutputTexture[tc] = nearFilled; + farOutputTexture[tc] = farFilled; +} diff --git a/code/renderer/shaders/crp/gatherdof_split.hlsl b/code/renderer/shaders/crp/gatherdof_split.hlsl new file mode 100644 index 0000000..cc99ef0 --- /dev/null +++ b/code/renderer/shaders/crp/gatherdof_split.hlsl @@ -0,0 +1,78 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// gather depth of field: field split and CoC generation + + +#include "common.hlsli" +#include "gatherdof.hlsli" + + +cbuffer RootConstants : register(b0) +{ + uint depthTextureIndex; + uint colorTextureIndex; + uint nearColorTextureIndex; + uint farColorTextureIndex; + uint nearCocTextureIndex; + uint farCocTextureIndex; + float linearDepthA; + float linearDepthB; + float focusNearMin; + float focusNearMax; + float focusFarMin; + float focusFarMax; + float brightnessScale; +}; + +[numthreads(8, 8, 1)] +void cs(uint3 dtid : SV_DispatchThreadID) +{ + uint2 tc = dtid.xy; + Texture2D colorTexture = ResourceDescriptorHeap[colorTextureIndex]; + uint width, height, levels; + colorTexture.GetDimensions(0, width, height, levels); + if(any(dtid.xy >= uint2(width, height))) + { + return; + } + + Texture2D depthTexture = ResourceDescriptorHeap[depthTextureIndex]; + RWTexture2D nearColorTexture = ResourceDescriptorHeap[nearColorTextureIndex]; + RWTexture2D farColorTexture = ResourceDescriptorHeap[farColorTextureIndex]; + RWTexture2D nearCocTexture = ResourceDescriptorHeap[nearCocTextureIndex]; + RWTexture2D farCocTexture = ResourceDescriptorHeap[farCocTextureIndex]; + + float4 color = colorTexture[tc]; + float depthZW = depthTexture[tc]; + float depth = LinearDepth(depthZW, linearDepthA, linearDepthB); + float coc = CircleOfConfusion(depth, focusNearMin, focusNearMax, focusFarMin, focusFarMax); + float nearCoc = max(-coc, 0.0); + float farCoc = max(coc, 0.0); + float brightnessWeight = 1.0 + brightnessScale * Brightness(color.rgb); + float farWeight = farCoc * brightnessWeight; + float4 nearColor = float4(color.rgb, nearCoc); + float4 farColor = float4(color.rgb * farWeight, farWeight); + + nearColorTexture[tc] = nearColor; + farColorTexture[tc] = farColor; + nearCocTexture[tc] = nearCoc; + farCocTexture[tc] = farCoc; +} diff --git a/code/renderer/shaders/crp/imgui.hlsl b/code/renderer/shaders/crp/imgui.hlsl new file mode 100644 index 0000000..12eebe6 --- /dev/null +++ b/code/renderer/shaders/crp/imgui.hlsl @@ -0,0 +1,73 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Dear ImGui integration + + +struct VOut +{ + float4 pos : SV_POSITION; + float4 col : COLOR0; + float2 uv : TEXCOORD0; +}; + +cbuffer RootConstants : register(b0) +{ + float4x4 projectionMatrix; + uint textureIndex; + uint samplerIndex; + float mipIndex; +}; + + +#if VERTEX_SHADER + +struct VIn +{ + float2 pos : POSITION; + float4 col : COLOR0; + float2 uv : TEXCOORD0; +}; + +VOut vs(VIn input) +{ + VOut output; + output.pos = mul(projectionMatrix, float4(input.pos.xy, 0.0, 1.0)); + output.col = input.col; + output.uv = input.uv; + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + SamplerState sampler0 = SamplerDescriptorHeap[samplerIndex]; + float4 result = input.col * texture0.SampleLevel(sampler0, input.uv, mipIndex); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/mip_1.hlsl b/code/renderer/shaders/crp/mip_1.hlsl new file mode 100644 index 0000000..62a5abb --- /dev/null +++ b/code/renderer/shaders/crp/mip_1.hlsl @@ -0,0 +1,40 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// mip-map generation: gamma-space to linear-space transform + + +#include "../common/mip_gen.hlsli" + + +cbuffer RootConstants +{ + float gamma; + uint srcTexture; + uint dstTexture; +} + +[numthreads(8, 8, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWTexture2D src = ResourceDescriptorHeap[srcTexture]; + RWTexture2D dst = ResourceDescriptorHeap[dstTexture]; + MipGen_GammaToLinear(dst, src, id, gamma); +} diff --git a/code/renderer/shaders/crp/mip_2.hlsl b/code/renderer/shaders/crp/mip_2.hlsl new file mode 100644 index 0000000..a4b8623 --- /dev/null +++ b/code/renderer/shaders/crp/mip_2.hlsl @@ -0,0 +1,46 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// mip-map generation: 8-tap 1D filter + + +#include "../common/mip_gen.hlsli" + + +cbuffer RootConstants +{ + float4 weights; + int2 maxSize; + int2 scale; + int2 offset; + uint clampMode; // 0 = repeat + uint srcMip; + uint dstMip; + uint srcTexture; + uint dstTexture; +} + +[numthreads(8, 8, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWTexture2D src = ResourceDescriptorHeap[srcTexture]; + RWTexture2D dst = ResourceDescriptorHeap[dstTexture]; + MipGen_DownSample(dst, src, id, maxSize, clampMode, scale, offset, weights); +} diff --git a/code/renderer/shaders/crp/mip_3.hlsl b/code/renderer/shaders/crp/mip_3.hlsl new file mode 100644 index 0000000..7d006f8 --- /dev/null +++ b/code/renderer/shaders/crp/mip_3.hlsl @@ -0,0 +1,44 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// mip-map generation: linear-space to gamma-space transform + + +#include "../common/mip_gen.hlsli" + + +cbuffer RootConstants +{ + float4 blendColor; + float intensity; + float invGamma; // 1.0 / gamma + uint srcMip; + uint dstMip; + uint srcTexture; + uint dstTexture; +} + +[numthreads(8, 8, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWTexture2D src = ResourceDescriptorHeap[srcTexture]; + RWTexture2D dst = ResourceDescriptorHeap[dstTexture]; + MipGen_LinearToGamma(dst, src, id, blendColor, intensity, invGamma); +} diff --git a/code/renderer/shaders/crp/nuklear.hlsl b/code/renderer/shaders/crp/nuklear.hlsl new file mode 100644 index 0000000..1d58e68 --- /dev/null +++ b/code/renderer/shaders/crp/nuklear.hlsl @@ -0,0 +1,72 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Nuklear integration + + +struct VOut +{ + float4 pos : SV_POSITION; + float4 col : COLOR0; + float2 uv : TEXCOORD0; +}; + +cbuffer RootConstants +{ + float4x4 projectionMatrix; + uint textureIndex; + uint samplerIndex; +}; + + +#if VERTEX_SHADER + +struct VIn +{ + float2 pos : POSITION; + float4 col : COLOR0; + float2 uv : TEXCOORD0; +}; + +VOut vs(VIn input) +{ + VOut output; + output.pos = mul(projectionMatrix, float4(input.pos.xy, 0.0, 1.0)); + output.col = input.col; + output.uv = input.uv; + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + SamplerState sampler0 = SamplerDescriptorHeap[samplerIndex]; + float4 result = input.col * texture0.Sample(sampler0, input.uv); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/oit.h.hlsli b/code/renderer/shaders/crp/oit.h.hlsli new file mode 100644 index 0000000..261fec0 --- /dev/null +++ b/code/renderer/shaders/crp/oit.h.hlsli @@ -0,0 +1,58 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// shared structures and constants used to implement order-independent transparency + + +#pragma once + + +#if defined(__cplusplus) + #pragma pack(push, 4) + typedef uint32_t uint; +#endif + +#define OIT_MAX_FRAGMENTS_PER_PIXEL 32 +#define OIT_AVG_FRAGMENTS_PER_PIXEL 16 + +struct OIT_Counter +{ + uint fragmentCount; + uint maxFragmentCount; + uint overflowCount; +}; + +struct OIT_Fragment +{ + uint color; + float depth; // higher is further away from the camera + uint stateBits; // GLS_* stage bits + stage index + uint next; + uint shaderTrace; // shader index: 14 - frame index: 2 - enable: 1 + uint depthFadeDistOffset; // offset: fp16 - distance: fp16 + uint depthFadeScaleBias; // enable: 1 - color bias: 4 - color scale: 4 + // @TODO: move the 9 bits from depthFadeScaleBias into shaderTrace +}; + +#if defined(__cplusplus) + #pragma pack(pop) + static_assert(sizeof(OIT_Counter) == 12, "sizeof(OIT_Counter) is wrong"); + static_assert(sizeof(OIT_Fragment) == 28, "sizeof(OIT_Fragment) is wrong"); +#endif diff --git a/code/renderer/shaders/crp/opaque.hlsl b/code/renderer/shaders/crp/opaque.hlsl new file mode 100644 index 0000000..1116767 --- /dev/null +++ b/code/renderer/shaders/crp/opaque.hlsl @@ -0,0 +1,131 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// generic shader for opaque surfaces + + +#include "common.hlsli" +#include "world.h.hlsli" +#include "world.hlsli" + + +cbuffer RootConstants +{ + // geometry + matrix modelViewMatrix; + matrix projectionMatrix; + float4 clipPlane; + + // general + uint textureIndex; + uint samplerIndex; + uint shaderIndexBufferIndex; + uint alphaTest; + float greyscale; + + // shader trace + uint shaderTrace; // shader index: 14 - frame index: 2 - enable: 1 + uint centerPixel; // y: 16 - x: 16 + + // @TODO: dither + // @TODO: Voronoi tiling +}; + + +#if VERTEX_SHADER + +struct VIn +{ + float3 position : POSITION; + float3 normal : NORMAL; + float2 texCoords : TEXCOORD0; + float4 color : COLOR0; +}; + +#endif + +struct VOut +{ + float4 position : SV_Position; + float3 normal : NORMAL; + float2 texCoords : TEXCOORD0; + float4 color : COLOR0; + float clipDist : SV_ClipDistance0; + float2 proj2232 : PROJ; + float depthVS : DEPTHVS; +}; + + +#if VERTEX_SHADER + +VOut vs(VIn input) +{ + float4 positionVS = mul(modelViewMatrix, float4(input.position.xyz, 1)); + + VOut output; + output.position = mul(projectionMatrix, positionVS); + output.normal = input.normal; + output.texCoords = input.texCoords; + output.color = input.color; + output.clipDist = dot(positionVS, clipPlane); + output.proj2232 = float2(-projectionMatrix[2][2], projectionMatrix[2][3]); + output.depthVS = -positionVS.z; + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + // @TODO: Voronoi tiling + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + SamplerState sampler0 = ResourceDescriptorHeap[samplerIndex]; + float4 dst = texture0.Sample(sampler0, input.texCoords) * input.color; + if(FailsAlphaTest(dst.a, alphaTest)) + { + discard; + } + + dst = MakeGreyscale(dst, greyscale); + + // @TODO: dithering (need to figure out the tone mapping function first) + + if(shaderTrace & 1) + { + // we only store the shader index of 1 pixel + uint2 fragmentCoords = uint2(input.position.xy); + uint2 centerCoords = uint2(centerPixel & 0xFFFF, centerPixel >> 16); + if(all(fragmentCoords == centerCoords)) + { + RWByteAddressBuffer shaderIndexBuffer = ResourceDescriptorHeap[shaderIndexBufferIndex]; + uint frameIndex = (shaderTrace >> 1) & 3; + uint shaderIndex = shaderTrace >> 3; + shaderIndexBuffer.Store(frameIndex * 4, shaderIndex); + } + } + + return dst; +} + +#endif diff --git a/code/renderer/shaders/crp/tone_map.hlsl b/code/renderer/shaders/crp/tone_map.hlsl new file mode 100644 index 0000000..73f7dde --- /dev/null +++ b/code/renderer/shaders/crp/tone_map.hlsl @@ -0,0 +1,74 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// post-processing: moves from linear to gamma space +// applies r_gamma, r_brightness, r_greyscale + + +#include "common.hlsli" + + +struct VOut +{ + float4 position : SV_Position; + float2 texCoords : TEXCOORD0; +}; + +cbuffer RootConstants +{ + uint textureIndex; + uint samplerIndex; + float invGamma; + float brightness; + float greyscale; +}; + + +#if VERTEX_SHADER + +VOut vs(uint id : SV_VertexID) +{ + VOut output; + output.position = FSTrianglePosFromVertexId(id); + output.texCoords = FSTriangleTCFromVertexId(id); + + return output; +} + +#endif + + +#if PIXEL_SHADER + +// X3571: pow(f, e) won't work if f is negative +#pragma warning(disable : 3571) + +float4 ps(VOut input) : SV_Target +{ + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + SamplerState sampler0 = SamplerDescriptorHeap[samplerIndex]; + float3 base = texture0.Sample(sampler0, input.texCoords).rgb; + float3 gc = pow(base, invGamma) * brightness; + float4 result = MakeGreyscale(float4(gc.rgb, 1.0), greyscale); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/tone_map_inverse.hlsl b/code/renderer/shaders/crp/tone_map_inverse.hlsl new file mode 100644 index 0000000..858fa86 --- /dev/null +++ b/code/renderer/shaders/crp/tone_map_inverse.hlsl @@ -0,0 +1,72 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// post-processing: moves from gamma to linear space + + +#include "common.hlsli" + + +struct VOut +{ + float4 position : SV_Position; + float2 texCoords : TEXCOORD0; +}; + +cbuffer RootConstants +{ + uint textureIndex; + uint samplerIndex; + float gamma; + float invBrightness; +}; + + +#if VERTEX_SHADER + +VOut vs(uint id : SV_VertexID) +{ + VOut output; + output.position = FSTrianglePosFromVertexId(id); + output.texCoords = FSTriangleTCFromVertexId(id); + + return output; +} + +#endif + + +#if PIXEL_SHADER + +// X3571: pow(f, e) won't work if f is negative +#pragma warning(disable : 3571) + +float4 ps(VOut input) : SV_Target +{ + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + SamplerState sampler0 = SamplerDescriptorHeap[samplerIndex]; + float3 base = texture0.Sample(sampler0, input.texCoords).rgb; + float3 linearSpace = pow(base * invBrightness, gamma); + float4 result = float4(linearSpace, 1.0); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/transp_draw.hlsl b/code/renderer/shaders/crp/transp_draw.hlsl new file mode 100644 index 0000000..9b04faa --- /dev/null +++ b/code/renderer/shaders/crp/transp_draw.hlsl @@ -0,0 +1,136 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// add fragments of transparent surfaces to per-pixel linked lists + + +#include "common.hlsli" +#include "world.h.hlsli" +#include "world.hlsli" +#include "oit.h.hlsli" + + +cbuffer RootConstants +{ + matrix modelViewMatrix; + matrix projectionMatrix; + float4 clipPlane; + + uint textureIndex; + uint samplerIndex; + uint alphaTest; + uint counterBuffer; + uint indexTexture; + uint fragmentBuffer; + float greyscale; + uint stateBits; + uint shaderTrace; + uint depthFadeDistOffset; // offset: fp16 - distance: fp16 + uint depthFadeScaleBias; // enable: 1 - color bias: 4 - color scale: 4 +}; + +#if VERTEX_SHADER + +struct VIn +{ + float3 position : POSITION; + float3 normal : NORMAL; + float2 texCoords : TEXCOORD0; + float4 color : COLOR0; +}; + +#endif + +struct VOut +{ + float4 position : SV_Position; + float3 normal : NORMAL; + float2 texCoords : TEXCOORD0; + float4 color : COLOR0; + float clipDist : SV_ClipDistance0; + float2 proj2232 : PROJ; + float depthVS : DEPTHVS; +}; + + +#if VERTEX_SHADER + +VOut vs(VIn input) +{ + float4 positionVS = mul(modelViewMatrix, float4(input.position.xyz, 1)); + + VOut output; + output.position = mul(projectionMatrix, positionVS); + output.normal = input.normal; + output.texCoords = input.texCoords; + output.color = input.color; + output.clipDist = dot(positionVS, clipPlane); + output.proj2232 = float2(-projectionMatrix[2][2], projectionMatrix[2][3]); + output.depthVS = -positionVS.z; + + return output; +} + +#endif + + +#if PIXEL_SHADER + +[earlydepthstencil] +void ps(VOut input) +{ + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + SamplerState sampler0 = SamplerDescriptorHeap[samplerIndex]; + float4 dst = texture0.Sample(sampler0, input.texCoords) * input.color; + if(FailsAlphaTest(dst.a, alphaTest)) + { + return; + } + + dst = MakeGreyscale(dst, greyscale); + + RWStructuredBuffer counter = ResourceDescriptorHeap[counterBuffer]; + uint fragmentIndex; + InterlockedAdd(counter[0].fragmentCount, 1, fragmentIndex); + if(fragmentIndex < counter[0].maxFragmentCount) + { + RWTexture2D indexTex = ResourceDescriptorHeap[indexTexture]; + RWStructuredBuffer fragments = ResourceDescriptorHeap[fragmentBuffer]; + uint prevFragmentIndex; + InterlockedExchange(indexTex[int2(input.position.xy)], fragmentIndex, prevFragmentIndex); + OIT_Fragment fragment; + fragment.color = PackColor(dst); + fragment.depth = input.depthVS; + fragment.stateBits = stateBits; + fragment.next = prevFragmentIndex; + fragment.shaderTrace = shaderTrace; + fragment.depthFadeDistOffset = depthFadeDistOffset; + fragment.depthFadeScaleBias = depthFadeScaleBias; + fragments[fragmentIndex] = fragment; + } + else + { + uint garbage; + InterlockedAdd(counter[0].overflowCount, 1, garbage); + InterlockedAdd(counter[0].fragmentCount, -1, garbage); + } +} + +#endif diff --git a/code/renderer/shaders/crp/transp_resolve.hlsl b/code/renderer/shaders/crp/transp_resolve.hlsl new file mode 100644 index 0000000..1ca6138 --- /dev/null +++ b/code/renderer/shaders/crp/transp_resolve.hlsl @@ -0,0 +1,216 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// reads per-pixel fragment linked lists into arrays, sorts them and composites them + + +#include "common.hlsli" +#include "oit.h.hlsli" +#include "../common/state_bits.h.hlsli" + + +cbuffer RootConstants +{ + uint renderTargetTexture; + uint shaderIndexBuffer; + uint indexTexture; + uint fragmentBuffer; + uint centerPixel; // y: 16 - x: 16 + uint depthTexture; + float proj22; + float proj32; + float2 scissorRectMin; + float2 scissorRectMax; +}; + +struct VOut +{ + float4 position : SV_Position; + float2 texCoords : TEXCOORD0; +}; + + +#if VERTEX_SHADER + +VOut vs(uint id : SV_VertexID) +{ + VOut output; + output.position = FSTrianglePosFromVertexId(id); + output.texCoords = FSTriangleTCFromVertexId(id); + + return output; +} + +#endif + + +#if PIXEL_SHADER + +uint GetShaderStage(uint stateBits) +{ + return (stateBits & GLS_STAGEINDEX_BITS) >> GLS_STAGEINDEX_SHIFT; +} + +bool IsBehind(float depthA, float depthB, uint stageA, uint stageB) +{ + if(depthA > depthB) + { + return true; + } + + if(depthA == depthB && stageA < stageB) + { + return true; + } + + return false; +} + +// from NVIDIA's 2007 "Soft Particles" whitepaper by Tristan Lorach +float Contrast(float d, float power) +{ + bool aboveHalf = d > 0.5; + float base = saturate(2.0 * (aboveHalf ? (1.0 - d) : d)); + float r = 0.5 * pow(base, power); + + return aboveHalf ? (1.0 - r) : r; +} + +float GetBitAsFloat(uint bits, uint bitIndex) +{ + return (bits & (1u << bitIndex)) ? 1.0 : 0.0; +} + +float2 UnpackHalf2(uint data) +{ + return float2(f16tof32(data), f16tof32(data >> 16)); +} + +float4 DepthFadeFragmentColor(float4 color, OIT_Fragment fragment, float storedDepthZW) +{ + if(((fragment.depthFadeScaleBias >> 8) & 1) == 0) + { + return color; + } + +#define BIT(Index) GetBitAsFloat(fragment.depthFadeScaleBias, Index) + float4 dst = color; + float2 distOffset = UnpackHalf2(fragment.depthFadeDistOffset); + float4 fadeColorScale = float4(BIT(0), BIT(1), BIT(2), BIT(3)); + float4 fadeColorBias = float4(BIT(4), BIT(5), BIT(6), BIT(7)); + float zwDepth = storedDepthZW; // stored depth, z/w + float depthS = LinearDepth(zwDepth, proj22, proj32); // stored depth, linear + float depthP = fragment.depth - distOffset.y; // fragment depth, linear + float fadeScale = Contrast((depthS - depthP) * distOffset.x, 2.0); + dst = lerp(dst * fadeColorScale + fadeColorBias, dst, fadeScale); +#undef BIT + + return dst; +} + +float4 ps(VOut input) : SV_Target +{ + Texture2D renderTarget = ResourceDescriptorHeap[renderTargetTexture]; + int2 tc = int2(input.position.x, input.position.y); + float4 color = renderTarget.Load(int3(tc.x, tc.y, 0)); + if(any(input.position.xy < scissorRectMin) || + any(input.position.xy > scissorRectMax)) + { + return color; + } + + RWTexture2D index = ResourceDescriptorHeap[indexTexture]; + RWStructuredBuffer fragments = ResourceDescriptorHeap[fragmentBuffer]; + Texture2D depthTex = ResourceDescriptorHeap[depthTexture]; + uint fragmentIndex = index[tc]; + uint i; + OIT_Fragment sorted[OIT_MAX_FRAGMENTS_PER_PIXEL]; + uint fragmentCount = 0; + + // grab this pixel's fragments + while(fragmentIndex != 0 && fragmentCount < OIT_MAX_FRAGMENTS_PER_PIXEL) + { + sorted[fragmentCount] = fragments[fragmentIndex]; + fragmentIndex = sorted[fragmentCount].next; + ++fragmentCount; + } + + // sort the fragments using an insertion sort + for(i = 1; i < fragmentCount; ++i) + { + OIT_Fragment insert = sorted[i]; + uint stage = GetShaderStage(insert.stateBits); + uint j = i; + while(j > 0 && IsBehind(insert.depth, sorted[j - 1].depth, stage, GetShaderStage(sorted[j - 1].stateBits))) + { + sorted[j] = sorted[j - 1]; + --j; + } + sorted[j] = insert; + } + + // blend the results + float storedDepthZW = depthTex.Load(int3(input.position.xy, 0)).x; // stored depth, z/w + float dstDepth = 1.0; + for(i = 0; i < fragmentCount; ++i) + { + OIT_Fragment frag = sorted[i]; + uint stateBits = frag.stateBits; + float fragDepth = frag.depth; + if((stateBits & (GLS_DEPTHFUNC_EQUAL | GLS_DEPTHTEST_DISABLE)) == GLS_DEPTHFUNC_EQUAL && + fragDepth != dstDepth) + { + continue; + } + + float4 fragColor = UnpackColor(frag.color); + fragColor = DepthFadeFragmentColor(fragColor, frag, storedDepthZW); + color = Blend(fragColor, color, frag.stateBits); + if((stateBits & GLS_DEPTHMASK_TRUE) != 0u && + fragDepth < dstDepth) + { + dstDepth = fragDepth; + } + } + + // write out the fragment shader ID of the closest fragment of the center pixel + if(fragmentCount > 0) + { + uint lastFragmentIndex = fragmentCount - 1; + OIT_Fragment closest = sorted[lastFragmentIndex]; + uint shaderTrace = closest.shaderTrace; + if(shaderTrace & 1) + { + uint2 fragmentCoords = uint2(input.position.xy); + uint2 centerCoords = uint2(centerPixel & 0xFFFF, centerPixel >> 16); + if(all(fragmentCoords == centerCoords)) + { + RWByteAddressBuffer shaderIdBuf = ResourceDescriptorHeap[shaderIndexBuffer]; + uint frameIndex = (shaderTrace >> 1) & 3; + uint shaderId = shaderTrace >> 3; + shaderIdBuf.Store(frameIndex * 4, shaderId); + } + } + } + + return color; +} + +#endif diff --git a/code/renderer/shaders/crp/ui.hlsl b/code/renderer/shaders/crp/ui.hlsl new file mode 100644 index 0000000..ad10c92 --- /dev/null +++ b/code/renderer/shaders/crp/ui.hlsl @@ -0,0 +1,73 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// UI rendering + + +struct VOut +{ + float4 position : SV_Position; + float2 texCoords : TEXCOORD0; + float4 color : COLOR0; +}; + +cbuffer RootConstants : register(b0) +{ + float2 scale; + uint textureIndex; + uint samplerIndex; +}; + + +#if VERTEX_SHADER + +struct VIn +{ + float2 position : POSITION; + float2 texCoords : TEXCOORD0; + float4 color : COLOR0; +}; + +VOut vs(VIn input) +{ + const float2 position = input.position * scale; + VOut output; + output.position = float4(position.x - 1.0, 1.0 - position.y, 0.0, 1.0); + output.texCoords = input.texCoords; + output.color = input.color; + + return output; +} + +#endif + + +#if PIXEL_SHADER + +float4 ps(VOut input) : SV_Target +{ + Texture2D texture0 = ResourceDescriptorHeap[textureIndex]; + SamplerState sampler0 = SamplerDescriptorHeap[samplerIndex]; + float4 result = input.color * texture0.Sample(sampler0, input.texCoords); + + return result; +} + +#endif diff --git a/code/renderer/shaders/crp/world.h.hlsli b/code/renderer/shaders/crp/world.h.hlsli new file mode 100644 index 0000000..336f92a --- /dev/null +++ b/code/renderer/shaders/crp/world.h.hlsli @@ -0,0 +1,30 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// shared world rendering constants + + +#pragma once + + +#define ATEST_NONE 0 +#define ATEST_GT_0 1 +#define ATEST_LT_HALF 2 +#define ATEST_GE_HALF 3 diff --git a/code/renderer/shaders/crp/world.hlsli b/code/renderer/shaders/crp/world.hlsli new file mode 100644 index 0000000..c8934a2 --- /dev/null +++ b/code/renderer/shaders/crp/world.hlsli @@ -0,0 +1,37 @@ +/* +=========================================================================== +Copyright (C) 2023 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// shared world surface rendering functions + + +#pragma once + + +bool FailsAlphaTest(float alpha, uint alphaTest) +{ + if(alphaTest == ATEST_GT_0) + return alpha == 0.0; + else if(alphaTest == ATEST_LT_HALF) + return alpha >= 0.5; + else if(alphaTest == ATEST_GE_HALF) + return alpha < 0.5; + else // ATEST_NONE + return false; +} diff --git a/code/renderer/hlsl/depth_pre_pass.hlsl b/code/renderer/shaders/grp/depth_pre_pass.hlsl similarity index 100% rename from code/renderer/hlsl/depth_pre_pass.hlsl rename to code/renderer/shaders/grp/depth_pre_pass.hlsl diff --git a/code/renderer/hlsl/dynamic_light.hlsl b/code/renderer/shaders/grp/dynamic_light.hlsl similarity index 100% rename from code/renderer/hlsl/dynamic_light.hlsl rename to code/renderer/shaders/grp/dynamic_light.hlsl diff --git a/code/renderer/hlsl/fog.hlsli b/code/renderer/shaders/grp/fog.hlsli similarity index 100% rename from code/renderer/hlsl/fog.hlsli rename to code/renderer/shaders/grp/fog.hlsli diff --git a/code/renderer/hlsl/fog_inside.hlsl b/code/renderer/shaders/grp/fog_inside.hlsl similarity index 100% rename from code/renderer/hlsl/fog_inside.hlsl rename to code/renderer/shaders/grp/fog_inside.hlsl diff --git a/code/renderer/hlsl/fog_outside.hlsl b/code/renderer/shaders/grp/fog_outside.hlsl similarity index 100% rename from code/renderer/hlsl/fog_outside.hlsl rename to code/renderer/shaders/grp/fog_outside.hlsl diff --git a/code/renderer/hlsl/imgui.hlsl b/code/renderer/shaders/grp/imgui.hlsl similarity index 100% rename from code/renderer/hlsl/imgui.hlsl rename to code/renderer/shaders/grp/imgui.hlsl diff --git a/code/renderer/hlsl/mip_1.hlsl b/code/renderer/shaders/grp/mip_1.hlsl similarity index 85% rename from code/renderer/hlsl/mip_1.hlsl rename to code/renderer/shaders/grp/mip_1.hlsl index 553d9f0..ca363eb 100644 --- a/code/renderer/hlsl/mip_1.hlsl +++ b/code/renderer/shaders/grp/mip_1.hlsl @@ -21,6 +21,9 @@ along with Challenge Quake 3. If not, see . // mip-map generation: gamma-space to linear-space transform +#include "../common/mip_gen.hlsli" + + cbuffer RootConstants { float gamma; @@ -32,14 +35,5 @@ RWTexture2D dst : register(u0); [numthreads(8, 8, 1)] void cs(uint3 id : SV_DispatchThreadID) { - // @TODO: is this actually required? - uint w, h; - dst.GetDimensions(w, h); - if(any(id.xy >= uint2(w, h))) - { - return; - } - - float4 v = src[id.xy]; - dst[id.xy] = float4(pow(v.xyz, gamma), v.a); + MipGen_GammaToLinear(dst, src, id, gamma); } diff --git a/code/renderer/hlsl/mip_2.hlsl b/code/renderer/shaders/grp/mip_2.hlsl similarity index 62% rename from code/renderer/hlsl/mip_2.hlsl rename to code/renderer/shaders/grp/mip_2.hlsl index ee23924..35e75dd 100644 --- a/code/renderer/hlsl/mip_2.hlsl +++ b/code/renderer/shaders/grp/mip_2.hlsl @@ -21,6 +21,9 @@ along with Challenge Quake 3. If not, see . // mip-map generation: 8-tap 1D filter +#include "../common/mip_gen.hlsli" + + cbuffer RootConstants { float4 weights; @@ -34,41 +37,10 @@ cbuffer RootConstants RWTexture2D mips[2] : register(u0); -uint2 FixCoords(int2 c) -{ - if(clampMode > 0) - { - // clamp - return uint2(clamp(c, int2(0, 0), maxSize)); - } - - // repeat - return uint2(c & maxSize); -} - [numthreads(8, 8, 1)] void cs(uint3 id : SV_DispatchThreadID) { RWTexture2D src = mips[srcMip]; RWTexture2D dst = mips[dstMip]; - - // @TODO: is this actually required? - uint w, h; - dst.GetDimensions(w, h); - if(any(id.xy >= uint2(w, h))) - { - return; - } - - int2 base = int2(id.xy) * scale; - float4 r = float4(0, 0, 0, 0); - r += src[FixCoords(base - offset * 3)] * weights.x; - r += src[FixCoords(base - offset * 2)] * weights.y; - r += src[FixCoords(base - offset * 1)] * weights.z; - r += src[base] * weights.w; - r += src[base + offset] * weights.w; - r += src[FixCoords(base + offset * 2)] * weights.z; - r += src[FixCoords(base + offset * 3)] * weights.y; - r += src[FixCoords(base + offset * 4)] * weights.x; - dst[id.xy] = r; + MipGen_DownSample(dst, src, id, maxSize, clampMode, scale, offset, weights); } diff --git a/code/renderer/hlsl/mip_3.hlsl b/code/renderer/shaders/grp/mip_3.hlsl similarity index 71% rename from code/renderer/hlsl/mip_3.hlsl rename to code/renderer/shaders/grp/mip_3.hlsl index a54994f..c878b23 100644 --- a/code/renderer/hlsl/mip_3.hlsl +++ b/code/renderer/shaders/grp/mip_3.hlsl @@ -21,6 +21,9 @@ along with Challenge Quake 3. If not, see . // mip-map generation: linear-space to gamma-space transform +#include "../common/mip_gen.hlsli" + + cbuffer RootConstants { float4 blendColor; @@ -37,22 +40,5 @@ void cs(uint3 id : SV_DispatchThreadID) { RWTexture2D src = mips[srcMip]; RWTexture2D dst = mips[dstMip]; - - // @TODO: is this actually required? - uint w, h; - dst.GetDimensions(w, h); - if(any(id.xy >= uint2(w, h))) - { - return; - } - - // yes, intensity *should* be done in light-linear space - // but we keep the old behavior for consistency... - float4 in0 = src[id.xy]; - float3 in1 = 0.5 * (in0.rgb + blendColor.rgb); - float3 inV = lerp(in0.rgb, in1.rgb, blendColor.a); - float3 out0 = pow(max(inV, 0.0), invGamma); - float3 out1 = out0 * intensity; - float4 outV = saturate(float4(out1, in0.a)); - dst[id.xy] = outV; + MipGen_LinearToGamma(dst, src, id, blendColor, intensity, invGamma); } diff --git a/code/renderer/hlsl/nuklear.hlsl b/code/renderer/shaders/grp/nuklear.hlsl similarity index 100% rename from code/renderer/hlsl/nuklear.hlsl rename to code/renderer/shaders/grp/nuklear.hlsl diff --git a/code/renderer/hlsl/post_gamma.hlsl b/code/renderer/shaders/grp/post_gamma.hlsl similarity index 100% rename from code/renderer/hlsl/post_gamma.hlsl rename to code/renderer/shaders/grp/post_gamma.hlsl diff --git a/code/renderer/hlsl/post_inverse_gamma.hlsl b/code/renderer/shaders/grp/post_inverse_gamma.hlsl similarity index 98% rename from code/renderer/hlsl/post_inverse_gamma.hlsl rename to code/renderer/shaders/grp/post_inverse_gamma.hlsl index 9ab4d4a..0d4fd4d 100644 --- a/code/renderer/hlsl/post_inverse_gamma.hlsl +++ b/code/renderer/shaders/grp/post_inverse_gamma.hlsl @@ -65,7 +65,7 @@ float4 ps(VOut input) : SV_Target float3 base = texture0.Sample(sampler0, input.texCoords).rgb; float3 linearSpace = pow(base * invBrightness, gamma); - return float4(linearSpace, 1.0f); + return float4(linearSpace, 1.0); } #endif diff --git a/code/renderer/hlsl/shared.hlsli b/code/renderer/shaders/grp/shared.hlsli similarity index 97% rename from code/renderer/hlsl/shared.hlsli rename to code/renderer/shaders/grp/shared.hlsli index 4fb1b6d..6dcf712 100644 --- a/code/renderer/hlsl/shared.hlsli +++ b/code/renderer/shaders/grp/shared.hlsli @@ -21,6 +21,11 @@ along with Challenge Quake 3. If not, see . // helper functions used by multiple shader files +#if !defined(DISABLE_PRAGMA_ONCE) +#pragma once +#endif + + float4 MakeGreyscale(float4 input, float amount) { float grey = dot(input.rgb, float3(0.299, 0.587, 0.114)); diff --git a/code/renderer/hlsl/smaa.hlsli b/code/renderer/shaders/grp/smaa.hlsli similarity index 100% rename from code/renderer/hlsl/smaa.hlsli rename to code/renderer/shaders/grp/smaa.hlsli diff --git a/code/renderer/hlsl/smaa_1.hlsl b/code/renderer/shaders/grp/smaa_1.hlsl similarity index 100% rename from code/renderer/hlsl/smaa_1.hlsl rename to code/renderer/shaders/grp/smaa_1.hlsl diff --git a/code/renderer/hlsl/smaa_2.hlsl b/code/renderer/shaders/grp/smaa_2.hlsl similarity index 100% rename from code/renderer/hlsl/smaa_2.hlsl rename to code/renderer/shaders/grp/smaa_2.hlsl diff --git a/code/renderer/hlsl/smaa_3.hlsl b/code/renderer/shaders/grp/smaa_3.hlsl similarity index 100% rename from code/renderer/hlsl/smaa_3.hlsl rename to code/renderer/shaders/grp/smaa_3.hlsl diff --git a/code/renderer/hlsl/uber_shader.hlsl b/code/renderer/shaders/grp/uber_shader.hlsl similarity index 70% rename from code/renderer/hlsl/uber_shader.hlsl rename to code/renderer/shaders/grp/uber_shader.hlsl index 4242191..fb85994 100644 --- a/code/renderer/hlsl/uber_shader.hlsl +++ b/code/renderer/shaders/grp/uber_shader.hlsl @@ -165,6 +165,8 @@ VOut vs(VIn input) #if USE_INCLUDES #include "shared.hlsli" +#include "../common/state_bits.h.hlsli" +#include "../common/blend.hlsli" #endif cbuffer RootConstants @@ -191,86 +193,6 @@ Texture2D textures2D[4096] : register(t0); SamplerState samplers[96] : register(s0); RWByteAddressBuffer shaderIndexBuffer : register(u0); -#define GLS_SRCBLEND_ZERO 0x00000001 -#define GLS_SRCBLEND_ONE 0x00000002 -#define GLS_SRCBLEND_DST_COLOR 0x00000003 -#define GLS_SRCBLEND_ONE_MINUS_DST_COLOR 0x00000004 -#define GLS_SRCBLEND_SRC_ALPHA 0x00000005 -#define GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA 0x00000006 -#define GLS_SRCBLEND_DST_ALPHA 0x00000007 -#define GLS_SRCBLEND_ONE_MINUS_DST_ALPHA 0x00000008 -#define GLS_SRCBLEND_ALPHA_SATURATE 0x00000009 -#define GLS_SRCBLEND_BITS 0x0000000f - -#define GLS_DSTBLEND_ZERO 0x00000010 -#define GLS_DSTBLEND_ONE 0x00000020 -#define GLS_DSTBLEND_SRC_COLOR 0x00000030 -#define GLS_DSTBLEND_ONE_MINUS_SRC_COLOR 0x00000040 -#define GLS_DSTBLEND_SRC_ALPHA 0x00000050 -#define GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA 0x00000060 -#define GLS_DSTBLEND_DST_ALPHA 0x00000070 -#define GLS_DSTBLEND_ONE_MINUS_DST_ALPHA 0x00000080 -#define GLS_DSTBLEND_BITS 0x000000f0 - -#define GLS_ATEST_GT_0 0x10000000 -#define GLS_ATEST_LT_80 0x20000000 -#define GLS_ATEST_GE_80 0x40000000 -#define GLS_ATEST_BITS 0x70000000 - -float4 BlendSource(float4 src, float4 dst, uint stateBits) -{ - if(stateBits == GLS_SRCBLEND_ZERO) - return float4(0.0, 0.0, 0.0, 0.0); - else if(stateBits == GLS_SRCBLEND_ONE) - return src; - else if(stateBits == GLS_SRCBLEND_DST_COLOR) - return src * dst; - else if(stateBits == GLS_SRCBLEND_ONE_MINUS_DST_COLOR) - return src * (float4(1.0, 1.0, 1.0, 1.0) - dst); - else if(stateBits == GLS_SRCBLEND_SRC_ALPHA) - return src * float4(src.a, src.a, src.a, 1.0); - else if(stateBits == GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA) - return src * float4(1.0 - src.a, 1.0 - src.a, 1.0 - src.a, 1.0); - else if(stateBits == GLS_SRCBLEND_DST_ALPHA) - return src * float4(dst.a, dst.a, dst.a, 1.0); - else if(stateBits == GLS_SRCBLEND_ONE_MINUS_DST_ALPHA) - return src * float4(1.0 - dst.a, 1.0 - dst.a, 1.0 - dst.a, 1.0); - else if(stateBits == GLS_SRCBLEND_ALPHA_SATURATE) - return src * float4(src.a, src.a, src.a, 1.0); // ????????? - else - return src; -} - -float4 BlendDest(float4 src, float4 dst, uint stateBits) -{ - if(stateBits == GLS_DSTBLEND_ZERO) - return float4(0.0, 0.0, 0.0, 0.0); - else if(stateBits == GLS_DSTBLEND_ONE) - return dst; - else if(stateBits == GLS_DSTBLEND_SRC_COLOR) - return dst * src; - else if(stateBits == GLS_DSTBLEND_ONE_MINUS_SRC_COLOR) - return dst * float4(1.0 - src.r, 1.0 - src.g, 1.0 - src.b, 1.0 - src.a); - else if(stateBits == GLS_DSTBLEND_SRC_ALPHA) - return dst * float4(src.a, src.a, src.a, 1.0); - else if(stateBits == GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA) - return dst * float4(1.0 - src.a, 1.0 - src.a, 1.0 - src.a, 0.0); - else if(stateBits == GLS_DSTBLEND_DST_ALPHA) - return dst * float4(dst.a, dst.a, dst.a, 1.0); - else if(stateBits == GLS_DSTBLEND_ONE_MINUS_DST_ALPHA) - return dst * float4(1.0 - dst.a, 1.0 - dst.a, 1.0 - dst.a, 1.0); - else - return float4(0.0, 0.0, 0.0, 0.0); -} - -float4 Blend(float4 src, float4 dst, uint stateBits) -{ - float4 srcOut = BlendSource(src, dst, stateBits & GLS_SRCBLEND_BITS); - float4 dstOut = BlendDest(src, dst, stateBits & GLS_DSTBLEND_BITS); - - return srcOut + dstOut; -} - bool FailsAlphaTest(float alpha, uint stateBits) { if(stateBits == GLS_ATEST_GT_0) diff --git a/code/renderer/hlsl/ui.hlsl b/code/renderer/shaders/grp/ui.hlsl similarity index 100% rename from code/renderer/hlsl/ui.hlsl rename to code/renderer/shaders/grp/ui.hlsl diff --git a/code/renderer/grp_imgui.cpp b/code/renderer/srp_imgui.cpp similarity index 82% rename from code/renderer/grp_imgui.cpp rename to code/renderer/srp_imgui.cpp index c9ada9c..f962986 100644 --- a/code/renderer/grp_imgui.cpp +++ b/code/renderer/srp_imgui.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2022-2023 Gian 'myT' Schellenbaum +Copyright (C) 2022-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -18,13 +18,11 @@ You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ -// Gameplay Rendering Pipeline - Dear ImGUI integration +// Shared Rendering Pipeline - Dear ImGUI integration -#include "grp_local.h" +#include "srp_local.h" #include "../imgui/imgui.h" -#include "hlsl/imgui_vs.h" -#include "hlsl/imgui_ps.h" #define MAX_VERTEX_COUNT (64 << 10) @@ -46,13 +44,16 @@ struct PixelRC #pragma pack(pop) -void ImGUI::Init() +HTexture ImGUI::Init(bool ddhi_, const ShaderByteCode& vs, const ShaderByteCode& ps, TextureFormat::Id rtFormat, HDescriptorTable descTable, RootSignatureDesc* rootSigDesc) { + ddhi = ddhi_; + descriptorTable = descTable; + ImGuiIO& io = ImGui::GetIO(); io.DisplaySize.x = glConfig.vidWidth; io.DisplaySize.y = glConfig.vidHeight; - if(grp.firstInit) + if(srp.firstInit) { io.BackendRendererUserData = this; io.BackendRendererName = "CNQ3 Direct3D 12"; @@ -71,8 +72,13 @@ void ImGUI::Init() fr->vertexBuffer = CreateBuffer(idx); } + if(ddhi) { - RootSignatureDesc desc = grp.rootSignatureDesc; + rootSignature = RHI_MAKE_NULL_HANDLE(); + } + else + { + RootSignatureDesc desc = *rootSigDesc; desc.name = "Dear ImGUI"; desc.constants[ShaderStage::Vertex].byteCount = sizeof(VertexRC); desc.constants[ShaderStage::Pixel].byteCount = sizeof(PixelRC); @@ -100,8 +106,8 @@ void ImGUI::Init() { GraphicsPipelineDesc desc("Dear ImGUI", rootSignature); desc.shortLifeTime = true; - desc.vertexShader = ShaderByteCode(g_vs); - desc.pixelShader = ShaderByteCode(g_ps); + desc.vertexShader = vs; + desc.pixelShader = ps; desc.vertexLayout.bindingStrides[0] = sizeof(ImDrawVert); desc.vertexLayout.AddAttribute(0, ShaderSemantic::Position, DataType::Float32, 2, offsetof(ImDrawVert, pos)); @@ -114,36 +120,39 @@ void ImGUI::Init() desc.depthStencil.enableDepthTest = false; desc.depthStencil.enableDepthWrites = false; desc.rasterizer.cullMode = CT_TWO_SIDED; - desc.AddRenderTarget(GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA, grp.renderTargetFormat); + desc.AddRenderTarget(GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA, rtFormat); pipeline = CreateGraphicsPipeline(desc); } - RegisterFontAtlas(); + if(ddhi) + { + const uint32_t fontIndex = GetTextureIndexSRV(fontAtlas); + io.Fonts->SetTexID((ImTextureID)fontIndex); + } + + return fontAtlas; } -void ImGUI::RegisterFontAtlas() +void ImGUI::RegisterFontAtlas(uint32_t fontIndex) { - ImGuiIO& io = ImGui::GetIO(); - - const uint32_t fontIndex = grp.RegisterTexture(fontAtlas); - io.Fonts->SetTexID((ImTextureID)fontIndex); + ImGui::GetIO().Fonts->SetTexID((ImTextureID)fontIndex); } -void ImGUI::Draw() +void ImGUI::Draw(HTexture renderTarget) { if(r_debugUI->integer == 0) { - SafeEndFrame(); + EndFrame(); return; } - grp.renderMode = RenderMode::ImGui; + srp.renderMode = RenderMode::ImGui; ImGuiIO& io = ImGui::GetIO(); io.DisplaySize.x = glConfig.vidWidth; io.DisplaySize.y = glConfig.vidHeight; - SafeEndFrame(); + EndFrame(); ImGui::Render(); const ImDrawData* drawData = ImGui::GetDrawData(); @@ -151,7 +160,7 @@ void ImGUI::Draw() // avoid rendering when minimized if(drawData->DisplaySize.x <= 0.0f || drawData->DisplaySize.y <= 0.0f) { - grp.renderMode = RenderMode::None; + srp.renderMode = RenderMode::None; return; } @@ -201,14 +210,27 @@ void ImGUI::Draw() const uint32_t vertexStride = sizeof(ImDrawVert); static_assert(sizeof(ImDrawIdx) == 4, "uint32 indices expected!"); - CmdBindRenderTargets(1, &grp.renderTarget, NULL); - CmdBindRootSignature(rootSignature); + CmdBindRenderTargets(1, &renderTarget, NULL); + if(!ddhi) + { + CmdBindRootSignature(rootSignature); + } CmdBindPipeline(pipeline); - CmdBindDescriptorTable(rootSignature, grp.descriptorTable); + if(!ddhi) + { + CmdBindDescriptorTable(rootSignature, descriptorTable); + } CmdBindVertexBuffers(1, &fr->vertexBuffer, &vertexStride, NULL); CmdBindIndexBuffer(fr->indexBuffer, IndexType::UInt32, 0); CmdSetViewport(0, 0, drawData->DisplaySize.x, drawData->DisplaySize.y); - CmdSetRootConstants(rootSignature, ShaderStage::Vertex, &vertexRC); + if(ddhi) + { + CmdSetGraphicsRootConstants(0, sizeof(vertexRC), &vertexRC); + } + else + { + CmdSetRootConstants(rootSignature, ShaderStage::Vertex, &vertexRC); + } // Render command lists // (Because we merged all buffers into a single one, we maintain our own offset into them) @@ -234,7 +256,14 @@ void ImGUI::Draw() pixelRC.texture = (uint32_t)cmd->TextureId & 0xFFFF; pixelRC.sampler = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); pixelRC.mip = (float)(((uint32_t)cmd->TextureId >> 16) & 0xFFFF); - CmdSetRootConstants(rootSignature, ShaderStage::Pixel, &pixelRC); + if(ddhi) + { + CmdSetGraphicsRootConstants(sizeof(vertexRC), sizeof(pixelRC), &pixelRC); + } + else + { + CmdSetRootConstants(rootSignature, ShaderStage::Pixel, &pixelRC); + } // Apply Scissor/clipping rectangle, Draw CmdSetScissor(clip_min.x, clip_min.y, clip_max.x - clip_min.x, clip_max.y - clip_min.y); @@ -245,10 +274,10 @@ void ImGUI::Draw() globalVtxOffset += cmdList->VtxBuffer.Size; } - grp.renderMode = RenderMode::None; + srp.renderMode = RenderMode::None; } -void ImGUI::SafeBeginFrame() +void ImGUI::BeginFrame() { if(!frameStarted) { @@ -257,7 +286,7 @@ void ImGUI::SafeBeginFrame() } } -void ImGUI::SafeEndFrame() +void ImGUI::EndFrame() { if(frameStarted) { diff --git a/code/renderer/srp_local.h b/code/renderer/srp_local.h new file mode 100644 index 0000000..de16472 --- /dev/null +++ b/code/renderer/srp_local.h @@ -0,0 +1,435 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Shared Rendering Pipeline - private declarations + + +#pragma once + + +#include "tr_local.h" +#include "rhi_local.h" + + +using namespace RHI; + + +struct BufferBase +{ + bool CanAdd(uint32_t count_) + { + return batchFirst + batchCount + count_ <= totalCount; + } + + void EndBatch() + { + batchFirst += batchCount; + batchCount = 0; + } + + void EndBatch(uint32_t size) + { + batchFirst += size; + batchCount = 0; + } + + void Rewind() + { + batchFirst = 0; + batchCount = 0; + } + + uint32_t totalCount = 0; + uint32_t batchFirst = 0; + uint32_t batchCount = 0; +}; + +struct IndexBuffer : BufferBase +{ + void Create(const char* name, MemoryUsage::Id memoryUsage, uint32_t indexCount) + { + totalCount = indexCount; + + BufferDesc desc = {}; + desc.committedResource = true; + desc.initialState = ResourceStates::IndexBufferBit; + desc.memoryUsage = memoryUsage; + desc.name = va("%s index", name); + desc.byteCount = indexCount * sizeof(uint32_t); + buffer = CreateBuffer(desc); + } + + void BeginUpload() + { + mapped = (uint32_t*)BeginBufferUpload(buffer); + } + + void EndUpload() + { + EndBufferUpload(buffer); + mapped = NULL; + } + + void Upload() + { + Q_assert(mapped != NULL); + + uint32_t* const idx = mapped + batchFirst + batchCount; + memcpy(idx, &tess.indexes[0], tess.numIndexes * sizeof(uint32_t)); + } + + uint32_t* GetCurrentAddress() + { + return mapped + batchFirst + batchCount; + } + + HBuffer buffer = RHI_MAKE_NULL_HANDLE(); + uint32_t* mapped = NULL; +}; + +struct GeometryBuffer : BufferBase +{ + void Init(uint32_t count_, uint32_t stride_) + { + buffer = RHI_MAKE_NULL_HANDLE(); + byteCount = count_ * stride_; + stride = stride_; + totalCount = count_; + batchFirst = 0; + batchCount = 0; + } + + void CreateVertexBuffer(const char* name, MemoryUsage::Id memoryUsage, uint32_t count, uint32_t stride_) + { + BufferDesc desc = {}; + desc.committedResource = true; + desc.initialState = ResourceStates::VertexBufferBit; + desc.memoryUsage = memoryUsage; + desc.name = name; + desc.byteCount = count * stride_; + buffer = CreateBuffer(desc); + byteCount = count * stride_; + stride = stride_; + totalCount = count; + batchFirst = 0; + batchCount = 0; + } + + void BeginUpload() + { + Q_assert(mapped == NULL); + mapped = BeginBufferUpload(buffer); + } + + void EndUpload() + { + Q_assert(mapped != NULL); + EndBufferUpload(buffer); + mapped = NULL; + } + + HBuffer buffer = RHI_MAKE_NULL_HANDLE(); + uint32_t byteCount = 0; + uint32_t stride = 0; + uint8_t* mapped = NULL; +}; + +struct RenderMode +{ + enum Id + { + None, + UI, + World, + ImGui, + Nuklear, + Count + }; +}; + +struct RenderPassQueries +{ + char name[64]; + uint32_t gpuDurationUS; + uint32_t cpuDurationUS; + int64_t cpuStartUS; + uint32_t queryIndex; +}; + +enum +{ + MaxRenderPasses = 64, // cg_draw3dIcons forces tons of 2D/3D transitions... + MaxStatsFrameCount = 64 +}; + +struct RenderPassStats +{ + void EndFrame(uint32_t cpu, uint32_t gpu); + + uint32_t samplesCPU[MaxStatsFrameCount]; + uint32_t samplesGPU[MaxStatsFrameCount]; + stats_t statsCPU; + stats_t statsGPU; + uint32_t count; + uint32_t index; +}; + +struct RenderPassFrame +{ + RenderPassQueries passes[MaxRenderPasses]; + uint32_t count; +}; + +struct FrameStats +{ + enum { MaxFrames = 1024 }; + + void EndFrame(); + + float temp[MaxFrames]; + float p2pMS[MaxFrames]; + stats_t p2pStats; + int frameCount; + int frameIndex; + int skippedFrames; +}; + +struct MipMapGenerator +{ + void Init(bool ddhi, const ShaderByteCode& g2l, const ShaderByteCode& down, const ShaderByteCode& l2g); + void GenerateMipMaps(HTexture texture); + +private: + struct Stage + { + enum Id + { + Start, // gamma to linear + DownSample, // down sample on 1 axis + End, // linear to gamma + Count + }; + + HRootSignature rootSignature; + HDescriptorTable descriptorTable; + HPipeline pipeline; + }; + + struct MipSlice + { + enum Id + { + Float16_0, + Float16_1, + Count + }; + }; + + HTexture textures[MipSlice::Count]; + Stage stages[3]; + bool ddhi = false; +}; + +struct UI +{ + void Init(bool ddhi_, const ShaderByteCode& vs, const ShaderByteCode& ps, TextureFormat::Id rtFormat, + HDescriptorTable descTable, RootSignatureDesc* rootSigDesc); + void BeginFrame(); + void Begin(HTexture renderTarget); + void End(); + void CmdSetColor(const uiSetColorCommand_t& cmd); + void CmdDrawQuad(const uiDrawQuadCommand_t& cmd); + void CmdDrawTriangle(const uiDrawTriangleCommand_t& cmd); + +private: + void DrawBatch(); + + // 32-bit needed until the render logic is fixed! + typedef uint32_t Index; + const IndexType::Id indexType = IndexType::UInt32; + + uint32_t renderPassIndex; + +#pragma pack(push, 1) + struct Vertex + { + vec2_t position; + vec2_t texCoords; + uint32_t color; + }; +#pragma pack(pop) + int maxIndexCount; + int maxVertexCount; + int firstIndex; + int firstVertex; + int indexCount; + int vertexCount; + HRootSignature rootSignature; + HDescriptorTable descriptorTable; + HPipeline pipeline; + HBuffer indexBuffer; + HBuffer vertexBuffer; + Index* indices; + Vertex* vertices; + uint32_t color; + const shader_t* shader; + bool ddhi; // direct descriptor heap indexing +}; + +struct ImGUI +{ + HTexture Init(bool ddhi, const ShaderByteCode& vs, const ShaderByteCode& ps, TextureFormat::Id rtFormat, HDescriptorTable descTable, RootSignatureDesc* rootSigDesc); + void RegisterFontAtlas(uint32_t fontIndex); + void Draw(HTexture renderTarget); + void BeginFrame(); + void EndFrame(); + +private: + struct FrameResources + { + HBuffer indexBuffer; + HBuffer vertexBuffer; + }; + + HRootSignature rootSignature; + HDescriptorTable descriptorTable; + HPipeline pipeline; + HTexture fontAtlas; + FrameResources frameResources[FrameCount]; + bool frameStarted = false; + bool ddhi = false; +}; + +struct Nuklear +{ + void Init(bool ddhi, const ShaderByteCode& vs, const ShaderByteCode& ps, TextureFormat::Id rtFormat, HDescriptorTable descTable, RootSignatureDesc* rootSigDesc); + void BeginFrame(); + void Begin(HTexture renderTarget); + void End(); + void Upload(const nuklearUploadCommand_t& cmd); + void Draw(const nuklearDrawCommand_t& cmd); + +private: + struct FrameResources + { + HBuffer indexBuffer; + HBuffer vertexBuffer; + }; + + HRootSignature rootSignature; + HDescriptorTable descriptorTable; + HPipeline pipeline; + FrameResources frameResources[FrameCount]; + uint32_t renderPassIndex; + int prevScissorRect[4]; + + // reset every frame + int firstVertex; + int firstIndex; + int numVertexes; // set in Upload + int numIndexes; // set in Upload + bool ddhi = false; +}; + +struct SRP +{ + uint32_t BeginRenderPass(const char* name, float r, float g, float b); + void EndRenderPass(uint32_t index); + + // @NOTE: SRP::BeginFrame doesn't call RHI::BeginFrame + // @NOTE: SRP::EndFrame calls RHI::EndFrame and Sys_V_EndFrame + void BeginFrame(); // call at the start of IRenderPipeline::BeginFrame + void EndFrame(); // call at the end of IRenderPipeline::EndFrame + + void DrawGUI(); + + // call this in Init but only on srp.firstInit + // you need to register them in your own local descriptor table(s) + void CreateShaderTraceBuffers(); + + bool firstInit = true; // first RP init after a RHI init? + RenderMode::Id renderMode; // necessary for sampler selection, useful for debugging + + // shader trace + HBuffer traceRenderBuffer; + HBuffer traceReadbackBuffer; + + // data for frame breakdown and frame graph + RenderPassFrame renderPasses[FrameCount]; + RenderPassFrame tempRenderPasses; + RenderPassStats renderPassStats[MaxRenderPasses]; + RenderPassStats wholeFrameStats; + FrameStats frameStats; + bool enableRenderPassQueries = true; + + // PSO stats + bool psoStatsValid = false; + int psoCount = 0; + int psoChangeCount = 0; +}; + +extern SRP srp; + +struct ScopedRenderPass +{ + ScopedRenderPass(const char* name, float r, float g, float b) + { + index = srp.BeginRenderPass(name, r, g, b); + } + + ~ScopedRenderPass() + { + srp.EndRenderPass(index); + } + + uint32_t index; +}; + +#define SCOPED_RENDER_PASS(Name, R, G, B) ScopedRenderPass CONCAT(rp_, __LINE__)(Name, R, G, B) + +#define BASE_SAMPLER_COUNT ((int)(TW_COUNT * TextureFilter::Count * MaxTextureMips)) + +const image_t* GetBundleImage(const textureBundle_t& bundle); +uint32_t GetBaseSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD); +uint32_t GetSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD = 0); +uint32_t GetSamplerIndex(const image_t* image); +void ReadTextureImage(void* outPixels, HTexture hreadback, int w, int h, int alignment, colorSpace_t colorSpace); +void UpdateEntityData(bool& depthHack, int entityNum, double originalTime); +cullType_t GetMirrorredCullType(cullType_t cullType); +uint32_t AlphaTestShaderConstFromStateBits(unsigned int stateBits); + +inline void CmdSetViewportAndScissor(uint32_t x, uint32_t y, uint32_t w, uint32_t h) +{ + CmdSetViewport(x, y, w, h); + CmdSetScissor(x, y, w, h); +} + +inline void CmdSetViewportAndScissor(const viewParms_t& vp) +{ + CmdSetViewportAndScissor(vp.viewportX, vp.viewportY, vp.viewportWidth, vp.viewportHeight); +} + +inline bool IsDepthFadeEnabled(const shader_t& shader) +{ + return + r_depthFade->integer != 0 && + shader.dfType > DFT_NONE && + shader.dfType < DFT_TBD; +} diff --git a/code/renderer/srp_main.cpp b/code/renderer/srp_main.cpp new file mode 100644 index 0000000..0ea4def --- /dev/null +++ b/code/renderer/srp_main.cpp @@ -0,0 +1,539 @@ +/* +=========================================================================== +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Shared Rendering Pipeline - core functionality + + +#include "srp_local.h" +#include "../client/cl_imgui.h" +#include "shaders/crp/world.h.hlsli" + + +extern IRenderPipeline* grpp; +extern IRenderPipeline* crpp; + +SRP srp; +IRenderPipeline* renderPipeline; + + +static ImPlotPoint FrameTimeGetter(int index, void*) +{ + const FrameStats& fs = srp.frameStats; + const int realIndex = (fs.frameIndex + index) % fs.frameCount; + const float value = fs.p2pMS[realIndex]; + + ImPlotPoint p; + p.x = index; + p.y = value; + + return p; +} + +static void UpdateAnimatedImage(image_t* image, int w, int h, const byte* data, qbool dirty) +{ + if(w != image->width || h != image->height) + { + // @TODO: ? + /*image->width = w; + image->height = h; + CreateTexture(&d3d.textures[image->texnum], image, 1, w, h); + GAL_UpdateTexture(image, 0, 0, 0, w, h, data);*/ + } + else if(dirty) + { + // @TODO: ? + //GAL_UpdateTexture(image, 0, 0, 0, w, h, data); + } +} + + +const image_t* GetBundleImage(const textureBundle_t& bundle) +{ + return R_UpdateAndGetBundleImage(&bundle, &UpdateAnimatedImage); +} + +uint32_t GetBaseSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD) +{ + Q_assert((uint32_t)wrap < TW_COUNT); + Q_assert((uint32_t)filter < TextureFilter::Count); + + const uint32_t baseIndex = + (uint32_t)filter + + (uint32_t)TextureFilter::Count * (uint32_t)wrap + + (uint32_t)TextureFilter::Count * (uint32_t)TW_COUNT * minLOD; + + return baseIndex; +} + +uint32_t GetSamplerIndex(textureWrap_t wrap, TextureFilter::Id filter, uint32_t minLOD) +{ + const uint32_t baseIndex = GetBaseSamplerIndex(wrap, filter, minLOD); + const uint32_t descIndex = renderPipeline->GetSamplerDescriptorIndexFromBaseIndex(baseIndex); + + return descIndex; +} + +uint32_t GetSamplerIndex(const image_t* image) +{ + TextureFilter::Id filter = TextureFilter::Anisotropic; + if(r_lego->integer && + srp.renderMode == RenderMode::World && + (image->flags & (IMG_LMATLAS | IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0) + { + filter = TextureFilter::Point; + } + else if((image->flags & IMG_NOAF) != 0 || + srp.renderMode != RenderMode::World) + { + filter = TextureFilter::Linear; + } + + int minLOD = 0; + if(srp.renderMode == RenderMode::World && + (image->flags & IMG_NOPICMIP) == 0) + { + minLOD = Com_ClampInt(0, MaxTextureMips - 1, r_picmip->integer); + } + + return GetSamplerIndex(image->wrapClampMode, filter, (uint32_t)minLOD); +} + +void ReadTextureImage(void* outPixels, HTexture hreadback, int w, int h, int alignment, colorSpace_t colorSpace) +{ + MappedTexture mapped; + BeginTextureReadback(mapped, hreadback); + + byte* const out0 = (byte*)outPixels; + const byte* const in0 = mapped.mappedData; + + if(colorSpace == CS_RGBA) + { + const int dstRowSizeNoPadding = w * 4; + mapped.dstRowByteCount = AlignUp(dstRowSizeNoPadding, alignment); + + for(int y = 0; y < mapped.rowCount; ++y) + { + byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount; + const byte* in = in0 + y * mapped.srcRowByteCount; + memcpy(out, in, dstRowSizeNoPadding); + } + } + else if(colorSpace == CS_BGR) + { + mapped.dstRowByteCount = AlignUp(w * 3, alignment); + + for(int y = 0; y < mapped.rowCount; ++y) + { + byte* out = out0 + (mapped.rowCount - 1 - y) * mapped.dstRowByteCount; + const byte* in = in0 + y * mapped.srcRowByteCount; + for(int x = 0; x < mapped.columnCount; ++x) + { + out[2] = in[0]; + out[1] = in[1]; + out[0] = in[2]; + out += 3; + in += 4; + } + } + } + else + { + Q_assert(!"Unsupported color space"); + } + + EndTextureReadback(); +} + +void UpdateEntityData(bool& depthHack, int entityNum, double originalTime) +{ + depthHack = false; + + if(entityNum != ENTITYNUM_WORLD) + { + backEnd.currentEntity = &backEnd.refdef.entities[entityNum]; + if(backEnd.currentEntity->intShaderTime) + { + backEnd.refdef.floatTime = originalTime - (double)backEnd.currentEntity->e.shaderTime.iShaderTime / 1000.0; + } + else + { + backEnd.refdef.floatTime = originalTime - backEnd.currentEntity->e.shaderTime.fShaderTime; + } + // we have to reset the shaderTime as well otherwise image animations start + // from the wrong frame + tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset; + + // set up the transformation matrix + R_RotateForEntity(backEnd.currentEntity, &backEnd.viewParms, &backEnd.orient); + + if(backEnd.currentEntity->e.renderfx & RF_DEPTHHACK) + { + depthHack = true; + } + } + else + { + backEnd.currentEntity = &tr.worldEntity; + backEnd.refdef.floatTime = originalTime; + backEnd.orient = backEnd.viewParms.world; + // we have to reset the shaderTime as well otherwise image animations on + // the world (like water) continue with the wrong frame + tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset; + } +} + +cullType_t GetMirrorredCullType(cullType_t cullType) +{ + switch(cullType) + { + case CT_BACK_SIDED: return CT_FRONT_SIDED; + case CT_FRONT_SIDED: return CT_BACK_SIDED; + default: return CT_TWO_SIDED; + } +} + +uint32_t AlphaTestShaderConstFromStateBits(unsigned int stateBits) +{ + stateBits &= GLS_ATEST_BITS; + switch(stateBits) + { + case GLS_ATEST_GE_80: return ATEST_GE_HALF; + case GLS_ATEST_GT_0: return ATEST_GT_0; + case GLS_ATEST_LT_80: return ATEST_LT_HALF; + default: return ATEST_NONE; + } +} + +void R_SelectRenderPipeline() +{ + if(r_pipeline->integer == 0) + { + renderPipeline = grpp; + } + else + { + renderPipeline = crpp; + } +} + + +void FrameStats::EndFrame() +{ + frameCount = min(frameCount + 1, (int)MaxFrames); + frameIndex = (frameIndex + 1) % MaxFrames; + Com_StatsFromArray(p2pMS, frameCount, temp, &p2pStats); +} + + +void RenderPassStats::EndFrame(uint32_t cpu, uint32_t gpu) +{ + static uint32_t tempSamples[MaxStatsFrameCount]; + samplesCPU[index] = cpu; + samplesGPU[index] = gpu; + count = min(count + 1, (uint32_t)MaxStatsFrameCount); + index = (index + 1) % MaxStatsFrameCount; + Com_StatsFromArray((const int*)samplesCPU, count, (int*)tempSamples, &statsCPU); + Com_StatsFromArray((const int*)samplesGPU, count, (int*)tempSamples, &statsGPU); +} + + +uint32_t SRP::BeginRenderPass(const char* name, float r, float g, float b) +{ + if(!enableRenderPassQueries) + { + CmdBeginDebugLabel(name, r, g, b); + return 0xDEADBEEF; + } + + RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount]; + if(f.count >= ARRAY_LEN(f.passes)) + { + Q_assert(0); + return UINT32_MAX; + } + + CmdBeginDebugLabel(name, r, g, b); + + const uint32_t index = f.count++; + RenderPassQueries& q = f.passes[index]; + Q_strncpyz(q.name, name, sizeof(q.name)); + q.cpuStartUS = Sys_Microseconds(); + q.queryIndex = CmdBeginDurationQuery(); + + return index; +} + +void SRP::EndRenderPass(uint32_t index) +{ + if(!enableRenderPassQueries) + { + CmdEndDebugLabel(); + return; + } + + Q_assert(index != 0xDEADBEEF); // enableRenderPassQueries should still be false! + RenderPassFrame& f = renderPasses[tr.frameCount % FrameCount]; + if(index >= f.count) + { + Q_assert(0); + return; + } + + CmdEndDebugLabel(); + + RenderPassQueries& q = f.passes[index]; + q.cpuDurationUS = (uint32_t)(Sys_Microseconds() - q.cpuStartUS); + CmdEndDurationQuery(q.queryIndex); +} + +void SRP::DrawGUI() +{ + uint32_t durations[MaxDurationQueries]; + GetDurations(durations); + + wholeFrameStats.EndFrame(rhie.renderToPresentUS, durations[0]); + + const RenderPassFrame& currFrame = renderPasses[(tr.frameCount % FrameCount) ^ 1]; + RenderPassFrame& tempFrame = tempRenderPasses; + + // see if the render pass list is the same as the previous frame's + bool sameRenderPass = true; + if(currFrame.count == tempRenderPasses.count) + { + for(uint32_t p = 0; p < currFrame.count; ++p) + { + if(Q_stricmp(currFrame.passes[p].name, tempRenderPasses.passes[p].name) != 0) + { + sameRenderPass = false; + break; + } + } + } + else + { + sameRenderPass = false; + } + + // write out the displayed timings into the temp buffer + tempFrame.count = currFrame.count; + if(sameRenderPass) + { + for(uint32_t p = 0; p < currFrame.count; ++p) + { + const uint32_t index = currFrame.passes[p].queryIndex; + if(index < MaxDurationQueries) + { + renderPassStats[p].EndFrame(currFrame.passes[p].cpuDurationUS, durations[index]); + tempFrame.passes[p].gpuDurationUS = renderPassStats[p].statsGPU.median; + tempFrame.passes[p].cpuDurationUS = renderPassStats[p].statsCPU.median; + } + } + } + else + { + for(uint32_t p = 0; p < currFrame.count; ++p) + { + const uint32_t index = currFrame.passes[p].queryIndex; + if(index < MaxDurationQueries) + { + tempFrame.passes[p].gpuDurationUS = durations[index]; + tempFrame.passes[p].cpuDurationUS = currFrame.passes[p].cpuDurationUS; + } + } + } + + static bool breakdownActive = false; + ToggleBooleanWithShortcut(breakdownActive, ImGuiKey_F); + GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame breakdown", "Ctrl+F", &breakdownActive); + if(breakdownActive) + { + if(ImGui::Begin("Frame breakdown", &breakdownActive, ImGuiWindowFlags_AlwaysAutoResize)) + { + if(BeginTable("Frame breakdown", 3)) + { + TableHeader(3, "Pass", "GPU [us]", "CPU [us]"); + + TableRow(3, "Whole frame", + va("%d", (int)wholeFrameStats.statsGPU.median), + va("%d", (int)wholeFrameStats.statsCPU.median)); + + for(uint32_t p = 0; p < currFrame.count; ++p) + { + const RenderPassQueries& rp = tempFrame.passes[p]; + if(rp.queryIndex < MaxDurationQueries) + { + TableRow(3, rp.name, + va("%d", (int)rp.gpuDurationUS), + va("%d", (int)rp.cpuDurationUS)); + } + } + + ImGui::EndTable(); + } + + if(psoStatsValid) + { + ImGui::Text("PSO count: %d", (int)psoCount); + ImGui::Text("PSO changes: %d", (int)psoChangeCount); + } + } + ImGui::End(); + } + + // save the current render pass list in the temp buffer + memcpy(&tempFrame, &currFrame, sizeof(tempFrame)); + + static bool frameTimeActive = false; + GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame stats", NULL, &frameTimeActive); + if(frameTimeActive) + { + if(ImGui::Begin("Frame stats", &frameTimeActive, ImGuiWindowFlags_AlwaysAutoResize)) + { + if(BeginTable("Frame stats", 2)) + { + const FrameStats& fs = frameStats; + const stats_t& s = fs.p2pStats; + TableRow2("Skipped frames", fs.skippedFrames); + TableRow2("Frame time target", rhie.targetFrameDurationMS); + TableRow2("Frame time average", s.average); + TableRow2("Frame time std dev.", s.stdDev); + TableRow2("Input to render", (float)rhie.inputToRenderUS / 1000.0f); + TableRow2("Input to present", (float)rhie.inputToPresentUS / 1000.0f); + + ImGui::EndTable(); + } + } + ImGui::End(); + } + + static bool graphsActive = false; + ToggleBooleanWithShortcut(graphsActive, ImGuiKey_G); + GUI_AddMainMenuItem(GUI_MainMenu::Perf, "Frame time graphs", "Ctrl+G", &graphsActive); + if(graphsActive) + { + const int windowFlags = + ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_NoBackground | + ImGuiWindowFlags_NoMove; + ImGui::SetNextWindowSize(ImVec2(glConfig.vidWidth, glConfig.vidHeight / 2), ImGuiCond_Always); + ImGui::SetNextWindowPos(ImVec2(0, glConfig.vidHeight / 2), ImGuiCond_Always); + if(ImGui::Begin("Frame time graphs", &graphsActive, windowFlags)) + { + const FrameStats& fs = frameStats; + const double target = (double)rhie.targetFrameDurationMS; + + static bool autoFit = false; + ImGui::Checkbox("Auto-fit", &autoFit); + + if(ImPlot::BeginPlot("Frame Times", ImVec2(-1, -1), ImPlotFlags_NoInputs)) + { + const int axisFlags = 0; // ImPlotAxisFlags_NoTickLabels + const int axisFlagsY = axisFlags | (autoFit ? ImPlotAxisFlags_AutoFit : 0); + ImPlot::SetupAxes(NULL, NULL, axisFlags, axisFlagsY); + ImPlot::SetupAxisLimits(ImAxis_X1, 0, FrameStats::MaxFrames, ImGuiCond_Always); + if(!autoFit) + { + ImPlot::SetupAxisLimits(ImAxis_Y1, max(target - 2.0, 0.0), target + 2.0, ImGuiCond_Always); + } + + ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f); + ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f); + ImPlot::PlotInfLines("Target", &target, 1, ImPlotInfLinesFlags_Horizontal); + + ImPlot::SetNextFillStyle(IMPLOT_AUTO_COL, 1.0f); + ImPlot::SetNextLineStyle(IMPLOT_AUTO_COL, 1.0f); + ImPlot::PlotLineG("Frame Time", &FrameTimeGetter, NULL, fs.frameCount, ImPlotLineFlags_None); + + ImPlot::EndPlot(); + } + } + ImGui::End(); + } + + GUI_DrawMainMenu(); + R_DrawGUI(); +} + +void SRP::BeginFrame() +{ + srp.renderPasses[tr.frameCount % FrameCount].count = 0; + R_SetColorMappings(); + + // nothing is bound to the command list yet! + srp.renderMode = RenderMode::None; +} + +void SRP::EndFrame() +{ + tr.tracedWorldShaderIndex = -1; + if(tr.traceWorldShader && tr.world != NULL) + { + // schedule a GPU -> CPU transfer + { + BufferBarrier barrier(traceRenderBuffer, ResourceStates::CopySourceBit); + CmdBarrier(0, NULL, 1, &barrier); + } + CmdCopyBuffer(traceReadbackBuffer, traceRenderBuffer); + { + BufferBarrier barrier(traceRenderBuffer, ResourceStates::UnorderedAccessBit); + CmdBarrier(0, NULL, 1, &barrier); + } + + // grab last frame's result + uint32_t* shaderIndices = (uint32_t*)MapBuffer(traceReadbackBuffer); + const uint32_t shaderIndex = shaderIndices[RHI::GetFrameIndex() ^ 1]; + UnmapBuffer(traceReadbackBuffer); + if(shaderIndex < (uint32_t)tr.numShaders) + { + tr.tracedWorldShaderIndex = (int)shaderIndex; + } + } + + RHI::EndFrame(); + + if(rhie.presentToPresentUS > 0) + { + frameStats.p2pMS[frameStats.frameIndex] = (float)rhie.presentToPresentUS / 1000.0f; + frameStats.EndFrame(); + } + else + { + frameStats.skippedFrames++; + } + + if(backEnd.renderFrame) + { + Sys_V_EndFrame(); + } +} + +void SRP::CreateShaderTraceBuffers() +{ + { + BufferDesc desc("shader trace opaque", 2 * sizeof(uint32_t), ResourceStates::UnorderedAccessBit); + traceRenderBuffer = CreateBuffer(desc); + } + + { + BufferDesc desc("shader trace opaque readback", 2 * sizeof(uint32_t), ResourceStates::Common); + desc.memoryUsage = MemoryUsage::Readback; + traceReadbackBuffer = CreateBuffer(desc); + } +} diff --git a/code/renderer/grp_mip_gen.cpp b/code/renderer/srp_mip_gen.cpp similarity index 64% rename from code/renderer/grp_mip_gen.cpp rename to code/renderer/srp_mip_gen.cpp index 011025e..d80debb 100644 --- a/code/renderer/grp_mip_gen.cpp +++ b/code/renderer/srp_mip_gen.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2023 Gian 'myT' Schellenbaum +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -18,26 +18,10 @@ You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ -// Gameplay Rendering Pipeline - texture mip-map generation +// Shared Rendering Pipeline - texture mip-map generation -// @TODO: test for OOB accesses in the shaders and return -// (also, is it needed with feature level 12.0 and HLSL 5.1/6.0 ?) - - -#include "grp_local.h" -namespace start -{ -#include "hlsl/mip_1_cs.h" -} -namespace down -{ -#include "hlsl/mip_2_cs.h" -} -namespace end -{ -#include "hlsl/mip_3_cs.h" -} +#include "srp_local.h" #pragma pack(push, 4) @@ -45,6 +29,8 @@ namespace end struct StartConstants { float gamma; + uint32_t srcTexture; + uint32_t dstTexture; }; struct DownConstants @@ -56,6 +42,8 @@ struct DownConstants uint32_t clampMode; // 0 = repeat uint32_t srcMip; uint32_t dstMip; + uint32_t srcTexture; + uint32_t dstTexture; }; struct EndConstants @@ -65,18 +53,22 @@ struct EndConstants float invGamma; // 1.0 / gamma uint32_t srcMip; uint32_t dstMip; + uint32_t srcTexture; + uint32_t dstTexture; }; #pragma pack(pop) -void MipMapGenerator::Init() +void MipMapGenerator::Init(bool ddhi_, const ShaderByteCode& g2l, const ShaderByteCode& down, const ShaderByteCode& l2g) { - if(!grp.firstInit) + if(!srp.firstInit) { return; } + ddhi = ddhi_; + for(int t = 0; t < 2; ++t) { TextureDesc desc(va("mip-map generation #%d", t + 1), MAX_TEXTURE_SIZE, MAX_TEXTURE_SIZE); @@ -88,26 +80,33 @@ void MipMapGenerator::Init() const char* const stageNames[] = { "start", "down", "end" }; const uint32_t stageRCByteCount[] = { sizeof(StartConstants), sizeof(DownConstants), sizeof(EndConstants) }; - const ShaderByteCode stageShaders[] = { ShaderByteCode(start::g_cs), ShaderByteCode(down::g_cs), ShaderByteCode(end::g_cs) }; const uint32_t stageExtraMips[] = { 1, 0, MaxTextureMips }; + const ShaderByteCode stageShaders[] = { g2l, down, l2g }; for(int s = 0; s < 3; ++s) { Stage& stage = stages[s]; + if(ddhi) { - RootSignatureDesc desc(va("mip-map %s", stageNames[s])); - desc.pipelineType = PipelineType::Compute; - desc.constants[ShaderStage::Compute].byteCount = stageRCByteCount[s]; - desc.AddRange(DescriptorType::RWTexture, 0, MipSlice::Count + stageExtraMips[s]); - stage.rootSignature = CreateRootSignature(desc); + stage.rootSignature = RHI_MAKE_NULL_HANDLE(); } + else { - const DescriptorTableDesc desc(DescriptorTableDesc(va("mip-map %s", stageNames[s]), stage.rootSignature)); - stage.descriptorTable = CreateDescriptorTable(desc); + { + RootSignatureDesc desc(va("mip-map %s", stageNames[s])); + desc.pipelineType = PipelineType::Compute; + desc.constants[ShaderStage::Compute].byteCount = stageRCByteCount[s]; + desc.AddRange(DescriptorType::RWTexture, 0, MipSlice::Count + stageExtraMips[s]); + stage.rootSignature = CreateRootSignature(desc); + } + { + const DescriptorTableDesc desc(DescriptorTableDesc(va("mip-map %s", stageNames[s]), stage.rootSignature)); + stage.descriptorTable = CreateDescriptorTable(desc); - DescriptorTableUpdate update; - update.SetRWTexturesSlice(ARRAY_LEN(textures), textures, 0, 0); - UpdateDescriptorTable(stage.descriptorTable, update); + DescriptorTableUpdate update; + update.SetRWTexturesSlice(ARRAY_LEN(textures), textures, 0, 0); + UpdateDescriptorTable(stage.descriptorTable, update); + } } { ComputePipelineDesc desc(va("mip-map %s", stageNames[s]), stage.rootSignature); @@ -119,7 +118,7 @@ void MipMapGenerator::Init() void MipMapGenerator::GenerateMipMaps(HTexture texture) { - // @FIXME: + // @TODO: better look-up image_t* image = NULL; for(int i = 0; i < tr.numImages; ++i) { @@ -153,19 +152,22 @@ void MipMapGenerator::GenerateMipMaps(HTexture texture) CmdBarrier(ARRAY_LEN(allBarriers), allBarriers); } - // this must happen after the BeginTempCommandList call because - // it has a CPU wait that guarantees it's safe to update the descriptor tables + if(!ddhi) { - Stage& stage = stages[Stage::Start]; - DescriptorTableUpdate update; - update.SetRWTexturesSlice(1, &texture, MipSlice::Count, 0); - UpdateDescriptorTable(stage.descriptorTable, update); - } - { - Stage& stage = stages[Stage::End]; - DescriptorTableUpdate update; - update.SetRWTexturesChain(1, &texture, MipSlice::Count); - UpdateDescriptorTable(stage.descriptorTable, update); + // this must happen after the BeginTempCommandList call because + // it has a CPU wait that guarantees it's safe to update the descriptor tables + { + Stage& stage = stages[Stage::Start]; + DescriptorTableUpdate update; + update.SetRWTexturesSlice(1, &texture, MipSlice::Count, 0); + UpdateDescriptorTable(stage.descriptorTable, update); + } + { + Stage& stage = stages[Stage::End]; + DescriptorTableUpdate update; + update.SetRWTexturesChain(1, &texture, MipSlice::Count); + UpdateDescriptorTable(stage.descriptorTable, update); + } } int w = image->width; @@ -180,10 +182,22 @@ void MipMapGenerator::GenerateMipMaps(HTexture texture) StartConstants rc = {}; rc.gamma = r_mipGenGamma->value; - CmdBindRootSignature(stage.rootSignature); + if(!ddhi) + { + CmdBindRootSignature(stage.rootSignature); + } CmdBindPipeline(stage.pipeline); - CmdBindDescriptorTable(stage.rootSignature, stage.descriptorTable); - CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + if(ddhi) + { + rc.srcTexture = GetTextureIndexUAV(texture, 0); + rc.dstTexture = GetTextureIndexUAV(textures[MipSlice::Float16_0], 0); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + } + else + { + CmdBindDescriptorTable(stage.rootSignature, stage.descriptorTable); + CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + } Dispatch(w, h); } @@ -207,10 +221,22 @@ void MipMapGenerator::GenerateMipMaps(HTexture texture) rc.srcMip = MipSlice::Float16_0; rc.dstMip = MipSlice::Count + destMip; - CmdBindRootSignature(stage.rootSignature); + if(!ddhi) + { + CmdBindRootSignature(stage.rootSignature); + } CmdBindPipeline(stage.pipeline); - CmdBindDescriptorTable(stage.rootSignature, stage.descriptorTable); - CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + if(ddhi) + { + rc.srcTexture = GetTextureIndexUAV(textures[MipSlice::Float16_0], 0); + rc.dstTexture = GetTextureIndexUAV(texture, destMip); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + } + else + { + CmdBindDescriptorTable(stage.rootSignature, stage.descriptorTable); + CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + } CmdBarrier(ARRAY_LEN(tempBarriers), tempBarriers); Dispatch(w, h); } @@ -229,9 +255,15 @@ void MipMapGenerator::GenerateMipMaps(HTexture texture) rc.clampMode = image->wrapClampMode == TW_REPEAT ? 0 : 1; memcpy(rc.weights, tr.mipFilter, sizeof(rc.weights)); - CmdBindRootSignature(stage.rootSignature); + if(!ddhi) + { + CmdBindRootSignature(stage.rootSignature); + } CmdBindPipeline(stage.pipeline); - CmdBindDescriptorTable(stage.rootSignature, stage.descriptorTable); + if(!ddhi) + { + CmdBindDescriptorTable(stage.rootSignature, stage.descriptorTable); + } // down-sample on the X-axis rc.srcMip = MipSlice::Float16_0; @@ -242,7 +274,16 @@ void MipMapGenerator::GenerateMipMaps(HTexture texture) rc.maxSize[1] = hs - 1; rc.offset[0] = 1; rc.offset[1] = 0; - CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + if(ddhi) + { + rc.srcTexture = GetTextureIndexUAV(textures[MipSlice::Float16_0], 0); + rc.dstTexture = GetTextureIndexUAV(textures[MipSlice::Float16_1], 0); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + } + else + { + CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + } CmdBarrier(ARRAY_LEN(tempBarriers), tempBarriers); Dispatch(w, hs); @@ -255,7 +296,16 @@ void MipMapGenerator::GenerateMipMaps(HTexture texture) rc.maxSize[1] = hs - 1; rc.offset[0] = 0; rc.offset[1] = 1; - CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + if(ddhi) + { + rc.srcTexture = GetTextureIndexUAV(textures[MipSlice::Float16_1], 0); + rc.dstTexture = GetTextureIndexUAV(textures[MipSlice::Float16_0], 0); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + } + else + { + CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + } CmdBarrier(ARRAY_LEN(tempBarriers), tempBarriers); Dispatch(w, h); } @@ -272,10 +322,22 @@ void MipMapGenerator::GenerateMipMaps(HTexture texture) rc.srcMip = MipSlice::Float16_0; rc.dstMip = MipSlice::Count + destMip; - CmdBindRootSignature(stage.rootSignature); + if(!ddhi) + { + CmdBindRootSignature(stage.rootSignature); + } CmdBindPipeline(stage.pipeline); - CmdBindDescriptorTable(stage.rootSignature, stage.descriptorTable); - CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + if(ddhi) + { + rc.srcTexture = GetTextureIndexUAV(textures[MipSlice::Float16_0], 0); + rc.dstTexture = GetTextureIndexUAV(texture, destMip); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + } + else + { + CmdBindDescriptorTable(stage.rootSignature, stage.descriptorTable); + CmdSetRootConstants(stage.rootSignature, ShaderStage::Compute, &rc); + } CmdBarrier(ARRAY_LEN(tempBarriers), tempBarriers); Dispatch(w, h); } diff --git a/code/renderer/grp_nuklear.cpp b/code/renderer/srp_nuklear.cpp similarity index 81% rename from code/renderer/grp_nuklear.cpp rename to code/renderer/srp_nuklear.cpp index 722606d..0b8555f 100644 --- a/code/renderer/grp_nuklear.cpp +++ b/code/renderer/srp_nuklear.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2023 Gian 'myT' Schellenbaum +Copyright (C) 2023-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -21,9 +21,7 @@ along with Challenge Quake 3. If not, see . // Gameplay Rendering Pipeline - Nuklear integration -#include "grp_local.h" -#include "hlsl/nuklear_vs.h" -#include "hlsl/nuklear_ps.h" +#include "srp_local.h" #define MAX_NUKLEAR_VERTEX_COUNT (1024 * 1024) @@ -53,9 +51,12 @@ struct NuklearVertex #pragma pack(pop) -void Nuklear::Init() +void Nuklear::Init(bool ddhi_, const ShaderByteCode& vs, const ShaderByteCode& ps, TextureFormat::Id rtFormat, HDescriptorTable descTable, RootSignatureDesc* rootSigDesc) { - if(grp.firstInit) + ddhi = ddhi_; + descriptorTable = descTable; + + if(srp.firstInit) { for(int i = 0; i < FrameCount; i++) { @@ -70,8 +71,9 @@ void Nuklear::Init() fr->vertexBuffer = CreateBuffer(idx); } + if(!ddhi) { - RootSignatureDesc desc = grp.rootSignatureDesc; + RootSignatureDesc desc = *rootSigDesc; desc.name = "Nuklear"; desc.constants[ShaderStage::Vertex].byteCount = sizeof(VertexRC); desc.constants[ShaderStage::Pixel].byteCount = sizeof(PixelRC); @@ -82,8 +84,8 @@ void Nuklear::Init() { GraphicsPipelineDesc desc("Nuklear", rootSignature); desc.shortLifeTime = true; - desc.vertexShader = ShaderByteCode(g_vs); - desc.pixelShader = ShaderByteCode(g_ps); + desc.vertexShader = vs; + desc.pixelShader = ps; desc.vertexLayout.bindingStrides[0] = sizeof(NuklearVertex); desc.vertexLayout.AddAttribute(0, ShaderSemantic::Position, DataType::Float32, 2, offsetof(NuklearVertex, position)); @@ -96,7 +98,7 @@ void Nuklear::Init() desc.depthStencil.enableDepthTest = false; desc.depthStencil.enableDepthWrites = false; desc.rasterizer.cullMode = CT_TWO_SIDED; - desc.AddRenderTarget(GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA, grp.renderTargetFormat); + desc.AddRenderTarget(GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA, rtFormat); pipeline = CreateGraphicsPipeline(desc); } } @@ -109,24 +111,30 @@ void Nuklear::BeginFrame() numIndexes = 0; } -void Nuklear::Begin() +void Nuklear::Begin(HTexture renderTarget) { - if(grp.renderMode == RenderMode::Nuklear) + if(srp.renderMode == RenderMode::Nuklear) { return; } - grp.renderMode = RenderMode::Nuklear; + srp.renderMode = RenderMode::Nuklear; - renderPassIndex = grp.BeginRenderPass("Nuklear", 0.75f, 0.75f, 1.0f); + renderPassIndex = srp.BeginRenderPass("Nuklear", 0.75f, 0.75f, 1.0f); FrameResources* const fr = &frameResources[GetFrameIndex()]; const uint32_t vertexStride = sizeof(NuklearVertex); - CmdBindRenderTargets(1, &grp.renderTarget, NULL); - CmdBindRootSignature(rootSignature); + CmdBindRenderTargets(1, &renderTarget, NULL); + if(!ddhi) + { + CmdBindRootSignature(rootSignature); + } CmdBindPipeline(pipeline); - CmdBindDescriptorTable(rootSignature, grp.descriptorTable); + if(!ddhi) + { + CmdBindDescriptorTable(rootSignature, descriptorTable); + } CmdBindVertexBuffers(1, &fr->vertexBuffer, &vertexStride, NULL); CmdBindIndexBuffer(fr->indexBuffer, IndexType::UInt32, 0); CmdSetViewport(0, 0, glConfig.vidWidth, glConfig.vidHeight); @@ -142,7 +150,14 @@ void Nuklear::Begin() 0.0f, 0.0f, 0.5f, 0.0f, (R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f }; - CmdSetRootConstants(rootSignature, ShaderStage::Vertex, &vertexRC); + if(ddhi) + { + CmdSetGraphicsRootConstants(0, sizeof(vertexRC), &vertexRC); + } + else + { + CmdSetRootConstants(rootSignature, ShaderStage::Vertex, &vertexRC); + } for(int i = 0; i < 4; ++i) { @@ -152,9 +167,9 @@ void Nuklear::Begin() void Nuklear::End() { - grp.EndRenderPass(renderPassIndex); + srp.EndRenderPass(renderPassIndex); - grp.renderMode = RenderMode::None; + srp.renderMode = RenderMode::None; } void Nuklear::Upload(const nuklearUploadCommand_t& cmd) @@ -196,7 +211,14 @@ void Nuklear::Draw(const nuklearDrawCommand_t& cmd) PixelRC pixelRC = {}; pixelRC.texture = (uint32_t)image->textureIndex; pixelRC.sampler = GetSamplerIndex(image->wrapClampMode, TextureFilter::Linear); - CmdSetRootConstants(rootSignature, ShaderStage::Pixel, &pixelRC); + if(ddhi) + { + CmdSetGraphicsRootConstants(sizeof(VertexRC), sizeof(pixelRC), &pixelRC); + } + else + { + CmdSetRootConstants(rootSignature, ShaderStage::Pixel, &pixelRC); + } if(memcmp(cmd.scissorRect, prevScissorRect, sizeof(prevScissorRect)) != 0) { diff --git a/code/renderer/grp_ui.cpp b/code/renderer/srp_ui.cpp similarity index 80% rename from code/renderer/grp_ui.cpp rename to code/renderer/srp_ui.cpp index 58ab003..dc55be1 100644 --- a/code/renderer/grp_ui.cpp +++ b/code/renderer/srp_ui.cpp @@ -1,6 +1,6 @@ /* =========================================================================== -Copyright (C) 2022-2023 Gian 'myT' Schellenbaum +Copyright (C) 2022-2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). @@ -18,16 +18,13 @@ You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ -// Gameplay Rendering Pipeline - UI/2D rendering +// Shared Rendering Pipeline - UI/2D rendering -#include "grp_local.h" -#include "hlsl/ui_vs.h" -#include "hlsl/ui_ps.h" +#include "srp_local.h" #pragma pack(push, 4) - struct VertexRC { float scale[2]; @@ -38,16 +35,23 @@ struct PixelRC uint32_t texture; uint32_t sampler; }; - #pragma pack(pop) -void UI::Init() +void UI::Init(bool ddhi_, const ShaderByteCode& vs, const ShaderByteCode& ps, TextureFormat::Id rtFormat, HDescriptorTable descTable, RootSignatureDesc* rootSigDesc) { - if(grp.firstInit) + ddhi = ddhi_; + descriptorTable = descTable; + + if(srp.firstInit) { + if(ddhi) { - RootSignatureDesc desc = grp.rootSignatureDesc; + rootSignature = RHI_MAKE_NULL_HANDLE(); + } + else + { + RootSignatureDesc desc = *rootSigDesc; desc.name = "UI"; desc.constants[ShaderStage::Vertex].byteCount = 8; desc.constants[ShaderStage::Pixel].byteCount = 8; @@ -71,8 +75,8 @@ void UI::Init() { GraphicsPipelineDesc desc("UI", rootSignature); desc.shortLifeTime = true; - desc.vertexShader = ShaderByteCode(g_vs); - desc.pixelShader = ShaderByteCode(g_ps); + desc.vertexShader = vs; + desc.pixelShader = ps; desc.vertexLayout.bindingStrides[0] = sizeof(UI::Vertex); desc.vertexLayout.AddAttribute(0, ShaderSemantic::Position, DataType::Float32, 2, offsetof(UI::Vertex, position)); @@ -85,7 +89,7 @@ void UI::Init() desc.depthStencil.enableDepthTest = false; desc.depthStencil.enableDepthWrites = false; desc.rasterizer.cullMode = CT_TWO_SIDED; - desc.AddRenderTarget(GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA, grp.renderTargetFormat); + desc.AddRenderTarget(GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA, rtFormat); pipeline = CreateGraphicsPipeline(desc); } } @@ -99,20 +103,29 @@ void UI::BeginFrame() renderPassIndex = UINT32_MAX; } -void UI::Begin() +void UI::Begin(HTexture renderTarget) { - grp.renderMode = RenderMode::UI; + srp.renderMode = RenderMode::UI; - renderPassIndex = grp.BeginRenderPass("UI", 0.0f, 0.85f, 1.0f); + renderPassIndex = srp.BeginRenderPass("UI", 0.0f, 0.85f, 1.0f); - CmdBindRenderTargets(1, &grp.renderTarget, NULL); + const TextureBarrier tb(renderTarget, ResourceStates::RenderTargetBit); + CmdBarrier(1, &tb); + + CmdBindRenderTargets(1, &renderTarget, NULL); // UI always uses the entire render surface CmdSetViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight); - CmdBindRootSignature(rootSignature); + if(!ddhi) + { + CmdBindRootSignature(rootSignature); + } CmdBindPipeline(pipeline); - CmdBindDescriptorTable(rootSignature, grp.descriptorTable); + if(!ddhi) + { + CmdBindDescriptorTable(rootSignature, descriptorTable); + } const uint32_t stride = sizeof(UI::Vertex); CmdBindVertexBuffers(1, &vertexBuffer, &stride, NULL); CmdBindIndexBuffer(indexBuffer, indexType, 0); @@ -120,16 +133,23 @@ void UI::Begin() VertexRC vertexRC = {}; vertexRC.scale[0] = 2.0f / glConfig.vidWidth; vertexRC.scale[1] = 2.0f / glConfig.vidHeight; - CmdSetRootConstants(rootSignature, ShaderStage::Vertex, &vertexRC); + if(ddhi) + { + CmdSetGraphicsRootConstants(0, sizeof(vertexRC), &vertexRC); + } + else + { + CmdSetRootConstants(rootSignature, ShaderStage::Vertex, &vertexRC); + } } void UI::End() { DrawBatch(); - grp.EndRenderPass(renderPassIndex); + srp.EndRenderPass(renderPassIndex); - grp.renderMode = RenderMode::None; + srp.renderMode = RenderMode::None; } void UI::DrawBatch() @@ -146,7 +166,14 @@ void UI::DrawBatch() PixelRC pixelRC = {}; pixelRC.texture = GetBundleImage(bundle)->textureIndex; pixelRC.sampler = GetSamplerIndex(wrapMode, TextureFilter::Linear); - CmdSetRootConstants(rootSignature, ShaderStage::Pixel, &pixelRC); + if(ddhi) + { + CmdSetGraphicsRootConstants(sizeof(VertexRC), sizeof(PixelRC), &pixelRC); + } + else + { + CmdSetRootConstants(rootSignature, ShaderStage::Pixel, &pixelRC); + } CmdDrawIndexed(indexCount, firstIndex, 0); firstIndex += indexCount; @@ -155,7 +182,7 @@ void UI::DrawBatch() vertexCount = 0; } -void UI::UISetColor(const uiSetColorCommand_t& cmd) +void UI::CmdSetColor(const uiSetColorCommand_t& cmd) { const float rgbScale = tr.identityLight * 255.0f; byte* const colors = (byte*)&color; @@ -165,7 +192,7 @@ void UI::UISetColor(const uiSetColorCommand_t& cmd) colors[3] = (byte)(cmd.color[3] * 255.0f); } -void UI::UIDrawQuad(const uiDrawQuadCommand_t& cmd) +void UI::CmdDrawQuad(const uiDrawQuadCommand_t& cmd) { if(vertexCount + 4 > maxVertexCount || indexCount + 6 > maxIndexCount) @@ -217,7 +244,7 @@ void UI::UIDrawQuad(const uiDrawQuadCommand_t& cmd) vertices[v + 3].color = color; } -void UI::UIDrawTriangle(const uiDrawTriangleCommand_t& cmd) +void UI::CmdDrawTriangle(const uiDrawTriangleCommand_t& cmd) { if(vertexCount + 3 > maxVertexCount || indexCount + 3 > maxIndexCount) diff --git a/code/renderer/tr_init.cpp b/code/renderer/tr_init.cpp index e63eb0e..502cbe2 100644 --- a/code/renderer/tr_init.cpp +++ b/code/renderer/tr_init.cpp @@ -90,6 +90,7 @@ cvar_t *r_picmip; cvar_t *r_clear; cvar_t *r_vsync; cvar_t *r_lego; +cvar_t *r_pipeline; cvar_t *r_lockpvs; cvar_t *r_noportals; cvar_t *r_portalOnly; @@ -445,10 +446,16 @@ static const cvarTableItem_t r_cvars[] = }, { &r_vsync, "r_vsync", "0", CVAR_ARCHIVE | CVAR_LATCH, CVART_BOOL, NULL, NULL, "enables v-sync", - "V-Sync", CVARCAT_DISPLAY | CVARCAT_PERFORMANCE, "Enabling locks the framerate to the monitor's refresh rate", "" + "V-Sync", CVARCAT_DISPLAY | CVARCAT_PERFORMANCE, "Enabling locks the framerate to the monitor's refresh rate", "", CVAR_GUI_VALUE("0", "Frame cap", "The framerate is capped by CNQ3's own limiter") CVAR_GUI_VALUE("1", "V-Sync", "The framerate matches the monitor's refresh rate") }, + { + &r_pipeline, "r_pipeline", "0", CVAR_ARCHIVE | CVAR_LATCH, CVART_BOOL, NULL, NULL, "rendering pipeline", + "Rendering pipeline", CVARCAT_GRAPHICS, "", "", + CVAR_GUI_VALUE("0", "Gameplay", "Use to play the game") + CVAR_GUI_VALUE("1", "Cinematic", "Use for screenshots and movies") + }, // // latched variables that can only change over a restart @@ -751,6 +758,7 @@ void R_Init() R_InitMipFilter(); + R_SelectRenderPipeline(); renderPipeline->Init(); R_InitImages(); @@ -773,6 +781,9 @@ static void RE_Shutdown( qbool destroyWindow ) if ( tr.registered ) { ri.Cmd_UnregisterModule(); + if ( !destroyWindow && r_pipeline->latchedString != NULL ) { + destroyWindow = qtrue; + } renderPipeline->ShutDown( destroyWindow ); } diff --git a/code/renderer/tr_local.h b/code/renderer/tr_local.h index dfd680c..d866050 100644 --- a/code/renderer/tr_local.h +++ b/code/renderer/tr_local.h @@ -28,6 +28,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "../qcommon/qfiles.h" #include "../qcommon/qcommon.h" #include "tr_public.h" +#include "shaders/common/state_bits.h.hlsli" // contains all the shared GLS_* macros + + +#define GLS_DEFAULT GLS_DEPTHMASK_TRUE +#define GLS_DEFAULT_2D (GLS_DEPTHTEST_DISABLE | GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA) extern const float s_flipMatrix[16]; @@ -427,6 +432,9 @@ struct shader_t { pipeline_t pipelines[MAX_SHADER_STAGES]; int numPipelines; + pipeline_t transpPipelines[MAX_SHADER_STAGES]; + int numTranspPipelines; + shader_t* next; }; @@ -473,6 +481,7 @@ typedef struct { float projectionMatrix[16]; cplane_t frustum[4]; vec3_t visBounds[2]; + float zNear; float zFar; } viewParms_t; @@ -1061,6 +1070,7 @@ extern cvar_t *r_picmip; // controls picmip values extern cvar_t *r_vsync; extern cvar_t *r_lego; +extern cvar_t *r_pipeline; extern cvar_t *r_vertexLight; // vertex lighting mode for better performance extern cvar_t *r_uiFullScreen; // ui is running fullscreen @@ -1112,49 +1122,11 @@ int R_CullPointAndRadius( const vec3_t origin, float radius ); int R_CullLocalPointAndRadius( const vec3_t origin, float radius ); void R_RotateForEntity( const trRefEntity_t* ent, const viewParms_t* viewParms, orientationr_t* orient ); +void R_CreateWorldModelMatrix( const vec3_t origin, const vec3_t axis[3], float* viewMatrix ); typedef void (*updateAnimatedImage_t)( image_t* image, int w, int h, const byte* data, qbool dirty ); const image_t* R_UpdateAndGetBundleImage( const textureBundle_t* bundle, updateAnimatedImage_t updateImage ); - -#define GLS_SRCBLEND_ZERO 0x00000001 -#define GLS_SRCBLEND_ONE 0x00000002 -#define GLS_SRCBLEND_DST_COLOR 0x00000003 -#define GLS_SRCBLEND_ONE_MINUS_DST_COLOR 0x00000004 -#define GLS_SRCBLEND_SRC_ALPHA 0x00000005 -#define GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA 0x00000006 -#define GLS_SRCBLEND_DST_ALPHA 0x00000007 -#define GLS_SRCBLEND_ONE_MINUS_DST_ALPHA 0x00000008 -#define GLS_SRCBLEND_ALPHA_SATURATE 0x00000009 -#define GLS_SRCBLEND_BITS 0x0000000f - -#define GLS_DSTBLEND_ZERO 0x00000010 -#define GLS_DSTBLEND_ONE 0x00000020 -#define GLS_DSTBLEND_SRC_COLOR 0x00000030 -#define GLS_DSTBLEND_ONE_MINUS_SRC_COLOR 0x00000040 -#define GLS_DSTBLEND_SRC_ALPHA 0x00000050 -#define GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA 0x00000060 -#define GLS_DSTBLEND_DST_ALPHA 0x00000070 -#define GLS_DSTBLEND_ONE_MINUS_DST_ALPHA 0x00000080 -#define GLS_DSTBLEND_BITS 0x000000f0 - -#define GLS_BLEND_BITS 0x000000ff - -#define GLS_DEPTHMASK_TRUE 0x00000100 // enable depth writes - -#define GLS_POLYMODE_LINE 0x00001000 // wireframe polygon filling, not line rendering - -#define GLS_DEPTHTEST_DISABLE 0x00010000 // disable depth tests -#define GLS_DEPTHFUNC_EQUAL 0x00020000 - -#define GLS_ATEST_GT_0 0x10000000 -#define GLS_ATEST_LT_80 0x20000000 -#define GLS_ATEST_GE_80 0x40000000 -#define GLS_ATEST_BITS 0x70000000 - -#define GLS_DEFAULT GLS_DEPTHMASK_TRUE -#define GLS_DEFAULT_2D (GLS_DEPTHTEST_DISABLE | GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA) - void RE_LoadWorldMap( const char *mapname ); void RE_SetWorldVisData( const byte *vis ); qhandle_t RE_RegisterModel( const char *name ); @@ -1309,6 +1281,9 @@ struct shaderCommands_t // how to process the colors of the current batch float greyscale; + + // identifier of the code currently tessellating geometry + int tessellator; }; extern shaderCommands_t tess; @@ -1490,7 +1465,6 @@ struct drawSceneViewCommand_t : renderCommandBase_t { struct endSceneCommand_t : renderCommandBase_t { viewParms_t viewParms; - uint32_t padding2; }; struct nuklearUploadCommand_t : renderCommandBase_t { @@ -1631,6 +1605,11 @@ void R_CameraAxisVectorsFromMatrix( const float* modelView, vec3_t axisX, vec3_t void R_MakeIdentityMatrix( float* m ); void R_MakeOrthoProjectionMatrix( float* m, float w, float h ); +// LinearDepth(depthZW, A, B) -> B / (depthZW - A) +void R_LinearDepthConstantsFromProjectionMatrix( const float* projMatrix, float* A, float* B ); +void R_LinearDepthConstantsFromClipPlanes( float zNear, float zFar, float* A, float* B ); +void RB_LinearDepthConstants( float* A, float* B ); + /////////////////////////////////////////////////////////////// @@ -1693,20 +1672,18 @@ struct IRenderPipeline virtual void EndTextureUpload() = 0; virtual void ExecuteRenderCommands(const byte* data, bool readbackRequested) = 0; - - virtual void UISetColor(const uiSetColorCommand_t& cmd) = 0; - virtual void UIDrawQuad(const uiDrawQuadCommand_t& cmd) = 0; - virtual void UIDrawTriangle(const uiDrawTriangleCommand_t& cmd) = 0; - virtual void DrawSceneView(const drawSceneViewCommand_t& cmd) = 0; virtual void TessellationOverflow() = 0; virtual void DrawSkyBox() = 0; virtual void DrawClouds() = 0; - virtual void ReadPixels(int w, int h, int alignment, colorSpace_t colorSpace, void* out) = 0; + + virtual uint32_t GetSamplerDescriptorIndexFromBaseIndex(uint32_t baseIndex) = 0; }; extern IRenderPipeline* renderPipeline; +void R_SelectRenderPipeline(); + struct RHIExport { uint32_t renderToPresentUS; diff --git a/code/renderer/tr_main.cpp b/code/renderer/tr_main.cpp index 5c2a126..5419397 100644 --- a/code/renderer/tr_main.cpp +++ b/code/renderer/tr_main.cpp @@ -464,6 +464,26 @@ void R_MakeOrthoProjectionMatrix( float* m, float w, float h ) } +void R_LinearDepthConstantsFromProjectionMatrix( const float* projMatrix, float* A, float* B ) +{ + *A = -projMatrix[2 * 4 + 2]; + *B = projMatrix[3 * 4 + 2]; +} + + +void R_LinearDepthConstantsFromClipPlanes( float n, float f, float* A, float* B ) +{ + *A = -n / (f - n); + *B = f * (n / (f - n)); +} + + +void RB_LinearDepthConstants( float* A, float* B ) +{ + R_LinearDepthConstantsFromProjectionMatrix( backEnd.viewParms.projectionMatrix, A, B ); +} + + /* ================= R_RotateForEntity @@ -534,13 +554,40 @@ void R_RotateForEntity( const trRefEntity_t* ent, const viewParms_t* viewParms, } +void R_CreateWorldModelMatrix( const vec3_t origin, const vec3_t axis[3], float* viewMatrix ) +{ + float viewerMatrix[16]; + + viewerMatrix[0] = axis[0][0]; + viewerMatrix[4] = axis[0][1]; + viewerMatrix[8] = axis[0][2]; + viewerMatrix[12] = -origin[0] * viewerMatrix[0] + -origin[1] * viewerMatrix[4] + -origin[2] * viewerMatrix[8]; + + viewerMatrix[1] = axis[1][0]; + viewerMatrix[5] = axis[1][1]; + viewerMatrix[9] = axis[1][2]; + viewerMatrix[13] = -origin[0] * viewerMatrix[1] + -origin[1] * viewerMatrix[5] + -origin[2] * viewerMatrix[9]; + + viewerMatrix[2] = axis[2][0]; + viewerMatrix[6] = axis[2][1]; + viewerMatrix[10] = axis[2][2]; + viewerMatrix[14] = -origin[0] * viewerMatrix[2] + -origin[1] * viewerMatrix[6] + -origin[2] * viewerMatrix[10]; + + viewerMatrix[3] = 0.0f; + viewerMatrix[7] = 0.0f; + viewerMatrix[11] = 0.0f; + viewerMatrix[15] = 1.0f; + + // convert from our coordinate system (looking down X) + // to the back-end's coordinate system (looking down -Z) + R_MultMatrix( viewerMatrix, s_flipMatrix, viewMatrix ); +} + + // sets up the modelview matrix for a given viewParm static void R_RotateForViewer() { - float viewerMatrix[16]; - vec3_t origin; - Com_Memset( &tr.orient, 0, sizeof(tr.orient) ); tr.orient.axis[0][0] = 1; tr.orient.axis[1][1] = 1; @@ -548,42 +595,20 @@ static void R_RotateForViewer() VectorCopy( tr.viewParms.orient.origin, tr.orient.viewOrigin ); // transform by the camera placement - VectorCopy( tr.viewParms.orient.origin, origin ); - - viewerMatrix[0] = tr.viewParms.orient.axis[0][0]; - viewerMatrix[4] = tr.viewParms.orient.axis[0][1]; - viewerMatrix[8] = tr.viewParms.orient.axis[0][2]; - viewerMatrix[12] = -origin[0] * viewerMatrix[0] + -origin[1] * viewerMatrix[4] + -origin[2] * viewerMatrix[8]; - - viewerMatrix[1] = tr.viewParms.orient.axis[1][0]; - viewerMatrix[5] = tr.viewParms.orient.axis[1][1]; - viewerMatrix[9] = tr.viewParms.orient.axis[1][2]; - viewerMatrix[13] = -origin[0] * viewerMatrix[1] + -origin[1] * viewerMatrix[5] + -origin[2] * viewerMatrix[9]; - - viewerMatrix[2] = tr.viewParms.orient.axis[2][0]; - viewerMatrix[6] = tr.viewParms.orient.axis[2][1]; - viewerMatrix[10] = tr.viewParms.orient.axis[2][2]; - viewerMatrix[14] = -origin[0] * viewerMatrix[2] + -origin[1] * viewerMatrix[6] + -origin[2] * viewerMatrix[10]; - - viewerMatrix[3] = 0; - viewerMatrix[7] = 0; - viewerMatrix[11] = 0; - viewerMatrix[15] = 1; - - // convert from our coordinate system (looking down X) - // to the back-end's coordinate system (looking down -Z) - R_MultMatrix( viewerMatrix, s_flipMatrix, tr.orient.modelMatrix ); + R_CreateWorldModelMatrix( tr.viewParms.orient.origin, tr.viewParms.orient.axis, tr.orient.modelMatrix ); tr.viewParms.world = tr.orient; } -static void SetFarClip() +static void SetClipPlanes() { + tr.viewParms.zNear = 1.0f; + // if not rendering the world (icons, menus, etc) // set a 2k far clip plane if ( tr.refdef.rdflags & RDF_NOWORLDMODEL ) { - tr.viewParms.zFar = 2048; + tr.viewParms.zFar = 2048.0f; return; } @@ -614,37 +639,37 @@ static void R_SetupProjection() float zNear, zFar; // dynamically compute far clip plane distance - SetFarClip(); + SetClipPlanes(); // // set up projection matrix // - zNear = 1.0f; + zNear = tr.viewParms.zNear; zFar = tr.viewParms.zFar; height = 2.0f * zNear * tan( tr.refdef.fov_y * M_PI / 360.0f ); width = 2.0f * zNear * tan( tr.refdef.fov_x * M_PI / 360.0f ); depth = zFar - zNear; - tr.viewParms.projectionMatrix[0] = 2 * zNear / width; - tr.viewParms.projectionMatrix[4] = 0; - tr.viewParms.projectionMatrix[8] = 0; - tr.viewParms.projectionMatrix[12] = 0; + tr.viewParms.projectionMatrix[ 0] = 2.0f * zNear / width; + tr.viewParms.projectionMatrix[ 4] = 0.0f; + tr.viewParms.projectionMatrix[ 8] = 0.0f; + tr.viewParms.projectionMatrix[12] = 0.0f; - tr.viewParms.projectionMatrix[1] = 0; - tr.viewParms.projectionMatrix[5] = 2 * zNear / height; - tr.viewParms.projectionMatrix[9] = 0; - tr.viewParms.projectionMatrix[13] = 0; + tr.viewParms.projectionMatrix[ 1] = 0.0f; + tr.viewParms.projectionMatrix[ 5] = 2.0f * zNear / height; + tr.viewParms.projectionMatrix[ 9] = 0.0f; + tr.viewParms.projectionMatrix[13] = 0.0f; - tr.viewParms.projectionMatrix[2] = 0; - tr.viewParms.projectionMatrix[6] = 0; + tr.viewParms.projectionMatrix[ 2] = 0.0f; + tr.viewParms.projectionMatrix[ 6] = 0.0f; tr.viewParms.projectionMatrix[10] = zNear / depth; tr.viewParms.projectionMatrix[14] = zFar * zNear / depth; - tr.viewParms.projectionMatrix[3] = 0; - tr.viewParms.projectionMatrix[7] = 0; - tr.viewParms.projectionMatrix[11] = -1; - tr.viewParms.projectionMatrix[15] = 0; + tr.viewParms.projectionMatrix[ 3] = 0.0f; + tr.viewParms.projectionMatrix[ 7] = 0.0f; + tr.viewParms.projectionMatrix[11] = -1.0f; + tr.viewParms.projectionMatrix[15] = 0.0f; } diff --git a/code/shadercomp/shadercomp.cpp b/code/shadercomp/shadercomp.cpp index a7b921e..69e5ee9 100644 --- a/code/shadercomp/shadercomp.cpp +++ b/code/shadercomp/shadercomp.cpp @@ -1,9 +1,44 @@ +/* +=========================================================================== +Copyright (C) 2022-2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// compiles core shaders as byte code to be embedded into the CNQ3 client + + #include #include #include #include +#include +#include -#include "../renderer/uber_shaders.h" +#include "../renderer/grp_uber_shaders.h" + + +char repoPath[MAX_PATH]; +char outputPath[MAX_PATH]; +char bin2headerPath[MAX_PATH]; +char dxcPath[MAX_PATH]; + +const char* targetVS = "vs_6_0"; +const char* targetPS = "ps_6_0"; +const char* targetCS = "cs_6_0"; #define PS(Data) #Data, @@ -22,7 +57,7 @@ const char* uberShaderPixelStates[] = // -Wno-warning disables the warning -const char* va(const char* format, ...) +const char* va(_Printf_format_string_ const char* format, ...) { static char string[64][32000]; static int index = 0; @@ -49,12 +84,14 @@ void CompileShader(const ShaderArgs& args, int extraCount = 0, const char** extr { static char temp[4096]; + const char* headerPath = va("%s\\%s", outputPath, args.headerPath); + // -Ges: Enable strict mode // -Gis: Force IEEE strictness // -Zi: Embed debug info // -Qembed_debug: Embed debug info in shader container - strcpy(temp, va("dxc.exe -Fh %s -E %s -T %s -WX -Ges -Gis -Zi -Qembed_debug", - args.headerPath, args.entryPoint, args.targetProfile)); + strcpy(temp, va("%s -Fh %s -E %s -T %s -WX -Ges -Gis -Zi -Qembed_debug", + dxcPath, headerPath, args.entryPoint, args.targetProfile)); for(int i = 0; i < extraCount; ++i) { @@ -93,7 +130,7 @@ void CompileSMAAShader(const SMAAArgs& smaaArgs) args.entryPoint = smaaArgs.vertexShader ? "vs" : "ps"; args.headerPath = smaaArgs.headerPath; args.shaderPath = smaaArgs.shaderPath; - args.targetProfile = smaaArgs.vertexShader ? "vs_6_0" : "ps_6_0"; + args.targetProfile = smaaArgs.vertexShader ? targetVS : targetPS; CompileShader(args, _countof(extras), extras); } @@ -130,7 +167,7 @@ void CompileVS(const char* headerPath, const char* shaderPath) args.entryPoint = "vs"; args.headerPath = headerPath; args.shaderPath = shaderPath; - args.targetProfile = "vs_6_0"; + args.targetProfile = targetVS; CompileShader(args, _countof(extras), extras); } @@ -142,7 +179,7 @@ void CompilePS(const char* headerPath, const char* shaderPath) args.entryPoint = "ps"; args.headerPath = headerPath; args.shaderPath = shaderPath; - args.targetProfile = "ps_6_0"; + args.targetProfile = targetPS; CompileShader(args, _countof(extras), extras); } @@ -154,7 +191,7 @@ void CompileCS(const char* headerPath, const char* shaderPath) args.entryPoint = "cs"; args.headerPath = headerPath; args.shaderPath = shaderPath; - args.targetProfile = "cs_6_0"; + args.targetProfile = targetCS; CompileShader(args, _countof(extras), extras); } @@ -177,7 +214,7 @@ void CompileUberVS(const char* headerPath, const char* shaderPath, int stageCoun args.entryPoint = "vs"; args.headerPath = headerPath; args.shaderPath = shaderPath; - args.targetProfile = "vs_6_0"; + args.targetProfile = targetVS; CompileShader(args, _countof(extras), extras); } @@ -213,26 +250,38 @@ void CompileUberPS(const char* stateString) args.entryPoint = "ps"; args.headerPath = va("uber_shader_ps_%s.h", stateString); args.shaderPath = "uber_shader.hlsl"; - args.targetProfile = "ps_6_0"; + args.targetProfile = targetPS; CompileShader(args, extraCount, extras); } -int main(int /*argc*/, const char** argv) +const char* Canonicalize(const char* path) { - char dirPath[MAX_PATH]; - strcpy(dirPath, argv[0]); - int l = strlen(dirPath); - while(l-- > 0) - { - if(dirPath[l] == '/' || dirPath[l] == '\\') - { - dirPath[l] = '\0'; - break; - } - } - SetCurrentDirectoryA(dirPath); + static char canonPath[MAX_PATH]; - system("del *.h"); + PathCanonicalizeA(canonPath, path); + + return canonPath; +} + +void InitDirectory(const char* dirName) +{ + const char* rendererPath = va("%s\\code\\renderer", repoPath); + const char* cd = Canonicalize(va("%s\\shaders\\%s", rendererPath, dirName)); + SetCurrentDirectoryA(cd); + const char* out = Canonicalize(va("%s\\compshaders\\%s", rendererPath, dirName)); + strcpy(outputPath, out); + CreateDirectoryA(outputPath, NULL); + + system(va("del %s\\*.h", outputPath)); + system(va("del %s\\*.temp", outputPath)); +} + +void ProcessGRP() +{ + InitDirectory("grp"); + targetVS = "vs_6_0"; + targetPS = "ps_6_0"; + targetCS = "cs_6_0"; CompileVSAndPS("post_gamma", "post_gamma.hlsl"); CompileVSAndPS("post_inverse_gamma", "post_inverse_gamma.hlsl"); @@ -249,25 +298,91 @@ int main(int /*argc*/, const char** argv) CompileCS("mip_3_cs.h", "mip_3.hlsl"); CompileSMAAShaders(); - system("type smaa*.h > complete_smaa.h"); + system(va("type %s\\smaa*.h > %s\\complete_smaa.h", outputPath, outputPath)); - system("type shared.hlsli uber_shader.hlsl > uber_shader.temp"); // combines both files into one - system("..\\..\\..\\tools\\bin2header.exe --output uber_shader.h --hname uber_shader_string uber_shader.temp"); - system("del uber_shader.temp"); + // type combines all files into one + system(va("type ..\\common\\state_bits.h.hlsli ..\\common\\blend.hlsli shared.hlsli uber_shader.hlsl > %s\\uber_shader.temp", outputPath)); + system(va("%s --output %s\\uber_shader.h --hname uber_shader_string %s\\uber_shader.temp", bin2headerPath, outputPath, outputPath)); + system(va("del %s\\uber_shader.temp", outputPath)); for(int i = 0; i < 8; ++i) { CompileUberVS(va("uber_shader_vs_%i.h", i + 1), "uber_shader.hlsl", i + 1); } - system("type uber_shader_vs_*.h > complete_uber_vs.h"); - system("del uber_shader_vs_*.h"); + system(va("type %s\\uber_shader_vs_*.h > %s\\complete_uber_vs.h", outputPath, outputPath)); + system(va("del %s\\uber_shader_vs_*.h", outputPath)); for(int i = 0; i < _countof(uberShaderPixelStates); ++i) { CompileUberPS(uberShaderPixelStates[i]); } - system("type uber_shader_ps_*.h > complete_uber_ps.h"); - system("del uber_shader_ps_*.h"); + system(va("type %s\\uber_shader_ps_*.h > %s\\complete_uber_ps.h", outputPath, outputPath)); + system(va("del %s\\uber_shader_ps_*.h", outputPath)); +} + +void ProcessCRP() +{ + InitDirectory("crp"); + targetVS = "vs_6_6"; + targetPS = "ps_6_6"; + targetCS = "cs_6_6"; + + CompileVSAndPS("blit", "blit.hlsl"); + CompileVSAndPS("ui", "ui.hlsl"); + CompileVSAndPS("imgui", "imgui.hlsl"); + CompileVSAndPS("nuklear", "nuklear.hlsl"); + CompileCS("mip_1_cs.h", "mip_1.hlsl"); + CompileCS("mip_2_cs.h", "mip_2.hlsl"); + CompileCS("mip_3_cs.h", "mip_3.hlsl"); + + CompileVSAndPS("opaque", "opaque.hlsl"); + CompileVSAndPS("transp_draw", "transp_draw.hlsl"); + CompileVSAndPS("transp_resolve", "transp_resolve.hlsl"); + CompileVSAndPS("tone_map", "tone_map.hlsl"); + CompileVSAndPS("tone_map_inverse", "tone_map_inverse.hlsl"); + CompileVSAndPS("accumdof_accum", "accumdof_accum.hlsl"); + CompileVSAndPS("accumdof_norm", "accumdof_norm.hlsl"); + CompileVSAndPS("accumdof_debug", "accumdof_debug.hlsl"); + CompileCS("gatherdof_split.h", "gatherdof_split.hlsl"); + CompileCS("gatherdof_coc_tile_gen.h", "gatherdof_coc_tile_gen.hlsl"); + CompileCS("gatherdof_coc_tile_max.h", "gatherdof_coc_tile_max.hlsl"); + CompileCS("gatherdof_blur.h", "gatherdof_blur.hlsl"); + CompileCS("gatherdof_fill.h", "gatherdof_fill.hlsl"); + CompileVSAndPS("gatherdof_combine", "gatherdof_combine.hlsl"); + CompileVSAndPS("gatherdof_debug", "gatherdof_debug.hlsl"); + CompileVSAndPS("fog_inside", "fog_inside.hlsl"); + CompileVSAndPS("fog_outside", "fog_outside.hlsl"); +} + +int main(int /*argc*/, const char** argv) +{ + char dirPath[MAX_PATH]; + strcpy(dirPath, argv[0]); + int l = strlen(dirPath); + while(l-- > 0) + { + if(dirPath[l] == '/' || dirPath[l] == '\\') + { + dirPath[l] = '\0'; + break; + } + } + strcpy(repoPath, Canonicalize(va("%s\\..\\..", dirPath))); + strcpy(bin2headerPath, Canonicalize(va("%s\\tools\\bin2header.exe", repoPath))); + + char* path = getenv("DXCPATH"); + if(path != NULL) + { + strcpy(dxcPath, path); + } + else + { + strcpy(dxcPath, "dxc.exe"); + } + system(va("%s --version", dxcPath)); + + ProcessGRP(); + ProcessCRP(); return 0; } diff --git a/makefiles/compile_shader.cmd b/makefiles/compile_shader.cmd deleted file mode 100644 index 8374958..0000000 --- a/makefiles/compile_shader.cmd +++ /dev/null @@ -1,5 +0,0 @@ -@echo off -set fxc="%FXCPATH%" -if "%FXCPATH%"=="" if not "%DXSDK_DIR%"=="" set fxc="%DXSDK_DIR%Utilities\\bin\\x86\\fxc.exe" -if "%FXCPATH%"=="" if "%DXSDK_DIR%"=="" set fxc="C:\\Program Files (x86)\\Windows Kits\\10\\bin\\10.0.20348.0\\x64\\fxc.exe" -%fxc% %* diff --git a/makefiles/premake5.lua b/makefiles/premake5.lua index 6416604..7c553f7 100644 --- a/makefiles/premake5.lua +++ b/makefiles/premake5.lua @@ -640,17 +640,9 @@ solution "cnq3" kind "ConsoleApp" language "C++" AddSourcesAndHeaders("shadercomp") - postbuildcommands { string.format("{copyfile} \"%%{cfg.buildtarget.directory}/%%{cfg.buildtarget.basename}.exe\" \"%s/renderer/hlsl\"", make_path_src) } - postbuildcommands { string.format("{copyfile} \"%%{cfg.buildtarget.directory}/%%{cfg.buildtarget.basename}.pdb\" \"%s/renderer/hlsl\"", make_path_src) } - postbuildcommands { string.format("\"%s/renderer/hlsl/%%{cfg.buildtarget.name}\"", make_path_src) } + postbuildcommands { "$(TargetPath)" } ApplyProjectSettings(true) - --[[ - VC++ STILL requires absolute paths for these... maybe it will be fixed a few decades after I'm in the grave - local debug_path_dir = string.format("%s/renderer/hlsl", make_path_src) - local debug_path_exe = string.format("%s/%%{cfg.buildtarget.name}", debug_path_dir) - debugdir(debug_path_dir) - debugcommand(debug_path_exe) - --]] + links { "Shlwapi" } project "renderer" @@ -662,8 +654,8 @@ solution "cnq3" includedirs { "/usr/local/include" } end if os.istarget("windows") then - files { string.format("%s/renderer/hlsl/*.hlsl", path_src) } - files { string.format("%s/renderer/hlsl/*.hlsli", path_src) } + files { string.format("%s/renderer/shaders/**.hlsl", path_src) } + files { string.format("%s/renderer/shaders/**.hlsli", path_src) } filter "files:**.hlsl" flags { "ExcludeFromBuild" } filter { } @@ -686,6 +678,8 @@ solution "cnq3" if os.istarget("windows") then includedirs { path_src.."/imgui" } libdirs { path_src.."/nvapi" } + files { string.format("premake5.lua", path_make) } + vpaths { ["*"] = "../code/" } -- don't allow "code" to be the parent filter end if os.istarget("bsd") then includedirs { "/usr/local/include" } diff --git a/makefiles/windows_vs2019/cnq3.vcxproj b/makefiles/windows_vs2019/cnq3.vcxproj index 4200258..6cee8d0 100644 --- a/makefiles/windows_vs2019/cnq3.vcxproj +++ b/makefiles/windows_vs2019/cnq3.vcxproj @@ -199,16 +199,18 @@ copy "..\..\.bin\release\cnq3.pdb" "$(QUAKE3DIR)" + + + - @@ -288,6 +290,9 @@ copy "..\..\.bin\release\cnq3.pdb" "$(QUAKE3DIR)" + + + {A1A792F4-8D49-BDCA-7604-D11E6245441B} diff --git a/makefiles/windows_vs2019/cnq3.vcxproj.filters b/makefiles/windows_vs2019/cnq3.vcxproj.filters index 77648ff..1ecebab 100644 --- a/makefiles/windows_vs2019/cnq3.vcxproj.filters +++ b/makefiles/windows_vs2019/cnq3.vcxproj.filters @@ -243,9 +243,15 @@ renderer + + renderer + renderer + + renderer + renderer @@ -258,6 +264,9 @@ renderer + + renderer + renderer @@ -270,9 +279,6 @@ renderer - - renderer - server @@ -504,4 +510,7 @@ win32 + + + \ No newline at end of file diff --git a/makefiles/windows_vs2019/renderer.vcxproj b/makefiles/windows_vs2019/renderer.vcxproj index 5d3a665..bef5e36 100644 --- a/makefiles/windows_vs2019/renderer.vcxproj +++ b/makefiles/windows_vs2019/renderer.vcxproj @@ -112,28 +112,41 @@ + + + - - + + + + + + + + + + - - - + + + + + @@ -159,59 +172,141 @@ - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + true - - - + + + + + + + + + + + + + diff --git a/makefiles/windows_vs2019/renderer.vcxproj.filters b/makefiles/windows_vs2019/renderer.vcxproj.filters index 806f1dd..c045ac2 100644 --- a/makefiles/windows_vs2019/renderer.vcxproj.filters +++ b/makefiles/windows_vs2019/renderer.vcxproj.filters @@ -1,34 +1,56 @@ - - {98F1977C-8428-990D-2D15-7F10192B150F} + + {0F45D591-7B24-542D-843C-DF03F09ABA8E} + + + {873F6737-730D-5B3D-5CA1-CB63480E37A2} + + + {A3AE1A56-0F64-934B-9858-8D22040D8A4F} + + + {A7BF1A56-1375-934B-9C69-8D22081E8A4F} + + + - - + + + + + + + + + + - - - + + + + + @@ -54,64 +76,166 @@ - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp - - hlsl + + shaders\common - - hlsl + + shaders\common - - hlsl + + shaders\common + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\grp + + + shaders\grp + + + shaders\grp \ No newline at end of file diff --git a/makefiles/windows_vs2019/shadercomp.vcxproj b/makefiles/windows_vs2019/shadercomp.vcxproj index 0a7ed85..6428efa 100644 --- a/makefiles/windows_vs2019/shadercomp.vcxproj +++ b/makefiles/windows_vs2019/shadercomp.vcxproj @@ -72,13 +72,12 @@ Console true + Shlwapi.lib;%(AdditionalDependencies) ..\..\.build\debug;%(AdditionalLibraryDirectories) %(AdditionalOptions) - copy /B /Y "$(TargetDir)\$(TargetName).exe" "..\..\code\renderer\hlsl" -copy /B /Y "$(TargetDir)\$(TargetName).pdb" "..\..\code\renderer\hlsl" -"../../code/renderer/hlsl/$(TargetFileName)" + $(TargetPath) @@ -107,13 +106,12 @@ copy /B /Y "$(TargetDir)\$(TargetName).pdb" "..\..\code\renderer\hlsl" true true true + Shlwapi.lib;%(AdditionalDependencies) ..\..\.build\release;%(AdditionalLibraryDirectories) %(AdditionalOptions) - copy /B /Y "$(TargetDir)\$(TargetName).exe" "..\..\code\renderer\hlsl" -copy /B /Y "$(TargetDir)\$(TargetName).pdb" "..\..\code\renderer\hlsl" -"../../code/renderer/hlsl/$(TargetFileName)" + $(TargetPath) diff --git a/makefiles/windows_vs2022/cnq3.vcxproj b/makefiles/windows_vs2022/cnq3.vcxproj index b601734..7d324d8 100644 --- a/makefiles/windows_vs2022/cnq3.vcxproj +++ b/makefiles/windows_vs2022/cnq3.vcxproj @@ -201,16 +201,18 @@ copy "..\..\.bin\release\cnq3.pdb" "$(QUAKE3DIR)" + + + - @@ -290,6 +292,9 @@ copy "..\..\.bin\release\cnq3.pdb" "$(QUAKE3DIR)" + + + {A1A792F4-8D49-BDCA-7604-D11E6245441B} diff --git a/makefiles/windows_vs2022/cnq3.vcxproj.filters b/makefiles/windows_vs2022/cnq3.vcxproj.filters index 77648ff..1ecebab 100644 --- a/makefiles/windows_vs2022/cnq3.vcxproj.filters +++ b/makefiles/windows_vs2022/cnq3.vcxproj.filters @@ -243,9 +243,15 @@ renderer + + renderer + renderer + + renderer + renderer @@ -258,6 +264,9 @@ renderer + + renderer + renderer @@ -270,9 +279,6 @@ renderer - - renderer - server @@ -504,4 +510,7 @@ win32 + + + \ No newline at end of file diff --git a/makefiles/windows_vs2022/renderer.vcxproj b/makefiles/windows_vs2022/renderer.vcxproj index fd4e8a6..2f93908 100644 --- a/makefiles/windows_vs2022/renderer.vcxproj +++ b/makefiles/windows_vs2022/renderer.vcxproj @@ -114,28 +114,41 @@ + + + - - + + + + + + + + + + - - - + + + + + @@ -161,59 +174,141 @@ - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + true - + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + true - - - + + + + + + + + + + + + + diff --git a/makefiles/windows_vs2022/renderer.vcxproj.filters b/makefiles/windows_vs2022/renderer.vcxproj.filters index 806f1dd..c045ac2 100644 --- a/makefiles/windows_vs2022/renderer.vcxproj.filters +++ b/makefiles/windows_vs2022/renderer.vcxproj.filters @@ -1,34 +1,56 @@ - - {98F1977C-8428-990D-2D15-7F10192B150F} + + {0F45D591-7B24-542D-843C-DF03F09ABA8E} + + + {873F6737-730D-5B3D-5CA1-CB63480E37A2} + + + {A3AE1A56-0F64-934B-9858-8D22040D8A4F} + + + {A7BF1A56-1375-934B-9C69-8D22081E8A4F} + + + - - + + + + + + + + + + - - - + + + + + @@ -54,64 +76,166 @@ - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp - - hlsl + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp + + + shaders\grp - - hlsl + + shaders\common - - hlsl + + shaders\common - - hlsl + + shaders\common + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\grp + + + shaders\grp + + + shaders\grp \ No newline at end of file diff --git a/makefiles/windows_vs2022/shadercomp.vcxproj b/makefiles/windows_vs2022/shadercomp.vcxproj index e0388b0..3753b7c 100644 --- a/makefiles/windows_vs2022/shadercomp.vcxproj +++ b/makefiles/windows_vs2022/shadercomp.vcxproj @@ -73,13 +73,12 @@ Console true + Shlwapi.lib;%(AdditionalDependencies) ..\..\.build\debug;%(AdditionalLibraryDirectories) %(AdditionalOptions) - copy /B /Y "$(TargetDir)\$(TargetName).exe" "..\..\code\renderer\hlsl" -copy /B /Y "$(TargetDir)\$(TargetName).pdb" "..\..\code\renderer\hlsl" -"../../code/renderer/hlsl/$(TargetFileName)" + $(TargetPath) @@ -109,13 +108,12 @@ copy /B /Y "$(TargetDir)\$(TargetName).pdb" "..\..\code\renderer\hlsl" true true true + Shlwapi.lib;%(AdditionalDependencies) ..\..\.build\release;%(AdditionalLibraryDirectories) %(AdditionalOptions) - copy /B /Y "$(TargetDir)\$(TargetName).exe" "..\..\code\renderer\hlsl" -copy /B /Y "$(TargetDir)\$(TargetName).pdb" "..\..\code\renderer\hlsl" -"../../code/renderer/hlsl/$(TargetFileName)" + $(TargetPath)