/* =========================================================================== Copyright (C) 2023 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). Challenge Quake 3 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Challenge Quake 3 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ // reads per-pixel fragment linked lists into arrays, sorts them and composites them #include "common.hlsli" #include "oit.h.hlsli" #include "fullscreen.hlsli" #include "../common/state_bits.h.hlsli" cbuffer RootConstants { uint renderTargetTexture; uint shaderIndexBuffer; uint indexTexture; uint fragmentBuffer; uint centerPixel; // y: 16 - x: 16 uint depthTexture; float linearDepthA; float linearDepthB; float2 scissorRectMin; float2 scissorRectMax; }; uint GetShaderStage(uint stateBits) { return (stateBits & GLS_STAGEINDEX_BITS) >> GLS_STAGEINDEX_SHIFT; } bool IsBehind(float depthA, float depthB, uint stageA, uint stageB) { if(depthA > depthB) { return true; } if(depthA == depthB && stageA < stageB) { return true; } return false; } // from NVIDIA's 2007 "Soft Particles" whitepaper by Tristan Lorach float Contrast(float d, float power) { bool aboveHalf = d > 0.5; float base = saturate(2.0 * (aboveHalf ? (1.0 - d) : d)); float r = 0.5 * pow(base, power); return aboveHalf ? (1.0 - r) : r; } float GetBitAsFloat(uint bits, uint bitIndex) { return (bits & (1u << bitIndex)) ? 1.0 : 0.0; } float2 UnpackHalf2(uint data) { return float2(f16tof32(data), f16tof32(data >> 16)); } float4 DepthFadeFragmentColor(float4 color, OIT_Fragment fragment, float storedDepthZW) { if(((fragment.depthFadeScaleBias >> 8) & 1) == 0) { return color; } #define BIT(Index) GetBitAsFloat(fragment.depthFadeScaleBias, Index) float4 dst = color; float2 distOffset = UnpackHalf2(fragment.depthFadeDistOffset); float4 fadeColorScale = float4(BIT(0), BIT(1), BIT(2), BIT(3)); float4 fadeColorBias = float4(BIT(4), BIT(5), BIT(6), BIT(7)); float zwDepth = storedDepthZW; // stored depth, z/w float depthS = LinearDepth(zwDepth, linearDepthA, linearDepthB); // stored depth, linear float depthP = fragment.depth - distOffset.y; // fragment depth, linear float fadeScale = Contrast((depthS - depthP) * distOffset.x, 2.0); dst = lerp(dst * fadeColorScale + fadeColorBias, dst, fadeScale); #undef BIT return dst; } float4 ps(VOut input) : SV_Target { Texture2D renderTarget = ResourceDescriptorHeap[renderTargetTexture]; int2 tc = int2(input.position.x, input.position.y); float4 color = renderTarget.Load(int3(tc.x, tc.y, 0)); if(any(input.position.xy < scissorRectMin) || any(input.position.xy > scissorRectMax)) { return color; } RWTexture2D index = ResourceDescriptorHeap[indexTexture]; RWStructuredBuffer fragments = ResourceDescriptorHeap[fragmentBuffer]; Texture2D depthTex = ResourceDescriptorHeap[depthTexture]; uint fragmentIndex = index[tc]; uint i; OIT_Fragment sorted[OIT_MAX_FRAGMENTS_PER_PIXEL]; uint fragmentCount = 0; // grab this pixel's fragments while(fragmentIndex != 0 && fragmentCount < OIT_MAX_FRAGMENTS_PER_PIXEL) { sorted[fragmentCount] = fragments[fragmentIndex]; fragmentIndex = sorted[fragmentCount].next; ++fragmentCount; } // sort the fragments using an insertion sort for(i = 1; i < fragmentCount; ++i) { OIT_Fragment insert = sorted[i]; uint stage = GetShaderStage(insert.stateBits); uint j = i; while(j > 0 && IsBehind(insert.depth, sorted[j - 1].depth, stage, GetShaderStage(sorted[j - 1].stateBits))) { sorted[j] = sorted[j - 1]; --j; } sorted[j] = insert; } // blend the results float storedDepthZW = depthTex.Load(int3(input.position.xy, 0)).x; // stored depth, z/w float dstDepth = 1.0; for(i = 0; i < fragmentCount; ++i) { OIT_Fragment frag = sorted[i]; uint stateBits = frag.stateBits; float fragDepth = frag.depth; if((stateBits & (GLS_DEPTHFUNC_EQUAL | GLS_DEPTHTEST_DISABLE)) == GLS_DEPTHFUNC_EQUAL && fragDepth != dstDepth) { continue; } float4 fragColor = UnpackColor(frag.color); fragColor = DepthFadeFragmentColor(fragColor, frag, storedDepthZW); color = Blend(fragColor, color, frag.stateBits); if((stateBits & GLS_DEPTHMASK_TRUE) != 0u && fragDepth < dstDepth) { dstDepth = fragDepth; } } // write out the fragment shader ID of the closest fragment of the center pixel if(fragmentCount > 0) { uint lastFragmentIndex = fragmentCount - 1; OIT_Fragment closest = sorted[lastFragmentIndex]; uint shaderTrace = closest.shaderTrace; if(shaderTrace & 1) { uint2 fragmentCoords = uint2(input.position.xy); uint2 centerCoords = uint2(centerPixel & 0xFFFF, centerPixel >> 16); if(all(fragmentCoords == centerCoords)) { RWByteAddressBuffer shaderIdBuf = ResourceDescriptorHeap[shaderIndexBuffer]; uint frameIndex = (shaderTrace >> 1) & 3; uint shaderId = shaderTrace >> 3; shaderIdBuf.Store(frameIndex * 4, shaderId); } } } return color; }