cnq3/code/renderer/shaders/crp/depth_pyramid.hlsl

101 lines
3.4 KiB
HLSL

/*
===========================================================================
Copyright (C) 2024 Gian 'myT' Schellenbaum
This file is part of Challenge Quake 3 (CNQ3).
Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// outputs min/max depth values for mip levels 0 to 7 in one go
#include "common.hlsli"
cbuffer RootConstants
{
uint destTextureIndex0;
uint destTextureIndex1;
uint destTextureIndex2;
uint destTextureIndex3;
uint destTextureIndex4;
uint destTextureIndex5;
uint destTextureIndex6;
uint depthTextureIndex;
uint depthSamplerIndex;
}
groupshared float2 s_depth[32][32];
void ProcessMip(uint threadCount, uint scale, uint destTextureIndex, uint3 gtid, uint3 gid, bool writeToSM)
{
if(gtid.x < threadCount && gtid.y < threadCount)
{
uint2 baseCoords = gtid.xy * scale * 2;
uint4 coords = uint4(baseCoords, baseCoords + scale.xx);
float2 v0 = s_depth[coords.x][coords.y];
float2 v1 = s_depth[coords.x][coords.w];
float2 v2 = s_depth[coords.z][coords.y];
float2 v3 = s_depth[coords.z][coords.w];
float minZ = min4(v0.x, v1.x, v2.x, v3.x);
float maxZ = max4(v0.y, v1.y, v2.y, v3.y);
float2 minMaxZ = float2(minZ, maxZ);
if(writeToSM)
{
s_depth[coords.x][coords.y] = minMaxZ;
}
RWTexture2D<float2> dst = ResourceDescriptorHeap[destTextureIndex];
dst[gid.xy * threadCount + gtid.xy] = minMaxZ;
}
}
[numthreads(32, 32, 1)]
void cs(uint3 dtid : SV_DispatchThreadID, uint3 gtid : SV_GroupThreadID, uint3 gid : SV_GroupID)
{
Texture2D<float> depthTexture = ResourceDescriptorHeap[depthTextureIndex];
SamplerState depthSampler = SamplerDescriptorHeap[depthSamplerIndex];
RWTexture2D<float2> dst0 = ResourceDescriptorHeap[destTextureIndex0];
RWTexture2D<float2> dst1 = ResourceDescriptorHeap[destTextureIndex1];
float2 depthTextureSize = float2(GetTextureSize(depthTexture));
// GatherRed order: x=(0, 1), y=(1, 1), z=(1, 0), w=(0, 0)
float2 tc = (float2(dtid.xy * 2) + float2(0.5, 0.5)) / depthTextureSize;
float4 mip0 = depthTexture.GatherRed(depthSampler, tc);
float minZ = min4(mip0.x, mip0.y, mip0.z, mip0.w);
float maxZ = max4(mip0.x, mip0.y, mip0.z, mip0.w);
float2 minMaxZ = float2(minZ, maxZ);
s_depth[gtid.x][gtid.y] = minMaxZ;
dst0[dtid.xy * 2 + int2(0, 0)] = mip0.ww;
dst0[dtid.xy * 2 + int2(0, 1)] = mip0.xx;
dst0[dtid.xy * 2 + int2(1, 0)] = mip0.zz;
dst0[dtid.xy * 2 + int2(1, 1)] = mip0.yy;
dst1[dtid.xy] = minMaxZ;
GroupMemoryBarrierWithGroupSync();
ProcessMip(16, 1, destTextureIndex2, gtid, gid, true);
GroupMemoryBarrierWithGroupSync();
ProcessMip(8, 2, destTextureIndex3, gtid, gid, true);
GroupMemoryBarrierWithGroupSync();
ProcessMip(4, 4, destTextureIndex4, gtid, gid, true);
GroupMemoryBarrierWithGroupSync();
ProcessMip(2, 8, destTextureIndex5, gtid, gid, true);
GroupMemoryBarrierWithGroupSync();
ProcessMip(1, 16, destTextureIndex6, gtid, gid, false);
}