cnq3/code/renderer/shaders/crp/mblur_blur.hlsl
myT 838c9a6485 added motion blur and freeze frame commands
- fixed crash due to missing blue noise texture
- added motion vector viz
- renamed TextureFormat entries
2024-02-18 16:26:05 +01:00

336 lines
11 KiB
HLSL

/*
===========================================================================
Copyright (C) 2024 Gian 'myT' Schellenbaum
This file is part of Challenge Quake 3 (CNQ3).
Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// motion blur reconstruction filter
#include "common.hlsli"
#include "fullscreen.hlsli"
#include "scene_view.h.hlsli"
cbuffer RootConstants : register(b0)
{
uint colorTextureIndex;
uint tileTextureIndex;
uint packedTextureIndex;
uint blueNoiseTextureIndex;
uint pointSamplerIndex; // clamp
uint linearSamplerIndex; // clamp
};
#if 1
/*
Summary:
- sampling N times in a line along fragment's direction
- sampling N times in a line along neighborhood's strongest direction
- bilinearly interpolating neighborhood tiles in polar coordinates with clamped velocity
- jittering the lines' center points with blue noise
- binary classification of samples based on depth and distance/velocity
- weighting samples based on direction similarity
- background reconstruction using mirrored samples
References:
- "A Reconstruction Filter for Plausible Motion Blur" by McGuire et al.
- "Next-Generation Post-Processing in Call of Duty Advanced Warfare" by Jorge Jimenez
*/
void ProcessSample(
inout float4 fgAccum, inout float4 bgAccum, inout float alpha, inout float center,
float Zs, float Zc, float Vsl, float Vcl, float distTC, float3 C0s, float3 C1s, float W0d)
{
const float DepthThreshold = 0.28; // 10mm assuming 56u == 2m
if(Zs - DepthThreshold < Zc && Vsl >= distTC)
{
fgAccum += float4(C0s, 1) * W0d;
bgAccum += float4(C1s, 1);
alpha += 1.0;
}
else if(Zs >= Zc && Vcl >= distTC)
{
bgAccum += float4(C0s, 1);
}
else
{
center += 1.0;
}
}
float4 ps(VOut input) : SV_Target
{
SceneView scene = GetSceneView();
SamplerState pointSampler = SamplerDescriptorHeap[pointSamplerIndex];
SamplerState linearSampler = SamplerDescriptorHeap[linearSamplerIndex];
Texture2D colorTexture = ResourceDescriptorHeap[colorTextureIndex];
Texture2D<float2> tileTexture = ResourceDescriptorHeap[tileTextureIndex];
Texture2D<uint2> packedTexture = ResourceDescriptorHeap[packedTextureIndex];
Texture2D blueNoiseTexture = ResourceDescriptorHeap[blueNoiseTextureIndex];
float2 fullResSize = float2(GetTextureSize(colorTexture));
float2 tileSize = float2(GetTextureSize(tileTexture));
uint2 blueNoiseSize = GetTextureSize(blueNoiseTexture);
float2 tileTC = input.texCoords;
float2 Vnraw = tileTexture.SampleLevel(pointSampler, tileTC, 0);
float2 tileWeights = frac(tileTC * tileSize + 0.5);
float4 Vnx = tileTexture.GatherRed(pointSampler, tileTC);
float4 Vny = tileTexture.GatherGreen(pointSampler, tileTC);
float2 Vnpa[4];
float Vnw[4];
for(int i = 0; i < 4; i++)
{
float2 Vni = float2(Vnx[i], Vny[i]);
if(length(Vni * fullResSize) < 2.0)
{
Vni = Vnraw; // ignore tiles with small vectors
}
Vnpa[i] = CartesianToPolar(Vni);
}
Vnw[0] = (1.0 - tileWeights.x) * tileWeights.y; // (-,+)
Vnw[1] = tileWeights.x * tileWeights.y; // (+,+)
Vnw[2] = tileWeights.x * (1.0 - tileWeights.y); // (+,-)
Vnw[3] = (1.0 - tileWeights.x) * (1.0 - tileWeights.y); // (-,-)
float2 Vnp = Vnw[0] * Vnpa[0] + Vnw[1] * Vnpa[1] + Vnw[2] * Vnpa[2] + Vnw[3] * Vnpa[3];
Vnp.x = max(Vnp.x, length(Vnraw)); // make sure the lerp won't yield a smaller vector
float2 Vn = PolarToCartesian(Vnp);
float2 Vnn = normalize(Vn);
float3 color = colorTexture.SampleLevel(pointSampler, input.texCoords, 0).rgb;
uint2 packedFrag = packedTexture.Load(uint3(input.texCoords * fullResSize, 0));
float2 Vc = UnpackHalf2(packedFrag.x);
float Vcl = length(Vc);
float2 Vcn = Vc / Vcl;
float Zc = asfloat(packedFrag.y) * scene.zFar;
float lengthPx = length(Vn * fullResSize);
if(lengthPx < 0.5)
{
return float4(color, 1);
}
if(length(Vc * fullResSize) <= 4.0)
{
Vc = Vn;
Vcn = Vnn;
}
uint2 blueNoisePx = (input.texCoords * fullResSize) % blueNoiseSize;
float2 blueNoise = blueNoiseTexture.Load(uint3(blueNoisePx, 0)).xy;
float tcJitter = blueNoise.x;
uint sampleCount = min(2 * uint(ceil(lengthPx)), 64);
float2 tcStepN = 0.5 * Vn / float2(sampleCount - 1, sampleCount - 1);
float2 tcStepC = 0.5 * Vc / float2(sampleCount - 1, sampleCount - 1);
float4 fgAccum = float4(0, 0, 0, 0);
float4 bgAccum = float4(0, 0, 0, 0);
float alpha = 0.0; // foreground (1) / background (0) lerp
float center = 0.0; // center (1) / background (0) lerp
uint realSampleCount = 0;
if(length(Vc * fullResSize) >= 1.0)
{
fgAccum += float4(color, 1);
alpha += 1.0;
}
bgAccum += float4(color, 1);
uint totalSampleCount = sampleCount * 2;
for(uint i = 1; i < totalSampleCount; i++)
{
float lineStep = float(i / 2);
float i0 = tcJitter + lineStep;
float i1 = tcJitter - lineStep;
float2 tcStep, dirn;
[flatten]
if(i % 2 == 0)
{
tcStep = tcStepN;
dirn = Vnn;
}
else
{
tcStep = tcStepC;
dirn = Vcn;
}
float2 TC0 = input.texCoords + i0 * tcStep;
float2 TC1 = input.texCoords + i1 * tcStep;
[branch]
if(!IsValue01(TC0) || !IsValue01(TC1))
{
continue;
}
uint2 packedSample0 = packedTexture.Load(uint3(TC0 * fullResSize, 0));
float2 V0s = UnpackHalf2(packedSample0.x);
float V0sl = length(V0s);
float2 V0sn = V0s / V0sl;
float Z0s = asfloat(packedSample0.y) * scene.zFar;
float3 C0s = colorTexture.SampleLevel(pointSampler, TC0, 0).rgb;
float distTC0 = distance(TC0, input.texCoords);
float W0d = max(dot(V0sn, dirn), 0.0);
uint2 packedSample1 = packedTexture.Load(uint3(TC1 * fullResSize, 0));
float2 V1s = UnpackHalf2(packedSample1.x);
float V1sl = length(V1s);
float2 V1sn = V1s / V1sl;
float Z1s = asfloat(packedSample1.y) * scene.zFar;
float3 C1s = colorTexture.SampleLevel(pointSampler, TC1, 0).rgb;
float distTC1 = distance(TC1, input.texCoords);
float W1d = max(dot(V1sn, dirn), 0.0);
ProcessSample(fgAccum, bgAccum, alpha, center, Z0s, Zc, V0sl, Vcl, distTC0, C0s, C1s, W0d);
ProcessSample(fgAccum, bgAccum, alpha, center, Z1s, Zc, V1sl, Vcl, distTC1, C1s, C0s, W1d);
realSampleCount += 2;
}
if(fgAccum.w <= 0.0 || bgAccum.w <= 0.0)
{
return float4(color, 1.0);
}
fgAccum.rgb /= fgAccum.w;
bgAccum.rgb /= bgAccum.w;
alpha /= float(realSampleCount);
center /= float(realSampleCount);
float3 bg = center * color + (1.0 - center) * bgAccum.rgb;
float3 blended = alpha * fgAccum.rgb + (1.0 - alpha) * bg;
return float4(blended, 1.0);
}
#else
/*
Summary:
- sampling N times in a line along fragment's direction
- sampling N times in a line along neighborhood's strongest direction
- jittering neighborhood tiles to trade banding for blue noise
- jittering the lines' center points with blue noise
- weighting samples based on depth, distance and direction similarity
References:
- "A Reconstruction Filter for Plausible Motion Blur" by McGuire et al.
- "A Fast and Stable Feature-Aware Motion Blur Filter" by Guertin et al.
*/
float Cone(float distSamples, float velocityLength)
{
float result = saturate(1.0 - distSamples / velocityLength);
return result;
}
float Cylinder(float distSamples, float velocityLength)
{
float l = velocityLength;
float result = 1.0 - smoothstep(0.95 * l, 1.05 * l, distSamples);
return result;
}
float SoftDepthFalloff(float zA, float zB)
{
float result = saturate(1.0 - (zB - zA) / max(zB, zA));
return result;
}
float4 ps(VOut input) : SV_Target
{
SceneView scene = GetSceneView();
SamplerState pointSampler = SamplerDescriptorHeap[pointSamplerIndex];
SamplerState linearSampler = SamplerDescriptorHeap[linearSamplerIndex];
Texture2D colorTexture = ResourceDescriptorHeap[colorTextureIndex];
Texture2D<float2> tileTexture = ResourceDescriptorHeap[tileTextureIndex];
Texture2D<uint2> packedTexture = ResourceDescriptorHeap[packedTextureIndex];
Texture2D blueNoiseTexture = ResourceDescriptorHeap[blueNoiseTextureIndex];
float2 fullResSize = float2(GetTextureSize(colorTexture));
float2 tileSize = float2(GetTextureSize(tileTexture));
uint2 blueNoiseSize = GetTextureSize(blueNoiseTexture);
uint2 blueNoisePx = (input.texCoords * fullResSize) % blueNoiseSize;
float2 blueNoise = blueNoiseTexture.Load(uint3(blueNoisePx, 0)).xy;
float2 tileJitterTC = 0.25 * (blueNoise * 2.0 - 1.0) / tileSize;
float2 tileTC = input.texCoords + tileJitterTC;
float2 Vn = tileTexture.SampleLevel(pointSampler, tileTC, 0);
float2 Vnn = normalize(Vn);
float3 color = colorTexture.SampleLevel(pointSampler, input.texCoords, 0).rgb;
uint2 packedFrag = packedTexture.Load(uint3(input.texCoords * fullResSize, 0));
float2 Vc = UnpackHalf2(packedFrag.x);
float Vcl = length(Vc);
float2 Vcn = Vc / Vcl;
float Zc = asfloat(packedFrag.y) * scene.zFar;
float lengthPx = length(Vn * fullResSize);
if(lengthPx < 0.5)
{
return float4(color, 1);
}
uint sampleCount = min(4 * uint(ceil(lengthPx)), 64); // per line
float2 tcStepN = 2.0 * Vn / float2(sampleCount - 1, sampleCount - 1);
float2 tcStepC = 2.0 * Vc / float2(sampleCount - 1, sampleCount - 1);
float3 colorAccum = color;
float weightAccum = 1.0;
uint sampleCountTotal = sampleCount * 2; // across all lines
for(uint i = 0; i < sampleCount; i++)
{
float step = float(i / 2);
float2 tc;
float2 dirn;
[flatten]
if(sampleCount % 2 == 0)
{
tc = input.texCoords - 0.5 * Vn + (blueNoise.x + step) * tcStepN;
dirn = Vnn;
}
else
{
tc = input.texCoords - 0.5 * Vc + (blueNoise.y + step) * tcStepC;
dirn = Vcn;
}
if(!IsValue01(tc))
{
continue;
}
uint2 packedSample = packedTexture.Load(uint3(tc * fullResSize, 0));
float2 Vs = UnpackHalf2(packedSample.x);
float Vsl = length(Vs);
float2 Vsn = Vs / Vsl;
float Zs = asfloat(packedSample.y) * scene.zFar;
float3 colorSample = colorTexture.SampleLevel(pointSampler, tc, 0).rgb;
float fg = SoftDepthFalloff(Zc, Zs);
float bg = SoftDepthFalloff(Zs, Zc);
float distTC = distance(tc, input.texCoords);
float fgWeight = fg * Cone(distTC, Vsl) * max(0, dot(dirn, Vsn));
float bgWeight = bg * Cone(distTC, Vcl) * max(0, dot(dirn, Vcn));
float overlapWeight = 2.0 * Cylinder(distTC, Vsl) * Cylinder(distTC, Vcl);
float weight = fgWeight + bgWeight + overlapWeight;
colorAccum += colorSample * weight;
weightAccum += weight;
}
float4 result = float4(colorAccum /= weightAccum, 1.0);
return result;
}
#endif