cnq3/code/renderer/shaders/crp/vl_frustum_injection_partic...

281 lines
10 KiB
HLSL

/*
===========================================================================
Copyright (C) 2024 Gian 'myT' Schellenbaum
This file is part of Challenge Quake 3 (CNQ3).
Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// volumetric lighting: inject particles into the frustum material textures
// 0 -> particle is a point
// 1 -> particle is a sphere, no super-sampling
// 2 -> particle is a sphere, 2x super-sampling
#define QUALITY 1
#include "common.hlsli"
#include "scene_view.h.hlsli"
#if QUALITY >= 2
#define VOXEL_SUPERSAMPLING_2X
#define SPHERE_SUPERSAMPLING_2X
#else
#define VOXEL_SUPERSAMPLING_1X
#define SPHERE_SUPERSAMPLING_1X
#endif
#include "vl_common.h.hlsli"
cbuffer RootConstants
{
uint3 tileScale;
uint pad0;
uint3 tileResolution;
uint particleBufferIndex;
uint materialTextureAIndex;
uint materialTextureBIndex;
uint materialTextureCIndex;
uint tileBufferIndex;
uint tileIndexBufferIndex;
uint particleIndexBufferIndex;
uint counterBufferIndex;
uint tileCount;
}
#define VOXEL_COUNT 512
#define THREAD_COUNT 512
groupshared uint s_scatterR[VOXEL_COUNT];
groupshared uint s_scatterG[VOXEL_COUNT];
groupshared uint s_scatterB[VOXEL_COUNT];
groupshared uint s_absorption[VOXEL_COUNT];
groupshared uint s_emissiveR[VOXEL_COUNT];
groupshared uint s_emissiveG[VOXEL_COUNT];
groupshared uint s_emissiveB[VOXEL_COUNT];
groupshared uint s_anisotropy[VOXEL_COUNT];
groupshared uint s_coverage[VOXEL_COUNT];
static const float g_materialScale = 131072.0;
static const float g_anisotropyScale = 1024.0;
static const float g_coverageScale = 1024.0;
float FroxelMinSize(SceneView scene, uint3 id, float3 textureSize)
{
float3 center = scene.FroxelIndexToWorldSpace(id, textureSize);
float w = distance(center, scene.FroxelIndexToWorldSpace(id + uint3(1, 0, 0), textureSize));
float h = distance(center, scene.FroxelIndexToWorldSpace(id + uint3(0, 1, 0), textureSize));
float d = distance(center, scene.FroxelIndexToWorldSpace(id + uint3(0, 0, 1), textureSize));
float size = min3(w, h, d);
return size;
}
[numthreads(THREAD_COUNT, 1, 1)]
void cs(uint3 dtid : SV_DispatchThreadID, uint gtidx : SV_GroupIndex)
{
uint tileIndexIndex = dtid.x / THREAD_COUNT;
#if 0
RWStructuredBuffer<Counters> counterBuffer = ResourceDescriptorHeap[counterBufferIndex];
Counters counters = counterBuffer[0];
//if(tileIndexIndex >= tileCount)
if(tileIndexIndex >= counters.tileCount)
{
return; // should never happen
}
#endif
RWStructuredBuffer<uint> tileIndexBuffer = ResourceDescriptorHeap[tileIndexBufferIndex];
RWTexture3D<float4> materialTextureA = ResourceDescriptorHeap[materialTextureAIndex];
uint3 textureSize = GetTextureSize(materialTextureA);
uint tileIndex = tileIndexBuffer[tileIndexIndex];
uint3 tileCornerIndex = UnflattenIndex(tileIndex, tileResolution);
uint3 tileThreadIndex = UnflattenIndex(gtidx, tileScale);
uint3 id = tileCornerIndex * tileScale + tileThreadIndex;
int3 froxelIndexMin = int3(tileCornerIndex * tileScale);
int3 froxelIndexMax = int3(tileCornerIndex * tileScale) + int3(tileScale) - int3(1, 1, 1);
uint smIndex = FlattenIndex(id - uint3(froxelIndexMin), tileScale);
if(smIndex < VOXEL_COUNT)
{
s_scatterR[smIndex] = 0;
s_scatterG[smIndex] = 0;
s_scatterB[smIndex] = 0;
s_absorption[smIndex] = 0;
s_emissiveR[smIndex] = 0;
s_emissiveG[smIndex] = 0;
s_emissiveB[smIndex] = 0;
s_anisotropy[smIndex] = 0;
s_coverage[smIndex] = 0;
}
GroupMemoryBarrierWithGroupSync();
RWTexture3D<float4> materialTextureB = ResourceDescriptorHeap[materialTextureBIndex];
RWTexture3D<float> materialTextureC = ResourceDescriptorHeap[materialTextureCIndex];
RWStructuredBuffer<Particle> particleBuffer = ResourceDescriptorHeap[particleBufferIndex];
RWStructuredBuffer<Tile> tileBuffer = ResourceDescriptorHeap[tileBufferIndex];
RWStructuredBuffer<uint> particleIndexBuffer = ResourceDescriptorHeap[particleIndexBufferIndex];
SceneView scene = GetSceneView();
Tile tile = tileBuffer[tileIndex];
float3 textureSizeF = float3(textureSize);
#if QUALITY > 0
float3 left = scene.cameraLeft;
float3 up = scene.cameraUp;
float3 fwd = scene.cameraForward;
float froxelMinSize = FroxelMinSize(scene, (tileCornerIndex * tileScale) + (tileScale / 2) - uint3(1, 1, 1), textureSizeF);
#endif
uint particleCount = tile.particleCount;
uint firstParticle = tile.firstParticle;
for(uint i = smIndex; i < particleCount; i += THREAD_COUNT)
{
uint particleIndex = particleIndexBuffer[firstParticle + i];
Particle particle = particleBuffer[particleIndex];
float3 scattering;
float3 emissive;
[flatten]
if(particle.isEmissive != 0)
{
scattering = float3(0, 0, 0);
emissive = particle.scattering;
}
else
{
scattering = particle.scattering;
emissive = float3(0, 0, 0);
}
#if QUALITY > 0
bool isBigParticle = particle.radius >= froxelMinSize;
bool isMediumParticle = particle.radius >= 0.125 * froxelMinSize;
int3 boxMin = particle.froxelMin - froxelIndexMin;
int3 boxMax = particle.froxelMax - froxelIndexMin;
boxMin = max(boxMin, int3(0, 0, 0));
boxMax = min(boxMax, int3(tileScale) - int3(1, 1, 1));
if(all(boxMax < boxMin))
{
continue;
}
for(int z = boxMin.z; z <= boxMax.z; z++)
{
for(int y = boxMin.y; y <= boxMax.y; y++)
{
for(int x = boxMin.x; x <= boxMax.x; x++)
{
uint3 froxelGroupThreadId = uint3(x, y, z);
uint froxelFlatIndex = FlattenIndex(froxelGroupThreadId, tileScale);
uint3 froxelThreadId = tileCornerIndex * tileScale + froxelGroupThreadId;
float particleCoverage = 0.0;
if(isBigParticle)
{
float3 tcBase = (float3(froxelThreadId) + float3(0.5, 0.5, 0.5)) / textureSizeF;
for(uint s = 0; s < VoxelSampleCount; s++)
{
float3 tcSample = tcBase + VoxelSamples[s] / textureSizeF;
float3 position = scene.FroxelTCToWorldSpace(tcSample, textureSizeF);
float dist = distance(position, particle.position);
float coverage = sqrt(saturate(1.0 - dist / particle.radius));
coverage *= 0.25 + 0.75 * SimplexNoise3D(0.25 * (position - particle.position));
particleCoverage += coverage;
}
particleCoverage /= float(VoxelSampleCount);
}
else if(isMediumParticle)
{
float3 basePosition = scene.FroxelIndexToWorldSpace(froxelThreadId, textureSizeF);
for(uint s = 0; s < SphereSampleCount; s++)
{
float3 position = basePosition + particle.radius * SphereSamples[s];
int3 sampleVoxelIdx = scene.FroxelWorldSpaceToIndex(position, textureSizeF);
bool isInVoxel = all(froxelThreadId == uint3(sampleVoxelIdx));
float dist = isInVoxel ? distance(position, particle.position) : 0.0;
float coverage = sqrt(saturate(1.0 - dist / particle.radius));
particleCoverage += coverage;
}
particleCoverage /= float(SphereSampleCount);
particleCoverage *= min(SphereVolume(particle.radius) / scene.FroxelVolume(froxelThreadId, textureSizeF), 1.0);
}
else
{
// assumes the sphere's density is not 1 but 1/distance
float density = 2.0 * PI * particle.radius * particle.radius;
particleCoverage = min(density / scene.FroxelVolume(froxelThreadId, textureSizeF), 1.0);
}
if(particleCoverage == 0.0)
{
continue;
}
uint4 scatterAbs = g_materialScale * particleCoverage * float4(scattering, particle.absorption);
uint4 emissiveAniso = float4(g_materialScale.xxx, g_anisotropyScale) * particleCoverage * float4(emissive, particle.anisotropy);
uint coverage = g_coverageScale * particleCoverage;
InterlockedAdd(s_scatterR[froxelFlatIndex], scatterAbs.r);
InterlockedAdd(s_scatterG[froxelFlatIndex], scatterAbs.g);
InterlockedAdd(s_scatterB[froxelFlatIndex], scatterAbs.b);
InterlockedAdd(s_absorption[froxelFlatIndex], scatterAbs.w);
InterlockedAdd(s_emissiveR[froxelFlatIndex], emissiveAniso.r);
InterlockedAdd(s_emissiveG[froxelFlatIndex], emissiveAniso.g);
InterlockedAdd(s_emissiveB[froxelFlatIndex], emissiveAniso.b);
InterlockedAdd(s_anisotropy[froxelFlatIndex], emissiveAniso.w);
InterlockedAdd(s_coverage[froxelFlatIndex], coverage);
}
}
}
#else
int3 froxelIndex = scene.FroxelWorldSpaceToIndex(particle.position, textureSizeF);
if(!IsInRange(froxelIndex, froxelIndexMin, froxelIndexMax))
{
continue;
}
uint froxelFlatIndex = FlattenIndex(uint3(froxelIndex) - uint3(froxelIndexMin), tileScale);
float particleCoverage = 1.0;
uint4 scatterAbs = g_materialScale * particleCoverage * float4(scattering, particle.absorption);
uint4 emissiveAniso = float4(g_materialScale.xxx, g_anisotropyScale) * particleCoverage * float4(emissive, particle.anisotropy);
uint coverage = g_coverageScale * particleCoverage;
InterlockedAdd(s_scatterR[froxelFlatIndex], scatterAbs.r);
InterlockedAdd(s_scatterG[froxelFlatIndex], scatterAbs.g);
InterlockedAdd(s_scatterB[froxelFlatIndex], scatterAbs.b);
InterlockedAdd(s_absorption[froxelFlatIndex], scatterAbs.w);
InterlockedAdd(s_emissiveR[froxelFlatIndex], emissiveAniso.r);
InterlockedAdd(s_emissiveG[froxelFlatIndex], emissiveAniso.g);
InterlockedAdd(s_emissiveB[froxelFlatIndex], emissiveAniso.b);
InterlockedAdd(s_anisotropy[froxelFlatIndex], emissiveAniso.w);
InterlockedAdd(s_coverage[froxelFlatIndex], coverage);
#endif
}
GroupMemoryBarrierWithGroupSync();
if(smIndex < VOXEL_COUNT &&
s_coverage[smIndex] > 0 &&
all(id < textureSize))
{
float4 accumScatterAbs = float4(s_scatterR[smIndex], s_scatterG[smIndex], s_scatterB[smIndex], s_absorption[smIndex]) / g_materialScale;
float4 accumEmissiveAniso = float4(s_emissiveR[smIndex], s_emissiveG[smIndex], s_emissiveB[smIndex], s_anisotropy[smIndex]) / float4(g_materialScale.xxx, g_anisotropyScale);
float accumCoverage = s_coverage[smIndex] / g_coverageScale;
materialTextureA[id] += accumScatterAbs;
materialTextureB[id] += accumEmissiveAniso;
materialTextureC[id] += accumCoverage;
}
}