cnq3/code/renderer/crp_raytracing.cpp
myT a76dba5cfb raytracing soft shadows, normal smoothing, G-buffer viz
- brightness-corrected ImGUI drawing
- upgraded shader code to HLSL 2021
- vertex normals drawing
2024-02-06 23:15:31 +01:00

652 lines
19 KiB
C++

/*
===========================================================================
Copyright (C) 2024 Gian 'myT' Schellenbaum
This file is part of Challenge Quake 3 (CNQ3).
Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// Cinematic Rendering Pipeline - raytracing acceleration structures management
#include "crp_local.h"
#include "shaders/crp/raytracing.h.hlsli"
struct BLASBuilder
{
bool IsEmpty()
{
return totalVertexCount <= 0 || totalIndexCount <= 0;
}
uint32_t totalVertexCount;
uint32_t totalIndexCount;
uint32_t meshCount;
float* buildVertices;
uint32_t* buildIndices;
BLASMeshDesc* buildMeshes;
uint32_t firstVertex;
uint32_t firstIndex;
uint32_t meshIndex;
BLASVertex* traceVertices;
uint32_t* traceIndices;
BLASMesh* traceMeshes;
};
static world_t* s_world;
static bool IsStaticBLASSurface(const msurface_t* surf)
{
if(surf->shader->numStages == 0 ||
surf->shader->isDynamic ||
surf->shader->polygonOffset)
{
return false;
}
return true;
}
static uint32_t GetBLASBucketIndex(const shader_t* shader)
{
const uint32_t index = (uint32_t)shader->cullType;
return index;
}
static const char* GetBLASBucketName(uint32_t index)
{
switch(index)
{
case CT_FRONT_SIDED: return "front-sided";
case CT_BACK_SIDED: return "back-sided";
case CT_TWO_SIDED: return "two-sided";
default: Q_assert(!"Invalid bucket index"); return "???";
}
}
static void TransformPoint(const vec3_t original, const float* matrix4x4, vec3_t result)
{
float x = original[0] * matrix4x4[0] + original[1] * matrix4x4[4] + original[2] * matrix4x4[ 8] + matrix4x4[12];
float y = original[0] * matrix4x4[1] + original[1] * matrix4x4[5] + original[2] * matrix4x4[ 9] + matrix4x4[13];
float z = original[0] * matrix4x4[2] + original[1] * matrix4x4[6] + original[2] * matrix4x4[10] + matrix4x4[14];
float w = original[0] * matrix4x4[3] + original[1] * matrix4x4[7] + original[2] * matrix4x4[11] + matrix4x4[15];
if(w != 1.0f && w != 0.0f)
{
x /= w;
y /= w;
z /= w;
}
result[0] = x;
result[1] = y;
result[2] = z;
}
// true when the surface should be skipped
static bool Tessellate(
int& surfVertexCount, int& surfIndexCount, const surfaceType_t* surface,
const shader_t* shader, int entityNum, double originalTime)
{
if(shader->numStages <= 0)
{
return true;
}
bool depthHack;
tess.numVertexes = 0;
tess.numIndexes = 0;
tess.shader = shader; // needed by R_ComputeTexCoords etc.
UpdateEntityData(depthHack, entityNum, originalTime);
R_TessellateSurface(surface);
surfVertexCount = tess.numVertexes;
surfIndexCount = tess.numIndexes;
if(surfVertexCount <= 0 || surfIndexCount <= 0)
{
return true;
}
RB_DeformTessGeometry(0, surfVertexCount, 0, surfIndexCount);
const shaderStage_t& stage = *shader->stages[0];
R_ComputeColors(&stage, tess.svars[0], 0, surfVertexCount);
R_ComputeTexCoords(&stage, tess.svars[0], 0, surfVertexCount, qfalse);
return false;
}
// true when the surface should be skipped
static bool EstimateTessellatedSize(
int& surfVertexCount, int& surfIndexCount, const surfaceType_t* surface,
const shader_t* shader, int entityNum, double originalTime)
{
if(shader->numStages <= 0)
{
return true;
}
bool depthHack;
tess.numVertexes = 0;
tess.numIndexes = 0;
tess.shader = shader;
UpdateEntityData(depthHack, entityNum, originalTime);
R_ComputeTessellatedSize(&surfVertexCount, &surfIndexCount, surface);
if(surfVertexCount <= 0 || surfIndexCount <= 0)
{
return true;
}
return false;
}
static void CreateOrGrowBuffer(HBuffer& buffer, uint32_t& curByteCount, const BufferDesc& desc)
{
if(desc.byteCount <= curByteCount)
{
return;
}
curByteCount = max(curByteCount * 2, desc.byteCount);
DestroyBufferDelayed(buffer);
buffer = CreateBuffer(desc);
}
void Raytracing::Init()
{
const uint32_t structByteCount = sizeof(TLASInstance);
BufferDesc desc("BLAS support instance", 2 * BLASBucket::Count * structByteCount, ResourceStates::ShaderAccessBits);
desc.shortLifeTime = true;
desc.structureByteCount = structByteCount;
tlasInstanceBuffer = CreateBuffer(desc);
}
void Raytracing::ProcessWorld(world_t& world)
{
TagMapSurfacesRecursively(world.nodes);
// make sure we're not trying to use deleted buffers after a video restart
for(uint32_t i = 0; i < BLASBucket::Count; i++)
{
blasBuildBuffers[i] = {};
staticBLASBuffers[i] = {};
dynamicBLASBuffers[i] = {};
}
tlasBuffer = RHI_MAKE_NULL_HANDLE();
BLASBuilder staticBLASes[BLASBucket::Count];
WorldSurfaceList surfaceList;
s_world = &world;
BuildBLASes(staticBLASBuffers, staticBLASes, &surfaceList);
s_world = NULL;
// create ASes
BeginTempCommandList();
for(uint32_t i = 0; i < BLASBucket::Count; i++)
{
BLASBuilder& bucket = staticBLASes[i];
if(bucket.IsEmpty())
{
continue;
}
BLASBuildBuffers& buffers = blasBuildBuffers[i];
BLASDesc desc = {};
desc.name = va("static BLAS %s", GetBLASBucketName(i));
desc.vertexBuffer = buffers.vertexBuffer;
desc.indexBuffer = buffers.indexBuffer;
desc.meshes = bucket.buildMeshes;
desc.meshCount = bucket.meshCount;
CmdCreateBLAS(&staticBLASBuffers[i].blasBuffer, desc);
}
EndTempCommandList();
for(uint32_t i = 0; i < BLASBucket::Count; i++)
{
BLASBuilder& bucket = staticBLASes[i];
if(bucket.IsEmpty())
{
continue;
}
free(bucket.buildMeshes);
}
staticTLASInstanceCount = 0;
tlasInstanceDescs.Clear();
for(uint32_t i = 0; i < BLASBucket::Count; i++)
{
BLASBuilder& bucket = staticBLASes[i];
if(bucket.IsEmpty())
{
continue;
}
TLASInstanceDesc inst;
inst = {};
inst.blasBuffer = staticBLASBuffers[i].blasBuffer;
inst.cullMode = (cullType_t)i;
inst.instanceId = staticTLASInstanceCount++;
inst.instanceMask = 0xFF;
inst.transform[0] = 1.0f;
inst.transform[4] = 1.0f;
inst.transform[8] = 1.0f;
tlasInstanceDescs.Add(inst);
}
}
void Raytracing::BeginFrame()
{
if(tr.world == NULL || tr.sceneCounterRT == 0)
{
return;
}
if(crp_updateRTAS->integer == 0 && !IsNullHandle(tlasBuffer))
{
return;
}
backEnd.refdef = tr.rtRefdef;
BeginTempCommandList();
const uint32_t renderPass = srp.BeginRenderPass("RTAS Build", 1.0f, 1.0f, 1.0f);
BLASBuilder dynamicBLASes[BLASBucket::Count];
DynamicSurfaceList surfaceList;
BuildBLASes(dynamicBLASBuffers, dynamicBLASes, &surfaceList);
for(uint32_t i = 0; i < BLASBucket::Count; i++)
{
BLASBuilder& bucket = dynamicBLASes[i];
if(bucket.IsEmpty())
{
continue;
}
BLASBuildBuffers& buffers = blasBuildBuffers[i];
BLASDesc desc = {};
desc.name = va("dynamic BLAS %s", GetBLASBucketName(i));
desc.vertexBuffer = buffers.vertexBuffer;
desc.indexBuffer = buffers.indexBuffer;
desc.meshes = bucket.buildMeshes;
desc.meshCount = bucket.meshCount;
CmdCreateBLAS(&dynamicBLASBuffers[i].blasBuffer, desc);
}
{
uint32_t instanceId = staticTLASInstanceCount;
tlasInstanceDescs.count = staticTLASInstanceCount;
for(uint32_t i = 0; i < BLASBucket::Count; i++)
{
BLASBuilder& bucket = dynamicBLASes[i];
if(bucket.IsEmpty())
{
continue;
}
TLASInstanceDesc inst = {};
inst.blasBuffer = dynamicBLASBuffers[i].blasBuffer;
inst.cullMode = (cullType_t)i;
inst.instanceId = instanceId++;
inst.instanceMask = 0xFF;
inst.transform[0] = 1.0f;
inst.transform[4] = 1.0f;
inst.transform[8] = 1.0f;
tlasInstanceDescs.Add(inst);
}
TLASDesc desc = {};
desc.instanceCount = tlasInstanceDescs.count;
desc.instances = tlasInstanceDescs.items;
CmdCreateTLAS(&tlasBuffer, desc);
}
srp.EndRenderPass(renderPass);
EndTempCommandList();
TLASInstance* traceInstances = (TLASInstance*)BeginBufferUpload(tlasInstanceBuffer);
uint32_t instanceId = 0;
for(uint32_t i = 0; i < ARRAY_LEN(staticBLASBuffers); i++)
{
const BLASBuffers& buffers = staticBLASBuffers[i];
if(IsNullHandle(buffers.blasBuffer))
{
continue;
}
Q_assert(instanceId == tlasInstanceDescs[instanceId].instanceId);
TLASInstance traceInst = {};
traceInst.meshBufferIndex = GetBufferIndexSRV(buffers.meshBuffer);
traceInst.vertexBufferIndex = GetBufferIndexSRV(buffers.vertexBuffer);
traceInst.indexBufferIndex = GetBufferIndexSRV(buffers.indexBuffer);
traceInst.cullMode = (uint32_t)tlasInstanceDescs[instanceId++].cullMode;
*traceInstances++ = traceInst;
}
for(uint32_t i = 0; i < ARRAY_LEN(dynamicBLASBuffers); i++)
{
const BLASBuffers& buffers = dynamicBLASBuffers[i];
if(IsNullHandle(buffers.blasBuffer))
{
continue;
}
Q_assert(instanceId == tlasInstanceDescs[instanceId].instanceId);
TLASInstance traceInst = {};
traceInst.meshBufferIndex = GetBufferIndexSRV(buffers.meshBuffer);
traceInst.vertexBufferIndex = GetBufferIndexSRV(buffers.vertexBuffer);
traceInst.indexBufferIndex = GetBufferIndexSRV(buffers.indexBuffer);
traceInst.cullMode = (uint32_t)tlasInstanceDescs[instanceId++].cullMode;
*traceInstances++ = traceInst;
}
EndBufferUpload(tlasInstanceBuffer);
#if defined(_DEBUG)
for(uint32_t i = 0; i < tlasInstanceDescs.count; i++)
{
Q_assert(tlasInstanceDescs[i].instanceId == i);
}
#endif
}
void Raytracing::TagMapSurfacesRecursively(mnode_t* node)
{
do
{
if(node->contents != CONTENTS_NODE)
{
break;
}
// recurse down the children, front side first
TagMapSurfacesRecursively(node->children[0]);
// tail recurse
node = node->children[1];
}
while(true);
// add the individual surfaces
int c = node->nummarksurfaces;
msurface_t** mark = node->firstmarksurface;
while(c--)
{
msurface_t* const surf = *mark++;
if(IsStaticBLASSurface(surf))
{
surf->rtSurfType = RTST_STATIC;
}
else
{
surf->rtSurfType = RTST_DYNAMIC;
}
}
}
void Raytracing::EnsureBuffersAreLargeEnough(Raytracing::BLASBuildBuffers& buffers, uint32_t maxVertexCount, uint32_t maxIndexCount)
{
{
BufferDesc desc("BLAS build vertex", 2 * maxVertexCount * sizeof(vec3_t), ResourceStates::Common);
desc.shortLifeTime = true;
CreateOrGrowBuffer(buffers.vertexBuffer, buffers.vertexBufferByteCount, desc);
}
{
BufferDesc desc("BLAS build index", 2 * maxIndexCount * sizeof(uint32_t), ResourceStates::Common);
desc.shortLifeTime = true;
CreateOrGrowBuffer(buffers.indexBuffer, buffers.indexBufferByteCount, desc);
}
}
void Raytracing::EnsureBuffersAreLargeEnough(Raytracing::BLASBuffers& buffers, uint32_t maxVertexCount, uint32_t maxIndexCount, uint32_t maxMeshCount)
{
{
const uint32_t structByteCount = sizeof(BLASVertex);
BufferDesc desc("BLAS support vertex", maxVertexCount * structByteCount, ResourceStates::ShaderAccessBits);
desc.shortLifeTime = true;
desc.structureByteCount = structByteCount;
CreateOrGrowBuffer(buffers.vertexBuffer, buffers.vertexBufferByteCount, desc);
}
{
const uint32_t structByteCount = sizeof(uint32_t);
BufferDesc desc("BLAS support index", maxIndexCount * structByteCount, ResourceStates::ShaderAccessBits);
desc.shortLifeTime = true;
desc.structureByteCount = structByteCount;
CreateOrGrowBuffer(buffers.indexBuffer, buffers.indexBufferByteCount, desc);
}
{
const uint32_t structByteCount = sizeof(BLASMesh);
BufferDesc desc("BLAS support mesh", maxMeshCount * structByteCount, ResourceStates::ShaderAccessBits);
desc.shortLifeTime = true;
desc.structureByteCount = structByteCount;
CreateOrGrowBuffer(buffers.meshBuffer, buffers.meshBufferByteCount, desc);
}
}
void Raytracing::BuildBLASes(BLASBuffers* blasBuffers, BLASBuilder* blasBuilders, ISurfaceList* surfaceList)
{
tess.tessellator = Tessellator::None;
tr.forceHighestLod = true;
memset(blasBuilders, 0, sizeof(BLASBuilder) * BLASBucket::Count);
const double originalTime = backEnd.refdef.floatTime;
// gather stats on all surfaces we can bake
for(uint32_t i = 0, count = surfaceList->GetSurfaceCount(); i < count; i++)
{
Surface surface;
if(surfaceList->GetSurface(surface, i))
{
continue;
}
int surfVertexCount, surfIndexCount;
if(EstimateTessellatedSize(surfVertexCount, surfIndexCount, surface.surface, surface.shader, surface.entityNum, originalTime))
{
continue;
}
BLASBuilder& bucket = blasBuilders[GetBLASBucketIndex(surface.shader)];
bucket.totalVertexCount += surfVertexCount;
bucket.totalIndexCount += surfIndexCount;
bucket.meshCount++;
}
// correct the vertex and index counts since the estimations might be a little off
for(uint32_t i = 0; i < BLASBucket::Count; i++)
{
BLASBuilder& bucket = blasBuilders[i];
if(!bucket.IsEmpty())
{
bucket.totalVertexCount = max(2 * bucket.totalVertexCount, 8192u);
bucket.totalIndexCount = max(2 * bucket.totalIndexCount, 32768u);
}
}
// create buffers and map them
for(uint32_t i = 0; i < BLASBucket::Count; i++)
{
BLASBuilder& bucket = blasBuilders[i];
if(bucket.IsEmpty())
{
continue;
}
EnsureBuffersAreLargeEnough(blasBuildBuffers[i], bucket.totalVertexCount, bucket.totalIndexCount);
bucket.buildMeshes = (BLASMeshDesc*)malloc(bucket.meshCount * sizeof(BLASMeshDesc));
if(bucket.buildMeshes == NULL)
{
ri.Error(ERR_FATAL, "Failed to allocate %d BLASMeshDesc instances\n", (int)bucket.meshCount);
}
bucket.buildVertices = (float*)BeginBufferUpload(blasBuildBuffers[i].vertexBuffer);
bucket.buildIndices = (uint32_t*)BeginBufferUpload(blasBuildBuffers[i].indexBuffer);
EnsureBuffersAreLargeEnough(blasBuffers[i], bucket.totalVertexCount, bucket.totalIndexCount, bucket.meshCount);
bucket.traceVertices = (BLASVertex*)BeginBufferUpload(blasBuffers[i].vertexBuffer);
bucket.traceIndices = (uint32_t*)BeginBufferUpload(blasBuffers[i].indexBuffer);
bucket.traceMeshes = (BLASMesh*)BeginBufferUpload(blasBuffers[i].meshBuffer);
}
// upload vertex and index data
for(uint32_t i = 0, count = surfaceList->GetSurfaceCount(); i < count; i++)
{
Surface surface;
if(surfaceList->GetSurface(surface, i))
{
continue;
}
int surfVertexCount, surfIndexCount;
if(Tessellate(surfVertexCount, surfIndexCount, surface.surface, surface.shader, surface.entityNum, originalTime))
{
continue;
}
BLASBuilder& bucket = blasBuilders[GetBLASBucketIndex(surface.shader)];
// tess.xyz is an array of vec4_t
for(uint32_t v = 0; v < surfVertexCount; v++)
{
if(surface.entityNum == ENTITYNUM_WORLD)
{
bucket.buildVertices[0] = tess.xyz[v][0];
bucket.buildVertices[1] = tess.xyz[v][1];
bucket.buildVertices[2] = tess.xyz[v][2];
}
else
{
const float original[3] = { tess.xyz[v][0], tess.xyz[v][1], tess.xyz[v][2] };
float newPos[3];
TransformPoint(original, backEnd.modelMatrix, newPos);
bucket.buildVertices[0] = newPos[0];
bucket.buildVertices[1] = newPos[1];
bucket.buildVertices[2] = newPos[2];
}
bucket.buildVertices += 3;
bucket.traceVertices->texCoords[0] = tess.svars[0].texcoords[v][0];
bucket.traceVertices->texCoords[1] = tess.svars[0].texcoords[v][1];
bucket.traceVertices->color[0] = tess.svars[0].colors[v][0];
bucket.traceVertices->color[1] = tess.svars[0].colors[v][1];
bucket.traceVertices->color[2] = tess.svars[0].colors[v][2];
bucket.traceVertices->color[3] = tess.svars[0].colors[v][3];
bucket.traceVertices++;
}
memcpy(bucket.buildIndices, tess.indexes, surfIndexCount * sizeof(uint32_t));
bucket.buildIndices += surfIndexCount;
memcpy(bucket.traceIndices, tess.indexes, surfIndexCount * sizeof(uint32_t));
bucket.traceIndices += surfIndexCount;
const shaderStage_t& stage0 = *surface.shader->stages[0];
const image_t& image0 = *stage0.bundle.image[0];
bucket.buildMeshes[bucket.meshIndex].firstVertex = bucket.firstVertex;
bucket.buildMeshes[bucket.meshIndex].vertexCount = surfVertexCount;
bucket.buildMeshes[bucket.meshIndex].firstIndex = bucket.firstIndex;
bucket.buildMeshes[bucket.meshIndex].indexCount = surfIndexCount;
bucket.buildMeshes[bucket.meshIndex].isFullyOpaque = surface.shader->isOpaque && !surface.shader->isAlphaTestedOpaque;
bucket.traceMeshes[bucket.meshIndex].firstVertex = bucket.firstVertex;
bucket.traceMeshes[bucket.meshIndex].firstIndex = bucket.firstIndex;
bucket.traceMeshes[bucket.meshIndex].textureIndex = image0.textureIndex;
bucket.traceMeshes[bucket.meshIndex].samplerIndex = GetSamplerIndex(image0.wrapClampMode, TextureFilter::Linear);
bucket.traceMeshes[bucket.meshIndex].alphaTestMode = AlphaTestShaderConstFromStateBits(stage0.stateBits);
bucket.traceMeshes[bucket.meshIndex].blendBits = stage0.stateBits & GLS_BLEND_BITS;
bucket.meshIndex++;
bucket.firstVertex += surfVertexCount;
bucket.firstIndex += surfIndexCount;
}
// unmap buffers
for(uint32_t i = 0; i < BLASBucket::Count; i++)
{
BLASBuilder& bucket = blasBuilders[i];
if(bucket.IsEmpty())
{
continue;
}
EndBufferUpload(blasBuildBuffers[i].vertexBuffer);
EndBufferUpload(blasBuildBuffers[i].indexBuffer);
bucket.buildVertices = NULL;
bucket.buildIndices = NULL;
EndBufferUpload(blasBuffers[i].vertexBuffer);
EndBufferUpload(blasBuffers[i].indexBuffer);
EndBufferUpload(blasBuffers[i].meshBuffer);
bucket.traceVertices = NULL;
bucket.traceIndices = NULL;
bucket.traceMeshes = NULL;
}
backEnd.refdef.floatTime = originalTime;
tr.forceHighestLod = false;
}
uint32_t Raytracing::WorldSurfaceList::GetSurfaceCount()
{
return s_world->numsurfaces;
}
bool Raytracing::WorldSurfaceList::GetSurface(Surface& surface, uint32_t index)
{
Q_assert(index < (uint32_t)s_world->numsurfaces);
const msurface_t& surf = s_world->surfaces[index];
if(surf.rtSurfType != RTST_STATIC || surf.shader->numStages <= 0)
{
return true;
}
surface.surface = surf.data;
surface.shader = surf.shader;
surface.entityNum = ENTITYNUM_WORLD;
return false;
}
uint32_t Raytracing::DynamicSurfaceList::GetSurfaceCount()
{
return tr.numRTSurfs + tr.world->numsurfaces;
}
bool Raytracing::DynamicSurfaceList::GetSurface(Surface& surface, uint32_t index)
{
Q_assert(index < (uint32_t)tr.numRTSurfs + (uint32_t)tr.world->numsurfaces);
bool skip = false;
if(index < tr.numRTSurfs)
{
const rtSurf_t& surf = tr.rtSurfs[index];
surface.surface = surf.surface;
surface.shader = surf.shader;
surface.entityNum = surf.entityNum;
}
else
{
index -= tr.numRTSurfs;
Q_assert(index < (uint32_t)tr.world->numsurfaces);
const msurface_t& surf = tr.world->surfaces[index];
if(surf.rtSurfType != RTST_DYNAMIC)
{
skip = true;
}
surface.surface = surf.data;
surface.shader = surf.shader;
surface.entityNum = ENTITYNUM_WORLD;
}
skip = skip || surface.shader->numStages <= 0;
return skip;
}