/* =========================================================================== Copyright (C) 2024 Gian 'myT' Schellenbaum This file is part of Challenge Quake 3 (CNQ3). Challenge Quake 3 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. Challenge Quake 3 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ // Cinematic Rendering Pipeline - raytracing acceleration structures management #include "crp_local.h" #include "shaders/crp/raytracing.h.hlsli" struct BLASBuilder { bool IsEmpty() { return totalVertexCount <= 0 || totalIndexCount <= 0; } uint32_t totalVertexCount; uint32_t totalIndexCount; uint32_t meshCount; float* buildVertices; uint32_t* buildIndices; BLASMeshDesc* buildMeshes; uint32_t firstVertex; uint32_t firstIndex; uint32_t meshIndex; BLASVertex* traceVertices; uint32_t* traceIndices; BLASMesh* traceMeshes; }; static world_t* s_world; static bool IsStaticBLASSurface(const msurface_t* surf) { if(surf->shader->numStages == 0 || surf->shader->isDynamic || surf->shader->polygonOffset) { return false; } return true; } static uint32_t GetBLASBucketIndex(const shader_t* shader) { const uint32_t index = (uint32_t)shader->cullType; return index; } static const char* GetBLASBucketName(uint32_t index) { switch(index) { case CT_FRONT_SIDED: return "front-sided"; case CT_BACK_SIDED: return "back-sided"; case CT_TWO_SIDED: return "two-sided"; default: Q_assert(!"Invalid bucket index"); return "???"; } } static void TransformPoint(const vec3_t original, const float* matrix4x4, vec3_t result) { float x = original[0] * matrix4x4[0] + original[1] * matrix4x4[4] + original[2] * matrix4x4[ 8] + matrix4x4[12]; float y = original[0] * matrix4x4[1] + original[1] * matrix4x4[5] + original[2] * matrix4x4[ 9] + matrix4x4[13]; float z = original[0] * matrix4x4[2] + original[1] * matrix4x4[6] + original[2] * matrix4x4[10] + matrix4x4[14]; float w = original[0] * matrix4x4[3] + original[1] * matrix4x4[7] + original[2] * matrix4x4[11] + matrix4x4[15]; if(w != 1.0f && w != 0.0f) { x /= w; y /= w; z /= w; } result[0] = x; result[1] = y; result[2] = z; } // true when the surface should be skipped static bool Tessellate( int& surfVertexCount, int& surfIndexCount, const surfaceType_t* surface, const shader_t* shader, int entityNum, double originalTime) { if(shader->numStages <= 0) { return true; } bool depthHack; tess.numVertexes = 0; tess.numIndexes = 0; tess.shader = shader; // needed by R_ComputeTexCoords etc. UpdateEntityData(depthHack, entityNum, originalTime); R_TessellateSurface(surface); surfVertexCount = tess.numVertexes; surfIndexCount = tess.numIndexes; if(surfVertexCount <= 0 || surfIndexCount <= 0) { return true; } RB_DeformTessGeometry(0, surfVertexCount, 0, surfIndexCount); const shaderStage_t& stage = *shader->stages[0]; R_ComputeColors(&stage, tess.svars[0], 0, surfVertexCount); R_ComputeTexCoords(&stage, tess.svars[0], 0, surfVertexCount, qfalse); return false; } // true when the surface should be skipped static bool EstimateTessellatedSize( int& surfVertexCount, int& surfIndexCount, const surfaceType_t* surface, const shader_t* shader, int entityNum, double originalTime) { if(shader->numStages <= 0) { return true; } bool depthHack; tess.numVertexes = 0; tess.numIndexes = 0; tess.shader = shader; UpdateEntityData(depthHack, entityNum, originalTime); R_ComputeTessellatedSize(&surfVertexCount, &surfIndexCount, surface); if(surfVertexCount <= 0 || surfIndexCount <= 0) { return true; } return false; } static void CreateOrGrowBuffer(HBuffer& buffer, uint32_t& curByteCount, const BufferDesc& desc) { if(desc.byteCount <= curByteCount) { return; } curByteCount = max(curByteCount * 2, desc.byteCount); DestroyBufferDelayed(buffer); buffer = CreateBuffer(desc); } void Raytracing::Init() { // make sure we're not trying to use deleted buffers after a video restart for(uint32_t f = 0; f < RTFrameCount; f++) { FrameData& fd = frameData[f]; for(uint32_t i = 0; i < BLASBucket::Count; i++) { fd.blasBuildBuffers[i] = {}; fd.dynamicBLASBuffers[i] = {}; } fd.tlasBuffer = RHI_MAKE_NULL_HANDLE(); } for(uint32_t i = 0; i < BLASBucket::Count; i++) { staticBLASBuffers[i] = {}; } if(!rhiInfo.hasInlineRaytracing) { for(uint32_t f = 0; f < RTFrameCount; f++) { FrameData& fd = frameData[f]; fd.tlasInstanceBuffer = RHI_MAKE_NULL_HANDLE(); } return; } for(uint32_t f = 0; f < RTFrameCount; f++) { FrameData& fd = frameData[f]; const uint32_t structByteCount = sizeof(TLASInstance); BufferDesc desc("BLAS support instance", 2 * BLASBucket::Count * structByteCount, ResourceStates::ShaderAccessBits); desc.shortLifeTime = true; desc.structureByteCount = structByteCount; fd.tlasInstanceBuffer = CreateBuffer(desc); } } void Raytracing::ProcessWorld(world_t& world) { if(!rhiInfo.hasInlineRaytracing) { return; } TagMapSurfacesRecursively(world.nodes); BLASBuilder staticBLASes[BLASBucket::Count]; WorldSurfaceList surfaceList; s_world = &world; BuildBLASes(staticBLASBuffers, staticBLASes, &surfaceList); s_world = NULL; FrameData& fd = frameData[GetRTFrameIndex()]; // create ASes BeginTempCommandList(); for(uint32_t i = 0; i < BLASBucket::Count; i++) { BLASBuilder& bucket = staticBLASes[i]; if(bucket.IsEmpty()) { continue; } BLASBuildBuffers& buffers = fd.blasBuildBuffers[i]; BLASDesc desc = {}; desc.name = va("static BLAS %s", GetBLASBucketName(i)); desc.vertexBuffer = buffers.vertexBuffer; desc.indexBuffer = buffers.indexBuffer; desc.meshes = bucket.buildMeshes; desc.meshCount = bucket.meshCount; CmdCreateBLAS(&staticBLASBuffers[i].blasBuffer, desc); } EndTempCommandList(); for(uint32_t i = 0; i < BLASBucket::Count; i++) { BLASBuilder& bucket = staticBLASes[i]; if(bucket.IsEmpty()) { continue; } free(bucket.buildMeshes); } staticTLASInstanceCount = 0; tlasInstanceDescs.Clear(); for(uint32_t i = 0; i < BLASBucket::Count; i++) { BLASBuilder& bucket = staticBLASes[i]; if(bucket.IsEmpty()) { continue; } TLASInstanceDesc inst; inst = {}; inst.blasBuffer = staticBLASBuffers[i].blasBuffer; inst.cullMode = (cullType_t)i; inst.instanceId = staticTLASInstanceCount++; inst.instanceMask = 0xFF; inst.transform[0] = 1.0f; inst.transform[4] = 1.0f; inst.transform[8] = 1.0f; tlasInstanceDescs.Add(inst); } } void Raytracing::BeginFrame(bool wantUpdate) { if(!rhiInfo.hasInlineRaytracing) { return; } if(tr.world == NULL || tr.sceneCounterRT == 0) { return; } FrameData& fd = frameData[GetRTFrameIndex()]; if((crp_updateRTAS->integer == 0 || !wantUpdate) && !IsNullHandle(fd.tlasBuffer)) { return; } backEnd.refdef = tr.rtRefdef; BeginTempCommandList(); const uint32_t renderPass = srp.BeginRenderPass("RTAS Build", 1.0f, 1.0f, 1.0f); BLASBuilder dynamicBLASes[BLASBucket::Count]; DynamicSurfaceList surfaceList; BuildBLASes(fd.dynamicBLASBuffers, dynamicBLASes, &surfaceList); for(uint32_t i = 0; i < BLASBucket::Count; i++) { BLASBuilder& bucket = dynamicBLASes[i]; if(bucket.IsEmpty()) { continue; } BLASBuildBuffers& buffers = fd.blasBuildBuffers[i]; BLASDesc desc = {}; desc.name = va("dynamic BLAS %s", GetBLASBucketName(i)); desc.vertexBuffer = buffers.vertexBuffer; desc.indexBuffer = buffers.indexBuffer; desc.meshes = bucket.buildMeshes; desc.meshCount = bucket.meshCount; CmdCreateBLAS(&fd.dynamicBLASBuffers[i].blasBuffer, desc); } { uint32_t instanceId = staticTLASInstanceCount; tlasInstanceDescs.count = staticTLASInstanceCount; for(uint32_t i = 0; i < BLASBucket::Count; i++) { BLASBuilder& bucket = dynamicBLASes[i]; if(bucket.IsEmpty()) { continue; } TLASInstanceDesc inst = {}; inst.blasBuffer = fd.dynamicBLASBuffers[i].blasBuffer; inst.cullMode = (cullType_t)i; inst.instanceId = instanceId++; inst.instanceMask = 0xFF; inst.transform[0] = 1.0f; inst.transform[4] = 1.0f; inst.transform[8] = 1.0f; tlasInstanceDescs.Add(inst); } TLASDesc desc = {}; desc.instanceCount = tlasInstanceDescs.count; desc.instances = tlasInstanceDescs.items; CmdCreateTLAS(&fd.tlasBuffer, desc); } srp.EndRenderPass(renderPass); EndTempCommandList(); TLASInstance* traceInstances = (TLASInstance*)BeginBufferUpload(fd.tlasInstanceBuffer); uint32_t instanceId = 0; for(uint32_t i = 0; i < ARRAY_LEN(staticBLASBuffers); i++) { const BLASBuffers& buffers = staticBLASBuffers[i]; if(IsNullHandle(buffers.blasBuffer)) { continue; } Q_assert(instanceId == tlasInstanceDescs[instanceId].instanceId); TLASInstance traceInst = {}; traceInst.meshBufferIndex = GetBufferIndexSRV(buffers.meshBuffer); traceInst.vertexBufferIndex = GetBufferIndexSRV(buffers.vertexBuffer); traceInst.indexBufferIndex = GetBufferIndexSRV(buffers.indexBuffer); traceInst.cullMode = (uint32_t)tlasInstanceDescs[instanceId++].cullMode; *traceInstances++ = traceInst; } for(uint32_t i = 0; i < ARRAY_LEN(fd.dynamicBLASBuffers); i++) { const BLASBuffers& buffers = fd.dynamicBLASBuffers[i]; if(IsNullHandle(buffers.blasBuffer)) { continue; } Q_assert(instanceId == tlasInstanceDescs[instanceId].instanceId); TLASInstance traceInst = {}; traceInst.meshBufferIndex = GetBufferIndexSRV(buffers.meshBuffer); traceInst.vertexBufferIndex = GetBufferIndexSRV(buffers.vertexBuffer); traceInst.indexBufferIndex = GetBufferIndexSRV(buffers.indexBuffer); traceInst.cullMode = (uint32_t)tlasInstanceDescs[instanceId++].cullMode; *traceInstances++ = traceInst; } EndBufferUpload(fd.tlasInstanceBuffer); #if defined(_DEBUG) for(uint32_t i = 0; i < tlasInstanceDescs.count; i++) { Q_assert(tlasInstanceDescs[i].instanceId == i); } #endif } void Raytracing::TagMapSurfacesRecursively(mnode_t* node) { do { if(node->contents != CONTENTS_NODE) { break; } // recurse down the children, front side first TagMapSurfacesRecursively(node->children[0]); // tail recurse node = node->children[1]; } while(true); // add the individual surfaces int c = node->nummarksurfaces; msurface_t** mark = node->firstmarksurface; while(c--) { msurface_t* const surf = *mark++; if(surf->shader->isSky) { continue; } if(IsStaticBLASSurface(surf)) { surf->rtSurfType = RTST_STATIC; } else { surf->rtSurfType = RTST_DYNAMIC; } } } void Raytracing::EnsureBuffersAreLargeEnough(Raytracing::BLASBuildBuffers& buffers, uint32_t maxVertexCount, uint32_t maxIndexCount) { { BufferDesc desc("BLAS build vertex", 2 * maxVertexCount * sizeof(vec3_t), ResourceStates::Common); desc.shortLifeTime = true; CreateOrGrowBuffer(buffers.vertexBuffer, buffers.vertexBufferByteCount, desc); } { BufferDesc desc("BLAS build index", 2 * maxIndexCount * sizeof(uint32_t), ResourceStates::Common); desc.shortLifeTime = true; CreateOrGrowBuffer(buffers.indexBuffer, buffers.indexBufferByteCount, desc); } } void Raytracing::EnsureBuffersAreLargeEnough(Raytracing::BLASBuffers& buffers, uint32_t maxVertexCount, uint32_t maxIndexCount, uint32_t maxMeshCount) { { const uint32_t structByteCount = sizeof(BLASVertex); BufferDesc desc("BLAS support vertex", maxVertexCount * structByteCount, ResourceStates::ShaderAccessBits); desc.shortLifeTime = true; desc.structureByteCount = structByteCount; CreateOrGrowBuffer(buffers.vertexBuffer, buffers.vertexBufferByteCount, desc); } { const uint32_t structByteCount = sizeof(uint32_t); BufferDesc desc("BLAS support index", maxIndexCount * structByteCount, ResourceStates::ShaderAccessBits); desc.shortLifeTime = true; desc.structureByteCount = structByteCount; CreateOrGrowBuffer(buffers.indexBuffer, buffers.indexBufferByteCount, desc); } { const uint32_t structByteCount = sizeof(BLASMesh); BufferDesc desc("BLAS support mesh", maxMeshCount * structByteCount, ResourceStates::ShaderAccessBits); desc.shortLifeTime = true; desc.structureByteCount = structByteCount; CreateOrGrowBuffer(buffers.meshBuffer, buffers.meshBufferByteCount, desc); } } void Raytracing::BuildBLASes(BLASBuffers* blasBuffers, BLASBuilder* blasBuilders, ISurfaceList* surfaceList) { tess.tessellator = Tessellator::None; tr.forceHighestLod = true; memset(blasBuilders, 0, sizeof(BLASBuilder) * BLASBucket::Count); const double originalTime = backEnd.refdef.floatTime; FrameData& fd = frameData[GetRTFrameIndex()]; // gather stats on all surfaces we can bake for(uint32_t i = 0, count = surfaceList->GetSurfaceCount(); i < count; i++) { Surface surface; if(surfaceList->GetSurface(surface, i)) { continue; } int surfVertexCount, surfIndexCount; if(EstimateTessellatedSize(surfVertexCount, surfIndexCount, surface.surface, surface.shader, surface.entityNum, originalTime)) { continue; } BLASBuilder& bucket = blasBuilders[GetBLASBucketIndex(surface.shader)]; bucket.totalVertexCount += surfVertexCount; bucket.totalIndexCount += surfIndexCount; bucket.meshCount++; } // correct the vertex and index counts since the estimations might be a little off for(uint32_t i = 0; i < BLASBucket::Count; i++) { BLASBuilder& bucket = blasBuilders[i]; if(!bucket.IsEmpty()) { bucket.totalVertexCount = max(2 * bucket.totalVertexCount, 8192u); bucket.totalIndexCount = max(2 * bucket.totalIndexCount, 32768u); } } // create buffers and map them for(uint32_t i = 0; i < BLASBucket::Count; i++) { BLASBuilder& bucket = blasBuilders[i]; if(bucket.IsEmpty()) { continue; } EnsureBuffersAreLargeEnough(fd.blasBuildBuffers[i], bucket.totalVertexCount, bucket.totalIndexCount); bucket.buildMeshes = (BLASMeshDesc*)malloc(bucket.meshCount * sizeof(BLASMeshDesc)); if(bucket.buildMeshes == NULL) { ri.Error(ERR_FATAL, "Failed to allocate %d BLASMeshDesc instances\n", (int)bucket.meshCount); } bucket.buildVertices = (float*)BeginBufferUpload(fd.blasBuildBuffers[i].vertexBuffer); bucket.buildIndices = (uint32_t*)BeginBufferUpload(fd.blasBuildBuffers[i].indexBuffer); EnsureBuffersAreLargeEnough(blasBuffers[i], bucket.totalVertexCount, bucket.totalIndexCount, bucket.meshCount); bucket.traceVertices = (BLASVertex*)BeginBufferUpload(blasBuffers[i].vertexBuffer); bucket.traceIndices = (uint32_t*)BeginBufferUpload(blasBuffers[i].indexBuffer); bucket.traceMeshes = (BLASMesh*)BeginBufferUpload(blasBuffers[i].meshBuffer); } // upload vertex and index data for(uint32_t i = 0, count = surfaceList->GetSurfaceCount(); i < count; i++) { Surface surface; if(surfaceList->GetSurface(surface, i)) { continue; } int surfVertexCount, surfIndexCount; if(Tessellate(surfVertexCount, surfIndexCount, surface.surface, surface.shader, surface.entityNum, originalTime)) { continue; } BLASBuilder& bucket = blasBuilders[GetBLASBucketIndex(surface.shader)]; // tess.xyz is an array of vec4_t for(uint32_t v = 0; v < surfVertexCount; v++) { if(surface.entityNum == ENTITYNUM_WORLD) { bucket.buildVertices[0] = tess.xyz[v][0]; bucket.buildVertices[1] = tess.xyz[v][1]; bucket.buildVertices[2] = tess.xyz[v][2]; } else { const float original[3] = { tess.xyz[v][0], tess.xyz[v][1], tess.xyz[v][2] }; float newPos[3]; TransformPoint(original, backEnd.modelMatrix, newPos); bucket.buildVertices[0] = newPos[0]; bucket.buildVertices[1] = newPos[1]; bucket.buildVertices[2] = newPos[2]; } bucket.buildVertices += 3; bucket.traceVertices->texCoords[0] = tess.svars[0].texcoords[v][0]; bucket.traceVertices->texCoords[1] = tess.svars[0].texcoords[v][1]; bucket.traceVertices->color[0] = tess.svars[0].colors[v][0]; bucket.traceVertices->color[1] = tess.svars[0].colors[v][1]; bucket.traceVertices->color[2] = tess.svars[0].colors[v][2]; bucket.traceVertices->color[3] = tess.svars[0].colors[v][3]; bucket.traceVertices++; } memcpy(bucket.buildIndices, tess.indexes, surfIndexCount * sizeof(uint32_t)); bucket.buildIndices += surfIndexCount; memcpy(bucket.traceIndices, tess.indexes, surfIndexCount * sizeof(uint32_t)); bucket.traceIndices += surfIndexCount; const shaderStage_t& stage0 = *surface.shader->stages[0]; const image_t& image0 = *stage0.bundle.image[0]; bucket.buildMeshes[bucket.meshIndex].firstVertex = bucket.firstVertex; bucket.buildMeshes[bucket.meshIndex].vertexCount = surfVertexCount; bucket.buildMeshes[bucket.meshIndex].firstIndex = bucket.firstIndex; bucket.buildMeshes[bucket.meshIndex].indexCount = surfIndexCount; bucket.buildMeshes[bucket.meshIndex].isFullyOpaque = surface.shader->isOpaque && !surface.shader->isAlphaTestedOpaque; bucket.traceMeshes[bucket.meshIndex].firstVertex = bucket.firstVertex; bucket.traceMeshes[bucket.meshIndex].firstIndex = bucket.firstIndex; bucket.traceMeshes[bucket.meshIndex].textureIndex = image0.textureIndex; bucket.traceMeshes[bucket.meshIndex].samplerIndex = GetSamplerIndex(image0.wrapClampMode, TextureFilter::Linear); bucket.traceMeshes[bucket.meshIndex].alphaTestMode = AlphaTestShaderConstFromStateBits(stage0.stateBits); bucket.traceMeshes[bucket.meshIndex].blendBits = stage0.stateBits & GLS_BLEND_BITS; bucket.meshIndex++; bucket.firstVertex += surfVertexCount; bucket.firstIndex += surfIndexCount; } // unmap buffers for(uint32_t i = 0; i < BLASBucket::Count; i++) { BLASBuilder& bucket = blasBuilders[i]; if(bucket.IsEmpty()) { continue; } EndBufferUpload(fd.blasBuildBuffers[i].vertexBuffer); EndBufferUpload(fd.blasBuildBuffers[i].indexBuffer); bucket.buildVertices = NULL; bucket.buildIndices = NULL; EndBufferUpload(blasBuffers[i].vertexBuffer); EndBufferUpload(blasBuffers[i].indexBuffer); EndBufferUpload(blasBuffers[i].meshBuffer); bucket.traceVertices = NULL; bucket.traceIndices = NULL; bucket.traceMeshes = NULL; } backEnd.refdef.floatTime = originalTime; tr.forceHighestLod = false; } uint32_t Raytracing::WorldSurfaceList::GetSurfaceCount() { return s_world->numsurfaces; } bool Raytracing::WorldSurfaceList::GetSurface(Surface& surface, uint32_t index) { Q_assert(index < (uint32_t)s_world->numsurfaces); const msurface_t& surf = s_world->surfaces[index]; if(surf.rtSurfType != RTST_STATIC || surf.shader->numStages <= 0) { return true; } surface.surface = surf.data; surface.shader = surf.shader; surface.entityNum = ENTITYNUM_WORLD; return false; } uint32_t Raytracing::DynamicSurfaceList::GetSurfaceCount() { return tr.numRTSurfs + tr.world->numsurfaces; } bool Raytracing::DynamicSurfaceList::GetSurface(Surface& surface, uint32_t index) { Q_assert(index < (uint32_t)tr.numRTSurfs + (uint32_t)tr.world->numsurfaces); bool skip = false; if(index < tr.numRTSurfs) { const rtSurf_t& surf = tr.rtSurfs[index]; surface.surface = surf.surface; surface.shader = surf.shader; surface.entityNum = surf.entityNum; } else { index -= tr.numRTSurfs; Q_assert(index < (uint32_t)tr.world->numsurfaces); const msurface_t& surf = tr.world->surfaces[index]; if(surf.rtSurfType != RTST_DYNAMIC) { skip = true; } surface.surface = surf.data; surface.shader = surf.shader; surface.entityNum = ENTITYNUM_WORLD; } skip = skip || surface.shader->numStages <= 0; return skip; } uint32_t Raytracing::GetTLASBufferIndex() { const HBuffer tlasBuffer = frameData[GetRTFrameIndex()].tlasBuffer; if(IsNullHandle(tlasBuffer)) { return 0; } return GetBufferIndexSRV(tlasBuffer); } uint32_t Raytracing::GetInstanceBufferIndex() { const HBuffer tlasInstanceBuffer = frameData[GetRTFrameIndex()].tlasInstanceBuffer; if(IsNullHandle(tlasInstanceBuffer)) { return 0; } return GetBufferIndexSRV(tlasInstanceBuffer); } bool Raytracing::CanRaytrace() { return rhiInfo.hasInlineRaytracing && GetTLASBufferIndex() != 0 && GetInstanceBufferIndex() != 0; }