diff --git a/changelog.txt b/changelog.txt index 4e344b2..665f1ee 100644 --- a/changelog.txt +++ b/changelog.txt @@ -146,7 +146,7 @@ chg: reworked renderer with 2 new rendering pipelines - order-independent transparency - depth of field (scatter-as-gather or accumulation) - shadowed point lights and sunlight - - volumetric lighting + - volumetric lighting: fog and NanoVDB animations lit by ambient light, point lights and sunlight - all corresponding CVars have the "crp_" prefix chg: removed cl_drawMouseLag, r_backend, r_frameSleep, r_gpuMipGen, r_alphaToCoverage, r_alphaToCoverageMipBoost diff --git a/code/qcommon/q_math.c b/code/qcommon/q_math.c index 55a280e..98a02ba 100644 --- a/code/qcommon/q_math.c +++ b/code/qcommon/q_math.c @@ -30,6 +30,7 @@ const vec3_t vec2_one = { 1, 1 }; const vec3_t vec3_origin = { 0, 0, 0 }; const vec3_t vec3_zero = { 0, 0, 0 }; const vec3_t vec3_one = { 1, 1, 1 }; +const vec3_t vec3_axis[3] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 } }; const vec4_t vec4_zero = { 0, 0, 0, 0 }; const vec4_t vec4_one = { 1, 1, 1, 1 }; diff --git a/code/qcommon/q_shared.h b/code/qcommon/q_shared.h index 36d254d..6cd302c 100644 --- a/code/qcommon/q_shared.h +++ b/code/qcommon/q_shared.h @@ -65,8 +65,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #if defined(__cplusplus) && !defined(min) - template __inline T min( T a, T b ) { return (a < b) ? a : b; } - template __inline T max( T a, T b ) { return (a > b) ? a : b; } + template T min( T a, T b ) { return (a < b) ? a : b; } + template T max( T a, T b ) { return (a > b) ? a : b; } + template T min3(T a, T b, T c) { return min(a, min(b, c)); } + template T max3(T a, T b, T c) { return max(a, max(b, c)); } #elif defined(Q3_VM) // #elif !defined(min) doesn't work here, because the VC headers are shit #define min( a, b ) ((a) < (b) ? (a) : (b)) #define max( a, b ) ((a) > (b) ? (a) : (b)) @@ -279,6 +281,7 @@ extern const vec3_t vec2_one; extern const vec3_t vec3_origin; extern const vec3_t vec3_zero; extern const vec3_t vec3_one; +extern const vec3_t vec3_axis[3]; extern const vec4_t vec4_zero; extern const vec4_t vec4_one; @@ -393,6 +396,7 @@ void ByteToDir( int b, vec3_t dir ); #define DotProduct(x,y) ((x)[0]*(y)[0]+(x)[1]*(y)[1]+(x)[2]*(y)[2]) #define VectorSubtract(a,b,c) ((c)[0]=(a)[0]-(b)[0],(c)[1]=(a)[1]-(b)[1],(c)[2]=(a)[2]-(b)[2]) #define VectorAdd(a,b,c) ((c)[0]=(a)[0]+(b)[0],(c)[1]=(a)[1]+(b)[1],(c)[2]=(a)[2]+(b)[2]) +#define VectorMultiply(a,b,c) ((c)[0]=(a)[0]*(b)[0],(c)[1]=(a)[1]*(b)[1],(c)[2]=(a)[2]*(b)[2]) #define VectorCopy(a,b) ((b)[0]=(a)[0],(b)[1]=(a)[1],(b)[2]=(a)[2]) #define VectorScale(v, s, o) ((o)[0]=(v)[0]*(s),(o)[1]=(v)[1]*(s),(o)[2]=(v)[2]*(s)) #define VectorMA(v, s, b, o) ((o)[0]=(v)[0]+(b)[0]*(s),(o)[1]=(v)[1]+(b)[1]*(s),(o)[2]=(v)[2]+(b)[2]*(s)) diff --git a/code/renderer/crp_local.h b/code/renderer/crp_local.h index b4f20e8..a2dd22f 100644 --- a/code/renderer/crp_local.h +++ b/code/renderer/crp_local.h @@ -438,6 +438,142 @@ struct Sunlight HTexture penumbraTexture; }; +struct VDBSequenceDesc +{ + const char* folderPath = NULL; + const char* smokeGridName = "density"; + const char* fireGridName = "flames"; + vec3_t originOffset = {}; // index space + vec3_t position = { 700 }; // world space + vec3_t anglesRad = {}; // in radians + vec3_t scale = { 1.0f, 1.0f, 1.0f }; + float smokeExtinctionScale = 1.0f; + float smokeAlbedo = 0.9f; // real smoke: 0.9 to 0.97 + float fireEmissionScale = 0.1f; + float fireTemperatureScale = 1000.0f; + float frameRate = 60.0f; + int startTimeMS = 0; + int startTimeUS = 0; + bool loop = false; + bool useSequenceOffset = true; + bool gpuResident = false; +}; + +struct NanoVDBManager +{ + struct Instance; + struct DrawInstance; + struct CPUFrame; + + void Init(); + void DrawGUI(); + void BeforeFrame(); + bool AddSequence(const VDBSequenceDesc& desc); + void MakeWorldToIndexMatrix(matrix3x3_t matrix, const Instance& instance); + void Purge(); + int FindStreamedFrameIndex(uint32_t sequenceIndex, uint32_t frameIndex); + + struct Sequence + { + char folderPath[64]; + vec3_t originOffset; + vec3_t scale; + HBuffer buffer; + uint32_t bufferByteCount; + uint32_t frameCount; + uint32_t firstFrameIndex; + }; + + struct Instance + { + char smokeGridName[64]; + char fireGridName[64]; + vec3_t originOffset; // index space + vec3_t position; // world space + vec3_t anglesRad; // in radians + vec3_t scale; + float smokeExtinctionScale; + float smokeAlbedo; + float fireEmissionScale; + float fireTemperatureScale; + float frameRate; + int startTimeMS; + int startTimeUS; + uint32_t sequenceIndex; + bool loop; + }; + + struct DrawInstance + { + HBuffer buffer; + uint32_t smokeByteOffset; + uint32_t fireByteOffset; + uint32_t smokeByteOffset2; + uint32_t fireByteOffset2; + float t; + }; + + struct GPUFrame + { + uint32_t smokeByteOffset; + uint32_t fireByteOffset; + }; + + struct CPUFrame + { + char filePath[64]; + uint32_t smokeByteOffset; + uint32_t smokeByteCount; + uint32_t fireByteOffset; + uint32_t fireByteCount; + }; + + struct StreamedFrame + { + uint32_t sequenceIndex; + uint32_t frameIndex; + uint32_t smokeByteOffset; + uint32_t flamesByteOffset; + }; + + StaticArray sequences; + StaticArray instances; + StaticArray drawInstances; // for the current frame + StaticArray streamedFrames; // for the current frame + StaticArray gpuFrames; + StaticArray cpuFrames; + HBuffer streamBuffers[FrameCount + 1]; + uint32_t streamBufferByteCount; + uint32_t streamBufferIndex; + bool windowActive = false; + bool linearInterpolation = false; + bool accurateOverlapTest = false; + bool supersampling = false; + int ambientRaymarchLOD = 8; + bool ambientIncreasedCoverage = true; + bool previewMode = false; + float emissiveScatterScale = 0.5f; +}; + +struct ParticleSystem +{ + void Init(); + void Draw(); + +//private: + HPipeline clearPipeline; + HPipeline setupPipeline; + HPipeline emitPipeline; + HPipeline simulatePipeline; + HBuffer particleBuffer; + HBuffer liveBuffers[2]; // indices before and after simulation + HBuffer deadBuffer; // indices + HBuffer emitterBuffer; + HBuffer indirectBuffer; // 0: emit dispatch, 1: simulate dispatch + uint32_t liveBufferReadIndex; + bool needsClearing; +}; + struct VolumetricLight { void Init(); @@ -453,6 +589,7 @@ struct VolumetricLight bool ShouldDrawDebug(); bool LoadFogFile(const char* filePath); void SaveFogFile(const char* filePath); + void SetLightGridRootConstants(struct LightGridRC& rc); // GUI/user-friendly data layout struct Fog @@ -462,9 +599,9 @@ struct VolumetricLight vec3_t boxMin; vec3_t boxMax; float extinction; - float albedo; // scatter / extinction + float albedo; // thin fog: 0.3 to 0.5, thick fog: 0.6 to 0.9 float emissive; - float anisotropy; + float anisotropy; // thin fog: 0.9, thick fog: 0.9 with strong backscatter float noiseStrength; float noiseSpatialPeriod; float noiseTimePeriod; @@ -472,30 +609,30 @@ struct VolumetricLight bool isHeightFog; }; -#if defined(VL_CPU_PARTICLES) - HPipeline particleDispatchPipeline; - HPipeline particlePreProcessExtinctionPipeline; - HPipeline particlePreProcessFrustumPipeline; - HPipeline extinctionParticlePipeline; - HPipeline frustumParticlePipeline; - HBuffer particleBuffer; // should be double buffered... - HBuffer particleHitBuffer; // for each tile, is there at least 1 particle? - HBuffer particleTileBuffer; // array of tiles, each tile has a uint3 index - HBuffer particleDispatchBuffer; // indirect dispatch buffer - HBuffer particleDispatchClearBuffer; // indirect dispatch buffer with values (0, 1, 1) - uint32_t particleCount; -#endif - Fog fogs[64]; uint32_t fogCount = 0; HPipeline extinctionFogPipeline; + HPipeline extinctionVDBPipeline; HPipeline frustumAmbientPipeline; HPipeline frustumAnisotropyPipeline; HPipeline frustumFogPipeline; + HPipeline frustumLightPropNXPipeline; + HPipeline frustumLightPropNYPipeline; + HPipeline frustumLightPropPXPipeline; + HPipeline frustumLightPropPYPipeline; + HPipeline frustumParticlePipeline; HPipeline frustumPointLightScatterPipeline; HPipeline frustumRaymarchPipeline; HPipeline frustumSunlightVisPipeline; - HPipeline frustumTemporalPipeline; + HPipeline frustumTemporalFloatPipeline; + HPipeline frustumTemporalFloat4Pipeline; + HPipeline frustumVDBPipeline; + HPipeline frustumVDBLQPipeline; + HPipeline frustumDepthTestPipeline; + HPipeline particleClearPipeline; + HPipeline particleHitPipeline; + HPipeline particleListPipeline; + HPipeline particleTilesPipeline; HPipeline pointLightShadowPipeline; HPipeline sunlightScatterPipeline; HPipeline sunlightShadowPipeline; @@ -514,12 +651,19 @@ struct VolumetricLight HTexture sunShadowTextures[4]; // cube, R = transmittance HTexture ambientLightTextureA; // box, can be NULL, RGB = ambient.rgb, A = directional.r HTexture ambientLightTextureB; // box, can be NULL, RG = directional.gb, B = longitude, A = latitude + HTexture frustumVisTexture; // screen tiles, R = Z index of furthest visible froxel tile + HBuffer particleTileBuffer; // voxel tiles: StructuredBuffer + HBuffer particleCounterBuffer; // global counters: StructuredBuffer + HBuffer particleTileIndexBuffer; // flattened voxel tile indices: StructuredBuffer + HBuffer particleIndexBuffer; // particle indices: StructuredBuffer + HBuffer particleDispatchBuffer; // indirect dispatch buffer uvec3_t frustumSize; // frustum volume pixel counts uvec3_t frustumTileScale; // by how much do we divide uvec3_t frustumTileSize; // frustum volume tile pixel counts uvec3_t extinctionSize; // extinction volume pixel counts uvec3_t extinctionTileScale; // by how much do we divide uvec3_t extinctionTileSize; // extinction volume tile pixel counts + uint32_t maxParticleIndexCount; // uint count in particleIndexBuffer uint32_t shadowPixelCount; // @TODO: transform into uvec3_t as well uint32_t jitterCounter; uint32_t depthMip; // has to match the X/Y scale of frustumSize @@ -527,8 +671,10 @@ struct VolumetricLight float pointShadowVolumeScale; // how many world units per pixel vec4_t sunShadowVolumeScale; // how many world units per pixel uvec3_t sunShadowSize; // sunlight shadow volume pixel counts - vec3_t ambientColor; + vec3_t ambientColorGUI; + vec3_t ambientColor; // normalized to 0.5 brightness float ambientIntensity; + float pointLightIntensity = 20.0f; vec3_t debugCameraPosition; float debugBoxScale = 1.0f; float debugExtinctionScale = 50.0f; @@ -540,10 +686,17 @@ struct VolumetricLight bool lockCameraPosition = false; bool firstFrame = true; bool windowActive = false; + bool drawSunlight = true; + bool enableLightGrid = true; vec3_t mapBoxMin; vec3_t mapBoxMax; vec3_t lightGridCenter; float debugSphereScale = 0.5f; + int xySubsampling = 2; + int zResolution = 256; + int extinctionResolution = 128; + int sunShadowResolution = 128; + int pointShadowResolution = 64; }; #pragma pack(push, 1) @@ -551,7 +704,8 @@ struct SunlightData { vec3_t direction; vec3_t color; - float intensity; + float intensityVL = 40.0f; + float intensityDL = 2.0f; }; #pragma pack(pop) @@ -657,6 +811,7 @@ struct CRP : IRenderPipeline HTexture lightTexture; HTexture shadingPositionTexture; HTexture renderTarget; + HTexture blackbodyTexture; TextureFormat::Id renderTargetFormat; HTexture renderTargets[2]; uint32_t renderTargetIndex; // the one to write to @@ -705,6 +860,8 @@ struct CRP : IRenderPipeline GBufferViz gbufferViz; SunlightEditor sunlightEditor; SunlightData sunlightData; + ParticleSystem particleSystem; + NanoVDBManager vdbManager; }; HPipeline CreateComputePipeline(const char* name, const ShaderByteCode& shader); diff --git a/code/renderer/crp_main.cpp b/code/renderer/crp_main.cpp index 6577ed0..849898a 100644 --- a/code/renderer/crp_main.cpp +++ b/code/renderer/crp_main.cpp @@ -42,7 +42,6 @@ struct SceneViewConst { MaxViews = 1024, LightBytes = sizeof(DynamicLight), - MaxLights = SCENE_VIEW_MAX_LIGHTS, StructBytes = sizeof(SceneView), BufferBytes = MaxViews * StructBytes }; @@ -595,6 +594,8 @@ void CRP::Init() volumetricLight.Init(); gbufferViz.Init(); sunlightEditor.Init(); + particleSystem.Init(); + vdbManager.Init(); srp.firstInit = false; } @@ -603,6 +604,7 @@ void CRP::LoadResources() { const int flags = IMG_NOPICMIP | IMG_NOMIPMAP | IMG_NOIMANIP | IMG_NOAF; blueNoise2D = LoadTexture("textures/stbn_2d.tga", flags, TW_REPEAT); + blackbodyTexture = LoadTexture("textures/blackbody.tga", flags, TW_CLAMP_TO_EDGE); } void CRP::ShutDown(bool fullShutDown) @@ -621,6 +623,8 @@ void CRP::BeginFrame() // have it be first to we can use ImGUI in the other components too imgui.BeginFrame(); + vdbManager.BeforeFrame(); + // must be run outside of the RHI::BeginFrame/RHI::EndFrame pair const bool rtasUpdate = dynamicLights.WantRTASUpdate(tr.rtRefdef) || @@ -667,6 +671,7 @@ void CRP::EndFrame() magnifier.DrawGUI(); sunlightEditor.DrawGUI(); volumetricLight.DrawGUI(); + vdbManager.DrawGUI(); imgui.Draw(renderTarget); toneMap.DrawToneMap(); magnifier.Draw(); @@ -834,15 +839,12 @@ void CRP::ExecuteRenderCommands(const byte* data, bool /*readbackRequested*/) ui.End(); break; case RC_BEGIN_3D: - // @TODO: srp.renderMode = RenderMode::None; break; case RC_END_3D: - // @TODO: srp.renderMode = RenderMode::None; break; case RC_END_SCENE: - // @TODO: post-processing break; case RC_BEGIN_NK: nuklear.Begin(renderTarget); @@ -884,6 +886,7 @@ void CRP::DrawSceneView3D(const drawSceneViewCommand_t& cmd) prepass.Draw(cmd); BuildDepthPyramid(); + particleSystem.Draw(); dynamicLights.DrawBegin(); if(volumetricLight.ShouldDraw()) { @@ -1045,8 +1048,6 @@ void CRP::UploadSceneViewData() memcpy(scene.prevProjectionMatrix, tr.prevProjMatrix, sizeof(scene.prevProjectionMatrix)); // we want the first Z slice to be closest to the sun to simplify ray marching - vec3_t zDown; - VectorSet(zDown, 0, 0, -1); SunToZMatrix(scene.sunToZMatrix); R_InvMatrix3x3(scene.sunToZMatrix, scene.zToSunMatrix); @@ -1076,7 +1077,9 @@ void CRP::UploadSceneViewData() VectorCopy(sunlightData.direction, scene.sunDirection); VectorCopy(sunlightData.color, scene.sunColor); - scene.sunIntensity = sunlightData.intensity; + scene.sunIntensityVL = sunlightData.intensityVL; + scene.sunIntensityDL = sunlightData.intensityDL; + scene.pointLightIntensityVL = volumetricLight.pointLightIntensity; VectorCopy(volumetricLight.ambientColor, scene.ambientColor); scene.ambientIntensity = volumetricLight.ambientIntensity; diff --git a/code/renderer/crp_nano_vdb.cpp b/code/renderer/crp_nano_vdb.cpp new file mode 100644 index 0000000..21db737 --- /dev/null +++ b/code/renderer/crp_nano_vdb.cpp @@ -0,0 +1,941 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - NanoVDB support + + +#include "crp_local.h" +#include "../client/cl_imgui.h" + +#define NANOVDB_MAGIC_NUMBER 0x304244566F6E614Eul // "NanoVDB0" +#define NANOVDB_MAGIC_GRID 0x314244566F6E614Eul // "NanoVDB1" +#define NANOVDB_MAGIC_FILE 0x324244566F6E614Eul // "NanoVDB2" + +#define NANOVDB_GRID_BUFFER_ALIGNMENT 32 + + +/* +File structure: +FileHeader [GridHeader GridName]+ [GridData]+ +*/ + +#pragma pack(push, 1) + +struct FileHeader +{ + uint64_t magic; + uint32_t version; + uint16_t gridCount; + uint16_t codec; + + bool IsValid() + { + return magic == NANOVDB_MAGIC_NUMBER || magic == NANOVDB_MAGIC_FILE; + } +}; + +static_assert(sizeof(FileHeader) == 16, "Invalid FileHeader size"); + +struct FileGridHeader +{ + uint64_t memoryByteCount; + uint64_t fileByteCount; + uint64_t gridNameHashKey; + uint64_t activeVoxelCount; + uint32_t gridType; + uint32_t gridClass; + double worldBBox[6]; // AABB in world space + int32_t indexBBox[6]; // AABB in index space + double voxelSize[3]; // in world units + uint32_t gridNameLength; // it includes the NULL terminator + uint32_t nodeCount[4]; // # nodes per level + uint32_t tileCount[3]; // # of active tiles per level + uint16_t codec; + uint16_t padding; + uint32_t versionNumber; +}; + +static_assert(sizeof(FileGridHeader) == 176, "Invalid FileHeader size"); + +#pragma pack(pop) + +struct FileGrid +{ + uint32_t byteOffset; + uint32_t byteCount; + + bool IsValid() const + { + return byteOffset > 0 && byteCount > 0; + } + + enum Id + { + Smoke, + Fire, + Count + }; +}; + + +static void ScaleMatrix(matrix3x3_t m, const vec3_t scale) +{ + m[0] = scale[0]; + m[1] = 0.0f; + m[2] = 0.0f; + m[3] = 0.0f; + m[4] = scale[1]; + m[5] = 0.0f; + m[6] = 0.0f; + m[7] = 0.0f; + m[8] = scale[2]; +} + +static void RotationMatrixX(matrix3x3_t m, float angleRad) +{ + const float c = cosf(angleRad); + const float s = sinf(angleRad); + m[0] = 1.0f; + m[1] = 0.0f; + m[2] = 0.0f; + m[3] = 0.0f; + m[4] = c; + m[5] = -s; + m[6] = 0.0f; + m[7] = s; + m[8] = c; +} + +static void RotationMatrixY(matrix3x3_t m, float angleRad) +{ + const float c = cosf(angleRad); + const float s = sinf(angleRad); + m[0] = c; + m[1] = 0.0f; + m[2] = s; + m[3] = 0.0f; + m[4] = 1.0f; + m[5] = 0.0f; + m[6] = -s; + m[7] = 0.0f; + m[8] = c; +} + +static void RotationMatrixZ(matrix3x3_t m, float angleRad) +{ + const float c = cosf(angleRad); + const float s = sinf(angleRad); + m[0] = c; + m[1] = -s; + m[2] = 0.0f; + m[3] = s; + m[4] = c; + m[5] = 0.0f; + m[6] = 0.0f; + m[7] = 0.0f; + m[8] = 1.0f; +} + +static void MultMatrix(matrix3x3_t m, const matrix3x3_t a, const matrix3x3_t b) +{ + m[0] = a[0] * b[0] + a[1] * b[3] + a[2] * b[6]; + m[1] = a[0] * b[1] + a[1] * b[4] + a[2] * b[7]; + m[2] = a[0] * b[2] + a[1] * b[5] + a[2] * b[8]; + m[3] = a[3] * b[0] + a[4] * b[3] + a[5] * b[6]; + m[4] = a[3] * b[1] + a[4] * b[4] + a[5] * b[7]; + m[5] = a[3] * b[2] + a[4] * b[5] + a[5] * b[8]; + m[6] = a[6] * b[0] + a[7] * b[3] + a[8] * b[6]; + m[7] = a[6] * b[1] + a[7] * b[4] + a[8] * b[7]; + m[8] = a[6] * b[2] + a[7] * b[5] + a[8] * b[8]; +} + +static void FindGrids(FileGrid* grids, fileHandle_t fh, int byteCount, const VDBSequenceDesc& desc) +{ + FileHeader fileHeader; + FS_Read(&fileHeader, sizeof(fileHeader), fh); + if(!fileHeader.IsValid()) + { + return; + } + + // for all grids + uint32_t gridByteCounts[16] = {}; + Q_assert(fileHeader.gridCount <= ARRAY_LEN(gridByteCounts)); + + // for grids of interest + int fileToCNQ3[FileGrid::Count]; + for(int g = 0; g < FileGrid::Count; g++) + { + fileToCNQ3[g] = -1; + grids[g].byteOffset = 0; + grids[g].byteCount = 0; + } + + const uint32_t fileGridCount = (uint32_t)fileHeader.gridCount; + for(uint32_t g = 0; g < fileGridCount; g++) + { + FileGridHeader gridHeader; + FS_Read(&gridHeader, sizeof(gridHeader), fh); + + char gridName[64]; + Q_assert(gridHeader.gridNameLength <= ARRAY_LEN(gridName)); + FS_Read(gridName, (int)gridHeader.gridNameLength, fh); + + // vdb_lod.exe auto-renames "density" to "density_level_2" for mip level 2 + if(Q_stristr(gridName, desc.smokeGridName) != NULL) + { + fileToCNQ3[g] = (int)FileGrid::Smoke; + } + else if(Q_stristr(gridName, desc.fireGridName) != NULL) + { + fileToCNQ3[g] = (int)FileGrid::Fire; + } + gridByteCounts[g] = gridHeader.fileByteCount; + + if(fileToCNQ3[g] >= 0 && fileToCNQ3[g] < FileGrid::Count) + { + grids[fileToCNQ3[g]].byteOffset = 0; + grids[fileToCNQ3[g]].byteCount = gridHeader.fileByteCount; + } + } + + for(uint32_t g = 0; g < fileGridCount; g++) + { + uint64_t magic; + FS_Read(&magic, sizeof(magic), fh); + Q_assert(magic == NANOVDB_MAGIC_NUMBER || magic == NANOVDB_MAGIC_GRID); + if(fileToCNQ3[g] >= 0 && fileToCNQ3[g] < FileGrid::Count) + { + grids[fileToCNQ3[g]].byteOffset = (uint32_t)FS_FTell(fh) - 8; + } + FS_Seek(fh, gridByteCounts[g] - 8, FS_SEEK_CUR); + } + + Q_assert(grids[FileGrid::Smoke].IsValid() || grids[FileGrid::Fire].IsValid()); +} + +static void ReadTransform(vec3_t originOffset, vec3_t scale, fileHandle_t fh, int byteOffset) +{ + FS_Seek(fh, byteOffset + 296, FS_SEEK_SET); + FS_Read(&scale[0], 4, fh); + FS_Seek(fh, byteOffset + 312, FS_SEEK_SET); + FS_Read(&scale[1], 4, fh); + FS_Seek(fh, byteOffset + 328, FS_SEEK_SET); + FS_Read(&scale[2], 4, fh); + FS_Seek(fh, byteOffset + 368, FS_SEEK_SET); + FS_Read(originOffset, 12, fh); +} + +static void ReadTransform(vec3_t originOffset, vec3_t scale, fileHandle_t fh, const FileGrid* grids) +{ + if(grids[FileGrid::Smoke].IsValid()) + { + ReadTransform(originOffset, scale, fh, grids[FileGrid::Smoke].byteOffset); + } + else if(grids[FileGrid::Fire].IsValid()) + { + ReadTransform(originOffset, scale, fh, grids[FileGrid::Fire].byteOffset); + } +} + +static void VectorScaleGUI(vec3_t vector, const char* id) +{ + ImGui::Text(" "); + ImGui::SameLine(); + if(ImGui::Button(va("x2##%s", id))) + { + VectorScale(vector, 2.0f, vector); + } + ImGui::SameLine(); + if(ImGui::Button(va("/2##%s", id))) + { + VectorScale(vector, 0.5f, vector); + } +} + +static void UploadFrame( + uint32_t& smokeByteOffset, uint32_t& fireByteOffset, uint32_t& gpuBufferOffset, + HBuffer buffer, const NanoVDBManager::CPUFrame& frame) +{ + if(frame.fireByteCount > 0 || frame.smokeByteCount > 0) + { + fileHandle_t fh; + const int fileByteCount = FS_FOpenFileRead(frame.filePath, &fh, qfalse); + if(fileByteCount > 0) + { + if(frame.smokeByteCount > 0) + { + smokeByteOffset = gpuBufferOffset; + FS_Seek(fh, frame.smokeByteOffset, FS_SEEK_SET); + const uint32_t gridByteCount = AlignUp(frame.smokeByteCount, NANOVDB_GRID_BUFFER_ALIGNMENT); + uint8_t* const mapped = BeginBufferUpload(buffer, gpuBufferOffset, gridByteCount); + FS_Read(mapped, frame.smokeByteCount, fh); + EndBufferUpload(buffer); + gpuBufferOffset += gridByteCount; + } + if(frame.fireByteCount > 0) + { + fireByteOffset = gpuBufferOffset; + FS_Seek(fh, frame.fireByteOffset, FS_SEEK_SET); + const uint32_t gridByteCount = AlignUp(frame.fireByteCount, NANOVDB_GRID_BUFFER_ALIGNMENT); + uint8_t* const mapped = BeginBufferUpload(buffer, gpuBufferOffset, gridByteCount); + FS_Read(mapped, frame.fireByteCount, fh); + EndBufferUpload(buffer); + gpuBufferOffset += gridByteCount; + } + } + if(fileByteCount >= 0) + { + FS_FCloseFile(fh); + } + } +} + +static int64_t GetTimeStampUS(int ms, int us) +{ + return int64_t(1000) * (int64_t)ms + (int64_t)us; +} + + +void NanoVDBManager::Init() +{ + sequences.Clear(); + instances.Clear(); + drawInstances.Clear(); + cpuFrames.Clear(); + gpuFrames.Clear(); + streamBufferIndex = 0; + + { + streamBufferByteCount = 256 << 20; // @TODO: CVar + BufferDesc desc("", streamBufferByteCount, ResourceStates::ComputeShaderAccessBit); + desc.shortLifeTime = true; + desc.structureByteCount = 4; + for(int i = 0; i < ARRAY_LEN(streamBuffers); i++) + { + desc.name = va("NanoVDB stream #%d", i + 1); + streamBuffers[i] = CreateBuffer(desc); + } + } +} + +void NanoVDBManager::BeforeFrame() +{ + drawInstances.Clear(); + streamedFrames.Clear(); + if(!tr.hasWorldRender) + { + return; + } + + streamBufferIndex = (streamBufferIndex + 1) % ARRAY_LEN(streamBuffers); + const HBuffer streamBuffer = streamBuffers[streamBufferIndex]; + + uint32_t gpuBufferOffset = NANOVDB_GRID_BUFFER_ALIGNMENT; + + const int64_t renderTimeUS = GetTimeStampUS(tr.worldRenderTimeMS, tr.worldRenderTimeUS); + for(int i = (int)instances.count - 1; i >= 0; i--) + { + const Instance& inst = instances[i]; + if(inst.loop) + { + continue; + } + + const Sequence& seq = sequences[inst.sequenceIndex]; + const float durationSec = (float)seq.frameCount / inst.frameRate; + const int64_t durationUS = (int64_t)ceilf(durationSec * 1000000.0f); + const int64_t endTimeUS = GetTimeStampUS(inst.startTimeMS, inst.startTimeUS) + durationUS; + if(renderTimeUS >= endTimeUS) + { + instances.RemoveUnordered((uint32_t)i); + } + } + + for(uint32_t i = 0; i < instances.count; i++) + { + const Instance& inst = instances[i]; + const Sequence& seq = sequences[inst.sequenceIndex]; + const int64_t startTimeUS = GetTimeStampUS(inst.startTimeMS, inst.startTimeUS); + const int64_t usPerFrame = (int64_t)(1000000.0f / instances[i].frameRate); + const uint32_t frameIndex = (uint32_t)((renderTimeUS - startTimeUS) / usPerFrame) % seq.frameCount; + const uint32_t remainder = (uint32_t)((renderTimeUS - startTimeUS) % usPerFrame); + const uint32_t frameIndex2 = min(frameIndex + 1, seq.frameCount - 1); + const float t = (float)remainder / (float)usPerFrame; // lerp(frame, frame2, t) + + DrawInstance drawInst = {}; + if(IsNullHandle(seq.buffer)) + { + const CPUFrame& frame = cpuFrames[seq.firstFrameIndex + frameIndex]; + const CPUFrame& frame2 = cpuFrames[seq.firstFrameIndex + frameIndex2]; + const int sf1 = FindStreamedFrameIndex(inst.sequenceIndex, frameIndex); + const int sf2 = FindStreamedFrameIndex(inst.sequenceIndex, frameIndex2); + + uint32_t requestedByteCount = 0; + if(sf1 >= 0) + { + drawInst.smokeByteOffset = streamedFrames[sf1].smokeByteOffset; + drawInst.fireByteOffset = streamedFrames[sf1].flamesByteOffset; + } + else + { + requestedByteCount += frame.smokeByteCount + frame.fireByteCount; + } + if(sf2 >= 0) + { + drawInst.smokeByteOffset2 = streamedFrames[sf2].smokeByteOffset; + drawInst.fireByteOffset2 = streamedFrames[sf2].flamesByteOffset; + } + else + { + requestedByteCount += frame2.smokeByteOffset + frame2.fireByteCount; + } + + drawInst.buffer = streamBuffer; + if(requestedByteCount > 0 && + gpuBufferOffset + requestedByteCount <= streamBufferByteCount) + { + UploadFrame(drawInst.smokeByteOffset, drawInst.fireByteOffset, gpuBufferOffset, streamBuffer, frame); + UploadFrame(drawInst.smokeByteOffset2, drawInst.fireByteOffset2, gpuBufferOffset, streamBuffer, frame2); + + StreamedFrame sf = {}; + sf.sequenceIndex = inst.sequenceIndex; + if(drawInst.smokeByteOffset > 0 || drawInst.fireByteOffset > 0) + { + sf.frameIndex = frameIndex; + sf.smokeByteOffset = drawInst.smokeByteOffset; + sf.flamesByteOffset = drawInst.fireByteOffset; + streamedFrames.Add(sf); + } + if(drawInst.smokeByteOffset2 > 0 || drawInst.fireByteOffset2 > 0) + { + sf.frameIndex = frameIndex2; + sf.smokeByteOffset = drawInst.smokeByteOffset2; + sf.flamesByteOffset = drawInst.fireByteOffset2; + streamedFrames.Add(sf); + } + } + } + else + { + const NanoVDBManager::GPUFrame& frame = gpuFrames[seq.firstFrameIndex + frameIndex]; + const NanoVDBManager::GPUFrame& frame2 = gpuFrames[seq.firstFrameIndex + frameIndex2]; + drawInst.buffer = seq.buffer; + drawInst.fireByteOffset = frame.fireByteOffset; + drawInst.fireByteOffset2 = frame2.fireByteOffset; + drawInst.smokeByteOffset = frame.smokeByteOffset; + drawInst.smokeByteOffset2 = frame2.smokeByteOffset; + } + drawInst.t = t; + drawInstances.Add(drawInst); + } +} + +bool NanoVDBManager::AddSequence(const VDBSequenceDesc& desc) +{ + if(!tr.worldMapLoaded) + { + return false; + } + + if(instances.IsFull()) + { + ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB instance limit reached\n"); + return false; + } + + vec3_t originOffset = {}; + vec3_t scale; + VectorSet(scale, 1, 1, 1); + + int sequenceIndex = -1; + for(uint32_t i = 0; i < sequences.count; i++) + { + if(Q_stricmp(sequences[i].folderPath, desc.folderPath) == 0) + { + sequenceIndex = (int)i; + break; + } + } + if(sequenceIndex < 0 && !sequences.IsFull()) + { + HBuffer gpuBuffer = RHI_MAKE_NULL_HANDLE(); + uint32_t gpuByteCount = 0; + uint32_t firstFrameIndex = 0; + int fileCount = 0; + + if(desc.gpuResident) + { + int startTimeMS = Sys_Milliseconds(); + + gpuByteCount = NANOVDB_GRID_BUFFER_ALIGNMENT; + char** fileList = ri.FS_ListFiles(desc.folderPath, ".nvdb", &fileCount); + for(int f = 0; f < fileCount; f++) + { + FileGrid grids[FileGrid::Count] = {}; + fileHandle_t fh; + const int byteCount = FS_FOpenFileRead(va("%s/%s", desc.folderPath, fileList[f]), &fh, qfalse); + if(byteCount > 0) + { + FindGrids(grids, fh, byteCount, desc); + } + if(byteCount >= 0) + { + FS_FCloseFile(fh); + } + for(uint32_t g = 0; g < FileGrid::Count; g++) + { + if(grids[g].byteCount > 0) + { + gpuByteCount += AlignUp(grids[g].byteCount, NANOVDB_GRID_BUFFER_ALIGNMENT); + } + } + } + ri.FS_FreeFileList(fileList); + if(fileCount <= 0 || gpuByteCount <= NANOVDB_GRID_BUFFER_ALIGNMENT) + { + ri.Printf(PRINT_WARNING, "^3WARNING: invalid NanoVDB folder '%s'\n", desc.folderPath); + return false; + } + + if(gpuByteCount >= uint32_t(1 << 31)) + { + ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB sequence '%s' too large for GPU storage\n", desc.folderPath); + VDBSequenceDesc newDesc = desc; + newDesc.gpuResident = false; + return AddSequence(newDesc); + } + + if(gpuFrames.count + fileCount > gpuFrames.capacity) + { + ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB frame limit reached\n"); + return false; + } + + ri.Printf(PRINT_ALL, "NanoVDB: analyzed %s in %d ms\n", desc.folderPath, Sys_Milliseconds() - startTimeMS); + startTimeMS = Sys_Milliseconds(); + + gpuByteCount = AlignUp(gpuByteCount, NANOVDB_GRID_BUFFER_ALIGNMENT); + BufferDesc bufferDesc("NanoVDB full sequence", gpuByteCount, ResourceStates::ComputeShaderAccessBit); + bufferDesc.shortLifeTime = true; + bufferDesc.structureByteCount = 4; + gpuBuffer = CreateBuffer(bufferDesc); + + uint32_t gpuByteOffset = NANOVDB_GRID_BUFFER_ALIGNMENT; + firstFrameIndex = gpuFrames.count; + fileList = ri.FS_ListFiles(desc.folderPath, ".nvdb", &fileCount); + for(int f = 0; f < fileCount; f++) + { + GPUFrame frame = {}; + FileGrid grids[FileGrid::Count] = {}; + fileHandle_t fh; + const int byteCount = FS_FOpenFileRead(va("%s/%s", desc.folderPath, fileList[f]), &fh, qfalse); + if(byteCount > 0) + { + FindGrids(grids, fh, byteCount, desc); + if(grids[FileGrid::Smoke].IsValid()) + { + const uint32_t gridByteCount = AlignUp(grids[FileGrid::Smoke].byteCount, NANOVDB_GRID_BUFFER_ALIGNMENT); + uint8_t* const cpuBuffer = BeginBufferUpload(gpuBuffer, gpuByteOffset, gridByteCount); + FS_Seek(fh, (int)grids[FileGrid::Smoke].byteOffset, FS_SEEK_SET); + FS_Read(cpuBuffer, (int)grids[FileGrid::Smoke].byteCount, fh); + EndBufferUpload(gpuBuffer); + frame.smokeByteOffset = gpuByteOffset; + gpuByteOffset += gridByteCount; + } + if(grids[FileGrid::Fire].IsValid()) + { + const uint32_t gridByteCount = AlignUp(grids[FileGrid::Fire].byteCount, NANOVDB_GRID_BUFFER_ALIGNMENT); + uint8_t* const cpuBuffer = BeginBufferUpload(gpuBuffer, gpuByteOffset, gridByteCount); + FS_Seek(fh, (int)grids[FileGrid::Fire].byteOffset, FS_SEEK_SET); + FS_Read(cpuBuffer, (int)grids[FileGrid::Fire].byteCount, fh); + EndBufferUpload(gpuBuffer); + frame.fireByteOffset = gpuByteOffset; + gpuByteOffset += gridByteCount; + } + + if(f == 0) + { + ReadTransform(originOffset, scale, fh, grids); + } + } + if(byteCount >= 0) + { + FS_FCloseFile(fh); + } + Q_assert(frame.fireByteOffset > 0 || frame.smokeByteOffset > 0); + gpuFrames.Add(frame); + } + ri.FS_FreeFileList(fileList); + + ri.Printf(PRINT_ALL, "NanoVDB: processed %s in %d ms\n", desc.folderPath, Sys_Milliseconds() - startTimeMS); + } + else + { + const int startTimeMS = Sys_Milliseconds(); + + firstFrameIndex = cpuFrames.count; + char** fileList = ri.FS_ListFiles(desc.folderPath, ".nvdb", &fileCount); + for(int f = 0; f < fileCount; f++) + { + FileGrid grids[FileGrid::Count] = {}; + const char* const filePath = va("%s/%s", desc.folderPath, fileList[f]); + fileHandle_t fh; + const int byteCount = FS_FOpenFileRead(filePath, &fh, qfalse); + if(byteCount > 0) + { + FindGrids(grids, fh, byteCount, desc); + CPUFrame frame = {}; + Q_strncpyz(frame.filePath, filePath, sizeof(frame.filePath)); + frame.fireByteOffset = grids[FileGrid::Fire].byteOffset; + frame.fireByteCount = grids[FileGrid::Fire].byteCount; + frame.smokeByteOffset = grids[FileGrid::Smoke].byteOffset; + frame.smokeByteCount = grids[FileGrid::Smoke].byteCount; + cpuFrames.Add(frame); + if(f == 0) + { + ReadTransform(originOffset, scale, fh, grids); + } + } + if(byteCount >= 0) + { + FS_FCloseFile(fh); + } + } + ri.FS_FreeFileList(fileList); + + if(fileCount <= 0) + { + ri.Printf(PRINT_WARNING, "^3WARNING: invalid NanoVDB folder '%s'\n", desc.folderPath); + return false; + } + + if(cpuFrames.count + fileCount > cpuFrames.capacity) + { + ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB frame limit reached\n"); + return false; + } + + ri.Printf(PRINT_ALL, "NanoVDB: analyzed %s in %d ms\n", desc.folderPath, Sys_Milliseconds() - startTimeMS); + } + + sequenceIndex = (int)sequences.count; + Sequence sequence = {}; + Q_strncpyz(sequence.folderPath, desc.folderPath, sizeof(sequence.folderPath)); + sequence.frameCount = (uint32_t)fileCount; + sequence.firstFrameIndex = firstFrameIndex; + sequence.buffer = gpuBuffer; + sequence.bufferByteCount = gpuByteCount; + VectorCopy(originOffset, sequence.originOffset); + VectorCopy(scale, sequence.scale); + sequences.Add(sequence); + } + if(sequenceIndex < 0) + { + ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB sequence limit reached\n"); + return false; + } + + Instance instance = {}; + instance.fireEmissionScale = desc.fireEmissionScale; + Q_strncpyz(instance.fireGridName, desc.fireGridName, sizeof(instance.fireGridName)); + instance.fireTemperatureScale = desc.fireTemperatureScale; + instance.frameRate = desc.frameRate; + instance.smokeExtinctionScale = desc.smokeExtinctionScale; + instance.smokeAlbedo = desc.smokeAlbedo; + Q_strncpyz(instance.smokeGridName, desc.smokeGridName, sizeof(instance.smokeGridName)); + instance.startTimeMS = desc.startTimeMS; + instance.startTimeUS = desc.startTimeUS; + instance.sequenceIndex = (uint32_t)sequenceIndex; + VectorMultiply(desc.scale, sequences[sequenceIndex].scale, instance.scale); + VectorCopy(desc.position, instance.position); + VectorCopy(desc.useSequenceOffset ? sequences[sequenceIndex].originOffset : desc.originOffset, instance.originOffset); + VectorCopy(desc.anglesRad, instance.anglesRad); + instance.loop = desc.loop; + instances.Add(instance); + + return true; +} + +void NanoVDBManager::MakeWorldToIndexMatrix(matrix3x3_t matrix, const Instance& instance) +{ + matrix3x3_t scale, rot, temp, temp2; + vec3_t scaleVector; + + for(int i = 0; i < 3; i++) + { + scaleVector[i] = 1.0f / instance.scale[i]; + } + ScaleMatrix(scale, scaleVector); + + RotationMatrixX(rot, -instance.anglesRad[0]); + MultMatrix(temp, scale, rot); + + RotationMatrixY(rot, -instance.anglesRad[1]); + MultMatrix(temp2, temp, rot); + + RotationMatrixZ(rot, -instance.anglesRad[2]); + MultMatrix(matrix, temp2, rot); +} + +void NanoVDBManager::DrawGUI() +{ + static const char* const sequencePopupTitle = "Add NanoVDB Sequence"; + + if(!tr.worldMapLoaded) + { + return; + } + + GUI_AddMainMenuItem(GUI_MainMenu::Tools, "Edit NanoVDB", "", &windowActive); + + if(!windowActive) + { + return; + } + + if(ImGui::Begin("NanoVDB Settings", &windowActive, ImGuiWindowFlags_AlwaysAutoResize)) + { + if(rhiInfo.forceNanoVDBPreviewMode) + { + static bool forcedPreviewMode = true; + ImGui::BeginDisabled(true); + ImGui::Checkbox("Preview mode (forced due to driver bug)", &forcedPreviewMode); + ImGui::EndDisabled(); + } + else + { + ImGui::Checkbox("Preview mode", &previewMode); + if(!previewMode) + { + ImGui::Checkbox("2x super-sampling", &supersampling); + ImGui::Checkbox("Linear interpolation", &linearInterpolation); + ImGui::Checkbox("Accurate overlap test", &accurateOverlapTest); + ImGui::Checkbox("Ambient lighting: higher angular LoD", &ambientIncreasedCoverage); + ImGui::SliderInt("Ambient lighting: sub-sampling", &ambientRaymarchLOD, 1, 8); + ImGui::SliderFloat("Emissive scattering scale", &emissiveScatterScale, 0.0f, 1.0f, "%g"); + } + } + + const uint64_t streamByteCount = (uint64_t)ARRAY_LEN(streamBuffers) * (uint64_t)streamBufferByteCount; + uint64_t dedicatedByteCount = 0; + for(uint32_t i = 0; i < sequences.count; i++) + { + dedicatedByteCount += (uint64_t)sequences[i].bufferByteCount; + } + ImGui::Text("%d sequence%s, %s dedicated, %s stream", + (int)sequences.count, sequences.count >= 2 ? "s" : "", + Com_FormatBytes(dedicatedByteCount), + Com_FormatBytes(streamByteCount)); + ImGui::Text("%d CPU frame%s, %d GPU frame%s", + (int)cpuFrames.count, cpuFrames.count >= 2 ? "s" : "", + (int)gpuFrames.count, gpuFrames.count >= 2 ? "s" : ""); + ImGui::Text("%d streamed file%s", (int)streamedFrames.count, streamedFrames.count >= 2 ? "s" : ""); + + ImGui::Separator(); + if(ImGui::Button("Purge unused sequences")) + { + Purge(); + } + + if(ImGui::BeginTabBar("Tabs#VDB", ImGuiTabBarFlags_AutoSelectNewTabs)) + { + for(uint32_t i = 0; i < instances.count; i++) + { + if(ImGui::BeginTabItem(va("#%d", i + 1))) + { + Instance& inst = instances[i]; + Sequence& seq = sequences[inst.sequenceIndex]; + + ImGui::Text("%s (%d frame%s, %s)", seq.folderPath, (int)seq.frameCount, + seq.frameCount >= 2 ? "s" : "", + IsNullHandle(seq.buffer) ? "streamed" : "in VRAM"); + ImGui::SliderFloat("Framerate", &inst.frameRate, 15.0f, 120.0f, "%g"); + ImGui::SliderFloat("Smoke extinction scale (thickness)", &inst.smokeExtinctionScale, 0.0f, 10.0f, "%g"); + ImGui::SliderFloat("Smoke albedo (reflectivity)", &inst.smokeAlbedo, 0.0f, 1.0f, "%g"); + ImGui::SliderFloat("Flame emission scale (brightness)", &inst.fireEmissionScale, 0.0f, 1.0f, "%g"); + ImGui::SliderFloat("Flame temperature scale (color)", &inst.fireTemperatureScale, 0.0f, 20000.0f, "%g"); + + vec3_t angles; + for(int a = 0; a < 3; a++) + { + angles[a] = RAD2DEG(inst.anglesRad[a]); + } + + ImGui::SliderFloat3("Origin offset (index space)", inst.originOffset, -1000.0f, 1000.0f, "%g"); + VectorScaleGUI(inst.originOffset, "origin"); + ImGui::SliderFloat3("Scale", inst.scale, 0.0f, 100.0f, "%g"); + VectorScaleGUI(inst.scale, "scale"); + ImGui::SliderFloat3("Position (world space)", inst.position, -100 * 1000.0f, 100 * 1000.0f, "%g"); + ImGui::SliderFloat3("Angles", angles, 0.0f, 360.0f, "%g"); + for(int a = 0; a < 3; a++) + { + inst.anglesRad[a] = DEG2RAD(angles[a]); + } + + ImGui::Separator(); + if(ImGui::Button("Remove")) + { + instances.Remove(i); + } + + ImGui::EndTabItem(); + } + } + + if(ImGui::BeginTabItem("Add")) + { + static char sequencePath[64]; + static char flamesGridName[64] = "flames"; + static char smokeGridName[64] = "density"; + static bool gpuResident = true; + + ImGui::InputText("Folder path", sequencePath, sizeof(sequencePath)); + ImGui::SameLine(); + if(ImGui::Button("...")) + { + OpenFolderDialog_Open("nanovdb"); + } + ImGui::InputText("Flames grid", flamesGridName, sizeof(flamesGridName)); + ImGui::InputText("Smoke grid", smokeGridName, sizeof(smokeGridName)); + ImGui::Checkbox("GPU resident", &gpuResident); + + ImGui::Separator(); + if(ImGui::Button("Add")) + { + VDBSequenceDesc desc = {}; + desc.fireGridName = flamesGridName; + desc.folderPath = sequencePath; + desc.gpuResident = gpuResident; + desc.loop = true; + desc.smokeGridName = smokeGridName; + AddSequence(desc); + } + + if(OpenFolderDialog_Do()) + { + Q_strncpyz(sequencePath, OpenFolderDialog_GetPath(), sizeof(sequencePath)); + } + + ImGui::EndTabItem(); + } + + ImGui::EndTabBar(); + } + } + ImGui::End(); +} + +void NanoVDBManager::Purge() +{ + // build sequence reference counts + uint32_t sequenceRefCounts[ARRAY_LEN(sequences.items)] = {}; + for(uint32_t i = 0; i < instances.count; i++) + { + const uint32_t s = instances[i].sequenceIndex; + sequenceRefCounts[s]++; + } + + // queue GPU buffer deletions + for(uint32_t s = 0; s < sequences.count; s++) + { + if(sequenceRefCounts[s] == 0 && + !IsNullHandle(sequences[s].buffer)) + { + DestroyBufferDelayed(sequences[s].buffer); + } + } + + // compact sequence array, build index map, remove frames, fix frame offsets + uint32_t sequenceRemap[ARRAY_LEN(sequences.items)] = {}; + uint32_t removed = 0; + uint32_t dst = 0; + uint32_t src = 0; + for(; src < sequences.count; src++) + { + if(sequenceRefCounts[src] == 0) + { + const uint32_t first = sequences[src].firstFrameIndex; + const uint32_t count = sequences[src].frameCount; + const bool streamed = IsNullHandle(sequences[src].buffer); + if(streamed) + { + cpuFrames.RemoveRange(first, count); + } + else + { + gpuFrames.RemoveRange(first, count); + } + for(uint32_t s = 0; s < sequences.count; s++) + { + if(sequences[s].firstFrameIndex > first) + { + sequences[s].firstFrameIndex -= count; + } + } + removed++; + continue; + } + + sequenceRemap[src] = dst; + if(src > dst) + { + sequences[dst] = sequences[src]; + } + dst++; + } + sequences.count -= removed; + + // fix sequence indices + for(uint32_t i = 0; i < instances.count; i++) + { + const uint32_t s = instances[i].sequenceIndex; + instances[i].sequenceIndex = sequenceRemap[s]; + } + +#if defined(_DEBUG) + for(uint32_t i = 0; i < instances.count; i++) + { + Q_assert(instances[i].sequenceIndex < sequences.count); + } + for(uint32_t s = 0; s < sequences.count; s++) + { + const Sequence& seq = sequences[s]; + const uint32_t frameCount = IsNullHandle(seq.buffer) ? cpuFrames.count : gpuFrames.count; + Q_assert(seq.firstFrameIndex + seq.frameCount <= frameCount); + } +#endif +} + +int NanoVDBManager::FindStreamedFrameIndex(uint32_t sequenceIndex, uint32_t frameIndex) +{ + int index = -1; + for(uint32_t f = 0; f < streamedFrames.count; f++) + { + if(streamedFrames[f].sequenceIndex == sequenceIndex && + streamedFrames[f].frameIndex == frameIndex) + { + index = (int)f; + break; + } + } + + return index; +} diff --git a/code/renderer/crp_particles.cpp b/code/renderer/crp_particles.cpp new file mode 100644 index 0000000..0902ec5 --- /dev/null +++ b/code/renderer/crp_particles.cpp @@ -0,0 +1,255 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// Cinematic Rendering Pipeline - GPU particle system + + +#include "crp_local.h" +#include "shaders/crp/scene_view.h.hlsli" +#include "compshaders/crp/particles_clear.h" +#include "compshaders/crp/particles_setup.h" +#include "compshaders/crp/particles_emit.h" +#include "compshaders/crp/particles_simulate.h" + + + // @TODO: +#if 0 +static const uint32_t EmitterParticleCount = 5; +static const uint32_t EmitterEmitCount = 2; +static const float EmitterMaxSeconds = 2.0f / 60.0f; +#else +static const float EmitterMaxSeconds = 1.0f; +static const float EmitterFPS = 60.0f; +static const uint32_t EmitterParticleCount = 1024; +static const uint32_t EmitterEmitCount = EmitterParticleCount / (uint32_t)ceilf(EmitterMaxSeconds * EmitterFPS); +#endif + + +#pragma pack(push, 4) + +struct GPSClearRC +{ + uint32_t emitterBufferIndex; + uint32_t deadBufferIndex; + uint32_t emitterIndex; + uint32_t firstParticle; + uint32_t particleCount; + float maxSeconds; +}; + +struct GPSSetupRC +{ + uint32_t emitterBufferIndex; + uint32_t indirectBufferIndex; + uint32_t emitterIndex; + uint32_t emitCount; +}; + +struct GPSEmitRC +{ + uint32_t particleBufferIndex; + uint32_t liveBufferIndex; + uint32_t deadBufferIndex; + uint32_t emitterBufferIndex; + uint32_t emitterIndex; +}; + +struct GPSSimulateRC +{ + uint32_t particleBufferIndex; + uint32_t liveSrcBufferIndex; + uint32_t liveDstBufferIndex; + uint32_t deadBufferIndex; + uint32_t emitterBufferIndex; + uint32_t emitterIndex; +}; + +#pragma pack(pop) + + +void ParticleSystem::Init() +{ + Q_assert(EmitterParticleCount <= MAX_PARTICLES); + + clearPipeline = CreateComputePipeline("GPS Clear", ShaderByteCode(g_particles_clear_cs)); + setupPipeline = CreateComputePipeline("GPS Setup", ShaderByteCode(g_particles_setup_cs)); + emitPipeline = CreateComputePipeline("GPS Emit", ShaderByteCode(g_particles_emit_cs)); + simulatePipeline = CreateComputePipeline("GPS Simulate", ShaderByteCode(g_particles_simulate_cs)); + + { + BufferDesc desc("particles", MAX_PARTICLES * sizeof(Particle), ResourceStates::UnorderedAccessBit); + desc.shortLifeTime = true; + desc.structureByteCount = sizeof(Particle); + particleBuffer = CreateBuffer(desc); + } + + { + BufferDesc desc("", MAX_PARTICLES * 4, ResourceStates::UnorderedAccessBit); + desc.shortLifeTime = true; + desc.structureByteCount = 4; + desc.name = "live particles #1"; + liveBuffers[0] = CreateBuffer(desc); + desc.name = "live particles #2"; + liveBuffers[1] = CreateBuffer(desc); + desc.name = "dead particles"; + deadBuffer = CreateBuffer(desc); + } + + { + BufferDesc desc("particle emitters", MAX_PARTICLE_EMITTERS * sizeof(ParticleEmitter), ResourceStates::UnorderedAccessBit); + desc.shortLifeTime = true; + desc.structureByteCount = sizeof(ParticleEmitter); + emitterBuffer = CreateBuffer(desc); + } + + { + const uint32_t dispatchData[] = + { + 0, 1, 1, + 0, 1, 1 + }; + + BufferDesc desc("particle dispatch", sizeof(dispatchData), ResourceStates::UnorderedAccessBit); + desc.shortLifeTime = true; + indirectBuffer = CreateBuffer(desc); + + uint8_t* const mapped = BeginBufferUpload(indirectBuffer); + memcpy(mapped, dispatchData, sizeof(dispatchData)); + EndBufferUpload(indirectBuffer); + } + + needsClearing = true; + liveBufferReadIndex = 0; +} + +void ParticleSystem::Draw() +{ +#if 1 // @TODO: shouldn't be necessary once cgame is adding the emitters + if(tr.world == NULL) + { + return; + } +#endif + +#if 0 + static int counter = 0; + counter++; + if(counter == 4) + { + counter = 0; + needsClearing = true; + } +#endif + + SCOPED_RENDER_PASS("Particles", 1.0f, 1.0f, 1.0f); + + if(needsClearing) + { + SCOPED_DEBUG_LABEL("Clear", 1.0f, 1.0f, 1.0f); + + CmdBeginBarrier(); + CmdBufferBarrier(emitterBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(deadBuffer, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + + GPSClearRC rc = {}; + rc.emitterBufferIndex = GetBufferIndexUAV(emitterBuffer); + rc.deadBufferIndex = GetBufferIndexUAV(deadBuffer); + rc.emitterIndex = 0; + rc.firstParticle = 0; + rc.particleCount = EmitterParticleCount; + rc.maxSeconds = EmitterMaxSeconds; + + CmdBindPipeline(clearPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((EmitterParticleCount + 63) / 64, 1, 1); + + needsClearing = false; + } + + { + SCOPED_DEBUG_LABEL("Setup", 1.0f, 1.0f, 1.0f); + + CmdBeginBarrier(); + CmdBufferBarrier(emitterBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(indirectBuffer, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + + GPSSetupRC rc = {}; + rc.emitterBufferIndex = GetBufferIndexUAV(emitterBuffer); + rc.indirectBufferIndex = GetBufferIndexUAV(indirectBuffer); + rc.emitterIndex = 0; // @TODO: + rc.emitCount = EmitterEmitCount; // @TODO: + + CmdBindPipeline(setupPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch(1, 1, 1); + } + + { + SCOPED_DEBUG_LABEL("Emit", 1.0f, 1.0f, 1.0f); + + CmdBeginBarrier(); + CmdBufferBarrier(indirectBuffer, ResourceStates::IndirectDispatchBit); + CmdBufferBarrier(emitterBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(deadBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(liveBuffers[liveBufferReadIndex], ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleBuffer, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + + GPSEmitRC rc = {}; + rc.emitterBufferIndex = GetBufferIndexUAV(emitterBuffer); + rc.deadBufferIndex = GetBufferIndexUAV(deadBuffer); + rc.liveBufferIndex = GetBufferIndexUAV(liveBuffers[liveBufferReadIndex]); + rc.particleBufferIndex = GetBufferIndexUAV(particleBuffer); + rc.emitterIndex = 0; // @TODO: + + CmdBindPipeline(emitPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatchIndirect(indirectBuffer, 0); + } + + { + SCOPED_DEBUG_LABEL("Simulate", 1.0f, 1.0f, 1.0f); + + CmdBeginBarrier(); + CmdBufferBarrier(indirectBuffer, ResourceStates::IndirectDispatchBit); + CmdBufferBarrier(emitterBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(deadBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(liveBuffers[liveBufferReadIndex], ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(liveBuffers[liveBufferReadIndex ^ 1], ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleBuffer, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + + GPSSimulateRC rc = {}; + rc.particleBufferIndex = GetBufferIndexUAV(particleBuffer); + rc.liveSrcBufferIndex = GetBufferIndexUAV(liveBuffers[liveBufferReadIndex]); + rc.liveDstBufferIndex = GetBufferIndexUAV(liveBuffers[liveBufferReadIndex ^ 1]); + rc.deadBufferIndex = GetBufferIndexUAV(deadBuffer); + rc.emitterBufferIndex = GetBufferIndexUAV(emitterBuffer); + rc.emitterIndex = 0; // @TODO: + + CmdBindPipeline(simulatePipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatchIndirect(indirectBuffer, 12); + + liveBufferReadIndex ^= 1; + } +} diff --git a/code/renderer/crp_sun_editor.cpp b/code/renderer/crp_sun_editor.cpp index a05dc93..f2c738f 100644 --- a/code/renderer/crp_sun_editor.cpp +++ b/code/renderer/crp_sun_editor.cpp @@ -71,7 +71,6 @@ static void LoadSunFromShader(const shader_t* skyShader) angles[0] = DEG2RAD(skyShader->sunAzimuth); angles[1] = DEG2RAD(skyShader->sunInclination); VectorCopy(skyShader->sunColor, crp.sunlightData.color); - crp.sunlightData.intensity = 1.0f; AzimuthInclinationToDirection(crp.sunlightData.direction, angles); } @@ -175,7 +174,8 @@ void SunlightEditor::DrawGUI() ImGui::SliderAngle("Inclination", &angles[1], 0.0f, 180.0f); AzimuthInclinationToDirection(crp.sunlightData.direction, angles); ImGui::ColorEdit3("Color", crp.sunlightData.color); - ImGui::SliderFloat("Light intensity", &crp.sunlightData.intensity, 0.0f, 10.0f); + ImGui::SliderFloat("Light intensity in fog/smoke", &crp.sunlightData.intensityVL, 0.0f, 200.0f); + ImGui::SliderFloat("Light intensity on surfaces", &crp.sunlightData.intensityDL, 0.0f, 10.0f); ImGui::NewLine(); if(ImGui::Button("Save Config...")) diff --git a/code/renderer/crp_volumetric_light.cpp b/code/renderer/crp_volumetric_light.cpp index 643aa66..92a96a0 100644 --- a/code/renderer/crp_volumetric_light.cpp +++ b/code/renderer/crp_volumetric_light.cpp @@ -18,30 +18,39 @@ You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ -// Cinematic Rendering Pipeline - fog and particles lit by the sun and local lights +// Cinematic Rendering Pipeline - fog, NanoVDB volumes and particles lit by the sun and local lights #include "crp_local.h" #include "../client/cl_imgui.h" #include "shaders/crp/scene_view.h.hlsli" +#include "shaders/crp/light_grid.h.hlsli" #include "shaders/crp/vl_common.h.hlsli" #include "compshaders/crp/fullscreen.h" -#if defined(VL_CPU_PARTICLES) -#include "compshaders/crp/vl_extinction_injection_particles.h" -#include "compshaders/crp/vl_frustum_injection_particles.h" -#include "compshaders/crp/vl_particles_dispatch.h" -#include "compshaders/crp/vl_particles_preprocess_extinction.h" -#include "compshaders/crp/vl_particles_preprocess_frustum.h" -#endif #include "compshaders/crp/vl_extinction_injection_fog.h" +#include "compshaders/crp/vl_extinction_injection_nanovdb.h" +//#include "compshaders/crp/vl_extinction_injection_particles.h" #include "compshaders/crp/vl_frustum_anisotropy_average.h" +#include "compshaders/crp/vl_frustum_depth_test.h" #include "compshaders/crp/vl_frustum_injection_fog.h" +#include "compshaders/crp/vl_frustum_injection_nanovdb.h" +#include "compshaders/crp/vl_frustum_injection_nanovdb_lq.h" +#include "compshaders/crp/vl_frustum_injection_particles.h" #include "compshaders/crp/vl_frustum_inscatter_ambient.h" #include "compshaders/crp/vl_frustum_inscatter_point_light.h" #include "compshaders/crp/vl_frustum_inscatter_sunlight.h" +#include "compshaders/crp/vl_frustum_light_propagation_nx.h" +#include "compshaders/crp/vl_frustum_light_propagation_ny.h" +#include "compshaders/crp/vl_frustum_light_propagation_px.h" +#include "compshaders/crp/vl_frustum_light_propagation_py.h" #include "compshaders/crp/vl_frustum_raymarch.h" #include "compshaders/crp/vl_frustum_sunlight_visibility.h" -#include "compshaders/crp/vl_frustum_temporal.h" +#include "compshaders/crp/vl_frustum_temporal_float.h" +#include "compshaders/crp/vl_frustum_temporal_float4.h" +#include "compshaders/crp/vl_particles_clear.h" +#include "compshaders/crp/vl_particles_hit.h" +#include "compshaders/crp/vl_particles_list.h" +#include "compshaders/crp/vl_particles_tiles.h" #include "compshaders/crp/vl_shadow_point_light.h" #include "compshaders/crp/vl_shadow_sun.h" #include "compshaders/crp/vl_debug_ambient.h" @@ -60,58 +69,6 @@ struct VLGlobalFogRC uint32_t materialTextureCIndex; }; -struct VLParticlePreProcessRC -{ - uvec3_t fullResolution; - uint32_t tileBufferIndex; - uvec3_t tileResolution; - uint32_t particleBufferIndex; - uvec3_t tileScale; - uint32_t particleCount; -}; - -struct VLParticlePreProcessExtinctionRC -{ - uvec3_t fullResolution; - uint32_t tileBufferIndex; - uvec3_t tileResolution; - uint32_t particleBufferIndex; - uvec3_t tileScale; - uint32_t particleCount; - float extinctionWorldScale; -}; - -struct VLParticleDispatchRC -{ - uvec3_t tileResolution; - uint32_t tileBufferIndex; - uint32_t dispatchBufferIndex; - uint32_t particleTileBufferIndex; -}; - -struct VLParticleRC -{ - uvec3_t tileScale; - uint32_t particleBufferIndex; - uint32_t particleCount; - uint32_t materialTextureAIndex; - uint32_t materialTextureBIndex; - uint32_t materialTextureCIndex; - uint32_t tileBufferIndex; - uint32_t tileCount; -}; - -struct VLParticleExtinctionRC -{ - uvec3_t tileScale; - uint32_t particleBufferIndex; - uint32_t particleCount; - uint32_t extinctionTextureIndex; - uint32_t tileBufferIndex; - uint32_t tileCount; - float extinctionWorldScale; -}; - struct VLAnisotropyRC { uint32_t materialTextureBIndex; @@ -122,6 +79,7 @@ struct VLSunlightVisRC { vec3_t jitter; uint32_t visTextureIndex; + uint32_t frustumVisTextureIndex; uint32_t depthMip; }; @@ -135,14 +93,9 @@ struct VLSunlightRC struct VLAmbientRC { - vec3_t centerPosition; + LightGridRC lightGrid; uint32_t materialTextureAIndex; - vec3_t worldScale; uint32_t scatterExtTextureIndex; - uint32_t ambientLightTextureAIndex; - uint32_t ambientLightTextureBIndex; - uint32_t ambientSamplerIndex; - uint32_t isLightGridAvailable; }; struct VLRaymarchRC @@ -208,11 +161,8 @@ struct VLSunShadowVizRC struct VLAmbientVizRC { - vec3_t centerPosition; + LightGridRC lightGrid; float sphereScale; - vec3_t worldScale; - uint32_t lightGridTextureAIndex; - uint32_t lightGridTextureBIndex; }; struct VLTemporalRC @@ -223,6 +173,127 @@ struct VLTemporalRC float alpha; }; +struct VLParticleClearRC +{ + uint32_t counterBufferIndex; + uint32_t tileBufferIndex; + uint32_t tileCount; +}; + +struct VLParticleHitRC +{ + uvec3_t fullResolution; + uint32_t tileBufferIndex; + uvec3_t tileResolution; + uint32_t pad0; + uvec3_t tileScale; + uint32_t pad1; + uint32_t particleBufferIndex; + uint32_t emitterBufferIndex; + uint32_t liveBufferIndex; + uint32_t emitterIndex; +}; + +struct VLParticleTilesRC +{ + uint32_t counterBufferIndex; + uint32_t tileBufferIndex; + uint32_t tileIndexBufferIndex; + uint32_t tileCount; +}; + +struct VLParticleListRC +{ + uvec3_t fullResolution; + uint32_t emitterIndex; + uvec3_t tileResolution; + uint32_t maxParticleIndexCount; + uvec3_t tileScale; + uint32_t tileBufferIndex; + uint32_t emitterBufferIndex; + uint32_t particleBufferIndex; + uint32_t liveBufferIndex; + uint32_t indexBufferIndex; +}; + +struct VLParticleInjectionRC +{ + uvec3_t tileScale; + uint32_t pad0; + uvec3_t tileResolution; + uint32_t particleBufferIndex; + uint32_t materialTextureAIndex; + uint32_t materialTextureBIndex; + uint32_t materialTextureCIndex; + uint32_t tileBufferIndex; + uint32_t tileIndexBufferIndex; + uint32_t particleIndexBufferIndex; + uint32_t counterBufferIndex; + uint32_t tileCount; +}; + +struct NanoVDBTransform +{ + matrix3x3_t worldToIndex; + vec3_t originOffset; + vec3_t translation; + vec3_t stepSize; +}; + +struct VLNanoVDBFrustInjectionRC +{ + NanoVDBTransform transform; + uint32_t nanovdbBufferIndex; + uint32_t blackbodyTextureIndex; + LightGridRC lightGrid; + uint32_t materialTextureAIndex; + uint32_t materialTextureBIndex; + uint32_t materialTextureCIndex; + uint32_t scatterExtTextureIndex; + uint32_t frustumVisTextureIndex; + uint32_t densityGridByteOffset; + uint32_t flamesGridByteOffset; + uint32_t densityGridByteOffset2; + uint32_t flamesGridByteOffset2; + uint32_t linearInterpolation; + uint32_t accurateOverlapTest; + uint32_t ambientAngularCoverage; + float densityExtinctionScale; + float densityAlbedo; + float flamesEmissionScale; + float flamesTemperatureScale; + float stepScale; + float transStepScale; + float t; +}; + +struct VLNanoVDBExtInjectionRC +{ + NanoVDBTransform transform; + uint32_t nanovdbBufferIndex; + uint32_t extinctionTextureIndex; + uint32_t densityGridByteOffset; + uint32_t densityGridByteOffset2; + uint32_t linearInterpolation; + float worldScale; + float densityExtinctionScale; + float t; +}; + +struct VLNanoVDBLightPropRC +{ + uint32_t materialTextureAIndex; + uint32_t materialTextureBIndex; + float emissiveScatter; +}; + +struct VLFrustumDepthTestRC +{ + uvec3_t frustumTextureSize; + uint32_t frustumVisTextureIndex; + uint32_t depthMip; +}; + #pragma pack(pop) const float MaxFogCoordinate = 69420.0f; @@ -305,6 +376,25 @@ static void ConvertFog(FogVolume& dst, const VolumetricLight::Fog& src, const Vo dst.isHeightFog = src.isHeightFog; } +static void TransformPosition(vec3_t out, const matrix3x3_t m, const vec3_t v) +{ + out[0] = v[0] * m[0] + v[1] * m[3] + v[2] * m[6]; + out[1] = v[0] * m[1] + v[1] * m[4] + v[2] * m[7]; + out[2] = v[0] * m[2] + v[1] * m[5] + v[2] * m[8]; +} + +static float VoxelStepSize(const vec3_t dir, const vec3_t voxelSize) +{ + vec3_t stepSize3; + for(int i = 0; i < 3; i++) + { + stepSize3[i] = voxelSize[i] / max(fabsf(dir[i]), 0.000001f); + } + const float stepSize = min(stepSize3[0], min(stepSize3[1], stepSize3[2])); + + return stepSize; +} + static const float OpaqueTransmittanceThreshold = 1.0f / 256.0f; static const float LnOpaqueTransmittanceThreshold = logf(OpaqueTransmittanceThreshold); @@ -318,104 +408,71 @@ static float ExtinctionToOpaqueDistance(float extinction) return -LnOpaqueTransmittanceThreshold / extinction; } -#if defined(VL_CPU_PARTICLES) - -void CRP_AddParticle(const vec3_t position, float radius, float alpha) -{ - uint32_t& particleCount = crp.volumetricLight.particleCount; - if(particleCount >= MAX_PARTICLES) - { - return; - } - - Particle p = {}; - VectorCopy(position, p.position); - p.radius = radius; - p.scattering[0] = 0.1f * alpha; - p.scattering[1] = 0.1f * alpha; - p.scattering[2] = 0.1f * alpha; - p.absorption = 0.1f * alpha; - p.anisotropy = 0.0f; - p.isEmissive = 0; - - HBuffer buffer = crp.volumetricLight.particleBuffer; - Particle* const particle = (Particle*)MapBuffer(buffer) + particleCount; - memcpy(particle, &p, sizeof(p)); - UnmapBuffer(buffer); - particleCount++; -} - -// for use with billboarded quads such as CPMA's rocket smoke -void CRP_AddPolygonAsParticle(const polyVert_t* vertices, int vertexCount) -{ - vec3_t bounds[2]; - VectorCopy(vertices[0].xyz, bounds[0]); - VectorCopy(vertices[0].xyz, bounds[1]); - for(int i = 1; i < vertexCount; i++) - { - AddPointToBounds(vertices[i].xyz, bounds[0], bounds[1]); - } - - vec3_t position; - VectorAdd(bounds[0], bounds[1], position); - VectorScale(position, 0.5f, position); - - vec3_t extents; - VectorSubtract(bounds[1], bounds[0], extents); - - const float radius = max(max(extents[0], extents[1]), extents[2]) * 0.5f; - const float alpha = (float)vertices[0].modulate[3] / 255.0f; - - CRP_AddParticle(position, radius, alpha); -} - -#endif void VolumetricLight::Init() { if(srp.firstInit) { - VectorSet(ambientColor, 0.125f, 0.125f, 0.125f); - ambientIntensity = 1.0f; + VectorSet(ambientColorGUI, 1.0f, 1.0f, 1.0f); + VectorSet(ambientColor, 0.5f, 0.5f, 0.5f); + ambientIntensity = 0.1f; } - // patched on world load - VectorSet(mapBoxMin, -MaxFogCoordinate, -MaxFogCoordinate, -MaxFogCoordinate); - VectorSet(mapBoxMax, MaxFogCoordinate, MaxFogCoordinate, MaxFogCoordinate); + int xyDivisor = 1; + for(int i = 0; i < xySubsampling; i++) + { + xyDivisor *= 2; + } - VectorSet(frustumSize, glConfig.vidWidth / 8, glConfig.vidHeight / 8, 256); - depthMip = 3; - VectorSet(frustumTileScale, 8, 8, 16); // x*y*z == 1024, must match the shader + VectorSet(mapBoxMin, -MaxFogCoordinate, -MaxFogCoordinate, -MaxFogCoordinate); // patched on world load + VectorSet(mapBoxMax, MaxFogCoordinate, MaxFogCoordinate, MaxFogCoordinate); + VectorSet(frustumSize, glConfig.vidWidth / xyDivisor, glConfig.vidHeight / xyDivisor, zResolution); + depthMip = (uint32_t)xySubsampling; + VectorSet(frustumTileScale, 8, 8, 8); // x*y*z == 512, must match the shader VectorSet(frustumTileSize, (frustumSize[0] + frustumTileScale[0] - 1) / frustumTileScale[0], (frustumSize[1] + frustumTileScale[1] - 1) / frustumTileScale[1], (frustumSize[2] + frustumTileScale[2] - 1) / frustumTileScale[2]); - VectorSet(extinctionSize, 128, 128, 128); + VectorSet(extinctionSize, extinctionResolution, extinctionResolution, extinctionResolution); VectorSet(extinctionTileScale, 8, 8, 8); // 8*8*8 == 512, must match the shader VectorSet(extinctionTileSize, (extinctionSize[0] + extinctionTileScale[0] - 1) / extinctionTileScale[0], (extinctionSize[1] + extinctionTileScale[1] - 1) / extinctionTileScale[1], (extinctionSize[2] + extinctionTileScale[2] - 1) / extinctionTileScale[2]); Vector4Set(extinctionVolumeScale, 8, 16, 32, 64); // patched on world load - VectorSet(sunShadowSize, 128, 128, 128); + VectorSet(sunShadowSize, sunShadowResolution, sunShadowResolution, sunShadowResolution); Vector4Set(sunShadowVolumeScale, 8, 16, 32, 64); // patched on world load - shadowPixelCount = 64; + shadowPixelCount = pointShadowResolution; pointShadowVolumeScale = 8.0f; jitterCounter = 0; -#if defined(VL_CPU_PARTICLES) - extinctionParticlePipeline = CreateComputePipeline("VL Extinction Particles", ShaderByteCode(g_vl_extinction_injection_particles_cs)); - frustumParticlePipeline = CreateComputePipeline("VL Frustum Particles", ShaderByteCode(g_vl_frustum_injection_particles_cs)); - particleDispatchPipeline = CreateComputePipeline("VL Particles Dispatch", ShaderByteCode(g_vl_particles_dispatch_cs)); - particlePreProcessExtinctionPipeline = CreateComputePipeline("VL Particles Extinction Pre-process", ShaderByteCode(g_vl_particles_preprocess_extinction_cs)); - particlePreProcessFrustumPipeline = CreateComputePipeline("VL Particles Frustum Pre-process", ShaderByteCode(g_vl_particles_preprocess_frustum_cs)); -#endif extinctionFogPipeline = CreateComputePipeline("VL Extinction Fog", ShaderByteCode(g_vl_extinction_injection_fog_cs)); + extinctionVDBPipeline = CreateComputePipeline("VL Extinction VDB", ShaderByteCode(g_vl_extinction_injection_nanovdb_cs)); frustumAmbientPipeline = CreateComputePipeline("VL Frustum Ambient Light Scatter", ShaderByteCode(g_vl_frustum_inscatter_ambient_cs)); frustumAnisotropyPipeline = CreateComputePipeline("VL Frustum Finalize Material", ShaderByteCode(g_vl_frustum_anisotropy_average_cs)); frustumFogPipeline = CreateComputePipeline("VL Frustum Fog", ShaderByteCode(g_vl_frustum_injection_fog_cs)); + frustumLightPropNXPipeline = CreateComputePipeline("VL Frustum Light Prop -X", ShaderByteCode(g_vl_frustum_light_propagation_nx_cs)); + frustumLightPropNYPipeline = CreateComputePipeline("VL Frustum Light Prop -Y", ShaderByteCode(g_vl_frustum_light_propagation_ny_cs)); + frustumLightPropPXPipeline = CreateComputePipeline("VL Frustum Light Prop +X", ShaderByteCode(g_vl_frustum_light_propagation_px_cs)); + frustumLightPropPYPipeline = CreateComputePipeline("VL Frustum Light Prop +Y", ShaderByteCode(g_vl_frustum_light_propagation_py_cs)); + frustumParticlePipeline = CreateComputePipeline("VL Frustum Particles", ShaderByteCode(g_vl_frustum_injection_particles_cs)); frustumRaymarchPipeline = CreateComputePipeline("VL Frustum Raymarch", ShaderByteCode(g_vl_frustum_raymarch_cs)); - frustumTemporalPipeline = CreateComputePipeline("VL Frustum Temporal Reprojection", ShaderByteCode(g_vl_frustum_temporal_cs)); + frustumTemporalFloatPipeline = CreateComputePipeline("VL Frustum Temporal Reprojection float", ShaderByteCode(g_vl_frustum_temporal_float_cs)); + frustumTemporalFloat4Pipeline = CreateComputePipeline("VL Frustum Temporal Reprojection float4", ShaderByteCode(g_vl_frustum_temporal_float4_cs)); + frustumVDBLQPipeline = CreateComputePipeline("VL Frustum VDB LQ", ShaderByteCode(g_vl_frustum_injection_nanovdb_lq_cs)); + if(rhiInfo.forceNanoVDBPreviewMode) + { + frustumVDBPipeline = frustumVDBLQPipeline; + } + else + { + frustumVDBPipeline = CreateComputePipeline("VL Frustum VDB", ShaderByteCode(g_vl_frustum_injection_nanovdb_cs)); + } + frustumDepthTestPipeline = CreateComputePipeline("VL Frustum Z Test", ShaderByteCode(g_vl_frustum_depth_test_cs)); + particleClearPipeline = CreateComputePipeline("VL Particle Clear", ShaderByteCode(g_vl_particles_clear_cs)); + particleHitPipeline = CreateComputePipeline("VL Particle Hit", ShaderByteCode(g_vl_particles_hit_cs)); + particleListPipeline = CreateComputePipeline("VL Particle List Build", ShaderByteCode(g_vl_particles_list_cs)); + particleTilesPipeline = CreateComputePipeline("VL Particle Tile List Build", ShaderByteCode(g_vl_particles_tiles_cs)); pointLightShadowPipeline = CreateComputePipeline("VL Shadow Raymarch Point Light", ShaderByteCode(g_vl_shadow_point_light_cs)); sunlightScatterPipeline = CreateComputePipeline("VL Frustum Sunlight Scatter", ShaderByteCode(g_vl_frustum_inscatter_sunlight_cs)); sunlightShadowPipeline = CreateComputePipeline("VL Shadow Raymarch Sun", ShaderByteCode(g_vl_shadow_sun_cs)); @@ -439,7 +496,7 @@ void VolumetricLight::Init() desc.depthStencil.depthComparison = ComparisonFunction::GreaterEqual; desc.depthStencil.enableDepthTest = true; desc.depthStencil.enableDepthWrites = true; - desc.rasterizer.cullMode = CT_TWO_SIDED; // @TODO: + desc.rasterizer.cullMode = CT_BACK_SIDED; desc.AddRenderTarget(0, crp.renderTargetFormat); extinctionVizPipeline = CreateGraphicsPipeline(desc); } @@ -453,7 +510,7 @@ void VolumetricLight::Init() desc.depthStencil.depthComparison = ComparisonFunction::GreaterEqual; desc.depthStencil.enableDepthTest = true; desc.depthStencil.enableDepthWrites = true; - desc.rasterizer.cullMode = CT_TWO_SIDED; // @TODO: + desc.rasterizer.cullMode = CT_BACK_SIDED; desc.AddRenderTarget(0, crp.renderTargetFormat); sunShadowVizPipeline = CreateGraphicsPipeline(desc); } @@ -467,7 +524,7 @@ void VolumetricLight::Init() desc.depthStencil.depthComparison = ComparisonFunction::GreaterEqual; desc.depthStencil.enableDepthTest = true; desc.depthStencil.enableDepthWrites = true; - desc.rasterizer.cullMode = CT_TWO_SIDED; // @TODO: + desc.rasterizer.cullMode = CT_FRONT_SIDED; desc.AddRenderTarget(0, crp.renderTargetFormat); ambientVizPipeline = CreateGraphicsPipeline(desc); } @@ -535,59 +592,64 @@ void VolumetricLight::Init() desc.format = TextureFormat::R16_Float; pointShadowTexture = CreateTexture(desc); + desc.width = (glConfig.vidWidth + xyDivisor - 1) / xyDivisor; + desc.height = (glConfig.vidHeight + xyDivisor - 1) / xyDivisor; + desc.depth = 1; + desc.name = "VL frustum visibility"; + desc.format = TextureFormat::R16_UInt; + frustumVisTexture = CreateTexture(desc); + ambientLightTextureA = RHI_MAKE_NULL_HANDLE(); // created on world load when available ambientLightTextureB = RHI_MAKE_NULL_HANDLE(); // created on world load when available } -#if defined(VL_CPU_PARTICLES) const uint32_t tileCountF = frustumTileSize[0] * frustumTileSize[1] * frustumTileSize[2]; const uint32_t tileCountE = extinctionTileSize[0] * extinctionTileSize[1] * extinctionTileSize[2]; const uint32_t maxTileCount = max(tileCountF, tileCountE); { - BufferDesc desc("particle", MAX_PARTICLES * sizeof(Particle), ResourceStates::ComputeShaderAccessBit); - desc.shortLifeTime = true; - desc.memoryUsage = MemoryUsage::Upload; - desc.structureByteCount = sizeof(Particle); - particleBuffer = CreateBuffer(desc); - } - - { - const uint32_t tileSize = 4; // 1 uint + const uint32_t tileSize = sizeof(Tile); const uint32_t byteCount = maxTileCount * tileSize; - BufferDesc desc("particle hit", byteCount, ResourceStates::UnorderedAccessBit); - desc.shortLifeTime = true; - particleHitBuffer = CreateBuffer(desc); - } - - { - const uint32_t tileSize = 12; // 3 uint - const uint32_t byteCount = maxTileCount * tileSize; - BufferDesc desc("particle tile", byteCount, ResourceStates::UnorderedAccessBit); + BufferDesc desc("VL particle voxel tile", byteCount, ResourceStates::UnorderedAccessBit); desc.shortLifeTime = true; desc.structureByteCount = tileSize; particleTileBuffer = CreateBuffer(desc); } { - BufferDesc desc("particle dispatch", 12, ResourceStates::UnorderedAccessBit); + BufferDesc desc("VL particle global counters", sizeof(Counters), ResourceStates::UnorderedAccessBit); desc.shortLifeTime = true; - particleDispatchBuffer = CreateBuffer(desc); + desc.structureByteCount = sizeof(Counters); + particleCounterBuffer = CreateBuffer(desc); } { - BufferDesc desc("particle dispatch clear", 12, ResourceStates::CopySourceBit); + BufferDesc desc("VL particle tile index", maxTileCount * 4, ResourceStates::UnorderedAccessBit); desc.shortLifeTime = true; - desc.memoryUsage = MemoryUsage::Upload; // @TODO: not ideal... - particleDispatchClearBuffer = CreateBuffer(desc); - - uint32_t* const groupCounts = (uint32_t*)MapBuffer(particleDispatchClearBuffer); - groupCounts[0] = 0; - groupCounts[1] = 1; - groupCounts[2] = 1; - UnmapBuffer(particleDispatchClearBuffer); + desc.structureByteCount = 4; + particleTileIndexBuffer = CreateBuffer(desc); + } + + { + const uint32_t Magic = 64; // @TODO: adjust or use particle groups + BufferDesc desc("VL particle index", Magic * MAX_PARTICLES * 4, ResourceStates::UnorderedAccessBit); + desc.shortLifeTime = true; + desc.structureByteCount = 4; + particleIndexBuffer = CreateBuffer(desc); + maxParticleIndexCount = Magic * MAX_PARTICLES; + } + + { + BufferDesc desc("VL particle dispatch", 12, ResourceStates::UnorderedAccessBit); + desc.shortLifeTime = true; + particleDispatchBuffer = CreateBuffer(desc); + + uint32_t* const mapped = (uint32_t*)BeginBufferUpload(particleDispatchBuffer); + mapped[0] = 0; + mapped[1] = 1; + mapped[2] = 1; + EndBufferUpload(particleDispatchBuffer); } -#endif } void VolumetricLight::ProcessWorld(world_t& world) @@ -731,6 +793,7 @@ void VolumetricLight::DrawBegin() CmdTextureBarrier(materialTextureA, ResourceStates::UnorderedAccessBit); CmdTextureBarrier(materialTextureB, ResourceStates::UnorderedAccessBit); CmdTextureBarrier(materialTextureC, ResourceStates::UnorderedAccessBit); + CmdTextureBarrier(scatterExtTexture, ResourceStates::UnorderedAccessBit); for(int i = 0; i < 4; i++) { CmdTextureBarrier(extinctionTextures[i], ResourceStates::UnorderedAccessBit); @@ -741,18 +804,37 @@ void VolumetricLight::DrawBegin() CmdClearTextureUAV(materialTextureA, 0, values); CmdClearTextureUAV(materialTextureB, 0, values); CmdClearTextureUAV(materialTextureC, 0, values); + CmdClearTextureUAV(scatterExtTexture, 0, values); for(int i = 0; i < 4; i++) { CmdClearTextureUAV(extinctionTextures[i], 0, values); } } + { + SCOPED_RENDER_PASS("VL Frustum Depth", 1.0f, 1.0f, 1.0f); + + CmdBeginBarrier(); + CmdTextureBarrier(crp.depthMinMaxTexture, ResourceStates::ComputeShaderAccessBit); + CmdTextureBarrier(frustumVisTexture, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + + VLFrustumDepthTestRC rc = {}; + rc.frustumVisTextureIndex = GetTextureIndexUAV(frustumVisTexture, 0); + rc.depthMip = depthMip; + VectorCopy(frustumSize, rc.frustumTextureSize); + + CmdBindPipeline(frustumDepthTestPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((frustumSize[0] + 7) / 8, (frustumSize[1] + 7) / 8, 1); + } + { SCOPED_RENDER_PASS("VL Fog", 1.0f, 1.0f, 1.0f); for(int f = 0; f < fogCount; f++) { - SCOPED_DEBUG_LABEL("VL Frustum Fog", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Frustum", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); CmdTextureBarrier(materialTextureA, ResourceStates::UnorderedAccessBit); @@ -780,7 +862,7 @@ void VolumetricLight::DrawBegin() for(int c = 0; c < 4; c++) { - SCOPED_DEBUG_LABEL("VL Extinction Fog", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Extinction", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); CmdTextureBarrier(extinctionTextures[c], ResourceStates::UnorderedAccessBit); @@ -796,89 +878,144 @@ void VolumetricLight::DrawBegin() } } -#if defined(VL_CPU_PARTICLES) - if(particleCount > 0) + if(0) // @TODO: { SCOPED_RENDER_PASS("VL Frustum Particles", 1.0f, 1.0f, 1.0f); + const ParticleSystem& ps = crp.particleSystem; + const uint32_t tileCount = frustumTileSize[0] * frustumTileSize[1] * frustumTileSize[2]; + { - SCOPED_DEBUG_LABEL("Pre-pass", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Clear", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); - CmdBufferBarrier(particleBuffer, ResourceStates::ComputeShaderAccessBit); - CmdBufferBarrier(particleHitBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleCounterBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleTileBuffer, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); - CmdClearBufferUAV(particleHitBuffer, 0); + VLParticleClearRC rc = {}; + rc.counterBufferIndex = GetBufferIndexUAV(particleCounterBuffer); + rc.tileBufferIndex = GetBufferIndexUAV(particleTileBuffer); + rc.tileCount = tileCount; + + CmdBindPipeline(particleClearPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((tileCount + 63) / 64, 1, 1); + } + + { + SCOPED_DEBUG_LABEL("Hit", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); - CmdBufferBarrier(particleHitBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(ps.indirectBuffer, ResourceStates::IndirectDispatchBit); + CmdBufferBarrier(ps.particleBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(ps.emitterBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(ps.liveBuffers[ps.liveBufferReadIndex], ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleTileBuffer, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); - VLParticlePreProcessRC rc = {}; - rc.tileBufferIndex = GetBufferIndexUAV(particleHitBuffer); - rc.particleBufferIndex = GetBufferIndexSRV(particleBuffer); - rc.particleCount = particleCount; + VLParticleHitRC rc = {}; + rc.particleBufferIndex = GetBufferIndexUAV(ps.particleBuffer); + rc.emitterBufferIndex = GetBufferIndexUAV(ps.emitterBuffer); + rc.liveBufferIndex = GetBufferIndexUAV(ps.liveBuffers[ps.liveBufferReadIndex]); + rc.emitterIndex = 0; // @TODO: + rc.tileBufferIndex = GetBufferIndexUAV(particleTileBuffer); VectorCopy(frustumSize, rc.fullResolution); VectorCopy(frustumTileSize, rc.tileResolution); VectorCopy(frustumTileScale, rc.tileScale); - CmdBindPipeline(particlePreProcessFrustumPipeline); + CmdBindPipeline(particleHitPipeline); CmdSetComputeRootConstants(0, sizeof(rc), &rc); - CmdDispatch((particleCount + 63) / 64, 1, 1); + CmdDispatchIndirect(ps.indirectBuffer, 12); } { - SCOPED_DEBUG_LABEL("Clear Dispatch", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Tile List Build", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); - CmdBufferBarrier(particleDispatchClearBuffer, ResourceStates::CopySourceBit); + CmdBufferBarrier(particleCounterBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleTileBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleTileIndexBuffer, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + + VLParticleTilesRC rc = {}; + rc.counterBufferIndex = GetBufferIndexUAV(particleCounterBuffer); + rc.tileBufferIndex = GetBufferIndexUAV(particleTileBuffer); + rc.tileIndexBufferIndex = GetBufferIndexUAV(particleTileIndexBuffer); + rc.tileCount = tileCount; + + CmdBindPipeline(particleTilesPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((tileCount + 63) / 64, 1, 1); + } + + { + SCOPED_DEBUG_LABEL("Particle List Build", 1.0f, 1.0f, 1.0f); + + CmdBeginBarrier(); + CmdBufferBarrier(ps.indirectBuffer, ResourceStates::IndirectDispatchBit); + CmdBufferBarrier(ps.particleBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(ps.emitterBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(ps.liveBuffers[ps.liveBufferReadIndex], ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleTileBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleIndexBuffer, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + + VLParticleListRC rc = {}; + rc.emitterBufferIndex = GetBufferIndexUAV(ps.emitterBuffer); + rc.indexBufferIndex = GetBufferIndexUAV(particleIndexBuffer); + rc.liveBufferIndex = GetBufferIndexUAV(ps.liveBuffers[ps.liveBufferReadIndex]); + rc.particleBufferIndex = GetBufferIndexUAV(ps.particleBuffer); + rc.tileBufferIndex = GetBufferIndexUAV(particleTileBuffer); + rc.emitterIndex = 0; // @TODO: + rc.maxParticleIndexCount = maxParticleIndexCount; + VectorCopy(frustumSize, rc.fullResolution); + VectorCopy(frustumTileSize, rc.tileResolution); + VectorCopy(frustumTileScale, rc.tileScale); + + CmdBindPipeline(particleListPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatchIndirect(ps.indirectBuffer, 12); + } + + { + SCOPED_DEBUG_LABEL("Set Dispatch Tile Count", 1.0f, 1.0f, 1.0f); + + CmdBeginBarrier(); + CmdBufferBarrier(particleCounterBuffer, ResourceStates::CopySourceBit); CmdBufferBarrier(particleDispatchBuffer, ResourceStates::CopyDestinationBit); CmdEndBarrier(); - CmdCopyBuffer(particleDispatchBuffer, particleDispatchClearBuffer); - } - - { - SCOPED_DEBUG_LABEL("Prepare Dispatch", 1.0f, 1.0f, 1.0f); - - CmdBeginBarrier(); - CmdBufferBarrier(particleHitBuffer, ResourceStates::UnorderedAccessBit); - CmdBufferBarrier(particleDispatchBuffer, ResourceStates::UnorderedAccessBit); - CmdBufferBarrier(particleTileBuffer, ResourceStates::UnorderedAccessBit); - CmdEndBarrier(); - - VLParticleDispatchRC rc = {}; - rc.tileBufferIndex = GetBufferIndexUAV(particleHitBuffer); - rc.dispatchBufferIndex = GetBufferIndexUAV(particleDispatchBuffer); - rc.particleTileBufferIndex = GetBufferIndexUAV(particleTileBuffer); - VectorCopy(frustumTileSize, rc.tileResolution); - - CmdBindPipeline(particleDispatchPipeline); - CmdSetComputeRootConstants(0, sizeof(rc), &rc); - CmdDispatch((frustumTileSize[0] + 3) / 4, (frustumTileSize[1] + 3) / 4, (frustumTileSize[2] + 3) / 4); + CmdCopyBuffer(particleDispatchBuffer, 0, particleCounterBuffer, 4, 4); } { SCOPED_DEBUG_LABEL("Injection", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); - CmdBufferBarrier(particleBuffer, ResourceStates::ComputeShaderAccessBit); + CmdBufferBarrier(ps.particleBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleTileBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleTileIndexBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleIndexBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(particleCounterBuffer, ResourceStates::UnorderedAccessBit); CmdTextureBarrier(materialTextureA, ResourceStates::UnorderedAccessBit); CmdTextureBarrier(materialTextureB, ResourceStates::UnorderedAccessBit); CmdTextureBarrier(materialTextureC, ResourceStates::UnorderedAccessBit); CmdBufferBarrier(particleDispatchBuffer, ResourceStates::IndirectDispatchBit); CmdEndBarrier(); - VLParticleRC rc = {}; + VLParticleInjectionRC rc = {}; + rc.particleBufferIndex = GetBufferIndexUAV(ps.particleBuffer); rc.materialTextureAIndex = GetTextureIndexUAV(materialTextureA, 0); rc.materialTextureBIndex = GetTextureIndexUAV(materialTextureB, 0); rc.materialTextureCIndex = GetTextureIndexUAV(materialTextureC, 0); rc.tileBufferIndex = GetBufferIndexUAV(particleTileBuffer); - rc.particleBufferIndex = GetBufferIndexSRV(particleBuffer); - rc.particleCount = particleCount; + rc.tileIndexBufferIndex = GetBufferIndexUAV(particleTileIndexBuffer); + rc.particleIndexBufferIndex = GetBufferIndexUAV(particleIndexBuffer); + rc.counterBufferIndex = GetBufferIndexUAV(particleCounterBuffer); rc.tileCount = frustumTileSize[0] * frustumTileSize[1] * frustumTileSize[2]; VectorCopy(frustumTileScale, rc.tileScale); + VectorCopy(frustumTileSize, rc.tileResolution); CmdBindPipeline(frustumParticlePipeline); CmdSetComputeRootConstants(0, sizeof(rc), &rc); @@ -886,96 +1023,190 @@ void VolumetricLight::DrawBegin() } } - if(particleCount > 0) + // NanoVDB: integrated GPUs are too slow and will trigger a TDR + if(!rhiInfo.isUMA) { - SCOPED_RENDER_PASS("VL Extinction Particles", 1.0f, 1.0f, 1.0f); + SCOPED_RENDER_PASS("VL NanoVDB", 1.0f, 1.0f, 1.0f); - for(int c = 0; c < 4; c++) + NanoVDBManager& vdb = crp.vdbManager; + + for(uint32_t i = 0; i < vdb.drawInstances.count; i++) { + const NanoVDBManager::Instance& instance = vdb.instances[i]; + const NanoVDBManager::DrawInstance& drawInstance = vdb.drawInstances[i]; + + NanoVDBTransform transform = {}; + vdb.MakeWorldToIndexMatrix(transform.worldToIndex, instance); + VectorCopy(instance.originOffset, transform.originOffset); + VectorCopy(instance.position, transform.translation); + + if(drawInstance.smokeByteOffset > 0 || drawInstance.fireByteOffset > 0) { - SCOPED_DEBUG_LABEL("Pre-pass", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Frustum", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); - CmdBufferBarrier(particleBuffer, ResourceStates::ComputeShaderAccessBit); - CmdBufferBarrier(particleHitBuffer, ResourceStates::UnorderedAccessBit); + CmdBufferBarrier(drawInstance.buffer, ResourceStates::ComputeShaderAccessBit); + if(tr.world->lightGridData != NULL) + { + CmdTextureBarrier(ambientLightTextureA, ResourceStates::ComputeShaderAccessBit); + CmdTextureBarrier(ambientLightTextureB, ResourceStates::ComputeShaderAccessBit); + } + CmdTextureBarrier(materialTextureA, ResourceStates::UnorderedAccessBit); + CmdTextureBarrier(materialTextureB, ResourceStates::UnorderedAccessBit); + CmdTextureBarrier(materialTextureC, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); - CmdClearBufferUAV(particleHitBuffer, 0); + // order change: fw, left, up -> left, up, fw + const orientationr_t& orient = backEnd.viewParms.orient; + vec3_t forward, left, up; + TransformPosition(forward, transform.worldToIndex, orient.axis[0]); + TransformPosition(left, transform.worldToIndex, orient.axis[1]); + TransformPosition(up, transform.worldToIndex, orient.axis[2]); + VectorNormalize(forward); + VectorNormalize(left); + VectorNormalize(up); + transform.stepSize[0] = VoxelStepSize(left, instance.scale); + transform.stepSize[1] = VoxelStepSize(up, instance.scale); + transform.stepSize[2] = VoxelStepSize(forward, instance.scale); - CmdBeginBarrier(); - CmdBufferBarrier(particleHitBuffer, ResourceStates::UnorderedAccessBit); - CmdEndBarrier(); + const float transStepScale = + max3(instance.scale[0], instance.scale[1], instance.scale[2]) * (float)vdb.ambientRaymarchLOD; - VLParticlePreProcessExtinctionRC rc = {}; - rc.tileBufferIndex = GetBufferIndexUAV(particleHitBuffer); - rc.particleBufferIndex = GetBufferIndexSRV(particleBuffer); - rc.particleCount = particleCount; - VectorCopy(extinctionSize, rc.fullResolution); - VectorCopy(extinctionTileSize, rc.tileResolution); - VectorCopy(extinctionTileScale, rc.tileScale); - rc.extinctionWorldScale = extinctionVolumeScale[c]; + VLNanoVDBFrustInjectionRC rc = {}; + SetLightGridRootConstants(rc.lightGrid); + rc.nanovdbBufferIndex = GetBufferIndexSRV(drawInstance.buffer); + rc.blackbodyTextureIndex = GetTextureIndexSRV(crp.blackbodyTexture); + rc.materialTextureAIndex = GetTextureIndexUAV(materialTextureA, 0); + rc.materialTextureBIndex = GetTextureIndexUAV(materialTextureB, 0); + rc.materialTextureCIndex = GetTextureIndexUAV(materialTextureC, 0); + rc.scatterExtTextureIndex = GetTextureIndexUAV(scatterExtTexture, 0); + rc.frustumVisTextureIndex = GetTextureIndexUAV(frustumVisTexture, 0); + rc.densityGridByteOffset = drawInstance.smokeByteOffset; + rc.flamesGridByteOffset = drawInstance.fireByteOffset; + rc.densityGridByteOffset2 = drawInstance.smokeByteOffset2; + rc.flamesGridByteOffset2 = drawInstance.fireByteOffset2; + rc.linearInterpolation = vdb.linearInterpolation ? 1 : 0; + rc.accurateOverlapTest = vdb.accurateOverlapTest ? 1 : 0; + rc.ambientAngularCoverage = vdb.ambientIncreasedCoverage ? 1 : 0; + rc.densityExtinctionScale = instance.smokeExtinctionScale; + rc.densityAlbedo = instance.smokeAlbedo; + rc.flamesEmissionScale = instance.fireEmissionScale; + rc.flamesTemperatureScale = instance.fireTemperatureScale; + rc.transform = transform; + rc.stepScale = vdb.supersampling ? 0.5f : 1.0f; + rc.transStepScale = transStepScale; + rc.t = drawInstance.t; - CmdBindPipeline(particlePreProcessExtinctionPipeline); + const bool previewMode = rhiInfo.forceNanoVDBPreviewMode != qfalse || vdb.previewMode; + const HPipeline pipeline = previewMode ? frustumVDBLQPipeline : frustumVDBPipeline; + CmdBindPipeline(pipeline); CmdSetComputeRootConstants(0, sizeof(rc), &rc); - CmdDispatch((particleCount + 63) / 64, 1, 1); + CmdDispatch((frustumSize[0] + 3) / 4, (frustumSize[1] + 3) / 4, (frustumSize[2] + 3) / 4); + } + + if(drawInstance.smokeByteOffset > 0 || drawInstance.smokeByteOffset2 > 0) + { + vec3_t axisX, axisY, axisZ; + VectorSet(axisX, 1, 0, 0); + VectorSet(axisY, 0, 1, 0); + VectorSet(axisZ, 0, 0, 1); + transform.stepSize[0] = VoxelStepSize(axisX, instance.scale); + transform.stepSize[1] = VoxelStepSize(axisY, instance.scale); + transform.stepSize[2] = VoxelStepSize(axisZ, instance.scale); + + for(int c = 0; c < 4; c++) + { + SCOPED_DEBUG_LABEL("Extinction", 1.0f, 1.0f, 1.0f); + + CmdBeginBarrier(); + CmdBufferBarrier(drawInstance.buffer, ResourceStates::ComputeShaderAccessBit); + CmdTextureBarrier(extinctionTextures[c], ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + + VLNanoVDBExtInjectionRC rc = {}; + rc.nanovdbBufferIndex = GetBufferIndexSRV(drawInstance.buffer); + rc.extinctionTextureIndex = GetTextureIndexUAV(extinctionTextures[c], 0); + rc.worldScale = extinctionVolumeScale[c]; + rc.densityGridByteOffset = drawInstance.smokeByteOffset; + rc.densityGridByteOffset2 = drawInstance.smokeByteOffset2; + rc.linearInterpolation = vdb.linearInterpolation ? 1 : 0; + rc.densityExtinctionScale = instance.smokeExtinctionScale; + rc.transform = transform; + rc.t = drawInstance.t; + + CmdBindPipeline(extinctionVDBPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((extinctionSize[0] + 3) / 4, (extinctionSize[1] + 3) / 4, (extinctionSize[2] + 3) / 4); + } + } + } + + uint32_t fireOffsetMask = 0; + for(uint32_t i = 0; i < vdb.drawInstances.count; i++) + { + fireOffsetMask |= vdb.drawInstances[i].fireByteOffset; + } + const bool hasFire = fireOffsetMask > 0; + if(hasFire && !vdb.previewMode && vdb.emissiveScatterScale > 0.0f) + { + VLNanoVDBLightPropRC rc = {}; + rc.materialTextureAIndex = GetTextureIndexUAV(materialTextureA, 0); + rc.materialTextureBIndex = GetTextureIndexUAV(materialTextureB, 0); + rc.emissiveScatter = vdb.emissiveScatterScale; + + { + SCOPED_DEBUG_LABEL("Light Propagation +X", 1.0f, 1.0f, 1.0f); + + CmdBeginBarrier(); + CmdTextureBarrier(materialTextureA, ResourceStates::UnorderedAccessBit); + CmdTextureBarrier(materialTextureB, ResourceStates::UnorderedAccessBit); + CmdEndBarrier(); + + CmdBindPipeline(frustumLightPropPXPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((frustumSize[1] + 7) / 8, (frustumSize[2] + 7) / 8, 1); } { - SCOPED_DEBUG_LABEL("Clear Dispatch", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Light Propagation -X", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); - CmdBufferBarrier(particleDispatchClearBuffer, ResourceStates::CopySourceBit); - CmdBufferBarrier(particleDispatchBuffer, ResourceStates::CopyDestinationBit); + CmdTextureBarrier(materialTextureA, ResourceStates::UnorderedAccessBit); + CmdTextureBarrier(materialTextureB, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); - CmdCopyBuffer(particleDispatchBuffer, particleDispatchClearBuffer); + CmdBindPipeline(frustumLightPropNXPipeline); + CmdSetComputeRootConstants(0, sizeof(rc), &rc); + CmdDispatch((frustumSize[1] + 7) / 8, (frustumSize[2] + 7) / 8, 1); } { - SCOPED_DEBUG_LABEL("Prepare Dispatch", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Light Propagation +Y", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); - CmdBufferBarrier(particleHitBuffer, ResourceStates::UnorderedAccessBit); - CmdBufferBarrier(particleDispatchBuffer, ResourceStates::UnorderedAccessBit); - CmdBufferBarrier(particleTileBuffer, ResourceStates::UnorderedAccessBit); + CmdTextureBarrier(materialTextureA, ResourceStates::UnorderedAccessBit); + CmdTextureBarrier(materialTextureB, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); - VLParticleDispatchRC rc = {}; - rc.tileBufferIndex = GetBufferIndexUAV(particleHitBuffer); - rc.dispatchBufferIndex = GetBufferIndexUAV(particleDispatchBuffer); - rc.particleTileBufferIndex = GetBufferIndexUAV(particleTileBuffer); - VectorCopy(extinctionTileSize, rc.tileResolution); - - CmdBindPipeline(particleDispatchPipeline); + CmdBindPipeline(frustumLightPropPYPipeline); CmdSetComputeRootConstants(0, sizeof(rc), &rc); - CmdDispatch((extinctionTileSize[0] + 3) / 4, (extinctionTileSize[1] + 3) / 4, (extinctionTileSize[2] + 3) / 4); + CmdDispatch((frustumSize[0] + 7) / 8, (frustumSize[2] + 7) / 8, 1); } { - SCOPED_DEBUG_LABEL("Injection", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Light Propagation -Y", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); - CmdBufferBarrier(particleBuffer, ResourceStates::ComputeShaderAccessBit); - CmdTextureBarrier(extinctionTextures[c], ResourceStates::UnorderedAccessBit); - CmdBufferBarrier(particleDispatchBuffer, ResourceStates::IndirectDispatchBit); + CmdTextureBarrier(materialTextureA, ResourceStates::UnorderedAccessBit); + CmdTextureBarrier(materialTextureB, ResourceStates::UnorderedAccessBit); CmdEndBarrier(); - VLParticleExtinctionRC rc = {}; - rc.extinctionTextureIndex = GetTextureIndexUAV(extinctionTextures[c], 0); - rc.tileBufferIndex = GetBufferIndexUAV(particleTileBuffer); - rc.particleBufferIndex = GetBufferIndexSRV(particleBuffer); - rc.particleCount = particleCount; - rc.tileCount = extinctionTileSize[0] * extinctionTileSize[1] * extinctionTileSize[2]; - rc.extinctionWorldScale = extinctionVolumeScale[c]; - VectorCopy(extinctionTileScale, rc.tileScale); - - CmdBindPipeline(extinctionParticlePipeline); + CmdBindPipeline(frustumLightPropNYPipeline); CmdSetComputeRootConstants(0, sizeof(rc), &rc); - CmdDispatchIndirect(particleDispatchBuffer, 0); + CmdDispatch((frustumSize[0] + 7) / 8, (frustumSize[2] + 7) / 8, 1); } } } -#endif { SCOPED_RENDER_PASS("VL Anisotropy Avg", 1.0f, 1.0f, 1.0f); @@ -1010,15 +1241,7 @@ void VolumetricLight::DrawBegin() VLAmbientRC rc = {}; rc.materialTextureAIndex = GetTextureIndexUAV(materialTextureA, 0); rc.scatterExtTextureIndex = GetTextureIndexUAV(scatterExtTexture, 0); - rc.isLightGridAvailable = tr.world->lightGridData != NULL; - if(rc.isLightGridAvailable) - { - rc.ambientLightTextureAIndex = GetTextureIndexSRV(ambientLightTextureA); - rc.ambientLightTextureBIndex = GetTextureIndexSRV(ambientLightTextureB); - VectorCopy(lightGridCenter, rc.centerPosition); - VectorCopy(tr.world->lightGridSize, rc.worldScale); - rc.ambientSamplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); - } + SetLightGridRootConstants(rc.lightGrid); CmdBindPipeline(frustumAmbientPipeline); CmdSetComputeRootConstants(0, sizeof(rc), &rc); @@ -1082,6 +1305,11 @@ void VolumetricLight::DrawPointLight(const dlight_t& light) void VolumetricLight::DrawSunlight() { + if(!drawSunlight) + { + return; + } + SCOPED_RENDER_PASS("VL Sunlight", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); @@ -1099,7 +1327,7 @@ void VolumetricLight::DrawSunlight() // each cascade needs to sample the higher level for setting the initial transmittance value for(int c = 3; c >= 0; c--) { - SCOPED_DEBUG_LABEL("VL Sunlight Shadow", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Shadow", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); CmdTextureBarrier(sunShadowTextures[c], ResourceStates::UnorderedAccessBit); @@ -1124,7 +1352,7 @@ void VolumetricLight::DrawSunlight() } { - SCOPED_DEBUG_LABEL("VL Sunlight Vis", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Vis", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); CmdTextureBarrier(crp.depthMinMaxTexture, ResourceStates::ComputeShaderAccessBit); @@ -1152,6 +1380,7 @@ void VolumetricLight::DrawSunlight() VLSunlightVisRC rc = {}; rc.visTextureIndex = GetTextureIndexUAV(sunlightVisTexture, 0); + rc.frustumVisTextureIndex = GetTextureIndexUAV(frustumVisTexture, 0); rc.depthMip = depthMip; rc.jitter[0] = (0.5f * jitterXY[2 * jitterXYSampleIndex + 0]) * maxJitterRadiusXY; rc.jitter[1] = (0.5f * jitterXY[2 * jitterXYSampleIndex + 1]) * maxJitterRadiusXY; @@ -1164,7 +1393,7 @@ void VolumetricLight::DrawSunlight() if(!firstFrame) { - SCOPED_DEBUG_LABEL("VL Sunlight Vis Temporal", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Vis Temporal", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); CmdTextureBarrier(prevSunlightVisTexture, ResourceStates::ComputeShaderAccessBit); @@ -1177,13 +1406,13 @@ void VolumetricLight::DrawSunlight() rc.prevTextureSamplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); rc.alpha = 0.9f; - CmdBindPipeline(frustumTemporalPipeline); + CmdBindPipeline(frustumTemporalFloatPipeline); CmdSetComputeRootConstants(0, sizeof(rc), &rc); CmdDispatch((frustumSize[0] + 3) / 4, (frustumSize[1] + 3) / 4, (frustumSize[2] + 3) / 4); } { - SCOPED_DEBUG_LABEL("VL Sunlight Vis Copy", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Vis Copy", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); CmdTextureBarrier(sunlightVisTexture, ResourceStates::CopySourceBit); @@ -1194,7 +1423,7 @@ void VolumetricLight::DrawSunlight() } { - SCOPED_DEBUG_LABEL("VL Sunlight", 1.0f, 1.0f, 1.0f); + SCOPED_DEBUG_LABEL("Sunlight", 1.0f, 1.0f, 1.0f); CmdBeginBarrier(); for(int c = 0; c < 4; c++) @@ -1318,11 +1547,8 @@ void VolumetricLight::DrawDebug() CmdEndBarrier(); VLAmbientVizRC rc = {}; - rc.lightGridTextureAIndex = GetTextureIndexSRV(ambientLightTextureA); - rc.lightGridTextureBIndex = GetTextureIndexSRV(ambientLightTextureB); - VectorCopy(tr.world->lightGridSize, rc.worldScale); + SetLightGridRootConstants(rc.lightGrid); rc.sphereScale = debugSphereScale; - VectorCopy(lightGridCenter, rc.centerPosition); const uint32_t voxelCount = tr.world->lightGridBounds[0] * tr.world->lightGridBounds[1] * tr.world->lightGridBounds[2]; const uint32_t vertexCount = voxelCount * 6; @@ -1359,18 +1585,14 @@ void VolumetricLight::DrawGUI() ImGuiColorEditFlags_NoAlpha | ImGuiColorEditFlags_DisplayRGB | ImGuiColorEditFlags_InputRGB; - ImGui::ColorEdit3("Ambient light color", ambientColor, colorEditFlags); - ImGui::SliderFloat("Ambient light intensity", &ambientIntensity, 0.0f, 10.0f); - - ImGui::Separator(); if(fogCount < ARRAY_LEN(fogs) && ImGui::Button("Add Fog")) { const float fogSize = 40.0f; Fog& fog = fogs[fogCount++]; fog = {}; - fog.extinction = 0.1f; - fog.albedo = 0.5f; + fog.extinction = OpaqueDistanceToExtinction(5000.0f); + fog.albedo = 0.7f; VectorSet(fog.scatterColor, 1, 1, 1); fog.emissive = 0.0f; VectorSet(fog.emissiveColor, 1, 1, 1); @@ -1379,7 +1601,7 @@ void VolumetricLight::DrawGUI() fog.noiseSpatialPeriod = 50.0f; fog.noiseTimePeriod = 8.0f; fog.isHeightFog = false; - fog.isGlobalFog = false; + fog.isGlobalFog = true; for(int a = 0; a < 3; a++) { fog.boxMin[a] = backEnd.refdef.vieworg[a] - fogSize; @@ -1401,6 +1623,7 @@ void VolumetricLight::DrawGUI() const char* axisNames[3] = { "X", "Y", "Z" }; ImGui::BeginTabBar("Tabs#Fogs", ImGuiTabBarFlags_AutoSelectNewTabs); + for(int i = 0; i < fogCount; i++) { if(ImGui::BeginTabItem(va("#%d", i + 1))) @@ -1440,28 +1663,104 @@ void VolumetricLight::DrawGUI() } } } + + if(ImGui::BeginTabItem("Main")) + { + bool enableVL = crp_volLight->integer != 0; + if(ImGui::Checkbox("Enable volumetric lighting", &enableVL)) + { + Cvar_Set(crp_volLight->name, enableVL ? "1" : "0"); + } + ImGui::Checkbox("Enable sunlight", &drawSunlight); + + ImGui::Separator(); + ImGui::Checkbox("Enable light grid", &enableLightGrid); + ImGui::ColorEdit3("Ambient light color", ambientColorGUI, colorEditFlags); + ImGui::SliderFloat("Ambient light intensity", &ambientIntensity, 0.0f, 1.0f); + + ImGui::Separator(); + ImGui::SliderFloat("Sunlight intensity", &crp.sunlightData.intensityVL, 0.0f, 200.0f); + ImGui::SliderFloat("Point light intensity", &pointLightIntensity, 0.0f, 200.0f); + + const float brightness = Brightness(ambientColorGUI); + if(brightness > 0.000001f) + { + const float scale = 0.5f / brightness; + for(int i = 0; i < 3; i++) + { + ambientColor[i] = ambientColorGUI[i] * scale; + } + } + else + { + VectorSet(ambientColor, 0.5f, 0.5f, 0.5f); + } + + ImGui::EndTabItem(); + } + + if(ImGui::BeginTabItem("Resolution")) + { + const float titleWidth = 14.0f * ImGui::GetFontSize(); + RadioButton(&xySubsampling, titleWidth, "X/Y sub-sampling", 3, "2x", 1, "4x", 2, "8x", 3); + RadioButton(&zResolution, titleWidth, "Z resolution", 3, "128", 128, "256", 256, "512", 512); + RadioButton(&extinctionResolution, titleWidth, "Extinction resolution", 3, "64", 64, "128", 128, "256", 256); + RadioButton(&sunShadowResolution, titleWidth, "Sun shadow resolution", 3, "64", 64, "128", 128, "256", 256); + RadioButton(&pointShadowResolution, titleWidth, "Point shadow resolution", 3, "32", 32, "64", 64, "128", 128); + + int divisor = 1; + for(int i = 0; i < xySubsampling; i++) + { + divisor *= 2; + } + + if((glConfig.vidWidth / divisor) != frustumSize[0] || + (glConfig.vidHeight / divisor) != frustumSize[1] || + zResolution != frustumSize[2] || + extinctionResolution != extinctionSize[0] || + sunShadowResolution != sunShadowSize[0] || + pointShadowResolution != shadowPixelCount) + { + if(ImGui::Button("Video restart")) + { + Cbuf_AddText("vid_restart\n"); + } + } + + ImGui::EndTabItem(); + } + if(ImGui::BeginTabItem("Debug")) { if(tr.world->lightGridData != NULL) { ImGui::Checkbox("Draw ambient light grid", &drawAmbientDebug); - ImGui::SliderFloat("Voxel sphere scale", &debugSphereScale, 0.25f, 1.0f); + if(drawAmbientDebug) + { + ImGui::SliderFloat("Voxel sphere scale", &debugSphereScale, 0.25f, 1.0f); + } } else { ImGui::TextColored(ImVec4(1.0f, 0.5f, 0.0f, 1.0f), "No valid light grid was found for this map"); } - ImGui::NewLine(); - ImGui::SliderInt("Extinction cascade", &debugExtinctionCascadeIndex, 0, 3); + ImGui::Separator(); ImGui::Checkbox("Draw extinction volume", &drawExtinctionDebug); - ImGui::SliderFloat("Extinction value scale", &debugExtinctionScale, 1.0f, 1000.0f); + if(drawExtinctionDebug) + { + ImGui::SliderInt("Extinction cascade", &debugExtinctionCascadeIndex, 0, 3); + ImGui::SliderFloat("Extinction value scale", &debugExtinctionScale, 1.0f, 1000.0f); + } - ImGui::NewLine(); - ImGui::SliderInt("Sun shadow cascade", &debugSunShadowCascadeIndex, 0, 3); + ImGui::Separator(); ImGui::Checkbox("Draw sun shadow volume", &drawSunShadowDebug); + if(drawSunShadowDebug) + { + ImGui::SliderInt("Sun shadow cascade", &debugSunShadowCascadeIndex, 0, 3); + } - ImGui::NewLine(); + ImGui::Separator(); ImGui::Checkbox("Lock camera position", &lockCameraPosition); ImGui::SliderFloat("Voxel box scale", &debugBoxScale, 0.25f, 1.0f); @@ -1544,3 +1843,16 @@ void VolumetricLight::SaveFogFile(const char* filePath) FS_EnableCNQ3FolderWrites(qfalse); } } + +void VolumetricLight::SetLightGridRootConstants(LightGridRC& rc) +{ + rc.isAvailable = tr.world->lightGridData != NULL && enableLightGrid; + if(rc.isAvailable) + { + rc.textureAIndex = GetTextureIndexSRV(ambientLightTextureA); + rc.textureBIndex = GetTextureIndexSRV(ambientLightTextureB); + VectorCopy(lightGridCenter, rc.centerPosition); + VectorCopy(tr.world->lightGridSize, rc.worldScale); + rc.samplerIndex = GetSamplerIndex(TW_CLAMP_TO_EDGE, TextureFilter::Linear); + } +} diff --git a/code/renderer/rhi_d3d12.cpp b/code/renderer/rhi_d3d12.cpp index 91130a9..5ebd9bd 100644 --- a/code/renderer/rhi_d3d12.cpp +++ b/code/renderer/rhi_d3d12.cpp @@ -3733,6 +3733,7 @@ namespace RHI rhiInfo.hasInlineRaytracing = hasInlineRaytracing; rhiInfo.hasBarycentrics = hasBarycentrics; rhiInfo.allocatedByteCount = 0; + rhiInfo.forceNanoVDBPreviewMode = rhi.vendorId == VENDORID_AMD || rhi.vendorId == VENDORID_INTEL; rhi.initialized = true; diff --git a/code/renderer/shaders/.gitignore b/code/renderer/shaders/.gitignore deleted file mode 100644 index c4f94b8..0000000 --- a/code/renderer/shaders/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.exe -*.pdb -*.h diff --git a/code/renderer/shaders/crp/PNanoVDB.h b/code/renderer/shaders/crp/PNanoVDB.h new file mode 100644 index 0000000..24fb684 --- /dev/null +++ b/code/renderer/shaders/crp/PNanoVDB.h @@ -0,0 +1,3384 @@ + +// Copyright Contributors to the OpenVDB Project +// SPDX-License-Identifier: MPL-2.0 + +/*! + \file PNanoVDB.h + + \author Andrew Reidmeyer + + \brief This file is a portable (e.g. pointer-less) C99/GLSL/HLSL port + of NanoVDB.h, which is compatible with most graphics APIs. +*/ + +#ifndef NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED +#define NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED + +// ------------------------------------------------ Configuration ----------------------------------------------------------- + +// platforms +//#define PNANOVDB_C +//#define PNANOVDB_HLSL +//#define PNANOVDB_GLSL + +// addressing mode +// PNANOVDB_ADDRESS_32 +// PNANOVDB_ADDRESS_64 +#if defined(PNANOVDB_C) +#ifndef PNANOVDB_ADDRESS_32 +#define PNANOVDB_ADDRESS_64 +#endif +#elif defined(PNANOVDB_HLSL) +#ifndef PNANOVDB_ADDRESS_64 +#define PNANOVDB_ADDRESS_32 +#endif +#elif defined(PNANOVDB_GLSL) +#ifndef PNANOVDB_ADDRESS_64 +#define PNANOVDB_ADDRESS_32 +#endif +#endif + +// bounds checking +//#define PNANOVDB_BUF_BOUNDS_CHECK + +// enable HDDA by default on HLSL/GLSL, make explicit on C +#if defined(PNANOVDB_C) +//#define PNANOVDB_HDDA +#ifdef PNANOVDB_HDDA +#ifndef PNANOVDB_CMATH +#define PNANOVDB_CMATH +#endif +#endif +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_HDDA +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_HDDA +#endif + +#ifdef PNANOVDB_CMATH +#ifndef __CUDACC_RTC__ +#include +#endif +#endif + +// ------------------------------------------------ Buffer ----------------------------------------------------------- + +#if defined(PNANOVDB_BUF_CUSTOM) +// NOP +#elif defined(PNANOVDB_C) +#define PNANOVDB_BUF_C +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_BUF_HLSL +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_BUF_GLSL +#endif + +#if defined(PNANOVDB_BUF_C) +#ifndef __CUDACC_RTC__ +#include +#endif +#if defined(__CUDACC__) +#define PNANOVDB_BUF_FORCE_INLINE static __host__ __device__ __forceinline__ +#elif defined(_WIN32) +#define PNANOVDB_BUF_FORCE_INLINE static inline __forceinline +#else +#define PNANOVDB_BUF_FORCE_INLINE static inline __attribute__((always_inline)) +#endif +typedef struct pnanovdb_buf_t +{ + uint32_t* data; +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words; +#endif +}pnanovdb_buf_t; +PNANOVDB_BUF_FORCE_INLINE pnanovdb_buf_t pnanovdb_make_buf(uint32_t* data, uint64_t size_in_words) +{ + pnanovdb_buf_t ret; + ret.data = data; +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + ret.size_in_words = size_in_words; +#endif + return ret; +} +#if defined(PNANOVDB_ADDRESS_32) +PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint32_t byte_offset) +{ + uint32_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + return wordaddress < buf.size_in_words ? buf.data[wordaddress] : 0u; +#else + return buf.data[wordaddress]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint32_t byte_offset) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint32_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + return wordaddress64 < size_in_words64 ? data64[wordaddress64] : 0llu; +#else + return data64[wordaddress64]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint32_t byte_offset, uint32_t value) +{ + uint32_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; +} +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint32_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint32_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} +#elif defined(PNANOVDB_ADDRESS_64) +PNANOVDB_BUF_FORCE_INLINE uint32_t pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + uint64_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + return wordaddress < buf.size_in_words ? buf.data[wordaddress] : 0u; +#else + return buf.data[wordaddress]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint64_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + return wordaddress64 < size_in_words64 ? data64[wordaddress64] : 0llu; +#else + return data64[wordaddress64]; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint32_t value) +{ + uint64_t wordaddress = (byte_offset >> 2u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + if (wordaddress < buf.size_in_words) + { + buf.data[wordaddress] = value; + } +#else + buf.data[wordaddress] = value; +#endif +} +PNANOVDB_BUF_FORCE_INLINE void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + uint64_t* data64 = (uint64_t*)buf.data; + uint64_t wordaddress64 = (byte_offset >> 3u); +#ifdef PNANOVDB_BUF_BOUNDS_CHECK + uint64_t size_in_words64 = buf.size_in_words >> 1u; + if (wordaddress64 < size_in_words64) + { + data64[wordaddress64] = value; + } +#else + data64[wordaddress64] = value; +#endif +} +#endif +typedef uint32_t pnanovdb_grid_type_t; +#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn +#elif defined(PNANOVDB_BUF_HLSL) +#if defined(PNANOVDB_ADDRESS_32) +#define pnanovdb_buf_t StructuredBuffer +uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint byte_offset) +{ + return buf[(byte_offset >> 2u)]; +} +uint2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) +{ + uint2 ret; + ret.x = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); + ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); + return ret; +} +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uint2 value) +{ + // NOP, by default no write in HLSL +} +#elif defined(PNANOVDB_ADDRESS_64) +#define pnanovdb_buf_t StructuredBuffer +uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + return buf[uint(byte_offset >> 2u)]; +} +uint64_t pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint64_t byte_offset) +{ + uint64_t ret; + ret = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); + ret = ret + (uint64_t(pnanovdb_buf_read_uint32(buf, byte_offset + 4u)) << 32u); + return ret; +} +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint64_t byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint64_t byte_offset, uint64_t value) +{ + // NOP, by default no write in HLSL +} +#endif +#define pnanovdb_grid_type_t uint +#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn +#elif defined(PNANOVDB_BUF_GLSL) +struct pnanovdb_buf_t +{ + uint unused; // to satisfy min struct size? +}; +uint pnanovdb_buf_read_uint32(pnanovdb_buf_t buf, uint byte_offset) +{ + return pnanovdb_buf_data[(byte_offset >> 2u)]; +} +uvec2 pnanovdb_buf_read_uint64(pnanovdb_buf_t buf, uint byte_offset) +{ + uvec2 ret; + ret.x = pnanovdb_buf_read_uint32(buf, byte_offset + 0u); + ret.y = pnanovdb_buf_read_uint32(buf, byte_offset + 4u); + return ret; +} +void pnanovdb_buf_write_uint32(pnanovdb_buf_t buf, uint byte_offset, uint value) +{ + // NOP, by default no write in HLSL +} +void pnanovdb_buf_write_uint64(pnanovdb_buf_t buf, uint byte_offset, uvec2 value) +{ + // NOP, by default no write in HLSL +} +#define pnanovdb_grid_type_t uint +#define PNANOVDB_GRID_TYPE_GET(grid_typeIn, nameIn) pnanovdb_grid_type_constants[grid_typeIn].nameIn +#endif + +// ------------------------------------------------ Basic Types ----------------------------------------------------------- + +// force inline +#if defined(PNANOVDB_C) +#if defined(__CUDACC__) +#define PNANOVDB_FORCE_INLINE static __host__ __device__ __forceinline__ +#elif defined(_WIN32) +#define PNANOVDB_FORCE_INLINE static inline __forceinline +#else +#define PNANOVDB_FORCE_INLINE static inline __attribute__((always_inline)) +#endif +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_FORCE_INLINE +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_FORCE_INLINE +#endif + +// struct typedef, static const, inout +#if defined(PNANOVDB_C) +#define PNANOVDB_STRUCT_TYPEDEF(X) typedef struct X X; +#define PNANOVDB_STATIC_CONST static const +#define PNANOVDB_INOUT(X) X* +#define PNANOVDB_IN(X) const X* +#define PNANOVDB_DEREF(X) (*X) +#define PNANOVDB_REF(X) &X +#elif defined(PNANOVDB_HLSL) +#define PNANOVDB_STRUCT_TYPEDEF(X) +#define PNANOVDB_STATIC_CONST static const +#define PNANOVDB_INOUT(X) inout X +#define PNANOVDB_IN(X) X +#define PNANOVDB_DEREF(X) X +#define PNANOVDB_REF(X) X +#elif defined(PNANOVDB_GLSL) +#define PNANOVDB_STRUCT_TYPEDEF(X) +#define PNANOVDB_STATIC_CONST const +#define PNANOVDB_INOUT(X) inout X +#define PNANOVDB_IN(X) X +#define PNANOVDB_DEREF(X) X +#define PNANOVDB_REF(X) X +#endif + +// basic types, type conversion +#if defined(PNANOVDB_C) +#define PNANOVDB_NATIVE_64 +#ifndef __CUDACC_RTC__ +#include +#endif +#if !defined(PNANOVDB_MEMCPY_CUSTOM) +#ifndef __CUDACC_RTC__ +#include +#endif +#define pnanovdb_memcpy memcpy +#endif +typedef uint32_t pnanovdb_uint32_t; +typedef int32_t pnanovdb_int32_t; +typedef int32_t pnanovdb_bool_t; +#define PNANOVDB_FALSE 0 +#define PNANOVDB_TRUE 1 +typedef uint64_t pnanovdb_uint64_t; +typedef int64_t pnanovdb_int64_t; +typedef struct pnanovdb_coord_t +{ + pnanovdb_int32_t x, y, z; +}pnanovdb_coord_t; +typedef struct pnanovdb_vec3_t +{ + float x, y, z; +}pnanovdb_vec3_t; +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return (pnanovdb_int32_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return (pnanovdb_int64_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return (pnanovdb_uint64_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return (pnanovdb_uint32_t)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { float vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return *((pnanovdb_uint32_t*)(&v)); } +PNANOVDB_FORCE_INLINE double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { double vf; pnanovdb_memcpy(&vf, &v, sizeof(vf)); return vf; } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return *((pnanovdb_uint64_t*)(&v)); } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)v; } +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return (pnanovdb_uint32_t)(v >> 32u); } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return ((pnanovdb_uint64_t)x) | (((pnanovdb_uint64_t)y) << 32u); } +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return ((pnanovdb_uint64_t)x); } +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return a == b; } +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a == 0; } +#ifdef PNANOVDB_CMATH +PNANOVDB_FORCE_INLINE float pnanovdb_floor(float v) { return floorf(v); } +#endif +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return (pnanovdb_int32_t)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return (float)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return (float)v; } +PNANOVDB_FORCE_INLINE float pnanovdb_min(float a, float b) { return a < b ? a : b; } +PNANOVDB_FORCE_INLINE float pnanovdb_max(float a, float b) { return a > b ? a : b; } +#elif defined(PNANOVDB_HLSL) +typedef uint pnanovdb_uint32_t; +typedef int pnanovdb_int32_t; +typedef bool pnanovdb_bool_t; +#define PNANOVDB_FALSE false +#define PNANOVDB_TRUE true +typedef int3 pnanovdb_coord_t; +typedef float3 pnanovdb_vec3_t; +pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } +pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } +float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return asfloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return asuint(v); } +float pnanovdb_floor(float v) { return floor(v); } +pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } +float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } +float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return float(v); } +float pnanovdb_min(float a, float b) { return min(a, b); } +float pnanovdb_max(float a, float b) { return max(a, b); } +#if defined(PNANOVDB_ADDRESS_32) +typedef uint2 pnanovdb_uint64_t; +typedef int2 pnanovdb_int64_t; +pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int2(v); } +pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint2(v); } +double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(v.x, v.y); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return ret; } +pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } +pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint2(x, y); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uint2(x, 0); } +bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return (a.x == b.x) && (a.y == b.y); } +bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a.x == 0 && a.y == 0; } +#else +typedef uint64_t pnanovdb_uint64_t; +typedef int64_t pnanovdb_int64_t; +pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return int64_t(v); } +pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uint64_t(v); } +double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return asdouble(uint(v), uint(v >> 32u)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { uint2 ret; asuint(v, ret.x, ret.y); return uint64_t(ret.x) + (uint64_t(ret.y) << 32u); } +pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return uint(v); } +pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return uint(v >> 32u); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uint64_t(x) + (uint64_t(y) << 32u); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uint64_t(x); } +bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return a == b; } +bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a == 0; } +#endif +#elif defined(PNANOVDB_GLSL) +#define pnanovdb_uint32_t uint +#define pnanovdb_int32_t int +#define pnanovdb_bool_t bool +#define PNANOVDB_FALSE false +#define PNANOVDB_TRUE true +#define pnanovdb_uint64_t uvec2 +#define pnanovdb_int64_t ivec2 +#define pnanovdb_coord_t ivec3 +#define pnanovdb_vec3_t vec3 +pnanovdb_int32_t pnanovdb_uint32_as_int32(pnanovdb_uint32_t v) { return int(v); } +pnanovdb_int64_t pnanovdb_uint64_as_int64(pnanovdb_uint64_t v) { return ivec2(v); } +pnanovdb_uint64_t pnanovdb_int64_as_uint64(pnanovdb_int64_t v) { return uvec2(v); } +pnanovdb_uint32_t pnanovdb_int32_as_uint32(pnanovdb_int32_t v) { return uint(v); } +float pnanovdb_uint32_as_float(pnanovdb_uint32_t v) { return uintBitsToFloat(v); } +pnanovdb_uint32_t pnanovdb_float_as_uint32(float v) { return floatBitsToUint(v); } +double pnanovdb_uint64_as_double(pnanovdb_uint64_t v) { return packDouble2x32(uvec2(v.x, v.y)); } +pnanovdb_uint64_t pnanovdb_double_as_uint64(double v) { return unpackDouble2x32(v); } +pnanovdb_uint32_t pnanovdb_uint64_low(pnanovdb_uint64_t v) { return v.x; } +pnanovdb_uint32_t pnanovdb_uint64_high(pnanovdb_uint64_t v) { return v.y; } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64(pnanovdb_uint32_t x, pnanovdb_uint32_t y) { return uvec2(x, y); } +pnanovdb_uint64_t pnanovdb_uint32_as_uint64_low(pnanovdb_uint32_t x) { return uvec2(x, 0); } +bool pnanovdb_uint64_is_equal(pnanovdb_uint64_t a, pnanovdb_uint64_t b) { return (a.x == b.x) && (a.y == b.y); } +bool pnanovdb_int64_is_zero(pnanovdb_int64_t a) { return a.x == 0 && a.y == 0; } +float pnanovdb_floor(float v) { return floor(v); } +pnanovdb_int32_t pnanovdb_float_to_int32(float v) { return int(v); } +float pnanovdb_int32_to_float(pnanovdb_int32_t v) { return float(v); } +float pnanovdb_uint32_to_float(pnanovdb_uint32_t v) { return float(v); } +float pnanovdb_min(float a, float b) { return min(a, b); } +float pnanovdb_max(float a, float b) { return max(a, b); } +#endif + +// ------------------------------------------------ Coord/Vec3 Utilties ----------------------------------------------------------- + +#if defined(PNANOVDB_C) +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) +{ + pnanovdb_vec3_t v; + v.x = a; + v.y = a; + v.z = a; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_add(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x + b.x; + v.y = a.y + b.y; + v.z = a.z + b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_sub(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x - b.x; + v.y = a.y - b.y; + v.z = a.z - b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_mul(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x * b.x; + v.y = a.y * b.y; + v.z = a.z * b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_div(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x / b.x; + v.y = a.y / b.y; + v.z = a.z / b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_min(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x < b.x ? a.x : b.x; + v.y = a.y < b.y ? a.y : b.y; + v.z = a.z < b.z ? a.z : b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_vec3_max(const pnanovdb_vec3_t a, const pnanovdb_vec3_t b) +{ + pnanovdb_vec3_t v; + v.x = a.x > b.x ? a.x : b.x; + v.y = a.y > b.y ? a.y : b.y; + v.z = a.z > b.z ? a.z : b.z; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_coord_to_vec3(const pnanovdb_coord_t coord) +{ + pnanovdb_vec3_t v; + v.x = pnanovdb_int32_to_float(coord.x); + v.y = pnanovdb_int32_to_float(coord.y); + v.z = pnanovdb_int32_to_float(coord.z); + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_coord_uniform(const pnanovdb_int32_t a) +{ + pnanovdb_coord_t v; + v.x = a; + v.y = a; + v.z = a; + return v; +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) +{ + pnanovdb_coord_t v; + v.x = a.x + b.x; + v.y = a.y + b.y; + v.z = a.z + b.z; + return v; +} +#elif defined(PNANOVDB_HLSL) +pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) { return float3(a, a, a); } +pnanovdb_vec3_t pnanovdb_vec3_add(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a + b; } +pnanovdb_vec3_t pnanovdb_vec3_sub(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a - b; } +pnanovdb_vec3_t pnanovdb_vec3_mul(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a * b; } +pnanovdb_vec3_t pnanovdb_vec3_div(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a / b; } +pnanovdb_vec3_t pnanovdb_vec3_min(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return min(a, b); } +pnanovdb_vec3_t pnanovdb_vec3_max(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return max(a, b); } +pnanovdb_vec3_t pnanovdb_coord_to_vec3(pnanovdb_coord_t coord) { return float3(coord); } +pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return int3(a, a, a); } +pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } +#elif defined(PNANOVDB_GLSL) +pnanovdb_vec3_t pnanovdb_vec3_uniform(float a) { return vec3(a, a, a); } +pnanovdb_vec3_t pnanovdb_vec3_add(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a + b; } +pnanovdb_vec3_t pnanovdb_vec3_sub(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a - b; } +pnanovdb_vec3_t pnanovdb_vec3_mul(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a * b; } +pnanovdb_vec3_t pnanovdb_vec3_div(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return a / b; } +pnanovdb_vec3_t pnanovdb_vec3_min(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return min(a, b); } +pnanovdb_vec3_t pnanovdb_vec3_max(pnanovdb_vec3_t a, pnanovdb_vec3_t b) { return max(a, b); } +pnanovdb_vec3_t pnanovdb_coord_to_vec3(const pnanovdb_coord_t coord) { return vec3(coord); } +pnanovdb_coord_t pnanovdb_coord_uniform(pnanovdb_int32_t a) { return ivec3(a, a, a); } +pnanovdb_coord_t pnanovdb_coord_add(pnanovdb_coord_t a, pnanovdb_coord_t b) { return a + b; } +#endif + +// ------------------------------------------------ Uint64 Utils ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint32_countbits(pnanovdb_uint32_t value) +{ +#if defined(PNANOVDB_C) +#if defined(_MSC_VER) && (_MSC_VER >= 1928) && defined(PNANOVDB_USE_INTRINSICS) + return __popcnt(value); +#elif (defined(__GNUC__) || defined(__clang__)) && defined(PNANOVDB_USE_INTRINSICS) + return __builtin_popcount(value); +#else + value = value - ((value >> 1) & 0x55555555); + value = (value & 0x33333333) + ((value >> 2) & 0x33333333); + value = (value + (value >> 4)) & 0x0F0F0F0F; + return (value * 0x01010101) >> 24; +#endif +#elif defined(PNANOVDB_HLSL) + return countbits(value); +#elif defined(PNANOVDB_GLSL) + return bitCount(value); +#endif +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_countbits(pnanovdb_uint64_t value) +{ + return pnanovdb_uint32_countbits(pnanovdb_uint64_low(value)) + pnanovdb_uint32_countbits(pnanovdb_uint64_high(value)); +} + +#if defined(PNANOVDB_ADDRESS_32) +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + low += b; + if (low < b) + { + high += 1u; + } + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + if (low == 0u) + { + high -= 1u; + } + low -= 1u; + return pnanovdb_uint32_as_uint64(low, high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + pnanovdb_uint32_t low = pnanovdb_uint64_low(a); + pnanovdb_uint32_t high = pnanovdb_uint64_high(a); + return (b >= 32u) ? + (high >> (b - 32)) : + ((low >> b) | ((b > 0) ? (high << (32u - b)) : 0u)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + pnanovdb_uint32_t mask_low = bit_idx < 32u ? 1u << bit_idx : 0u; + pnanovdb_uint32_t mask_high = bit_idx >= 32u ? 1u << (bit_idx - 32u) : 0u; + return pnanovdb_uint32_as_uint64(mask_low, mask_high); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return pnanovdb_uint32_as_uint64( + pnanovdb_uint64_low(a) & pnanovdb_uint64_low(b), + pnanovdb_uint64_high(a) & pnanovdb_uint64_high(b) + ); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return pnanovdb_uint64_low(a) != 0u || pnanovdb_uint64_high(a) != 0u; +} + +#else +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_offset(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return a + b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_dec(pnanovdb_uint64_t a) +{ + return a - 1u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_uint64_to_uint32_lsr(pnanovdb_uint64_t a, pnanovdb_uint32_t b) +{ + return pnanovdb_uint64_low(a >> b); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_bit_mask(pnanovdb_uint32_t bit_idx) +{ + return 1llu << bit_idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_uint64_and(pnanovdb_uint64_t a, pnanovdb_uint64_t b) +{ + return a & b; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_uint64_any_bit(pnanovdb_uint64_t a) +{ + return a != 0llu; +} +#endif + +// ------------------------------------------------ Address Type ----------------------------------------------------------- + +#if defined(PNANOVDB_ADDRESS_32) +struct pnanovdb_address_t +{ + pnanovdb_uint32_t byte_offset; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_address_t) + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_neg(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset -= byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_product(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset * multiplier; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset) +{ + pnanovdb_address_t ret = address; + // lose high bits on 32-bit + ret.byte_offset += pnanovdb_uint64_low(byte_offset); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += pnanovdb_uint64_low(byte_offset) * multiplier; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + return address.byte_offset & mask; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_mask_inv(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + pnanovdb_address_t ret = address; + ret.byte_offset &= (~mask); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_null() +{ + pnanovdb_address_t ret = { 0 }; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_is_null(pnanovdb_address_t address) +{ + return address.byte_offset == 0u; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_in_interval(pnanovdb_address_t address, pnanovdb_address_t min_address, pnanovdb_address_t max_address) +{ + return address.byte_offset >= min_address.byte_offset && address.byte_offset < max_address.byte_offset; +} +#elif defined(PNANOVDB_ADDRESS_64) +struct pnanovdb_address_t +{ + pnanovdb_uint64_t byte_offset; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_address_t) + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_neg(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset -= byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset_product(pnanovdb_address_t address, pnanovdb_uint32_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += pnanovdb_uint32_as_uint64_low(byte_offset) * pnanovdb_uint32_as_uint64_low(multiplier); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_offset64_product(pnanovdb_address_t address, pnanovdb_uint64_t byte_offset, pnanovdb_uint32_t multiplier) +{ + pnanovdb_address_t ret = address; + ret.byte_offset += byte_offset * pnanovdb_uint32_as_uint64_low(multiplier); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_address_mask(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + return pnanovdb_uint64_low(address.byte_offset) & mask; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_mask_inv(pnanovdb_address_t address, pnanovdb_uint32_t mask) +{ + pnanovdb_address_t ret = address; + ret.byte_offset &= (~pnanovdb_uint32_as_uint64_low(mask)); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_address_null() +{ + pnanovdb_address_t ret = { 0 }; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_is_null(pnanovdb_address_t address) +{ + return address.byte_offset == 0llu; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_address_in_interval(pnanovdb_address_t address, pnanovdb_address_t min_address, pnanovdb_address_t max_address) +{ + return address.byte_offset >= min_address.byte_offset && address.byte_offset < max_address.byte_offset; +} +#endif + +// ------------------------------------------------ High Level Buffer Read ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_buf_read_uint32(buf, address.byte_offset); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_read_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_buf_read_uint64(buf, address.byte_offset); +} +PNANOVDB_FORCE_INLINE pnanovdb_int32_t pnanovdb_read_int32(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, address)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint32_as_float(pnanovdb_read_uint32(buf, address)); +} +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_read_int64(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint64_as_int64(pnanovdb_read_uint64(buf, address)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_read_double(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + return pnanovdb_uint64_as_double(pnanovdb_read_uint64(buf, address)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_read_coord(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_coord_t ret; + ret.x = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 0u))); + ret.y = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 4u))); + ret.z = pnanovdb_uint32_as_int32(pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, 8u))); + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + ret.x = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 0u)); + ret.y = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 4u)); + ret.z = pnanovdb_read_float(buf, pnanovdb_address_offset(address, 8u)); + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint16(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_read_uint8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(address, 3u)); + return (raw >> (pnanovdb_address_mask(address, 3) << 3)) & 255; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u16(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + const float scale = 1.f / 65535.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint16(buf, pnanovdb_address_offset(address, 4u))) - 0.5f; + return ret; +} +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_read_vec3u8(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_vec3_t ret; + const float scale = 1.f / 255.f; + ret.x = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 0u))) - 0.5f; + ret.y = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 1u))) - 0.5f; + ret.z = scale * pnanovdb_uint32_to_float(pnanovdb_read_uint8(buf, pnanovdb_address_offset(address, 2u))) - 0.5f; + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_read_bit(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t bit_offset) +{ + pnanovdb_address_t word_address = pnanovdb_address_mask_inv(address, 3u); + pnanovdb_uint32_t bit_index = (pnanovdb_address_mask(address, 3u) << 3u) + bit_offset; + pnanovdb_uint32_t value_word = pnanovdb_buf_read_uint32(buf, word_address.byte_offset); + return ((value_word >> bit_index) & 1) != 0u; +} + +#if defined(PNANOVDB_C) +PNANOVDB_FORCE_INLINE short pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); + return (short)(raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +#elif defined(PNANOVDB_HLSL) +PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); + return f16tof32(raw >> (pnanovdb_address_mask(address, 2) << 3)); +} +#elif defined(PNANOVDB_GLSL) +PNANOVDB_FORCE_INLINE float pnanovdb_read_half(pnanovdb_buf_t buf, pnanovdb_address_t address) +{ + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, address); + return unpackHalf2x16(raw >> (pnanovdb_address_mask(address, 2) << 3)).x; +} +#endif + +// ------------------------------------------------ High Level Buffer Write ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint32_t value) +{ + pnanovdb_buf_write_uint32(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_uint64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_uint64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, value); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int32(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int32_t value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_int32_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_int64(pnanovdb_buf_t buf, pnanovdb_address_t address, pnanovdb_int64_t value) +{ + pnanovdb_buf_write_uint64(buf, address.byte_offset, pnanovdb_int64_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_float(pnanovdb_buf_t buf, pnanovdb_address_t address, float value) +{ + pnanovdb_write_uint32(buf, address, pnanovdb_float_as_uint32(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_double(pnanovdb_buf_t buf, pnanovdb_address_t address, double value) +{ + pnanovdb_write_uint64(buf, address, pnanovdb_double_as_uint64(value)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_coord(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) value) +{ + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 0u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).x)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 4u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).y)); + pnanovdb_write_uint32(buf, pnanovdb_address_offset(address, 8u), pnanovdb_int32_as_uint32(PNANOVDB_DEREF(value).z)); +} +PNANOVDB_FORCE_INLINE void pnanovdb_write_vec3(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_vec3_t) value) +{ + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 0u), PNANOVDB_DEREF(value).x); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 4u), PNANOVDB_DEREF(value).y); + pnanovdb_write_float(buf, pnanovdb_address_offset(address, 8u), PNANOVDB_DEREF(value).z); +} + +// ------------------------------------------------ Core Structures ----------------------------------------------------------- + +#define PNANOVDB_MAGIC_NUMBER 0x304244566f6e614eUL// "NanoVDB0" in hex - little endian (uint64_t) +#define PNANOVDB_MAGIC_GRID 0x314244566f6e614eUL// "NanoVDB1" in hex - little endian (uint64_t) +#define PNANOVDB_MAGIC_FILE 0x324244566f6e614eUL// "NanoVDB2" in hex - little endian (uint64_t) + +#define PNANOVDB_MAJOR_VERSION_NUMBER 32// reflects changes to the ABI +#define PNANOVDB_MINOR_VERSION_NUMBER 6// reflects changes to the API but not ABI +#define PNANOVDB_PATCH_VERSION_NUMBER 0// reflects bug-fixes with no ABI or API changes + +#define PNANOVDB_GRID_TYPE_UNKNOWN 0 +#define PNANOVDB_GRID_TYPE_FLOAT 1 +#define PNANOVDB_GRID_TYPE_DOUBLE 2 +#define PNANOVDB_GRID_TYPE_INT16 3 +#define PNANOVDB_GRID_TYPE_INT32 4 +#define PNANOVDB_GRID_TYPE_INT64 5 +#define PNANOVDB_GRID_TYPE_VEC3F 6 +#define PNANOVDB_GRID_TYPE_VEC3D 7 +#define PNANOVDB_GRID_TYPE_MASK 8 +#define PNANOVDB_GRID_TYPE_HALF 9 +#define PNANOVDB_GRID_TYPE_UINT32 10 +#define PNANOVDB_GRID_TYPE_BOOLEAN 11 +#define PNANOVDB_GRID_TYPE_RGBA8 12 +#define PNANOVDB_GRID_TYPE_FP4 13 +#define PNANOVDB_GRID_TYPE_FP8 14 +#define PNANOVDB_GRID_TYPE_FP16 15 +#define PNANOVDB_GRID_TYPE_FPN 16 +#define PNANOVDB_GRID_TYPE_VEC4F 17 +#define PNANOVDB_GRID_TYPE_VEC4D 18 +#define PNANOVDB_GRID_TYPE_INDEX 19 +#define PNANOVDB_GRID_TYPE_ONINDEX 20 +#define PNANOVDB_GRID_TYPE_INDEXMASK 21 +#define PNANOVDB_GRID_TYPE_ONINDEXMASK 22 +#define PNANOVDB_GRID_TYPE_POINTINDEX 23 +#define PNANOVDB_GRID_TYPE_VEC3U8 24 +#define PNANOVDB_GRID_TYPE_VEC3U16 25 +#define PNANOVDB_GRID_TYPE_END 26 + +#define PNANOVDB_GRID_CLASS_UNKNOWN 0 +#define PNANOVDB_GRID_CLASS_LEVEL_SET 1 // narrow band level set, e.g. SDF +#define PNANOVDB_GRID_CLASS_FOG_VOLUME 2 // fog volume, e.g. density +#define PNANOVDB_GRID_CLASS_STAGGERED 3 // staggered MAC grid, e.g. velocity +#define PNANOVDB_GRID_CLASS_POINT_INDEX 4 // point index grid +#define PNANOVDB_GRID_CLASS_POINT_DATA 5 // point data grid +#define PNANOVDB_GRID_CLASS_TOPOLOGY 6 // grid with active states only (no values) +#define PNANOVDB_GRID_CLASS_VOXEL_VOLUME 7 // volume of geometric cubes, e.g. minecraft +#define PNANOVDB_GRID_CLASS_INDEX_GRID 8 // grid whose values are offsets, e.g. into an external array +#define PNANOVDB_GRID_CLASS_TENSOR_GRID 9 // grid which can have extra metadata and features +#define PNANOVDB_GRID_CLASS_END 10 + +#define PNANOVDB_GRID_FLAGS_HAS_LONG_GRID_NAME (1 << 0) +#define PNANOVDB_GRID_FLAGS_HAS_BBOX (1 << 1) +#define PNANOVDB_GRID_FLAGS_HAS_MIN_MAX (1 << 2) +#define PNANOVDB_GRID_FLAGS_HAS_AVERAGE (1 << 3) +#define PNANOVDB_GRID_FLAGS_HAS_STD_DEVIATION (1 << 4) +#define PNANOVDB_GRID_FLAGS_IS_BREADTH_FIRST (1 << 5) +#define PNANOVDB_GRID_FLAGS_END (1 << 6) + +#define PNANOVDB_LEAF_TYPE_DEFAULT 0 +#define PNANOVDB_LEAF_TYPE_LITE 1 +#define PNANOVDB_LEAF_TYPE_FP 2 +#define PNANOVDB_LEAF_TYPE_INDEX 3 +#define PNANOVDB_LEAF_TYPE_INDEXMASK 4 +#define PNANOVDB_LEAF_TYPE_POINTINDEX 5 + +// BuildType = Unknown, float, double, int16_t, int32_t, int64_t, Vec3f, Vec3d, Mask, ... +// bit count of values in leaf nodes, i.e. 8*sizeof(*nanovdb::LeafNode::mValues) or zero if no values are stored +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_value_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 0, 16, 32, 1, 32, 4, 8, 16, 0, 128, 256, 0, 0, 0, 0, 16, 24, 48 }; +// bit count of the Tile union in InternalNodes, i.e. 8*sizeof(nanovdb::InternalData::Tile) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_table_strides_bits[PNANOVDB_GRID_TYPE_END] = { 64, 64, 64, 64, 64, 64, 128, 192, 64, 64, 64, 64, 64, 64, 64, 64, 64, 128, 256, 64, 64, 64, 64, 64, 64, 64 }; +// bit count of min/max values, i.e. 8*sizeof(nanovdb::LeafData::mMinimum) or zero if no min/max exists +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 96, 192, 8, 16, 32, 8, 32, 32, 32, 32, 32, 128, 256, 64, 64, 64, 64, 64, 24, 48 }; +// bit alignment of the value type, controlled by the smallest native type, which is why it is always 0, 8, 16, 32, or 64, e.g. for Vec3f it is 32 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_minmax_aligns_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 16, 32, 64, 32, 64, 8, 16, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 8, 16 }; +// bit alignment of the stats (avg/std-dev) types, e.g. 8*sizeof(nanovdb::LeafData::mAverage) +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_stat_strides_bits[PNANOVDB_GRID_TYPE_END] = { 0, 32, 64, 32, 32, 64, 32, 64, 8, 32, 32, 8, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 32, 32 }; +// one of the 4 leaf types defined above, e.g. PNANOVDB_LEAF_TYPE_INDEX = 3 +PNANOVDB_STATIC_CONST pnanovdb_uint32_t pnanovdb_grid_type_leaf_type[PNANOVDB_GRID_TYPE_END] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 2, 2, 2, 0, 0, 3, 3, 4, 4, 5, 0, 0 }; + +struct pnanovdb_map_t +{ + float matf[9]; + float invmatf[9]; + float vecf[3]; + float taperf; + double matd[9]; + double invmatd[9]; + double vecd[3]; + double taperd; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_map_t) +struct pnanovdb_map_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_map_handle_t) + +#define PNANOVDB_MAP_SIZE 264 + +#define PNANOVDB_MAP_OFF_MATF 0 +#define PNANOVDB_MAP_OFF_INVMATF 36 +#define PNANOVDB_MAP_OFF_VECF 72 +#define PNANOVDB_MAP_OFF_TAPERF 84 +#define PNANOVDB_MAP_OFF_MATD 88 +#define PNANOVDB_MAP_OFF_INVMATD 160 +#define PNANOVDB_MAP_OFF_VECD 232 +#define PNANOVDB_MAP_OFF_TAPERD 256 + +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index)); +} +PNANOVDB_FORCE_INLINE float pnanovdb_map_get_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_map_get_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float matf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATF + 4u * index), matf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float invmatf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATF + 4u * index), invmatf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float vecf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECF + 4u * index), vecf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperf(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, float taperf) { + pnanovdb_write_float(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERF), taperf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_matd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double matd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_MATD + 8u * index), matd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_invmatd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double invmatd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_INVMATD + 8u * index), invmatd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_vecd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double vecd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_VECD + 8u * index), vecd); +} +PNANOVDB_FORCE_INLINE void pnanovdb_map_set_taperd(pnanovdb_buf_t buf, pnanovdb_map_handle_t p, pnanovdb_uint32_t index, double taperd) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_MAP_OFF_TAPERD), taperd); +} + +struct pnanovdb_grid_t +{ + pnanovdb_uint64_t magic; // 8 bytes, 0 + pnanovdb_uint64_t checksum; // 8 bytes, 8 + pnanovdb_uint32_t version; // 4 bytes, 16 + pnanovdb_uint32_t flags; // 4 bytes, 20 + pnanovdb_uint32_t grid_index; // 4 bytes, 24 + pnanovdb_uint32_t grid_count; // 4 bytes, 28 + pnanovdb_uint64_t grid_size; // 8 bytes, 32 + pnanovdb_uint32_t grid_name[256 / 4]; // 256 bytes, 40 + pnanovdb_map_t map; // 264 bytes, 296 + double world_bbox[6]; // 48 bytes, 560 + double voxel_size[3]; // 24 bytes, 608 + pnanovdb_uint32_t grid_class; // 4 bytes, 632 + pnanovdb_uint32_t grid_type; // 4 bytes, 636 + pnanovdb_int64_t blind_metadata_offset; // 8 bytes, 640 + pnanovdb_uint32_t blind_metadata_count; // 4 bytes, 648 + pnanovdb_uint32_t pad[5]; // 20 bytes, 652 +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_t) +struct pnanovdb_grid_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_handle_t) + +#define PNANOVDB_GRID_SIZE 672 + +#define PNANOVDB_GRID_OFF_MAGIC 0 +#define PNANOVDB_GRID_OFF_CHECKSUM 8 +#define PNANOVDB_GRID_OFF_VERSION 16 +#define PNANOVDB_GRID_OFF_FLAGS 20 +#define PNANOVDB_GRID_OFF_GRID_INDEX 24 +#define PNANOVDB_GRID_OFF_GRID_COUNT 28 +#define PNANOVDB_GRID_OFF_GRID_SIZE 32 +#define PNANOVDB_GRID_OFF_GRID_NAME 40 +#define PNANOVDB_GRID_OFF_MAP 296 +#define PNANOVDB_GRID_OFF_WORLD_BBOX 560 +#define PNANOVDB_GRID_OFF_VOXEL_SIZE 608 +#define PNANOVDB_GRID_OFF_GRID_CLASS 632 +#define PNANOVDB_GRID_OFF_GRID_TYPE 636 +#define PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET 640 +#define PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT 648 + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_grid_get_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index)); +} +PNANOVDB_FORCE_INLINE pnanovdb_map_handle_t pnanovdb_grid_get_map(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + pnanovdb_map_handle_t ret; + ret.address = pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAP); + return ret; +} +PNANOVDB_FORCE_INLINE double pnanovdb_grid_get_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index)); +} +PNANOVDB_FORCE_INLINE double pnanovdb_grid_get_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_grid_get_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_grid_get_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_magic(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t magic) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_MAGIC), magic); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_checksum(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t checksum) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_CHECKSUM), checksum); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_version(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t version) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VERSION), version); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_flags(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_FLAGS), flags); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_index(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_index) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_INDEX), grid_index); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_COUNT), grid_count); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t grid_size) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_SIZE), grid_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_name(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, pnanovdb_uint32_t grid_name) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_NAME + 4u * index), grid_name); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_world_bbox(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double world_bbox) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_WORLD_BBOX + 8u * index), world_bbox); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_voxel_size(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t index, double voxel_size) { + pnanovdb_write_double(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_VOXEL_SIZE + 8u * index), voxel_size); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_class(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_class) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_CLASS), grid_class); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_grid_type(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t grid_type) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_GRID_TYPE), grid_type); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_offset(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint64_t blind_metadata_offset) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_OFFSET), blind_metadata_offset); +} +PNANOVDB_FORCE_INLINE void pnanovdb_grid_set_blind_metadata_count(pnanovdb_buf_t buf, pnanovdb_grid_handle_t p, pnanovdb_uint32_t metadata_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRID_OFF_BLIND_METADATA_COUNT), metadata_count); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_make_version(pnanovdb_uint32_t major, pnanovdb_uint32_t minor, pnanovdb_uint32_t patch_num) +{ + return (major << 21u) | (minor << 10u) | patch_num; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_major(pnanovdb_uint32_t version) +{ + return (version >> 21u) & ((1u << 11u) - 1u); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_minor(pnanovdb_uint32_t version) +{ + return (version >> 10u) & ((1u << 11u) - 1u); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_version_get_patch(pnanovdb_uint32_t version) +{ + return version & ((1u << 10u) - 1u); +} + +struct pnanovdb_gridblindmetadata_t +{ + pnanovdb_int64_t byte_offset; // 8 bytes, 0 + pnanovdb_uint64_t element_count; // 8 bytes, 8 + pnanovdb_uint32_t flags; // 4 bytes, 16 + pnanovdb_uint32_t semantic; // 4 bytes, 20 + pnanovdb_uint32_t data_class; // 4 bytes, 24 + pnanovdb_uint32_t data_type; // 4 bytes, 28 + pnanovdb_uint32_t name[256 / 4]; // 256 bytes, 32 +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_t) +struct pnanovdb_gridblindmetadata_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_gridblindmetadata_handle_t) + +#define PNANOVDB_GRIDBLINDMETADATA_SIZE 288 + +#define PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET 0 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT 8 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS 16 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC 20 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS 24 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE 28 +#define PNANOVDB_GRIDBLINDMETADATA_OFF_NAME 32 + +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_gridblindmetadata_get_byte_offset(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_BYTE_OFFSET)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_gridblindmetadata_get_element_count(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_ELEMENT_COUNT)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_flags(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_semantic(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_SEMANTIC)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_data_class(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_CLASS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_data_type(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_DATA_TYPE)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_gridblindmetadata_get_name(pnanovdb_buf_t buf, pnanovdb_gridblindmetadata_handle_t p, pnanovdb_uint32_t index) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_GRIDBLINDMETADATA_OFF_NAME + 4u * index)); +} + +struct pnanovdb_tree_t +{ + pnanovdb_uint64_t node_offset_leaf; + pnanovdb_uint64_t node_offset_lower; + pnanovdb_uint64_t node_offset_upper; + pnanovdb_uint64_t node_offset_root; + pnanovdb_uint32_t node_count_leaf; + pnanovdb_uint32_t node_count_lower; + pnanovdb_uint32_t node_count_upper; + pnanovdb_uint32_t tile_count_leaf; + pnanovdb_uint32_t tile_count_lower; + pnanovdb_uint32_t tile_count_upper; + pnanovdb_uint64_t voxel_count; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_tree_t) +struct pnanovdb_tree_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_tree_handle_t) + +#define PNANOVDB_TREE_SIZE 64 + +#define PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF 0 +#define PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER 8 +#define PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER 16 +#define PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT 24 +#define PNANOVDB_TREE_OFF_NODE_COUNT_LEAF 32 +#define PNANOVDB_TREE_OFF_NODE_COUNT_LOWER 36 +#define PNANOVDB_TREE_OFF_NODE_COUNT_UPPER 40 +#define PNANOVDB_TREE_OFF_TILE_COUNT_LEAF 44 +#define PNANOVDB_TREE_OFF_TILE_COUNT_LOWER 48 +#define PNANOVDB_TREE_OFF_TILE_COUNT_UPPER 52 +#define PNANOVDB_TREE_OFF_VOXEL_COUNT 56 + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_tree_get_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_tree_get_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_leaf) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LEAF), node_offset_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_lower) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_LOWER), node_offset_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_upper) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_UPPER), node_offset_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_offset_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t node_offset_root) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_OFFSET_ROOT), node_offset_root); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LEAF), node_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_LOWER), node_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_node_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t node_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_NODE_COUNT_UPPER), node_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_leaf(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_leaf) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LEAF), tile_count_leaf); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_lower(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_lower) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_LOWER), tile_count_lower); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_tile_count_upper(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint32_t tile_count_upper) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_TILE_COUNT_UPPER), tile_count_upper); +} +PNANOVDB_FORCE_INLINE void pnanovdb_tree_set_voxel_count(pnanovdb_buf_t buf, pnanovdb_tree_handle_t p, pnanovdb_uint64_t voxel_count) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_TREE_OFF_VOXEL_COUNT), voxel_count); +} + +struct pnanovdb_root_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_coord_t bbox_max; + pnanovdb_uint32_t table_size; + pnanovdb_uint32_t pad1; // background can start here + // background, min, max +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_t) +struct pnanovdb_root_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_handle_t) + +#define PNANOVDB_ROOT_BASE_SIZE 28 + +#define PNANOVDB_ROOT_OFF_BBOX_MIN 0 +#define PNANOVDB_ROOT_OFF_BBOX_MAX 12 +#define PNANOVDB_ROOT_OFF_TABLE_SIZE 24 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_root_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_root_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_set_tile_count(pnanovdb_buf_t buf, pnanovdb_root_handle_t p, pnanovdb_uint32_t tile_count) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_OFF_TABLE_SIZE), tile_count); +} + +struct pnanovdb_root_tile_t +{ + pnanovdb_uint64_t key; + pnanovdb_int64_t child; // signed byte offset from root to the child node, 0 means it is a constant tile, so use value + pnanovdb_uint32_t state; + pnanovdb_uint32_t pad1; // value can start here + // value +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_tile_t) +struct pnanovdb_root_tile_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_root_tile_handle_t) + +#define PNANOVDB_ROOT_TILE_BASE_SIZE 20 + +#define PNANOVDB_ROOT_TILE_OFF_KEY 0 +#define PNANOVDB_ROOT_TILE_OFF_CHILD 8 +#define PNANOVDB_ROOT_TILE_OFF_STATE 16 + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_tile_get_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY)); +} +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_root_tile_get_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { + return pnanovdb_read_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_tile_get_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_key(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint64_t key) { + pnanovdb_write_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_KEY), key); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_child(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_int64_t child) { + pnanovdb_write_int64(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_CHILD), child); +} +PNANOVDB_FORCE_INLINE void pnanovdb_root_tile_set_state(pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t p, pnanovdb_uint32_t state) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_ROOT_TILE_OFF_STATE), state); +} + +struct pnanovdb_upper_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_coord_t bbox_max; + pnanovdb_uint64_t flags; + pnanovdb_uint32_t value_mask[1024]; + pnanovdb_uint32_t child_mask[1024]; + // min, max + // alignas(32) pnanovdb_uint32_t table[]; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_upper_t) +struct pnanovdb_upper_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_upper_handle_t) + +#define PNANOVDB_UPPER_TABLE_COUNT 32768 +#define PNANOVDB_UPPER_BASE_SIZE 8224 + +#define PNANOVDB_UPPER_OFF_BBOX_MIN 0 +#define PNANOVDB_UPPER_OFF_BBOX_MAX 12 +#define PNANOVDB_UPPER_OFF_FLAGS 24 +#define PNANOVDB_UPPER_OFF_VALUE_MASK 32 +#define PNANOVDB_UPPER_OFF_CHILD_MASK 4128 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_upper_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_upper_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_upper_get_flags(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_value_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_get_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_child_mask(pnanovdb_buf_t buf, pnanovdb_upper_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_UPPER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + +struct pnanovdb_lower_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_coord_t bbox_max; + pnanovdb_uint64_t flags; + pnanovdb_uint32_t value_mask[128]; + pnanovdb_uint32_t child_mask[128]; + // min, max + // alignas(32) pnanovdb_uint32_t table[]; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_lower_t) +struct pnanovdb_lower_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_lower_handle_t) + +#define PNANOVDB_LOWER_TABLE_COUNT 4096 +#define PNANOVDB_LOWER_BASE_SIZE 1056 + +#define PNANOVDB_LOWER_OFF_BBOX_MIN 0 +#define PNANOVDB_LOWER_OFF_BBOX_MAX 12 +#define PNANOVDB_LOWER_OFF_FLAGS 24 +#define PNANOVDB_LOWER_OFF_VALUE_MASK 32 +#define PNANOVDB_LOWER_OFF_CHILD_MASK 544 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_lower_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_lower_get_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_lower_get_flags(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p) { + return pnanovdb_read_uint64(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_value_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_get_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_bbox_max(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_max) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_BBOX_MAX), bbox_max); +} +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_child_mask(pnanovdb_buf_t buf, pnanovdb_lower_handle_t p, pnanovdb_uint32_t bit_index, pnanovdb_bool_t value) { + pnanovdb_address_t addr = pnanovdb_address_offset(p.address, PNANOVDB_LOWER_OFF_CHILD_MASK + 4u * (bit_index >> 5u)); + pnanovdb_uint32_t valueMask = pnanovdb_read_uint32(buf, addr); + if (!value) { valueMask &= ~(1u << (bit_index & 31u)); } + if (value) valueMask |= (1u << (bit_index & 31u)); + pnanovdb_write_uint32(buf, addr, valueMask); +} + +struct pnanovdb_leaf_t +{ + pnanovdb_coord_t bbox_min; + pnanovdb_uint32_t bbox_dif_and_flags; + pnanovdb_uint32_t value_mask[16]; + // min, max + // alignas(32) pnanovdb_uint32_t values[]; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_leaf_t) +struct pnanovdb_leaf_handle_t { pnanovdb_address_t address; }; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_leaf_handle_t) + +#define PNANOVDB_LEAF_TABLE_COUNT 512 +#define PNANOVDB_LEAF_BASE_SIZE 80 + +#define PNANOVDB_LEAF_OFF_BBOX_MIN 0 +#define PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS 12 +#define PNANOVDB_LEAF_OFF_VALUE_MASK 16 + +#define PNANOVDB_LEAF_TABLE_NEG_OFF_BBOX_DIF_AND_FLAGS 84 +#define PNANOVDB_LEAF_TABLE_NEG_OFF_MINIMUM 16 +#define PNANOVDB_LEAF_TABLE_NEG_OFF_QUANTUM 12 + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_leaf_get_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p) { + return pnanovdb_read_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_get_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p) { + return pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS)); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_get_value_mask(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bit_index) { + pnanovdb_uint32_t value = pnanovdb_read_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * (bit_index >> 5u))); + return ((value >> (bit_index & 31u)) & 1) != 0u; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_min(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, PNANOVDB_IN(pnanovdb_coord_t) bbox_min) { + pnanovdb_write_coord(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_MIN), bbox_min); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_set_bbox_dif_and_flags(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t p, pnanovdb_uint32_t bbox_dif_and_flags) { + pnanovdb_write_uint32(buf, pnanovdb_address_offset(p.address, PNANOVDB_LEAF_OFF_BBOX_DIF_AND_FLAGS), bbox_dif_and_flags); +} + +struct pnanovdb_grid_type_constants_t +{ + pnanovdb_uint32_t root_off_background; + pnanovdb_uint32_t root_off_min; + pnanovdb_uint32_t root_off_max; + pnanovdb_uint32_t root_off_ave; + pnanovdb_uint32_t root_off_stddev; + pnanovdb_uint32_t root_size; + pnanovdb_uint32_t value_stride_bits; + pnanovdb_uint32_t table_stride; + pnanovdb_uint32_t root_tile_off_value; + pnanovdb_uint32_t root_tile_size; + pnanovdb_uint32_t upper_off_min; + pnanovdb_uint32_t upper_off_max; + pnanovdb_uint32_t upper_off_ave; + pnanovdb_uint32_t upper_off_stddev; + pnanovdb_uint32_t upper_off_table; + pnanovdb_uint32_t upper_size; + pnanovdb_uint32_t lower_off_min; + pnanovdb_uint32_t lower_off_max; + pnanovdb_uint32_t lower_off_ave; + pnanovdb_uint32_t lower_off_stddev; + pnanovdb_uint32_t lower_off_table; + pnanovdb_uint32_t lower_size; + pnanovdb_uint32_t leaf_off_min; + pnanovdb_uint32_t leaf_off_max; + pnanovdb_uint32_t leaf_off_ave; + pnanovdb_uint32_t leaf_off_stddev; + pnanovdb_uint32_t leaf_off_table; + pnanovdb_uint32_t leaf_size; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_grid_type_constants_t) + +// The following table with offsets will nedd to be updates as new GridTypes are added in NanoVDB.h +PNANOVDB_STATIC_CONST pnanovdb_grid_type_constants_t pnanovdb_grid_type_constants[PNANOVDB_GRID_TYPE_END] = +{ +{28, 28, 28, 28, 28, 32, 0, 8, 20, 32, 8224, 8224, 8224, 8224, 8224, 270368, 1056, 1056, 1056, 1056, 1056, 33824, 80, 80, 80, 80, 96, 96}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{32, 40, 48, 56, 64, 96, 64, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 104, 128, 4224}, +{28, 40, 52, 64, 68, 96, 96, 16, 20, 32, 8224, 8236, 8248, 8252, 8256, 532544, 1056, 1068, 1080, 1084, 1088, 66624, 80, 92, 104, 108, 128, 6272}, +{32, 56, 80, 104, 112, 128, 192, 24, 24, 64, 8224, 8248, 8272, 8280, 8288, 794720, 1056, 1080, 1104, 1112, 1120, 99424, 80, 104, 128, 136, 160, 12448}, +{28, 29, 30, 31, 32, 64, 0, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 96}, +{28, 30, 32, 36, 40, 64, 16, 8, 20, 32, 8224, 8226, 8228, 8232, 8256, 270400, 1056, 1058, 1060, 1064, 1088, 33856, 80, 82, 84, 88, 96, 1120}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 29, 30, 31, 32, 64, 1, 8, 20, 32, 8224, 8225, 8226, 8227, 8256, 270400, 1056, 1057, 1058, 1059, 1088, 33856, 80, 80, 80, 80, 96, 160}, +{28, 32, 36, 40, 44, 64, 32, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 80, 84, 88, 92, 96, 2144}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 352}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 608}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 1120}, +{28, 32, 36, 40, 44, 64, 0, 8, 20, 32, 8224, 8228, 8232, 8236, 8256, 270400, 1056, 1060, 1064, 1068, 1088, 33856, 88, 90, 92, 94, 96, 96}, +{28, 44, 60, 76, 80, 96, 128, 16, 20, 64, 8224, 8240, 8256, 8260, 8288, 532576, 1056, 1072, 1088, 1092, 1120, 66656, 80, 96, 112, 116, 128, 8320}, +{32, 64, 96, 128, 136, 160, 256, 32, 24, 64, 8224, 8256, 8288, 8296, 8320, 1056896, 1056, 1088, 1120, 1128, 1152, 132224, 80, 112, 144, 152, 160, 16544}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 96}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 0, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 80, 80, 80, 80, 160}, +{32, 40, 48, 56, 64, 96, 16, 8, 24, 32, 8224, 8232, 8240, 8248, 8256, 270400, 1056, 1064, 1072, 1080, 1088, 33856, 80, 88, 96, 96, 96, 1120}, +{28, 31, 34, 40, 44, 64, 24, 8, 20, 32, 8224, 8227, 8232, 8236, 8256, 270400, 1056, 1059, 1064, 1068, 1088, 33856, 80, 83, 88, 92, 96, 1632}, +{28, 34, 40, 48, 52, 64, 48, 8, 20, 32, 8224, 8230, 8236, 8240, 8256, 270400, 1056, 1062, 1068, 1072, 1088, 33856, 80, 86, 92, 96, 128, 3200}, +}; + +// ------------------------------------------------ Basic Lookup ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_gridblindmetadata_handle_t pnanovdb_grid_get_gridblindmetadata(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) +{ + pnanovdb_gridblindmetadata_handle_t meta = { grid.address }; + pnanovdb_uint64_t byte_offset = pnanovdb_grid_get_blind_metadata_offset(buf, grid); + meta.address = pnanovdb_address_offset64(meta.address, byte_offset); + meta.address = pnanovdb_address_offset_product(meta.address, PNANOVDB_GRIDBLINDMETADATA_SIZE, index); + return meta; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_grid_get_gridblindmetadata_value_address(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, pnanovdb_uint32_t index) +{ + pnanovdb_gridblindmetadata_handle_t meta = pnanovdb_grid_get_gridblindmetadata(buf, grid, index); + pnanovdb_int64_t byte_offset = pnanovdb_gridblindmetadata_get_byte_offset(buf, meta); + pnanovdb_address_t address = pnanovdb_address_offset64(meta.address, pnanovdb_int64_as_uint64(byte_offset)); + return address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_tree_handle_t pnanovdb_grid_get_tree(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid) +{ + pnanovdb_tree_handle_t tree = { grid.address }; + tree.address = pnanovdb_address_offset(tree.address, PNANOVDB_GRID_SIZE); + return tree; +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_handle_t pnanovdb_tree_get_root(pnanovdb_buf_t buf, pnanovdb_tree_handle_t tree) +{ + pnanovdb_root_handle_t root = { tree.address }; + pnanovdb_uint64_t byte_offset = pnanovdb_tree_get_node_offset_root(buf, tree); + root.address = pnanovdb_address_offset64(root.address, byte_offset); + return root; +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_get_tile(pnanovdb_grid_type_t grid_type, pnanovdb_root_handle_t root, pnanovdb_uint32_t n) +{ + pnanovdb_root_tile_handle_t tile = { root.address }; + tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_size)); + tile.address = pnanovdb_address_offset_product(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_size), n); + return tile; +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_get_tile_zero(pnanovdb_grid_type_t grid_type, pnanovdb_root_handle_t root) +{ + pnanovdb_root_tile_handle_t tile = { root.address }; + tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_size)); + return tile; +} + +PNANOVDB_FORCE_INLINE pnanovdb_upper_handle_t pnanovdb_root_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, pnanovdb_root_tile_handle_t tile) +{ + pnanovdb_upper_handle_t upper = { root.address }; + upper.address = pnanovdb_address_offset64(upper.address, pnanovdb_int64_as_uint64(pnanovdb_root_tile_get_child(buf, tile))); + return upper; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_coord_to_key(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ +#if defined(PNANOVDB_NATIVE_64) + pnanovdb_uint64_t iu = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x) >> 12u; + pnanovdb_uint64_t ju = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).y) >> 12u; + pnanovdb_uint64_t ku = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).z) >> 12u; + return (ku) | (ju << 21u) | (iu << 42u); +#else + pnanovdb_uint32_t iu = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x) >> 12u; + pnanovdb_uint32_t ju = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).y) >> 12u; + pnanovdb_uint32_t ku = pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).z) >> 12u; + pnanovdb_uint32_t key_x = ku | (ju << 21); + pnanovdb_uint32_t key_y = (iu << 10) | (ju >> 11); + return pnanovdb_uint32_as_uint64(key_x, key_y); +#endif +} + +PNANOVDB_FORCE_INLINE pnanovdb_root_tile_handle_t pnanovdb_root_find_tile(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t tile_count = pnanovdb_uint32_as_int32(pnanovdb_root_get_tile_count(buf, root)); + pnanovdb_root_tile_handle_t tile = pnanovdb_root_get_tile_zero(grid_type, root); + pnanovdb_uint64_t key = pnanovdb_coord_to_key(ijk); + for (pnanovdb_uint32_t i = 0u; i < tile_count; i++) + { + if (pnanovdb_uint64_is_equal(key, pnanovdb_root_tile_get_key(buf, tile))) + { + return tile; + } + tile.address = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_size)); + } + pnanovdb_root_tile_handle_t null_handle = { pnanovdb_address_null() }; + return null_handle; +} + +// ----------------------------- Leaf Node --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (((PNANOVDB_DEREF(ijk).x & 7) >> 0) << (2 * 3)) + + (((PNANOVDB_DEREF(ijk).y & 7) >> 0) << (3)) + + ((PNANOVDB_DEREF(ijk).z & 7) >> 0); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_min); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_max); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_ave); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_stddev); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, leaf_off_table) + ((PNANOVDB_GRID_TYPE_GET(grid_type, value_stride_bits) * n) >> 3u); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); +} + +// ----------------------------- Leaf FP Types Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t value_log_bits) +{ + // value_log_bits // 2 3 4 + pnanovdb_uint32_t value_bits = 1u << value_log_bits; // 4 8 16 + pnanovdb_uint32_t value_mask = (1u << value_bits) - 1u; // 0xF 0xFF 0xFFFF + pnanovdb_uint32_t values_per_word_bits = 5u - value_log_bits; // 3 2 1 + pnanovdb_uint32_t values_per_word_mask = (1u << values_per_word_bits) - 1u; // 7 3 1 + + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + float minimum = pnanovdb_read_float(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_MINIMUM)); + float quantum = pnanovdb_read_float(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_QUANTUM)); + pnanovdb_uint32_t raw = pnanovdb_read_uint32(buf, pnanovdb_address_offset(address, ((n >> values_per_word_bits) << 2u))); + pnanovdb_uint32_t value_compressed = (raw >> ((n & values_per_word_mask) << value_log_bits)) & value_mask; + return pnanovdb_uint32_to_float(value_compressed) * quantum + minimum; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp4_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_fp_read_float(buf, address, ijk, 2u); +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp8_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_fp_read_float(buf, address, ijk, 3u); +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fp16_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_fp_read_float(buf, address, ijk, 4u); +} + +PNANOVDB_FORCE_INLINE float pnanovdb_leaf_fpn_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t bbox_dif_and_flags = pnanovdb_read_uint32(buf, pnanovdb_address_offset_neg(address, PNANOVDB_LEAF_TABLE_NEG_OFF_BBOX_DIF_AND_FLAGS)); + pnanovdb_uint32_t flags = bbox_dif_and_flags >> 24u; + pnanovdb_uint32_t value_log_bits = flags >> 5; // b = 0, 1, 2, 3, 4 corresponding to 1, 2, 4, 8, 16 bits + return pnanovdb_leaf_fp_read_float(buf, address, ijk, value_log_bits); +} + +// ----------------------------- Leaf Index Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_index_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, min_address), 512u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, max_address), 513u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, ave_address), 514u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_uint64_offset(pnanovdb_read_uint64(buf, dev_address), 515u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + return pnanovdb_uint64_offset(offset, n); +} + +// ----------------------------- Leaf IndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_index_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_index_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_index_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_index_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_index_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_indexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_index_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_indexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_indexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) + { + val_mask = val_mask | (1u << bit_idx); + } + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); +} + +// ----------------------------- Leaf OnIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindex_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * 7u)); + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64( + buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table) + 8u)); + return pnanovdb_uint64_countbits(val_mask) + (pnanovdb_uint64_to_uint32_lsr(prefix_sum, 54u) & 511u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_uint64_offset( + pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table))), + pnanovdb_leaf_onindex_get_value_count(buf, leaf) - 1u); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindex_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return (pnanovdb_leaf_get_bbox_dif_and_flags(buf, leaf) & (1u << 28u)) != 0u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(min_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 1u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(max_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 2u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(ave_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 3u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(dev_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + pnanovdb_uint64_t idx = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_leaf_onindex_has_stats(buf, leaf)) + { + idx = pnanovdb_uint64_offset(pnanovdb_leaf_onindex_get_last_offset(buf, leaf), 4u); + } + return idx; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_ONINDEX, leaf_off_table)) }; + + pnanovdb_uint32_t word_idx = n >> 6u; + pnanovdb_uint32_t bit_idx = n & 63u; + pnanovdb_uint64_t val_mask = pnanovdb_read_uint64(buf, pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 8u * word_idx)); + pnanovdb_uint64_t mask = pnanovdb_uint64_bit_mask(bit_idx); + pnanovdb_uint64_t value_index = pnanovdb_uint32_as_uint64_low(0u); + if (pnanovdb_uint64_any_bit(pnanovdb_uint64_and(val_mask, mask))) + { + pnanovdb_uint32_t sum = 0u; + sum += pnanovdb_uint64_countbits(pnanovdb_uint64_and(val_mask, pnanovdb_uint64_dec(mask))); + if (word_idx > 0u) + { + pnanovdb_uint64_t prefix_sum = pnanovdb_read_uint64(buf, pnanovdb_address_offset(value_address, 8u)); + sum += pnanovdb_uint64_to_uint32_lsr(prefix_sum, 9u * (word_idx - 1u)) & 511u; + } + pnanovdb_uint64_t offset = pnanovdb_read_uint64(buf, value_address); + value_index = pnanovdb_uint64_offset(offset, sum); + } + return value_index; +} + +// ----------------------------- Leaf OnIndexMask Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_onindexmask_get_value_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_value_count(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_last_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_get_last_offset(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_has_stats(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_leaf_onindex_has_stats(buf, leaf); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_min_index(pnanovdb_buf_t buf, pnanovdb_address_t min_address) +{ + return pnanovdb_leaf_onindex_get_min_index(buf, min_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_max_index(pnanovdb_buf_t buf, pnanovdb_address_t max_address) +{ + return pnanovdb_leaf_onindex_get_max_index(buf, max_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_ave_index(pnanovdb_buf_t buf, pnanovdb_address_t ave_address) +{ + return pnanovdb_leaf_onindex_get_ave_index(buf, ave_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_dev_index(pnanovdb_buf_t buf, pnanovdb_address_t dev_address) +{ + return pnanovdb_leaf_onindex_get_dev_index(buf, dev_address); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_onindexmask_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t value_address, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return pnanovdb_leaf_onindex_get_value_index(buf, value_address, ijk); +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_onindexmask_get_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + return (val_mask & (1u << bit_idx)) != 0u; +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_onindexmask_set_mask_bit(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t n, pnanovdb_bool_t v) +{ + pnanovdb_uint32_t word_idx = n >> 5; + pnanovdb_uint32_t bit_idx = n & 31; + pnanovdb_uint32_t val_mask = + pnanovdb_read_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx)); + if (v) + { + val_mask = val_mask | (1u << bit_idx); + } + else + { + val_mask = val_mask & ~(1u << bit_idx); + } + pnanovdb_write_uint32(buf, pnanovdb_address_offset(leaf.address, 96u + 4u * word_idx), val_mask); +} + +// ----------------------------- Leaf PointIndex Specialization --------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_offset(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_min_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_point_count(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf) +{ + return pnanovdb_read_uint64(buf, pnanovdb_leaf_get_max_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf)); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_first(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + (i == 0u ? 0u : pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i - 1u)))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_last(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint64_offset(pnanovdb_leaf_pointindex_get_offset(buf, leaf), + pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_leaf_pointindex_get_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + return pnanovdb_uint32_as_uint64_low(pnanovdb_read_uint16(buf, pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i))); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value_only(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_address_t addr = pnanovdb_leaf_get_table_address(PNANOVDB_GRID_TYPE_POINTINDEX, buf, leaf, i); + pnanovdb_uint32_t raw32 = pnanovdb_read_uint32(buf, pnanovdb_address_mask_inv(addr, 3u)); + if ((i & 1) == 0u) + { + raw32 = (raw32 & 0xFFFF0000) | (value & 0x0000FFFF); + } + else + { + raw32 = (raw32 & 0x0000FFFF) | (value << 16u); + } + pnanovdb_write_uint32(buf, addr, raw32); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_on(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i) +{ + pnanovdb_uint32_t word_idx = i >> 5; + pnanovdb_uint32_t bit_idx = i & 31; + pnanovdb_address_t addr = pnanovdb_address_offset(leaf.address, PNANOVDB_LEAF_OFF_VALUE_MASK + 4u * word_idx); + pnanovdb_uint32_t val_mask = pnanovdb_read_uint32(buf, addr); + val_mask = val_mask | (1u << bit_idx); + pnanovdb_write_uint32(buf, addr, val_mask); +} +PNANOVDB_FORCE_INLINE void pnanovdb_leaf_pointindex_set_value(pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, pnanovdb_uint32_t i, pnanovdb_uint32_t value) +{ + pnanovdb_leaf_pointindex_set_on(buf, leaf, i); + pnanovdb_leaf_pointindex_set_value_only(buf, leaf, i, value); +} + +// ------------------------------------------------ Lower Node ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (((PNANOVDB_DEREF(ijk).x & 127) >> 3) << (2 * 4)) + + (((PNANOVDB_DEREF(ijk).y & 127) >> 3) << (4)) + + ((PNANOVDB_DEREF(ijk).z & 127) >> 3); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_min); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_max); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_ave); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_stddev); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, lower_off_table) + PNANOVDB_GRID_TYPE_GET(grid_type, table_stride) * n; + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_lower_get_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); + return pnanovdb_read_int64(buf, table_address); +} + +PNANOVDB_FORCE_INLINE pnanovdb_leaf_handle_t pnanovdb_lower_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, pnanovdb_uint32_t n) +{ + pnanovdb_leaf_handle_t leaf = { lower.address }; + leaf.address = pnanovdb_address_offset64(leaf.address, pnanovdb_int64_as_uint64(pnanovdb_lower_get_table_child(grid_type, buf, lower, n))); + return leaf; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + value_address = pnanovdb_leaf_get_value_address(grid_type, buf, child, ijk); + PNANOVDB_DEREF(level) = 0u; + } + else + { + value_address = pnanovdb_lower_get_table_address(grid_type, buf, lower, n); + PNANOVDB_DEREF(level) = 1u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_lower_get_value_address_and_level(grid_type, buf, lower, ijk, PNANOVDB_REF(level)); +} + +// ------------------------------------------------ Upper Node ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_coord_to_offset(PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (((PNANOVDB_DEREF(ijk).x & 4095) >> 7) << (2 * 5)) + + (((PNANOVDB_DEREF(ijk).y & 4095) >> 7) << (5)) + + ((PNANOVDB_DEREF(ijk).z & 4095) >> 7); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_min); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_max); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_ave); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_stddev); + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_table_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, upper_off_table) + PNANOVDB_GRID_TYPE_GET(grid_type, table_stride) * n; + return pnanovdb_address_offset(node.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_int64_t pnanovdb_upper_get_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n) +{ + pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); + return pnanovdb_read_int64(buf, bufAddress); +} + +PNANOVDB_FORCE_INLINE pnanovdb_lower_handle_t pnanovdb_upper_get_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, pnanovdb_uint32_t n) +{ + pnanovdb_lower_handle_t lower = { upper.address }; + lower.address = pnanovdb_address_offset64(lower.address, pnanovdb_int64_as_uint64(pnanovdb_upper_get_table_child(grid_type, buf, upper, n))); + return lower; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + value_address = pnanovdb_lower_get_value_address_and_level(grid_type, buf, child, ijk, level); + } + else + { + value_address = pnanovdb_upper_get_table_address(grid_type, buf, upper, n); + PNANOVDB_DEREF(level) = 2u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_upper_get_value_address_and_level(grid_type, buf, upper, ijk, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_upper_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t bufAddress = pnanovdb_upper_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, bufAddress, child); +} + +// ------------------------------------------------ Root ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_min_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_min); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_max_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_max); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_ave_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_ave); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_stddev_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_off_stddev); + return pnanovdb_address_offset(root.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_tile_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_tile_handle_t root_tile) +{ + pnanovdb_uint32_t byte_offset = PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value); + return pnanovdb_address_offset(root_tile.address, byte_offset); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_address_t ret; + if (pnanovdb_address_is_null(tile.address)) + { + ret = pnanovdb_address_offset(root.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_off_background)); + PNANOVDB_DEREF(level) = 4u; + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + ret = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value)); + PNANOVDB_DEREF(level) = 3u; + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + ret = pnanovdb_upper_get_value_address_and_level(grid_type, buf, child, ijk, level); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_root_get_value_address_and_level(grid_type, buf, root, ijk, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_bit(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) bit_index) +{ + pnanovdb_uint32_t level; + pnanovdb_address_t address = pnanovdb_root_get_value_address_and_level(grid_type, buf, root, ijk, PNANOVDB_REF(level)); + PNANOVDB_DEREF(bit_index) = level == 0u ? pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x & 7) : 0u; + return address; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fp4_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fp4_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fp8_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fp8_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fp16_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fp16_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE float pnanovdb_root_fpn_read_float(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + float ret; + if (level == 0) + { + ret = pnanovdb_leaf_fpn_read_float(buf, address, ijk); + } + else + { + ret = pnanovdb_read_float(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_index_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_index_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_onindex_get_value_index(pnanovdb_buf_t buf, pnanovdb_address_t address, PNANOVDB_IN(pnanovdb_coord_t) ijk, pnanovdb_uint32_t level) +{ + pnanovdb_uint64_t ret; + if (level == 0) + { + ret = pnanovdb_leaf_onindex_get_value_index(buf, address, ijk); + } + else + { + ret = pnanovdb_read_uint64(buf, address); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_range( + pnanovdb_buf_t buf, + pnanovdb_address_t value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_begin, + PNANOVDB_INOUT(pnanovdb_uint64_t)range_end +) +{ + pnanovdb_uint32_t local_range_begin = 0u; + pnanovdb_uint32_t local_range_end = 0u; + pnanovdb_uint64_t offset = pnanovdb_uint32_as_uint64_low(0u); + if (level == 0) + { + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + // recover leaf address + pnanovdb_leaf_handle_t leaf = { pnanovdb_address_offset_neg(value_address, PNANOVDB_GRID_TYPE_GET(PNANOVDB_GRID_TYPE_POINTINDEX, leaf_off_table) + 2u * n) }; + if (n > 0u) + { + local_range_begin = pnanovdb_read_uint16(buf, pnanovdb_address_offset_neg(value_address, 2u)); + } + local_range_end = pnanovdb_read_uint16(buf, value_address); + offset = pnanovdb_leaf_pointindex_get_offset(buf, leaf); + } + PNANOVDB_DEREF(range_begin) = pnanovdb_uint64_offset(offset, local_range_begin); + PNANOVDB_DEREF(range_end) = pnanovdb_uint64_offset(offset, local_range_end); + return pnanovdb_uint32_as_uint64_low(local_range_end - local_range_begin); +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint64_t pnanovdb_root_pointindex_get_point_address_range( + pnanovdb_buf_t buf, + pnanovdb_grid_type_t value_type, + pnanovdb_address_t value_address, + pnanovdb_address_t blindmetadata_value_address, + PNANOVDB_IN(pnanovdb_coord_t) ijk, + pnanovdb_uint32_t level, + PNANOVDB_INOUT(pnanovdb_address_t)address_begin, + PNANOVDB_INOUT(pnanovdb_address_t)address_end +) +{ + pnanovdb_uint64_t range_begin; + pnanovdb_uint64_t range_end; + pnanovdb_uint64_t range_size = pnanovdb_root_pointindex_get_point_range(buf, value_address, ijk, level, PNANOVDB_REF(range_begin), PNANOVDB_REF(range_end)); + + pnanovdb_uint32_t stride = 12u; // vec3f + if (value_type == PNANOVDB_GRID_TYPE_VEC3U8) + { + stride = 3u; + } + else if (value_type == PNANOVDB_GRID_TYPE_VEC3U16) + { + stride = 6u; + } + PNANOVDB_DEREF(address_begin) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_begin, stride); + PNANOVDB_DEREF(address_end) = pnanovdb_address_offset64_product(blindmetadata_value_address, range_end, stride); + return range_size; +} + +// ------------------------------------------------ ReadAccessor ----------------------------------------------------------- + +struct pnanovdb_readaccessor_t +{ + pnanovdb_coord_t key; + pnanovdb_leaf_handle_t leaf; + pnanovdb_lower_handle_t lower; + pnanovdb_upper_handle_t upper; + pnanovdb_root_handle_t root; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_readaccessor_t) + +PNANOVDB_FORCE_INLINE void pnanovdb_readaccessor_init(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, pnanovdb_root_handle_t root) +{ + PNANOVDB_DEREF(acc).key.x = 0x7FFFFFFF; + PNANOVDB_DEREF(acc).key.y = 0x7FFFFFFF; + PNANOVDB_DEREF(acc).key.z = 0x7FFFFFFF; + PNANOVDB_DEREF(acc).leaf.address = pnanovdb_address_null(); + PNANOVDB_DEREF(acc).lower.address = pnanovdb_address_null(); + PNANOVDB_DEREF(acc).upper.address = pnanovdb_address_null(); + PNANOVDB_DEREF(acc).root = root; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached0(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) +{ + if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).leaf.address)) { return PNANOVDB_FALSE; } + if ((dirty & ~((1u << 3) - 1u)) != 0) + { + PNANOVDB_DEREF(acc).leaf.address = pnanovdb_address_null(); + return PNANOVDB_FALSE; + } + return PNANOVDB_TRUE; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached1(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) +{ + if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).lower.address)) { return PNANOVDB_FALSE; } + if ((dirty & ~((1u << 7) - 1u)) != 0) + { + PNANOVDB_DEREF(acc).lower.address = pnanovdb_address_null(); + return PNANOVDB_FALSE; + } + return PNANOVDB_TRUE; +} +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_iscached2(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, int dirty) +{ + if (pnanovdb_address_is_null(PNANOVDB_DEREF(acc).upper.address)) { return PNANOVDB_FALSE; } + if ((dirty & ~((1u << 12) - 1u)) != 0) + { + PNANOVDB_DEREF(acc).upper.address = pnanovdb_address_null(); + return PNANOVDB_FALSE; + } + return PNANOVDB_TRUE; +} +PNANOVDB_FORCE_INLINE int pnanovdb_readaccessor_computedirty(PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + return (PNANOVDB_DEREF(ijk).x ^ PNANOVDB_DEREF(acc).key.x) | (PNANOVDB_DEREF(ijk).y ^ PNANOVDB_DEREF(acc).key.y) | (PNANOVDB_DEREF(ijk).z ^ PNANOVDB_DEREF(acc).key.z); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_leaf_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + return pnanovdb_leaf_get_table_address(grid_type, buf, leaf, n); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + PNANOVDB_DEREF(acc).leaf = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + value_address = pnanovdb_leaf_get_value_address_and_cache(grid_type, buf, child, ijk, acc); + PNANOVDB_DEREF(level) = 0u; + } + else + { + value_address = pnanovdb_lower_get_table_address(grid_type, buf, lower, n); + PNANOVDB_DEREF(level) = 1u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_lower_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t level; + return pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, lower, ijk, acc, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE void pnanovdb_lower_set_table_child(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t node, pnanovdb_uint32_t n, pnanovdb_int64_t child) +{ + pnanovdb_address_t table_address = pnanovdb_lower_get_table_address(grid_type, buf, node, n); + pnanovdb_write_int64(buf, table_address, child); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_address_t value_address; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + PNANOVDB_DEREF(acc).lower = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + value_address = pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, child, ijk, acc, level); + } + else + { + value_address = pnanovdb_upper_get_table_address(grid_type, buf, upper, n); + PNANOVDB_DEREF(level) = 2u; + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_upper_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t level; + return pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, upper, ijk, acc, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_level_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_address_t ret; + if (pnanovdb_address_is_null(tile.address)) + { + ret = pnanovdb_address_offset(root.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_off_background)); + PNANOVDB_DEREF(level) = 4u; + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + ret = pnanovdb_address_offset(tile.address, PNANOVDB_GRID_TYPE_GET(grid_type, root_tile_off_value)); + PNANOVDB_DEREF(level) = 3u; + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + PNANOVDB_DEREF(acc).upper = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, child, ijk, acc, level); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_root_get_value_address_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t level; + return pnanovdb_root_get_value_address_and_level_and_cache(grid_type, buf, root, ijk, acc, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address_and_level(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) level) +{ + int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); + + pnanovdb_address_t value_address; + if (pnanovdb_readaccessor_iscached0(acc, dirty)) + { + value_address = pnanovdb_leaf_get_value_address_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); + PNANOVDB_DEREF(level) = 0u; + } + else if (pnanovdb_readaccessor_iscached1(acc, dirty)) + { + value_address = pnanovdb_lower_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc, level); + } + else if (pnanovdb_readaccessor_iscached2(acc, dirty)) + { + value_address = pnanovdb_upper_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc, level); + } + else + { + value_address = pnanovdb_root_get_value_address_and_level_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc, level); + } + return value_address; +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + pnanovdb_uint32_t level; + return pnanovdb_readaccessor_get_value_address_and_level(grid_type, buf, acc, ijk, PNANOVDB_REF(level)); +} + +PNANOVDB_FORCE_INLINE pnanovdb_address_t pnanovdb_readaccessor_get_value_address_bit(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_uint32_t) bit_index) +{ + pnanovdb_uint32_t level; + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address_and_level(grid_type, buf, acc, ijk, PNANOVDB_REF(level)); + PNANOVDB_DEREF(bit_index) = level == 0u ? pnanovdb_int32_as_uint32(PNANOVDB_DEREF(ijk).x & 7) : 0u; + return address; +} + +// ------------------------------------------------ ReadAccessor GetDim ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_leaf_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + return 1u; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_lower_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_uint32_t ret; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + PNANOVDB_DEREF(acc).leaf = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_leaf_get_dim_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + ret = (1u << (3u)); // node 0 dim + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_upper_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_uint32_t ret; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + PNANOVDB_DEREF(acc).lower = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_lower_get_dim_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + ret = (1u << (4u + 3u)); // node 1 dim + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_root_get_dim_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_uint32_t ret; + if (pnanovdb_address_is_null(tile.address)) + { + ret = 1u << (5u + 4u + 3u); // background, node 2 dim + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + ret = 1u << (5u + 4u + 3u); // tile value, node 2 dim + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + PNANOVDB_DEREF(acc).upper = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + ret = pnanovdb_upper_get_dim_and_cache(grid_type, buf, child, ijk, acc); + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_uint32_t pnanovdb_readaccessor_get_dim(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); + + pnanovdb_uint32_t dim; + if (pnanovdb_readaccessor_iscached0(acc, dirty)) + { + dim = pnanovdb_leaf_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached1(acc, dirty)) + { + dim = pnanovdb_lower_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached2(acc, dirty)) + { + dim = pnanovdb_upper_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc); + } + else + { + dim = pnanovdb_root_get_dim_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc); + } + return dim; +} + +// ------------------------------------------------ ReadAccessor IsActive ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_leaf_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_leaf_handle_t leaf, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_leaf_coord_to_offset(ijk); + return pnanovdb_leaf_get_value_mask(buf, leaf, n); +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_lower_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_lower_handle_t lower, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_lower_coord_to_offset(ijk); + pnanovdb_bool_t is_active; + if (pnanovdb_lower_get_child_mask(buf, lower, n)) + { + pnanovdb_leaf_handle_t child = pnanovdb_lower_get_child(grid_type, buf, lower, n); + PNANOVDB_DEREF(acc).leaf = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + is_active = pnanovdb_leaf_is_active_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + is_active = pnanovdb_lower_get_value_mask(buf, lower, n); + } + return is_active; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_upper_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_upper_handle_t upper, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_uint32_t n = pnanovdb_upper_coord_to_offset(ijk); + pnanovdb_bool_t is_active; + if (pnanovdb_upper_get_child_mask(buf, upper, n)) + { + pnanovdb_lower_handle_t child = pnanovdb_upper_get_child(grid_type, buf, upper, n); + PNANOVDB_DEREF(acc).lower = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + is_active = pnanovdb_lower_is_active_and_cache(grid_type, buf, child, ijk, acc); + } + else + { + is_active = pnanovdb_upper_get_value_mask(buf, upper, n); + } + return is_active; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_root_is_active_and_cache(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, pnanovdb_root_handle_t root, PNANOVDB_IN(pnanovdb_coord_t) ijk, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc) +{ + pnanovdb_root_tile_handle_t tile = pnanovdb_root_find_tile(grid_type, buf, root, ijk); + pnanovdb_bool_t is_active; + if (pnanovdb_address_is_null(tile.address)) + { + is_active = PNANOVDB_FALSE; // background + } + else if (pnanovdb_int64_is_zero(pnanovdb_root_tile_get_child(buf, tile))) + { + pnanovdb_uint32_t state = pnanovdb_root_tile_get_state(buf, tile); + is_active = state != 0u; // tile value + } + else + { + pnanovdb_upper_handle_t child = pnanovdb_root_get_child(grid_type, buf, root, tile); + PNANOVDB_DEREF(acc).upper = child; + PNANOVDB_DEREF(acc).key = PNANOVDB_DEREF(ijk); + is_active = pnanovdb_upper_is_active_and_cache(grid_type, buf, child, ijk, acc); + } + return is_active; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_readaccessor_is_active(pnanovdb_grid_type_t grid_type, pnanovdb_buf_t buf, PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, PNANOVDB_IN(pnanovdb_coord_t) ijk) +{ + int dirty = pnanovdb_readaccessor_computedirty(acc, ijk); + + pnanovdb_bool_t is_active; + if (pnanovdb_readaccessor_iscached0(acc, dirty)) + { + is_active = pnanovdb_leaf_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).leaf, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached1(acc, dirty)) + { + is_active = pnanovdb_lower_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).lower, ijk, acc); + } + else if (pnanovdb_readaccessor_iscached2(acc, dirty)) + { + is_active = pnanovdb_upper_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).upper, ijk, acc); + } + else + { + is_active = pnanovdb_root_is_active_and_cache(grid_type, buf, PNANOVDB_DEREF(acc).root, ijk, acc); + } + return is_active; +} + +// ------------------------------------------------ Map Transforms ----------------------------------------------------------- + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x; + float sy = PNANOVDB_DEREF(src).y; + float sz = PNANOVDB_DEREF(src).z; + dst.x = sx * pnanovdb_map_get_matf(buf, map, 0) + sy * pnanovdb_map_get_matf(buf, map, 1) + sz * pnanovdb_map_get_matf(buf, map, 2) + pnanovdb_map_get_vecf(buf, map, 0); + dst.y = sx * pnanovdb_map_get_matf(buf, map, 3) + sy * pnanovdb_map_get_matf(buf, map, 4) + sz * pnanovdb_map_get_matf(buf, map, 5) + pnanovdb_map_get_vecf(buf, map, 1); + dst.z = sx * pnanovdb_map_get_matf(buf, map, 6) + sy * pnanovdb_map_get_matf(buf, map, 7) + sz * pnanovdb_map_get_matf(buf, map, 8) + pnanovdb_map_get_vecf(buf, map, 2); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_inverse(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x - pnanovdb_map_get_vecf(buf, map, 0); + float sy = PNANOVDB_DEREF(src).y - pnanovdb_map_get_vecf(buf, map, 1); + float sz = PNANOVDB_DEREF(src).z - pnanovdb_map_get_vecf(buf, map, 2); + dst.x = sx * pnanovdb_map_get_invmatf(buf, map, 0) + sy * pnanovdb_map_get_invmatf(buf, map, 1) + sz * pnanovdb_map_get_invmatf(buf, map, 2); + dst.y = sx * pnanovdb_map_get_invmatf(buf, map, 3) + sy * pnanovdb_map_get_invmatf(buf, map, 4) + sz * pnanovdb_map_get_invmatf(buf, map, 5); + dst.z = sx * pnanovdb_map_get_invmatf(buf, map, 6) + sy * pnanovdb_map_get_invmatf(buf, map, 7) + sz * pnanovdb_map_get_invmatf(buf, map, 8); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_jacobi(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x; + float sy = PNANOVDB_DEREF(src).y; + float sz = PNANOVDB_DEREF(src).z; + dst.x = sx * pnanovdb_map_get_matf(buf, map, 0) + sy * pnanovdb_map_get_matf(buf, map, 1) + sz * pnanovdb_map_get_matf(buf, map, 2); + dst.y = sx * pnanovdb_map_get_matf(buf, map, 3) + sy * pnanovdb_map_get_matf(buf, map, 4) + sz * pnanovdb_map_get_matf(buf, map, 5); + dst.z = sx * pnanovdb_map_get_matf(buf, map, 6) + sy * pnanovdb_map_get_matf(buf, map, 7) + sz * pnanovdb_map_get_matf(buf, map, 8); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_map_apply_inverse_jacobi(pnanovdb_buf_t buf, pnanovdb_map_handle_t map, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_vec3_t dst; + float sx = PNANOVDB_DEREF(src).x; + float sy = PNANOVDB_DEREF(src).y; + float sz = PNANOVDB_DEREF(src).z; + dst.x = sx * pnanovdb_map_get_invmatf(buf, map, 0) + sy * pnanovdb_map_get_invmatf(buf, map, 1) + sz * pnanovdb_map_get_invmatf(buf, map, 2); + dst.y = sx * pnanovdb_map_get_invmatf(buf, map, 3) + sy * pnanovdb_map_get_invmatf(buf, map, 4) + sz * pnanovdb_map_get_invmatf(buf, map, 5); + dst.z = sx * pnanovdb_map_get_invmatf(buf, map, 6) + sy * pnanovdb_map_get_invmatf(buf, map, 7) + sz * pnanovdb_map_get_invmatf(buf, map, 8); + return dst; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_world_to_indexf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply_inverse(buf, map, src); +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_index_to_worldf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply(buf, map, src); +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_world_to_index_dirf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply_inverse_jacobi(buf, map, src); +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_grid_index_to_world_dirf(pnanovdb_buf_t buf, pnanovdb_grid_handle_t grid, PNANOVDB_IN(pnanovdb_vec3_t) src) +{ + pnanovdb_map_handle_t map = pnanovdb_grid_get_map(buf, grid); + return pnanovdb_map_apply_jacobi(buf, map, src); +} + +// ------------------------------------------------ DitherLUT ----------------------------------------------------------- + +// This table was generated with +/************** + +static constexpr inline uint32 +SYSwang_inthash(uint32 key) +{ + // From http://www.concentric.net/~Ttwang/tech/inthash.htm + key += ~(key << 16); + key ^= (key >> 5); + key += (key << 3); + key ^= (key >> 13); + key += ~(key << 9); + key ^= (key >> 17); + return key; +} + +static void +ut_initDitherR(float *pattern, float offset, + int x, int y, int z, int res, int goalres) +{ + // These offsets are designed to maximize the difference between + // dither values in nearby voxels within a given 2x2x2 cell, without + // producing axis-aligned artifacts. The are organized in row-major + // order. + static const float theDitherOffset[] = {0,4,6,2,5,1,3,7}; + static const float theScale = 0.125F; + int key = (((z << res) + y) << res) + x; + + if (res == goalres) + { + pattern[key] = offset; + return; + } + + // Randomly flip (on each axis) the dithering patterns used by the + // subcells. This key is xor'd with the subcell index below before + // looking up in the dither offset list. + key = SYSwang_inthash(key) & 7; + + x <<= 1; + y <<= 1; + z <<= 1; + + offset *= theScale; + for (int i = 0; i < 8; i++) + ut_initDitherR(pattern, offset+theDitherOffset[i ^ key]*theScale, + x+(i&1), y+((i&2)>>1), z+((i&4)>>2), res+1, goalres); +} + +// This is a compact algorithm that accomplishes essentially the same thing +// as ut_initDither() above. We should eventually switch to use this and +// clean the dead code. +static fpreal32 * +ut_initDitherRecursive(int goalres) +{ + const int nfloat = 1 << (goalres*3); + float *pattern = new float[nfloat]; + ut_initDitherR(pattern, 1.0F, 0, 0, 0, 0, goalres); + + // This has built an even spacing from 1/nfloat to 1.0. + // however, our dither pattern should be 1/(nfloat+1) to nfloat/(nfloat+1) + // So we do a correction here. Note that the earlier calculations are + // done with powers of 2 so are exact, so it does make sense to delay + // the renormalization to this pass. + float correctionterm = nfloat / (nfloat+1.0F); + for (int i = 0; i < nfloat; i++) + pattern[i] *= correctionterm; + return pattern; +} + + theDitherMatrix = ut_initDitherRecursive(3); + + for (int i = 0; i < 512/8; i ++) + { + for (int j = 0; j < 8; j ++) + std::cout << theDitherMatrix[i*8+j] << "f, "; + std::cout << std::endl; + } + + **************/ + +PNANOVDB_STATIC_CONST float pnanovdb_dither_lut[512] = +{ + 0.14425f, 0.643275f, 0.830409f, 0.331384f, 0.105263f, 0.604289f, 0.167641f, 0.666667f, + 0.892788f, 0.393762f, 0.0818713f, 0.580897f, 0.853801f, 0.354776f, 0.916179f, 0.417154f, + 0.612086f, 0.11306f, 0.79922f, 0.300195f, 0.510721f, 0.0116959f, 0.947368f, 0.448343f, + 0.362573f, 0.861598f, 0.0506823f, 0.549708f, 0.261209f, 0.760234f, 0.19883f, 0.697856f, + 0.140351f, 0.639376f, 0.576998f, 0.0779727f, 0.522417f, 0.0233918f, 0.460039f, 0.959064f, + 0.888889f, 0.389864f, 0.327485f, 0.826511f, 0.272904f, 0.77193f, 0.709552f, 0.210526f, + 0.483431f, 0.982456f, 0.296296f, 0.795322f, 0.116959f, 0.615984f, 0.0545809f, 0.553606f, + 0.732943f, 0.233918f, 0.545809f, 0.0467836f, 0.865497f, 0.366472f, 0.803119f, 0.304094f, + 0.518519f, 0.0194932f, 0.45614f, 0.955166f, 0.729045f, 0.230019f, 0.54191f, 0.042885f, + 0.269006f, 0.768031f, 0.705653f, 0.206628f, 0.479532f, 0.978558f, 0.292398f, 0.791423f, + 0.237817f, 0.736842f, 0.424951f, 0.923977f, 0.136452f, 0.635478f, 0.323587f, 0.822612f, + 0.986355f, 0.487329f, 0.674464f, 0.175439f, 0.88499f, 0.385965f, 0.573099f, 0.0740741f, + 0.51462f, 0.0155945f, 0.202729f, 0.701754f, 0.148148f, 0.647174f, 0.834308f, 0.335283f, + 0.265107f, 0.764133f, 0.951267f, 0.452242f, 0.896686f, 0.397661f, 0.08577f, 0.584795f, + 0.8577f, 0.358674f, 0.920078f, 0.421053f, 0.740741f, 0.241715f, 0.678363f, 0.179337f, + 0.109162f, 0.608187f, 0.17154f, 0.670565f, 0.491228f, 0.990253f, 0.42885f, 0.927875f, + 0.0662768f, 0.565302f, 0.62768f, 0.128655f, 0.183236f, 0.682261f, 0.744639f, 0.245614f, + 0.814815f, 0.315789f, 0.378168f, 0.877193f, 0.931774f, 0.432749f, 0.495127f, 0.994152f, + 0.0350877f, 0.534113f, 0.97076f, 0.471735f, 0.214425f, 0.71345f, 0.526316f, 0.0272904f, + 0.783626f, 0.2846f, 0.222222f, 0.721248f, 0.962963f, 0.463938f, 0.276803f, 0.775828f, + 0.966862f, 0.467836f, 0.405458f, 0.904483f, 0.0701754f, 0.569201f, 0.881092f, 0.382066f, + 0.218324f, 0.717349f, 0.654971f, 0.155945f, 0.818713f, 0.319688f, 0.132554f, 0.631579f, + 0.0623782f, 0.561404f, 0.748538f, 0.249513f, 0.912281f, 0.413255f, 0.974659f, 0.475634f, + 0.810916f, 0.311891f, 0.499025f, 0.998051f, 0.163743f, 0.662768f, 0.226121f, 0.725146f, + 0.690058f, 0.191033f, 0.00389864f, 0.502924f, 0.557505f, 0.0584795f, 0.120858f, 0.619883f, + 0.440546f, 0.939571f, 0.752437f, 0.253411f, 0.307992f, 0.807018f, 0.869396f, 0.37037f, + 0.658869f, 0.159844f, 0.346979f, 0.846004f, 0.588694f, 0.0896686f, 0.152047f, 0.651072f, + 0.409357f, 0.908382f, 0.596491f, 0.0974659f, 0.339181f, 0.838207f, 0.900585f, 0.401559f, + 0.34308f, 0.842105f, 0.779727f, 0.280702f, 0.693957f, 0.194932f, 0.25731f, 0.756335f, + 0.592593f, 0.0935673f, 0.0311891f, 0.530214f, 0.444444f, 0.94347f, 0.506823f, 0.00779727f, + 0.68616f, 0.187135f, 0.124756f, 0.623782f, 0.288499f, 0.787524f, 0.350877f, 0.849903f, + 0.436647f, 0.935673f, 0.873294f, 0.374269f, 0.538012f, 0.0389864f, 0.60039f, 0.101365f, + 0.57115f, 0.0721248f, 0.758285f, 0.259259f, 0.719298f, 0.220273f, 0.532164f, 0.0331384f, + 0.321637f, 0.820663f, 0.00974659f, 0.508772f, 0.469786f, 0.968811f, 0.282651f, 0.781676f, + 0.539961f, 0.0409357f, 0.727096f, 0.22807f, 0.500975f, 0.00194932f, 0.563353f, 0.0643275f, + 0.290448f, 0.789474f, 0.477583f, 0.976608f, 0.251462f, 0.750487f, 0.31384f, 0.812865f, + 0.94152f, 0.442495f, 0.879142f, 0.380117f, 0.37232f, 0.871345f, 0.309942f, 0.808967f, + 0.192982f, 0.692008f, 0.130604f, 0.62963f, 0.621832f, 0.122807f, 0.559454f, 0.0604289f, + 0.660819f, 0.161793f, 0.723197f, 0.224172f, 0.403509f, 0.902534f, 0.840156f, 0.341131f, + 0.411306f, 0.910331f, 0.473684f, 0.97271f, 0.653021f, 0.153996f, 0.0916179f, 0.590643f, + 0.196881f, 0.695906f, 0.384016f, 0.883041f, 0.0955166f, 0.594542f, 0.157895f, 0.65692f, + 0.945419f, 0.446394f, 0.633528f, 0.134503f, 0.844055f, 0.345029f, 0.906433f, 0.407407f, + 0.165692f, 0.664717f, 0.103314f, 0.602339f, 0.126706f, 0.625731f, 0.189084f, 0.688109f, + 0.91423f, 0.415205f, 0.851852f, 0.352827f, 0.875244f, 0.376218f, 0.937622f, 0.438596f, + 0.317739f, 0.816764f, 0.255361f, 0.754386f, 0.996101f, 0.497076f, 0.933723f, 0.434698f, + 0.567251f, 0.0682261f, 0.504873f, 0.00584795f, 0.247563f, 0.746589f, 0.185185f, 0.684211f, + 0.037037f, 0.536062f, 0.0994152f, 0.598441f, 0.777778f, 0.278752f, 0.465887f, 0.964912f, + 0.785575f, 0.28655f, 0.847953f, 0.348928f, 0.0292398f, 0.528265f, 0.7154f, 0.216374f, + 0.39961f, 0.898636f, 0.961014f, 0.461988f, 0.0487329f, 0.547758f, 0.111111f, 0.610136f, + 0.649123f, 0.150097f, 0.212476f, 0.711501f, 0.797271f, 0.298246f, 0.859649f, 0.360624f, + 0.118908f, 0.617934f, 0.0565302f, 0.555556f, 0.329435f, 0.82846f, 0.516569f, 0.0175439f, + 0.867446f, 0.368421f, 0.805068f, 0.306043f, 0.578947f, 0.079922f, 0.267057f, 0.766082f, + 0.270955f, 0.76998f, 0.707602f, 0.208577f, 0.668616f, 0.169591f, 0.606238f, 0.107212f, + 0.520468f, 0.0214425f, 0.45809f, 0.957115f, 0.419103f, 0.918129f, 0.356725f, 0.855751f, + 0.988304f, 0.489279f, 0.426901f, 0.925926f, 0.450292f, 0.949318f, 0.512671f, 0.0136452f, + 0.239766f, 0.738791f, 0.676413f, 0.177388f, 0.699805f, 0.20078f, 0.263158f, 0.762183f, + 0.773879f, 0.274854f, 0.337232f, 0.836257f, 0.672515f, 0.173489f, 0.734893f, 0.235867f, + 0.0253411f, 0.524366f, 0.586745f, 0.0877193f, 0.423002f, 0.922027f, 0.48538f, 0.984405f, + 0.74269f, 0.243665f, 0.680312f, 0.181287f, 0.953216f, 0.454191f, 0.1423f, 0.641326f, + 0.493177f, 0.992203f, 0.430799f, 0.929825f, 0.204678f, 0.703704f, 0.890838f, 0.391813f, + 0.894737f, 0.395712f, 0.0838207f, 0.582846f, 0.0448343f, 0.54386f, 0.231969f, 0.730994f, + 0.146199f, 0.645224f, 0.832359f, 0.333333f, 0.793372f, 0.294347f, 0.980507f, 0.481481f, + 0.364522f, 0.863548f, 0.80117f, 0.302144f, 0.824561f, 0.325536f, 0.138402f, 0.637427f, + 0.614035f, 0.11501f, 0.0526316f, 0.551657f, 0.0760234f, 0.575049f, 0.88694f, 0.387914f, +}; + +PNANOVDB_FORCE_INLINE float pnanovdb_dither_lookup(pnanovdb_bool_t enabled, int offset) +{ + return enabled ? pnanovdb_dither_lut[offset & 511] : 0.5f; +} + +// ------------------------------------------------ HDDA ----------------------------------------------------------- + +#ifdef PNANOVDB_HDDA + +// Comment out to disable this explicit round-off check +#define PNANOVDB_ENFORCE_FORWARD_STEPPING + +#define PNANOVDB_HDDA_FLOAT_MAX 1e38f + +struct pnanovdb_hdda_t +{ + pnanovdb_int32_t dim; + float tmin; + float tmax; + pnanovdb_coord_t voxel; + pnanovdb_coord_t step; + pnanovdb_vec3_t delta; + pnanovdb_vec3_t next; +}; +PNANOVDB_STRUCT_TYPEDEF(pnanovdb_hdda_t) + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_hdda_pos_to_ijk(PNANOVDB_IN(pnanovdb_vec3_t) pos) +{ + pnanovdb_coord_t voxel; + voxel.x = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).x)); + voxel.y = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).y)); + voxel.z = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).z)); + return voxel; +} + +PNANOVDB_FORCE_INLINE pnanovdb_coord_t pnanovdb_hdda_pos_to_voxel(PNANOVDB_IN(pnanovdb_vec3_t) pos, int dim) +{ + pnanovdb_coord_t voxel; + voxel.x = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).x)) & (~(dim - 1)); + voxel.y = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).y)) & (~(dim - 1)); + voxel.z = pnanovdb_float_to_int32(pnanovdb_floor(PNANOVDB_DEREF(pos).z)) & (~(dim - 1)); + return voxel; +} + +PNANOVDB_FORCE_INLINE pnanovdb_vec3_t pnanovdb_hdda_ray_start(PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, PNANOVDB_IN(pnanovdb_vec3_t) direction) +{ + pnanovdb_vec3_t pos = pnanovdb_vec3_add( + pnanovdb_vec3_mul(PNANOVDB_DEREF(direction), pnanovdb_vec3_uniform(tmin)), + PNANOVDB_DEREF(origin) + ); + return pos; +} + +PNANOVDB_FORCE_INLINE void pnanovdb_hdda_init(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda, PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, PNANOVDB_IN(pnanovdb_vec3_t) direction, float tmax, int dim) +{ + PNANOVDB_DEREF(hdda).dim = dim; + PNANOVDB_DEREF(hdda).tmin = tmin; + PNANOVDB_DEREF(hdda).tmax = tmax; + + pnanovdb_vec3_t pos = pnanovdb_hdda_ray_start(origin, tmin, direction); + pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); + + PNANOVDB_DEREF(hdda).voxel = pnanovdb_hdda_pos_to_voxel(PNANOVDB_REF(pos), dim); + + // x + if (PNANOVDB_DEREF(direction).x == 0.f) + { + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_HDDA_FLOAT_MAX; + PNANOVDB_DEREF(hdda).step.x = 0; + PNANOVDB_DEREF(hdda).delta.x = 0.f; + } + else if (dir_inv.x > 0.f) + { + PNANOVDB_DEREF(hdda).step.x = 1; + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x + dim - pos.x) * dir_inv.x; + PNANOVDB_DEREF(hdda).delta.x = dir_inv.x; + } + else + { + PNANOVDB_DEREF(hdda).step.x = -1; + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x - pos.x) * dir_inv.x; + PNANOVDB_DEREF(hdda).delta.x = -dir_inv.x; + } + + // y + if (PNANOVDB_DEREF(direction).y == 0.f) + { + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_HDDA_FLOAT_MAX; + PNANOVDB_DEREF(hdda).step.y = 0; + PNANOVDB_DEREF(hdda).delta.y = 0.f; + } + else if (dir_inv.y > 0.f) + { + PNANOVDB_DEREF(hdda).step.y = 1; + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y + dim - pos.y) * dir_inv.y; + PNANOVDB_DEREF(hdda).delta.y = dir_inv.y; + } + else + { + PNANOVDB_DEREF(hdda).step.y = -1; + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y - pos.y) * dir_inv.y; + PNANOVDB_DEREF(hdda).delta.y = -dir_inv.y; + } + + // z + if (PNANOVDB_DEREF(direction).z == 0.f) + { + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_HDDA_FLOAT_MAX; + PNANOVDB_DEREF(hdda).step.z = 0; + PNANOVDB_DEREF(hdda).delta.z = 0.f; + } + else if (dir_inv.z > 0.f) + { + PNANOVDB_DEREF(hdda).step.z = 1; + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z + dim - pos.z) * dir_inv.z; + PNANOVDB_DEREF(hdda).delta.z = dir_inv.z; + } + else + { + PNANOVDB_DEREF(hdda).step.z = -1; + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z - pos.z) * dir_inv.z; + PNANOVDB_DEREF(hdda).delta.z = -dir_inv.z; + } +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_update(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda, PNANOVDB_IN(pnanovdb_vec3_t) origin, PNANOVDB_IN(pnanovdb_vec3_t) direction, int dim) +{ + if (PNANOVDB_DEREF(hdda).dim == dim) + { + return PNANOVDB_FALSE; + } + PNANOVDB_DEREF(hdda).dim = dim; + + pnanovdb_vec3_t pos = pnanovdb_vec3_add( + pnanovdb_vec3_mul(PNANOVDB_DEREF(direction), pnanovdb_vec3_uniform(PNANOVDB_DEREF(hdda).tmin)), + PNANOVDB_DEREF(origin) + ); + pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); + + PNANOVDB_DEREF(hdda).voxel = pnanovdb_hdda_pos_to_voxel(PNANOVDB_REF(pos), dim); + + if (PNANOVDB_DEREF(hdda).step.x != 0) + { + PNANOVDB_DEREF(hdda).next.x = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.x - pos.x) * dir_inv.x; + if (PNANOVDB_DEREF(hdda).step.x > 0) + { + PNANOVDB_DEREF(hdda).next.x += dim * dir_inv.x; + } + } + if (PNANOVDB_DEREF(hdda).step.y != 0) + { + PNANOVDB_DEREF(hdda).next.y = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.y - pos.y) * dir_inv.y; + if (PNANOVDB_DEREF(hdda).step.y > 0) + { + PNANOVDB_DEREF(hdda).next.y += dim * dir_inv.y; + } + } + if (PNANOVDB_DEREF(hdda).step.z != 0) + { + PNANOVDB_DEREF(hdda).next.z = PNANOVDB_DEREF(hdda).tmin + (PNANOVDB_DEREF(hdda).voxel.z - pos.z) * dir_inv.z; + if (PNANOVDB_DEREF(hdda).step.z > 0) + { + PNANOVDB_DEREF(hdda).next.z += dim * dir_inv.z; + } + } + + return PNANOVDB_TRUE; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_step(PNANOVDB_INOUT(pnanovdb_hdda_t) hdda) +{ + pnanovdb_bool_t ret; + if (PNANOVDB_DEREF(hdda).next.x < PNANOVDB_DEREF(hdda).next.y && PNANOVDB_DEREF(hdda).next.x < PNANOVDB_DEREF(hdda).next.z) + { +#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING + if (PNANOVDB_DEREF(hdda).next.x <= PNANOVDB_DEREF(hdda).tmin) + { + PNANOVDB_DEREF(hdda).next.x += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.x + 1.0e-6f; + } +#endif + PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.x; + PNANOVDB_DEREF(hdda).next.x += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.x; + PNANOVDB_DEREF(hdda).voxel.x += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.x; + ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; + } + else if (PNANOVDB_DEREF(hdda).next.y < PNANOVDB_DEREF(hdda).next.z) + { +#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING + if (PNANOVDB_DEREF(hdda).next.y <= PNANOVDB_DEREF(hdda).tmin) + { + PNANOVDB_DEREF(hdda).next.y += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.y + 1.0e-6f; + } +#endif + PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.y; + PNANOVDB_DEREF(hdda).next.y += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.y; + PNANOVDB_DEREF(hdda).voxel.y += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.y; + ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; + } + else + { +#ifdef PNANOVDB_ENFORCE_FORWARD_STEPPING + if (PNANOVDB_DEREF(hdda).next.z <= PNANOVDB_DEREF(hdda).tmin) + { + PNANOVDB_DEREF(hdda).next.z += PNANOVDB_DEREF(hdda).tmin - 0.999999f * PNANOVDB_DEREF(hdda).next.z + 1.0e-6f; + } +#endif + PNANOVDB_DEREF(hdda).tmin = PNANOVDB_DEREF(hdda).next.z; + PNANOVDB_DEREF(hdda).next.z += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).delta.z; + PNANOVDB_DEREF(hdda).voxel.z += PNANOVDB_DEREF(hdda).dim * PNANOVDB_DEREF(hdda).step.z; + ret = PNANOVDB_DEREF(hdda).tmin <= PNANOVDB_DEREF(hdda).tmax; + } + return ret; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_ray_clip( + PNANOVDB_IN(pnanovdb_vec3_t) bbox_min, + PNANOVDB_IN(pnanovdb_vec3_t) bbox_max, + PNANOVDB_IN(pnanovdb_vec3_t) origin, PNANOVDB_INOUT(float) tmin, + PNANOVDB_IN(pnanovdb_vec3_t) direction, PNANOVDB_INOUT(float) tmax +) +{ + pnanovdb_vec3_t dir_inv = pnanovdb_vec3_div(pnanovdb_vec3_uniform(1.f), PNANOVDB_DEREF(direction)); + pnanovdb_vec3_t t0 = pnanovdb_vec3_mul(pnanovdb_vec3_sub(PNANOVDB_DEREF(bbox_min), PNANOVDB_DEREF(origin)), dir_inv); + pnanovdb_vec3_t t1 = pnanovdb_vec3_mul(pnanovdb_vec3_sub(PNANOVDB_DEREF(bbox_max), PNANOVDB_DEREF(origin)), dir_inv); + pnanovdb_vec3_t tmin3 = pnanovdb_vec3_min(t0, t1); + pnanovdb_vec3_t tmax3 = pnanovdb_vec3_max(t0, t1); + float tnear = pnanovdb_max(tmin3.x, pnanovdb_max(tmin3.y, tmin3.z)); + float tfar = pnanovdb_min(tmax3.x, pnanovdb_min(tmax3.y, tmax3.z)); + pnanovdb_bool_t hit = tnear <= tfar; + PNANOVDB_DEREF(tmin) = pnanovdb_max(PNANOVDB_DEREF(tmin), tnear); + PNANOVDB_DEREF(tmax) = pnanovdb_min(PNANOVDB_DEREF(tmax), tfar); + return hit; +} + +PNANOVDB_FORCE_INLINE pnanovdb_bool_t pnanovdb_hdda_zero_crossing( + pnanovdb_grid_type_t grid_type, + pnanovdb_buf_t buf, + PNANOVDB_INOUT(pnanovdb_readaccessor_t) acc, + PNANOVDB_IN(pnanovdb_vec3_t) origin, float tmin, + PNANOVDB_IN(pnanovdb_vec3_t) direction, float tmax, + PNANOVDB_INOUT(float) thit, + PNANOVDB_INOUT(float) v +) +{ + pnanovdb_coord_t bbox_min = pnanovdb_root_get_bbox_min(buf, PNANOVDB_DEREF(acc).root); + pnanovdb_coord_t bbox_max = pnanovdb_root_get_bbox_max(buf, PNANOVDB_DEREF(acc).root); + pnanovdb_vec3_t bbox_minf = pnanovdb_coord_to_vec3(bbox_min); + pnanovdb_vec3_t bbox_maxf = pnanovdb_coord_to_vec3(pnanovdb_coord_add(bbox_max, pnanovdb_coord_uniform(1))); + + pnanovdb_bool_t hit = pnanovdb_hdda_ray_clip(PNANOVDB_REF(bbox_minf), PNANOVDB_REF(bbox_maxf), origin, PNANOVDB_REF(tmin), direction, PNANOVDB_REF(tmax)); + if (!hit || tmax > 1.0e20f) + { + return PNANOVDB_FALSE; + } + + pnanovdb_vec3_t pos = pnanovdb_hdda_ray_start(origin, tmin, direction); + pnanovdb_coord_t ijk = pnanovdb_hdda_pos_to_ijk(PNANOVDB_REF(pos)); + + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk)); + float v0 = pnanovdb_read_float(buf, address); + + pnanovdb_int32_t dim = pnanovdb_uint32_as_int32(pnanovdb_readaccessor_get_dim(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk))); + pnanovdb_hdda_t hdda; + pnanovdb_hdda_init(PNANOVDB_REF(hdda), origin, tmin, direction, tmax, dim); + while (pnanovdb_hdda_step(PNANOVDB_REF(hdda))) + { + pnanovdb_vec3_t pos_start = pnanovdb_hdda_ray_start(origin, hdda.tmin + 1.0001f, direction); + ijk = pnanovdb_hdda_pos_to_ijk(PNANOVDB_REF(pos_start)); + dim = pnanovdb_uint32_as_int32(pnanovdb_readaccessor_get_dim(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk))); + pnanovdb_hdda_update(PNANOVDB_REF(hdda), origin, direction, dim); + if (hdda.dim > 1 || !pnanovdb_readaccessor_is_active(grid_type, buf, acc, PNANOVDB_REF(ijk))) + { + continue; + } + while (pnanovdb_hdda_step(PNANOVDB_REF(hdda)) && pnanovdb_readaccessor_is_active(grid_type, buf, acc, PNANOVDB_REF(hdda.voxel))) + { + ijk = hdda.voxel; + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address(PNANOVDB_GRID_TYPE_FLOAT, buf, acc, PNANOVDB_REF(ijk)); + PNANOVDB_DEREF(v) = pnanovdb_read_float(buf, address); + if (PNANOVDB_DEREF(v) * v0 < 0.f) + { + PNANOVDB_DEREF(thit) = hdda.tmin; + return PNANOVDB_TRUE; + } + } + } + return PNANOVDB_FALSE; +} + +#endif + +#endif // end of NANOVDB_PNANOVDB_H_HAS_BEEN_INCLUDED diff --git a/code/renderer/shaders/crp/common.hlsli b/code/renderer/shaders/crp/common.hlsli index 98378d3..bca4908 100644 --- a/code/renderer/shaders/crp/common.hlsli +++ b/code/renderer/shaders/crp/common.hlsli @@ -48,6 +48,12 @@ along with Challenge Quake 3. If not, see . #define UINT32_MAX 0xFFFFFFFF #define UINT64_MAX 0xFFFFFFFFFFFFFFFF +#define FLT_INF asfloat(0x7F800000) +#define FLT_NAN asfloat(0xFFC00000) +#define FLT_MAX asfloat(0x7F7FFFFF) + +#define HALF_MAX 65504.0 + typedef RaytracingAccelerationStructure RTAS; @@ -69,6 +75,20 @@ float Brightness(float3 color) return brightness; } +float3 ColorAtBrightness(float3 color, float targetBrightness) +{ + float brightness = Brightness(color); + if(brightness <= 0.000001) + { + color = float3(0.5, 0.5, 0.5); + brightness = Brightness(color); + } + float brightnessScale = targetBrightness / brightness; + float3 result = color * brightnessScale; + + return result; +} + float4 MakeGreyscale(float4 input, float amount) { float grey = dot(input.rgb, float3(0.299, 0.587, 0.114)); @@ -163,6 +183,13 @@ float EaseInQuad(float x) return x * x; } +float EaseOutQuad(float x) +{ + float y = 1.0 - x; + + return 1.0 - y * y; +} + float EaseInExp(float x) { return x == 0.0 ? 0.0 : pow(2.0, 10.0 * x - 10.0); @@ -404,6 +431,111 @@ float AnimateBlueNoise(float blueNoise, uint frameIndex) return frac(blueNoise + float(frameIndex % 32) * 0.61803399); } +// credit: David Hoskins +float Hash1To1(float p) +{ + p = frac(p * .1031); + p *= p + 33.33; + p *= p + p; + return frac(p); +} + +// credit: David Hoskins +float Hash2To1(float2 p) +{ + float3 p3 = frac(float3(p.xyx) * .1031); + p3 += dot(p3, p3.yzx + 33.33); + return frac((p3.x + p3.y) * p3.z); +} + +// credit: David Hoskins +float Hash3To1(float3 p3) +{ + p3 = frac(p3 * .1031); + p3 += dot(p3, p3.yzx + 33.33); + return frac((p3.x + p3.y) * p3.z); +} + +// credit: David Hoskins +float2 Hash1To2(float p) +{ + float3 p3 = frac(float3(p, p, p) * float3(.1031, .1030, .0973)); + p3 += dot(p3, p3.yzx + 33.33); + return frac((p3.xx + p3.yz) * p3.zy); +} + +// credit: David Hoskins +float2 Hash2To2(float2 p) +{ + float3 p3 = frac(float3(p.xyx) * float3(.1031, .1030, .0973)); + p3 += dot(p3, p3.yzx + 33.33); + return frac((p3.xx + p3.yz) * p3.zy); +} + +// credit: David Hoskins +float2 Hash3To2(float3 p3) +{ + p3 = frac(p3 * float3(.1031, .1030, .0973)); + p3 += dot(p3, p3.yzx + 33.33); + return frac((p3.xx + p3.yz) * p3.zy); +} + +// credit: David Hoskins +float3 Hash1To3(float p) +{ + float3 p3 = frac(float3(p, p, p) * float3(.1031, .1030, .0973)); + p3 += dot(p3, p3.yzx + 33.33); + return frac((p3.xxy + p3.yzz) * p3.zyx); +} + +// credit: David Hoskins +float3 Hash2To3(float2 p) +{ + float3 p3 = frac(float3(p.xyx) * float3(.1031, .1030, .0973)); + p3 += dot(p3, p3.yxz + 33.33); + return frac((p3.xxy + p3.yzz) * p3.zyx); +} + +// credit: David Hoskins +float3 Hash3To3(float3 p3) +{ + p3 = frac(p3 * float3(.1031, .1030, .0973)); + p3 += dot(p3, p3.yxz + 33.33); + return frac((p3.xxy + p3.yxx) * p3.zyx); +} + +// credit: David Hoskins +float4 Hash1To4(float p) +{ + float4 p4 = frac(float4(p, p, p, p) * float4(.1031, .1030, .0973, .1099)); + p4 += dot(p4, p4.wzxy + 33.33); + return frac((p4.xxyz + p4.yzzw) * p4.zywx); +} + +// credit: David Hoskins +float4 Hash2To4(float2 p) +{ + float4 p4 = frac(float4(p.xyxy) * float4(.1031, .1030, .0973, .1099)); + p4 += dot(p4, p4.wzxy + 33.33); + return frac((p4.xxyz + p4.yzzw) * p4.zywx); +} + +// credit: David Hoskins +float4 Hash3To4(float3 p) +{ + float4 p4 = frac(float4(p.xyzx) * float4(.1031, .1030, .0973, .1099)); + p4 += dot(p4, p4.wzxy + 33.33); + return frac((p4.xxyz + p4.yzzw) * p4.zywx); +} + +// credit: David Hoskins +float4 Hash4To4(float4 p4) +{ + p4 = frac(p4 * float4(.1031, .1030, .0973, .1099)); + p4 += dot(p4, p4.wzxy + 33.33); + return frac((p4.xxyz + p4.yzzw) * p4.zywx); +} + float2 NDCToTC(float2 ndc) { float2 tc = ndc * float2(0.5, -0.5) + float2(0.5, 0.5); @@ -515,12 +647,13 @@ int FlattenIndex(int3 tileIndex, int3 tileResolution) uint3 UnflattenIndex(uint flatIndex, uint3 tileResolution) { + uint w = tileResolution.x; uint h = tileResolution.y; - uint wh = tileResolution.x * h; + uint wh = w * h; uint z = flatIndex / wh; flatIndex -= z * wh; - uint y = flatIndex / h; - uint x = flatIndex - y * h; + uint y = flatIndex / w; + uint x = flatIndex - y * w; uint3 result = uint3(x, y, z); return result; @@ -528,12 +661,13 @@ uint3 UnflattenIndex(uint flatIndex, uint3 tileResolution) int3 UnflattenIndex(int flatIndex, int3 tileResolution) { + int w = tileResolution.x; int h = tileResolution.y; - int wh = tileResolution.x * h; + int wh = w * h; int z = flatIndex / wh; flatIndex -= z * wh; - int y = flatIndex / h; - int x = flatIndex - y * h; + int y = flatIndex / w; + int x = flatIndex - y * w; int3 result = int3(x, y, z); return result; @@ -545,6 +679,12 @@ void ClearBoundingBox(out int3 boxMin, out int3 boxMax) boxMax = int3(INT32_MIN, INT32_MIN, INT32_MIN); } +void ClearBoundingBox(out float3 boxMin, out float3 boxMax) +{ + boxMin = float3(FLT_MAX, FLT_MAX, FLT_MAX); + boxMax = float3(-FLT_MAX, -FLT_MAX, -FLT_MAX); +} + template void ExpandBoundingBox(inout T boxMin, inout T boxMax, T newPoint) { @@ -699,17 +839,103 @@ float3 DirectionFromLongLat(float longitude01, float latitude01) return direction; } -float3 AmbientColor(float4 payloadA, float4 payloadB, float3 normal, float3 fallbackColor) +float SphereVolume(float radius) { - float3 ambColor = payloadA.rgb; - float3 localColor = float3(payloadA.a, payloadB.rg); - float3 localDir = DirectionFromLongLat(payloadB.b, payloadB.a); - float localScale = dot(localDir, normal) * 0.5 + 0.5; // wraps around - float3 interpColor = ambColor + localColor * localScale; - float brightNew = Brightness(interpColor); - float brightFall = Brightness(fallbackColor); - float t = saturate(brightNew / max(brightFall, 0.001)); - float3 color = lerp(fallbackColor, interpColor, t); + float volume = ((4.0 / 3.0) * PI) * radius * radius * radius; - return color; + return volume; +} + +// "2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere" by Mara and McGuire +float2 ProjectedSphereExtentsAxis(float xy, float z, float r) +{ + float t = sqrt(xy * xy + z * z - r * r); + float min = (t * xy - r * z) / (t * z + r * xy); + float max = (t * xy + r * z) / (t * z - r * xy); + float2 result = float2(min, max); + + return result; +} + +// "2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere" by Mara and McGuire +float4 ProjectedSphereExtentsNDC(float3 spherePositionWS, float sphereRadius, matrix viewMatrix, matrix projMatrix) +{ + float4 spherePosVSw = mul(viewMatrix, float4(spherePositionWS, 1)); + float3 spherePosVS = spherePosVSw.xyz / spherePosVSw.w; + float2 extentsX = ProjectedSphereExtentsAxis(spherePosVS.x, -spherePosVS.z, sphereRadius) * projMatrix[0][0] + projMatrix[2][0]; + float2 extentsY = ProjectedSphereExtentsAxis(spherePosVS.y, spherePosVS.z, sphereRadius) * projMatrix[1][1] + projMatrix[2][1]; + float4 result = float4(extentsX.x, extentsY.x, extentsX.y, extentsY.y); + + return result; +} + +float3 Project(float3 P, matrix m) +{ + float4 Qw = mul(m, float4(P, 1)); + float3 Q = Qw.xyz / Qw.w; + + return Q; +} + +float VoxelStepSize(float3 dir, float3 voxelSize) +{ + float3 stepSize3 = voxelSize / max(abs(dir), float(0.000001).xxx); + float stepSize = min3(stepSize3.x, stepSize3.y, stepSize3.z); + + return stepSize; +} + +float2x2 RandomRotationMatrix2x2(float3 position) +{ + float angle = Hash3To1(position) * 2.0 * PI; + float sin, cos; + sincos(angle, sin, cos); + float2x2 result = float2x2(cos, -sin, sin, cos); + + return result; +} + +float3x3 RandomRotationMatrix3x3(float3 position) +{ + float3 angles = Hash3To3(position) * 2.0 * PI; + float3 sin, cos; + sincos(angles.x, sin.x, cos.x); + sincos(angles.y, sin.y, cos.y); + sincos(angles.z, sin.z, cos.z); + float r0 = cos.x * cos.y; + float r1 = cos.x * sin.y * sin.z - sin.x * cos.z; + float r2 = cos.x * sin.y * cos.z + sin.x * sin.z; + float r3 = sin.x * cos.y; + float r4 = sin.x * sin.y * sin.z + cos.x * cos.z; + float r5 = sin.x * sin.y * cos.z - cos.x * sin.z; + float r6 = -sin.y; + float r7 = cos.y * sin.z; + float r8 = cos.y * cos.z; + float3x3 result = float3x3(r0, r1, r2, r3, r4, r5, r6, r7, r8); + + return result; +} + +float2x2 IdentityMatrix2x2() +{ + return float2x2( + 1, 0, + 0, 1); +} + +float3x3 IdentityMatrix3x3() +{ + return float3x3( + 1, 0, 0, + 0, 1, 0, + 0, 0, 1); +} + +matrix IdentityMatrix4x4() +{ + return matrix( + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1); } diff --git a/code/renderer/shaders/crp/light_grid.h.hlsli b/code/renderer/shaders/crp/light_grid.h.hlsli new file mode 100644 index 0000000..009c028 --- /dev/null +++ b/code/renderer/shaders/crp/light_grid.h.hlsli @@ -0,0 +1,141 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// shared structure for the Quake 3 light grid + + +#pragma once + + +#include "typedefs.h.hlsli" +#if !defined(__cplusplus) +# include "common.hlsli" +#endif + +#if defined(__cplusplus) +# pragma pack(push, 4) +#endif + +struct LightGridRC +{ + float3 centerPosition; + uint textureAIndex; + float3 worldScale; + uint textureBIndex; + uint samplerIndex; + uint isAvailable; +}; + +#if defined(__cplusplus) +# pragma pack(pop) +#endif + +#if defined(__cplusplus) + static_assert(sizeof(LightGridRC) == 40, "sizeof(LightGridRC) is wrong"); +#endif + +#if !defined(__cplusplus) + +struct LightGridSample +{ + float4 a; + float4 b; + + float3 GetLightDirection() + { + return DirectionFromLongLat(b.z, b.w); + } + + float3 GetLocalColor() + { + return float3(a.w, b.xy); + } + + float3 GetGlobalColor() + { + return a.xyz; + } + + float3 GetAmbientColor(float3 normal, float3 fallbackColor, float ambColorScale, float localColorScale) + { + float3 ambColor = GetGlobalColor(); + float3 localColor = GetLocalColor(); + float3 localDir = GetLightDirection(); + float localScale = dot(localDir, normal) * 0.5 + 0.5; // wraps around + float3 interpColor = ambColor * ambColorScale + localColor * localScale * localColorScale; + float brightNew = Brightness(interpColor); + float brightFall = Brightness(fallbackColor); + float t = saturate(brightNew / max(brightFall, 0.001)); + float3 color = lerp(fallbackColor, interpColor, t); + + return color; + } +}; + +struct LightGrid +{ + float3 centerPosition; + float3 worldScale; + float3 textureSize; + Texture3D textureA; + Texture3D textureB; + SamplerState sampler0; + + LightGridSample SampleAtPosition(float3 positionWS) + { + float3 ambientTC = AABoxWorldSpaceToTC(positionWS, centerPosition, textureSize, worldScale); + LightGridSample sample; + sample.a = textureA.SampleLevel(sampler0, ambientTC, 0); + sample.b = textureB.SampleLevel(sampler0, ambientTC, 0); + + return sample; + } + + LightGridSample SampleAtIndex(int3 index) + { + LightGridSample sample; + sample.a = textureA[index]; + sample.b = textureB[index]; + + return sample; + } + + float3 IndexToWorldSpace(int3 voxelIndex) + { + float3 voxelCenter = AABoxIndexToWorldSpace(voxelIndex, centerPosition, textureSize, worldScale); + + return voxelCenter; + } +}; + +LightGrid GetLightGrid(LightGridRC rc) +{ + LightGrid grid; + grid.textureA = ResourceDescriptorHeap[rc.textureAIndex]; + grid.textureB = ResourceDescriptorHeap[rc.textureBIndex]; + grid.sampler0 = SamplerDescriptorHeap[rc.samplerIndex]; + grid.centerPosition = rc.centerPosition; + grid.worldScale = rc.worldScale; + grid.textureSize = float3(GetTextureSize(grid.textureA)); + + return grid; +} + +#endif diff --git a/code/renderer/shaders/crp/particles_clear.hlsl b/code/renderer/shaders/crp/particles_clear.hlsl new file mode 100644 index 0000000..196ed9f --- /dev/null +++ b/code/renderer/shaders/crp/particles_clear.hlsl @@ -0,0 +1,62 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// GPU particle system: emitter and particle free list initialization + + +#include "common.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + uint emitterBufferIndex; + uint deadBufferIndex; + uint emitterIndex; + uint firstParticle; + uint particleCount; + float maxSeconds; +} + +[numthreads(64, 1, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + if(id.x >= particleCount) + { + return; + } + + if(id.x == 0) + { + RWStructuredBuffer emitterBuffer = ResourceDescriptorHeap[emitterBufferIndex]; + ParticleEmitter e; + e.deadCount = particleCount; + e.firstIndex = firstParticle; + e.liveCount = 0; + e.liveCount2 = 0; + e.totalCount = particleCount; + e.emitCount = 0; + e.maxSeconds = maxSeconds; + emitterBuffer[emitterIndex] = e; + } + + RWStructuredBuffer deadBuffer = ResourceDescriptorHeap[deadBufferIndex]; + deadBuffer[firstParticle + id.x] = particleCount - 1 - id.x; +} diff --git a/code/renderer/shaders/crp/particles_emit.hlsl b/code/renderer/shaders/crp/particles_emit.hlsl new file mode 100644 index 0000000..44c84b7 --- /dev/null +++ b/code/renderer/shaders/crp/particles_emit.hlsl @@ -0,0 +1,74 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// GPU particle system: particle emission step + + +#include "common.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + uint particleBufferIndex; + uint liveBufferIndex; + uint deadBufferIndex; + uint emitterBufferIndex; + uint emitterIndex; +} + +[numthreads(64, 1, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWStructuredBuffer emitterBuffer = ResourceDescriptorHeap[emitterBufferIndex]; + uint emitCount = emitterBuffer[emitterIndex].emitCount; + if(id.x >= emitCount) + { + return; + } + + RWStructuredBuffer particleBuffer = ResourceDescriptorHeap[particleBufferIndex]; + RWStructuredBuffer deadBuffer = ResourceDescriptorHeap[deadBufferIndex]; + RWStructuredBuffer liveBuffer = ResourceDescriptorHeap[liveBufferIndex]; + SceneView scene = GetSceneView(); + + uint firstIndex = emitterBuffer[emitterIndex].firstIndex; + uint oldDeadCount; + InterlockedAdd(emitterBuffer[emitterIndex].deadCount, -1, oldDeadCount); + uint newIndex = deadBuffer[firstIndex + oldDeadCount - 1]; + uint oldLiveCount; + InterlockedAdd(emitterBuffer[emitterIndex].liveCount, 1, oldLiveCount); + liveBuffer[firstIndex + oldLiveCount] = newIndex; + + Particle p; +#if 1 // @TODO: insert proper logic here + p.absorption = 1.0; + p.anisotropy = 0.5; + p.isEmissive = 0; + p.position = float3(694, -42, -300); + p.velocity = (Hash1To3(scene.frameSeed + 17.69 * float(id.x)) * 2.0 - 1.0) * 20.0; + p.radius = 10.0; + p.scattering = float3(1.0, 0.5, 0.0); + p.lifeTime = 0.0; + p.froxelMin = int3(0, 0, 0); + p.froxelMax = int3(-1, -1, -1); +#endif + particleBuffer[firstIndex + newIndex] = p; +} diff --git a/code/renderer/shaders/crp/particles_setup.hlsl b/code/renderer/shaders/crp/particles_setup.hlsl new file mode 100644 index 0000000..f969267 --- /dev/null +++ b/code/renderer/shaders/crp/particles_setup.hlsl @@ -0,0 +1,50 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// GPU particle system: sets up buffers for 1 emission step and 1 simulation step + + +#include "common.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + uint emitterBufferIndex; + uint indirectBufferIndex; + uint emitterIndex; + uint emitCountRequest; +} + +[numthreads(1, 1, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWStructuredBuffer emitterBuffer = ResourceDescriptorHeap[emitterBufferIndex]; + RWByteAddressBuffer indirectBuffer = ResourceDescriptorHeap[indirectBufferIndex]; + + uint deadCount = emitterBuffer[emitterIndex].deadCount; + uint liveCount = emitterBuffer[emitterIndex].liveCount2; + uint emitCount = min(deadCount, emitCountRequest); + indirectBuffer.Store(0, (emitCount + 63) / 64); // emit.x + indirectBuffer.Store(12, (liveCount + emitCount + 63) / 64); // simulate.x + emitterBuffer[emitterIndex].liveCount = liveCount; + emitterBuffer[emitterIndex].liveCount2 = 0; + emitterBuffer[emitterIndex].emitCount = emitCount; +} diff --git a/code/renderer/shaders/crp/particles_simulate.hlsl b/code/renderer/shaders/crp/particles_simulate.hlsl new file mode 100644 index 0000000..eaef713 --- /dev/null +++ b/code/renderer/shaders/crp/particles_simulate.hlsl @@ -0,0 +1,86 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// GPU particle system: particle simulation step + + +#include "common.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + uint particleBufferIndex; + uint liveSrcBufferIndex; + uint liveDstBufferIndex; + uint deadBufferIndex; + uint emitterBufferIndex; + uint emitterIndex; +} + +[numthreads(64, 1, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWStructuredBuffer emitterBuffer = ResourceDescriptorHeap[emitterBufferIndex]; + if(id.x >= emitterBuffer[emitterIndex].liveCount) + { + return; + } + + RWStructuredBuffer particleBuffer = ResourceDescriptorHeap[particleBufferIndex]; + RWStructuredBuffer liveSrcBuffer = ResourceDescriptorHeap[liveSrcBufferIndex]; + + uint firstIndex = emitterBuffer[emitterIndex].firstIndex; + uint particleIndex = liveSrcBuffer[firstIndex + id.x]; + Particle p = particleBuffer[firstIndex + particleIndex]; + if(p.lifeTime >= emitterBuffer[emitterIndex].maxSeconds) + { + RWStructuredBuffer deadBuffer = ResourceDescriptorHeap[deadBufferIndex]; + uint oldDeadCount; + InterlockedAdd(emitterBuffer[emitterIndex].deadCount, 1, oldDeadCount); + deadBuffer[firstIndex + oldDeadCount] = particleIndex; + return; + } + + RWStructuredBuffer liveDstBuffer = ResourceDescriptorHeap[liveDstBufferIndex]; +#if 1 // @TODO: insert proper logic here + float dt = 1.0 / 60.0; + float t = p.lifeTime / emitterBuffer[emitterIndex].maxSeconds; + float fadeOut = 1.0 - EaseOutQuad(t); + float3 velocityNoise = Hash3To3(p.position); // want Curl + float scatterNoise = Hash2To1(float2(particleIndex * 1337.0, p.lifeTime)); // want Simplex + float3 color0 = float3(1, 1, 1); + float3 color1 = float3(0.25, 0.25, 0.25); + float3 color = lerp(color0, color1, t); + p.lifeTime += dt; + p.position += p.velocity * dt; + p.velocity *= 0.85; + p.velocity += velocityNoise * (1.0 + 0.077 * 100.0); + p.velocity += float3(0, 0, 1) * (1.0 + 0.210 * 100.0); + p.absorption = fadeOut * 0.02; + p.scattering = fadeOut * (color + float(0.5 + scatterNoise).xxx) * 0.02; + p.radius = 5.0 + 2.0 * EaseInCubic(t); +#endif + particleBuffer[firstIndex + particleIndex] = p; + + uint oldLiveCount; + InterlockedAdd(emitterBuffer[emitterIndex].liveCount2, 1, oldLiveCount); + liveDstBuffer[firstIndex + oldLiveCount] = particleIndex; +} diff --git a/code/renderer/shaders/crp/scene_view.h.hlsli b/code/renderer/shaders/crp/scene_view.h.hlsli index 1608e2b..0ebaf5b 100644 --- a/code/renderer/shaders/crp/scene_view.h.hlsli +++ b/code/renderer/shaders/crp/scene_view.h.hlsli @@ -33,19 +33,39 @@ along with Challenge Quake 3. If not, see . # pragma pack(push, 4) #endif +// @TODO: move out +struct ParticleEmitter +{ + uint firstIndex; + uint liveCount; // post-emission, pre-simulation + uint liveCount2; // post-simulation + uint deadCount; + uint totalCount; + uint emitCount; // how many added this frame + float maxSeconds; +}; + // @TODO: move out struct Particle { + // needed for injection, set by user float3 position; float radius; float3 scattering; // or emissive float absorption; float anisotropy; uint isEmissive; + // needed for injection, private + int3 froxelMin; + int3 froxelMax; + // extra data for simulation only + float3 velocity; + float lifeTime; // in seconds }; // @TODO: move out -#define MAX_PARTICLES 8192 +#define MAX_PARTICLES (1 << 20) +#define MAX_PARTICLE_EMITTERS (1 << 10) // @TODO: move out struct DynamicLight @@ -56,9 +76,6 @@ struct DynamicLight float padding; }; -// @TODO: move out -#define SCENE_VIEW_MAX_LIGHTS 32 - #if !defined(__cplusplus) struct ExtinctionCascade { @@ -230,7 +247,7 @@ struct SceneView float4 clipPlane; float4 debug; float3 cameraPosition; - float sunIntensity; + float sunIntensityDL; float3 sunDirection; float zNear; float3 sunColor; @@ -240,9 +257,9 @@ struct SceneView float3 cameraForward; float prevZFar; float3 cameraLeft; - float padding0; + float sunIntensityVL; float3 cameraUp; - float padding1; + float pointLightIntensityVL; float3 linearDepthConstants; float frameSeed; float3 ambientColor; @@ -282,7 +299,7 @@ struct SceneView return ray; } -#if 1 // exponential depth distribution like The Last of Us Part II +#if 0 // exponential depth distribution like The Last of Us Part II float TLOU2SliceToViewDepth(float slice, float C) { @@ -295,7 +312,8 @@ struct SceneView float TLOU2ViewDepthToSlice(float viewDepth, float C) { const float Q = 1.0; - float slice = C * (log2(exp2(Q) + viewDepth) - Q); + float logArg = exp2(Q) + viewDepth; + float slice = logArg <= 0.0 ? -666.0 : (C * (log2(logArg) - Q)); return slice; } @@ -328,6 +346,24 @@ struct SceneView return viewDepth; } +#elif 1 // quadratic depth distribution + + float FroxelViewDepthToZ01Ex(float viewDepth, float sliceCount, float zn, float zf) + { + float depth01 = (viewDepth - zn) / (zf - zn); + depth01 = sqrt(depth01); + + return depth01; + } + + float FroxelZ01ToViewDepth(float depth01, float sliceCount) + { + depth01 *= depth01; + float viewDepth = zNear + (zFar - zNear) * depth01; + + return viewDepth; + } + #else // linear depth distribution float FroxelViewDepthToZ01Ex(float viewDepth, float sliceCount, float zn, float zf) @@ -400,6 +436,19 @@ struct SceneView return index; } + int2 FroxelSphereZExtents(float3 positionWS, float radius, float3 textureSize) + { + float4 positionVSw = mul(viewMatrix, float4(positionWS, 1)); + float viewDepth = -positionVSw.z / positionVSw.w; + float viewDepthMin = viewDepth - radius; + float viewDepthMax = viewDepth + radius; + float zMin01 = FroxelViewDepthToZ01(viewDepthMin, textureSize.z); + float zMax01 = FroxelViewDepthToZ01(viewDepthMax, textureSize.z); + int2 extents = int2(zMin01 * textureSize.z, ceil(zMax01 * textureSize.z)); + + return extents; + } + // @TODO: validate new logic float3 FroxelReproject01(int3 currIndex, float3 textureSize) { @@ -420,6 +469,82 @@ struct SceneView return prevTC; } + float FroxelVolume(uint3 index, float3 textureSize) + { + float3 tcBase = (float3(index) + float3(0.5, 0.5, 0.5)) / textureSize; + float3 halfTexel = float3(0.5, 0.5, 0.5) / textureSize; + float3 posL = FroxelTCToWorldSpace(tcBase + float3(-halfTexel.x, 0, 0), textureSize); + float3 posR = FroxelTCToWorldSpace(tcBase + float3(halfTexel.x, 0, 0), textureSize); + float w = distance(posL, posR); + float3 posU = FroxelTCToWorldSpace(tcBase + float3(0, halfTexel.y, 0), textureSize); + float3 posD = FroxelTCToWorldSpace(tcBase + float3(0, -halfTexel.y, 0), textureSize); + float h = distance(posU, posD); + float3 posF = FroxelTCToWorldSpace(tcBase + float3(0, 0, halfTexel.z), textureSize); + float3 posB = FroxelTCToWorldSpace(tcBase + float3(0, 0, -halfTexel.z), textureSize); + float d = distance(posF, posB); + float volume = w * h * d; + + return volume; + } + + float3 FroxelAverageDimensions(uint3 index, float3 textureSize) + { + float3 tcBase = (float3(index) + float3(0.5, 0.5, 0.5)) / textureSize; + float3 halfTexel = float3(0.5, 0.5, 0.5) / textureSize; + float3 posL = FroxelTCToWorldSpace(tcBase + float3(-halfTexel.x, 0, 0), textureSize); + float3 posR = FroxelTCToWorldSpace(tcBase + float3(halfTexel.x, 0, 0), textureSize); + float w = distance(posL, posR); + float3 posU = FroxelTCToWorldSpace(tcBase + float3(0, halfTexel.y, 0), textureSize); + float3 posD = FroxelTCToWorldSpace(tcBase + float3(0, -halfTexel.y, 0), textureSize); + float h = distance(posU, posD); + float3 posF = FroxelTCToWorldSpace(tcBase + float3(0, 0, -halfTexel.z), textureSize); + float3 posB = FroxelTCToWorldSpace(tcBase + float3(0, 0, halfTexel.z), textureSize); + float d = distance(posF, posB); + float3 dimensions = float3(w, h, d); + + return dimensions; + } + + float3 FroxelMaxDimensions(uint3 index, float3 textureSize) + { + float3 tcBase = (float3(index) + float3(0.5, 0.5, 0.5)) / textureSize; + float3 halfTexel = float3(0.5, 0.5, 0.5) / textureSize; + float3 posL = FroxelTCToWorldSpace(tcBase + float3(-halfTexel.x, 0, halfTexel.z), textureSize); + float3 posR = FroxelTCToWorldSpace(tcBase + float3(halfTexel.x, 0, halfTexel.z), textureSize); + float w = distance(posL, posR); + float3 posU = FroxelTCToWorldSpace(tcBase + float3(0, halfTexel.y, halfTexel.z), textureSize); + float3 posD = FroxelTCToWorldSpace(tcBase + float3(0, -halfTexel.y, halfTexel.z), textureSize); + float h = distance(posU, posD); + float3 posF = FroxelTCToWorldSpace(tcBase + float3(0, 0, -halfTexel.z), textureSize); + float3 posB = FroxelTCToWorldSpace(tcBase + float3(0, 0, halfTexel.z), textureSize); + float d = distance(posF, posB); + float3 dimensions = float3(w, h, d); + + return dimensions; + } + +#if 0 + void FroxelAABB(out float boxMin, out float3 boxMax, int3 index, float3 textureSize) + { + float3 tc = (float3(index) + float3(0.5, 0.5, 0.5)) / textureSize; + float3 halfTexel = float3(0.5, 0.5, 0.5) / textureSize; + float3 pointWS; + ClearBoundingBox(boxMin, boxMax); + pointWS = FroxelTCToWorldSpace(tc + float3(-halfTexel.x, 0, 0), textureSize); + ExpandBoundingBox(boxMin, boxMax, pointWS); + pointWS = FroxelTCToWorldSpace(tc + float3(halfTexel.x, 0, 0), textureSize); + ExpandBoundingBox(boxMin, boxMax, pointWS); + pointWS = FroxelTCToWorldSpace(tc + float3(0, -halfTexel.y, 0), textureSize); + ExpandBoundingBox(boxMin, boxMax, pointWS); + pointWS = FroxelTCToWorldSpace(tc + float3(0, halfTexel.y, 0), textureSize); + ExpandBoundingBox(boxMin, boxMax, pointWS); + pointWS = FroxelTCToWorldSpace(tc + float3(0, 0, -halfTexel.z), textureSize); + ExpandBoundingBox(boxMin, boxMax, pointWS); + pointWS = FroxelTCToWorldSpace(tc + float3(0, 0, halfTexel.z), textureSize); + ExpandBoundingBox(boxMin, boxMax, pointWS); + } +#endif + float3 ExtinctionIndexToWorldSpace(int3 index, float3 textureSize, float worldScale) { return AABoxIndexToWorldSpace(index, cameraPosition, textureSize, worldScale); diff --git a/code/renderer/shaders/crp/sun_blur.hlsl b/code/renderer/shaders/crp/sun_blur.hlsl index ff7ef8c..c81f0e8 100644 --- a/code/renderer/shaders/crp/sun_blur.hlsl +++ b/code/renderer/shaders/crp/sun_blur.hlsl @@ -106,12 +106,11 @@ float4 ps(VOut input) : SV_Target } } - // @TODO: all light intensities are 50x for light scattering? float visOpaque = weightSum > 0.0 ? visSum / weightSum : 0.0; float visVolume = cascade.TransmittanceAt(positionWS); float vis = visOpaque * visVolume; float lambert = max(dot(normalWS, scene.sunDirection), 0.0); - float3 color = vis * scene.sunColor * min(scene.sunIntensity / 10.0, 5.0) * lambert; + float3 color = vis * scene.sunColor * scene.sunIntensityDL * lambert; float4 result = float4(color, 1); return result; diff --git a/code/renderer/shaders/crp/vl_common.h.hlsli b/code/renderer/shaders/crp/vl_common.h.hlsli index 13e8e60..a2e0589 100644 --- a/code/renderer/shaders/crp/vl_common.h.hlsli +++ b/code/renderer/shaders/crp/vl_common.h.hlsli @@ -70,13 +70,27 @@ struct FogVolume #endif }; +struct Tile +{ + uint firstParticle; + uint particleCount; + uint particleIndex; + uint pad0; +}; + +struct Counters +{ + uint particleCount; + uint tileCount; +}; + #if defined(__cplusplus) #pragma pack(pop) #endif #if !defined(__cplusplus) -// defines voxel sampling offsets for super-sampled fog injection +// defines voxel sampling offsets for super-sampled fog/particle injection #if defined(VOXEL_SUPERSAMPLING_1X) static const int VoxelSampleCount = 1; @@ -203,4 +217,46 @@ static const float3 VoxelSamples[65] = }; #endif +// defines sphere sampling offsets for super-sampled particle injection + +#if defined(SPHERE_SUPERSAMPLING_1X) +static const int SphereSampleCount = 1; +static const float3 SphereSamples[1] = +{ + float3(0, 0, 0) +}; +#elif defined(SPHERE_SUPERSAMPLING_2X) +static const int SphereSampleCount = 13; +static const float3 SphereSamples[13] = +{ + float3(-0.000070998815798, -0.000005560663499, -0.666666662862852), + float3(-0.184273763669020, -0.567104158597683, -0.298128324331843), + float3(0.482401099870031, -0.350493177757963, -0.298141167291191), + float3(-0.596343845795264, -0.000022877791848, -0.298024263279293), + float3(0.482326151451980, 0.350363917328204, -0.298414231403936), + float3(-0.184272548445118, 0.567103656272574, -0.298130030986924), + float3(0.000000000000000, 0.000000000000000, 0.000000000000000), + float3(0.184241177452589, -0.567097520996929, 0.298161088431179), + float3(-0.482404802195943, -0.350487719732367, 0.298141593172679), + float3(0.596236953290593, -0.000018337625111, 0.298238058669458), + float3(-0.482641375963473, 0.350268310738867, 0.298016538374417), + float3(0.184262272798761, 0.567101475888716, 0.298140529469443), + float3(-0.000135987657896, -0.000010037080285, 0.666666652721627) +}; +#elif defined(SPHERE_SUPERSAMPLING_2X_OLD) // 8x with 0,0,0 added +static const int SphereSampleCount = 9; +static const float3 SphereSamples[9] = +{ + float3(0, 0, 0), + float3(-0.378024926878978, -0.378024806866870, -0.317879684372474), + float3(0.378024762722181, -0.378024944670528, -0.317879715711806), + float3(-0.378024789075323, 0.378024882734286, -0.317879758027500), + float3(0.378024900525835, 0.378024744930628, -0.317879789366832), + float3(-0.000000084265328, -0.534607831462839, 0.317879788951622), + float3(-0.534607849254392, 0.000000128410024, 0.317879759029907), + float3(0.534607875607536, -0.000000066473779, 0.317879714709398), + float3(0.000000110618471, 0.534607893399083, 0.317879684787684) +}; +#endif + #endif diff --git a/code/renderer/shaders/crp/vl_debug_ambient.hlsl b/code/renderer/shaders/crp/vl_debug_ambient.hlsl index 4f53cf4..89335d1 100644 --- a/code/renderer/shaders/crp/vl_debug_ambient.hlsl +++ b/code/renderer/shaders/crp/vl_debug_ambient.hlsl @@ -23,15 +23,13 @@ along with Challenge Quake 3. If not, see . #include "common.hlsli" #include "scene_view.h.hlsli" +#include "light_grid.h.hlsli" cbuffer RootConstants { - float3 centerPosition; + LightGridRC lightGridRC; float sphereScale; - float3 worldScale; - uint lightGridTextureAIndex; - uint lightGridTextureBIndex; } struct VOut @@ -44,16 +42,15 @@ struct VOut VOut vs(uint vertexId : SV_VertexID) { - Texture3D lightGridTexture = ResourceDescriptorHeap[lightGridTextureAIndex]; SceneView scene = GetSceneView(); + LightGrid lightGrid = GetLightGrid(lightGridRC); - uint3 textureSize = GetTextureSize(lightGridTexture); uint flatVoxelIndex = vertexId / 6; uint vertexIndex = vertexId % 6; - int3 voxelIndex = int3(UnflattenIndex(flatVoxelIndex, textureSize)); - float3 voxelCenter = AABoxIndexToWorldSpace(voxelIndex, centerPosition, float3(textureSize), worldScale); + int3 voxelIndex = int3(UnflattenIndex(flatVoxelIndex, lightGrid.textureSize)); + float3 voxelCenter = lightGrid.IndexToWorldSpace(voxelIndex); float2 quadPosition = QuadFromVertexID(vertexIndex); - float radius = 0.5 * sphereScale * min3(worldScale.x, worldScale.y, worldScale.z); + float radius = 0.5 * sphereScale * min3(lightGrid.worldScale.x, lightGrid.worldScale.y, lightGrid.worldScale.z); float3 up = scene.cameraUp; float3 forward = normalize(voxelCenter - scene.cameraPosition); float3 right = normalize(cross(forward, up)); @@ -79,9 +76,8 @@ VOut vs(uint vertexId : SV_VertexID) float4 ps(VOut input) : SV_Target { - Texture3D lightGridTextureA = ResourceDescriptorHeap[lightGridTextureAIndex]; - Texture3D lightGridTextureB = ResourceDescriptorHeap[lightGridTextureBIndex]; SceneView scene = GetSceneView(); + LightGrid lightGrid = GetLightGrid(lightGridRC); float3 rayDir = normalize(input.positionWS - scene.cameraPosition); float t = RaytraceSphere(scene.cameraPosition, rayDir, input.sphere.xyz, input.sphere.w); @@ -90,11 +86,10 @@ float4 ps(VOut input) : SV_Target discard; } - float4 payloadA = lightGridTextureA[input.voxelIndex]; - float4 payloadB = lightGridTextureB[input.voxelIndex]; + LightGridSample ambient = lightGrid.SampleAtIndex(input.voxelIndex); float3 hitPosition = scene.cameraPosition + rayDir * t; float3 normal = normalize(hitPosition - input.sphere.xyz); - float3 color = AmbientColor(payloadA, payloadB, normal, scene.ambientColor); + float3 color = ambient.GetAmbientColor(normal, scene.ambientColor, 1, 1); float4 result = float4(color * 0.5, 1); return result; diff --git a/code/renderer/shaders/crp/vl_extinction_injection_nanovdb.hlsl b/code/renderer/shaders/crp/vl_extinction_injection_nanovdb.hlsl new file mode 100644 index 0000000..c173b91 --- /dev/null +++ b/code/renderer/shaders/crp/vl_extinction_injection_nanovdb.hlsl @@ -0,0 +1,113 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// volumetric lighting: inject a NanoVDB volume into the extinction volume + + +#include "common.hlsli" +#include "scene_view.h.hlsli" +#include "vl_common.h.hlsli" +#include "vl_nanovdb.hlsli" + + +#define LOW_QUALITY_INJECTION_MODE 1 + + +cbuffer RootConstants +{ + float4 packedTransform[4]; + float2 packedTransform2; + uint nanovdbBufferIndex; + uint extinctionTextureIndex; + uint densityGridByteOffset; + uint densityGridByteOffset2; + uint linearInterpolation; + float worldScale; + float densityExtinctionScale; + float t; +} + +[numthreads(4, 4, 4)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWTexture3D extinctionTexture = ResourceDescriptorHeap[extinctionTextureIndex]; + uint3 textureSize = GetTextureSize(extinctionTexture); + if(any(id >= textureSize)) + { + return; + } + + pnanovdb_buf_t nanovdbBuffer = ResourceDescriptorHeap[nanovdbBufferIndex]; + SceneView scene = GetSceneView(); + + float3 textureSizeF = float3(textureSize); + float3 tcBase = (float3(id) + float3(0.5, 0.5, 0.5)) / textureSizeF; + float3 voxelPosition = scene.ExtinctionIndexToWorldSpace(id, textureSizeF, worldScale); + Transform transform = DecodeTransform(packedTransform, packedTransform2); +#if LOW_QUALITY_INJECTION_MODE + // big perf boost, tiny visual impact + transform.stepSize = 0.5 * worldScale.xxx; +#endif + + if(linearInterpolation != 0u) + { + SampleResult extResult1 = CreateSampleResult(); + if(densityGridByteOffset) + { + Grid grid1 = GetGrid(nanovdbBuffer, densityGridByteOffset); + if(grid1.OverlapsAxisAlignedBox(scene, id, textureSizeF, worldScale, transform)) + { + extResult1 = grid1.GetAxisAlignedBoxAverage(scene, voxelPosition, worldScale.xxx, transform); + } + } + + SampleResult extResult2 = CreateSampleResult(); + if(densityGridByteOffset2 > 0) + { + Grid grid2 = GetGrid(nanovdbBuffer, densityGridByteOffset2); + if(grid2.OverlapsAxisAlignedBox(scene, id, textureSizeF, worldScale, transform)) + { + extResult2 = grid2.GetAxisAlignedBoxAverage(scene, voxelPosition, worldScale.xxx, transform); + } + } + + if(extResult1.sum > 0.0 || extResult2.sum > 0.0) + { + float extinction1 = (extResult1.sum / float(extResult1.maxSampleCount)); + float extinction2 = (extResult2.sum / float(extResult2.maxSampleCount)); + float extinction = lerp(extinction1, extinction2, t) * densityExtinctionScale; + extinctionTexture[id] += extinction; + } + } + else if(densityGridByteOffset > 0) + { + Grid grid = GetGrid(nanovdbBuffer, densityGridByteOffset); + if(grid.OverlapsAxisAlignedBox(scene, id, textureSizeF, worldScale, transform)) + { + SampleResult extResult = CreateSampleResult(); + extResult = grid.GetAxisAlignedBoxAverage(scene, voxelPosition, worldScale.xxx, transform); + if(extResult.sum > 0.0) + { + float extinction = (extResult.sum / float(extResult.maxSampleCount)) * densityExtinctionScale; + extinctionTexture[id] += extinction; + } + } + } +} diff --git a/code/renderer/shaders/crp/vl_extinction_injection_particles.hlsl b/code/renderer/shaders/crp/vl_extinction_injection_particles.hlsl deleted file mode 100644 index 29d0f00..0000000 --- a/code/renderer/shaders/crp/vl_extinction_injection_particles.hlsl +++ /dev/null @@ -1,100 +0,0 @@ -/* -=========================================================================== -Copyright (C) 2024 Gian 'myT' Schellenbaum - -This file is part of Challenge Quake 3 (CNQ3). - -Challenge Quake 3 is free software; you can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the License, -or (at your option) any later version. - -Challenge Quake 3 is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Challenge Quake 3. If not, see . -=========================================================================== -*/ -// volumetric lighting: inject particles into the extinction volume - - -#include "common.hlsli" -#include "scene_view.h.hlsli" -#define VOXEL_SUPERSAMPLING_2X -#include "vl_common.h.hlsli" - - -cbuffer RootConstants -{ - uint3 tileScale; - uint particleBufferIndex; - uint particleCount; - uint extinctionTextureIndex; - uint tileBufferIndex; - uint tileCount; - float extinctionWorldScale; -} - -[numthreads(512, 1, 1)] -void cs(uint3 dtid : SV_DispatchThreadID, uint gidx : SV_GroupIndex) -{ - uint tileIndex = dtid.x / 512; - if(tileIndex >= tileCount) - { - return; - } - - RWStructuredBuffer tileBuffer = ResourceDescriptorHeap[tileBufferIndex]; - RWTexture3D extinctionTexture = ResourceDescriptorHeap[extinctionTextureIndex]; - uint3 textureSize = GetTextureSize(extinctionTexture); - uint3 tileCornerIndex = tileBuffer[tileIndex]; - uint3 tileThreadIndex = UnflattenIndex(gidx, tileScale); - uint3 id = tileCornerIndex * tileScale + tileThreadIndex; - if(any(id >= textureSize)) - { - return; - } - - StructuredBuffer particleBuffer = ResourceDescriptorHeap[particleBufferIndex]; - SceneView scene = GetSceneView(); - - float3 textureSizeF = float3(textureSize); - float3 tcBase = (float3(id) + float3(0.5, 0.5, 0.5)) / textureSizeF; - float accumExtinction = 0.0; - for(uint i = 0; i < particleCount; i++) - { - Particle particle = particleBuffer[i]; - float extinction = particle.absorption; - [flatten] - if(particle.isEmissive == 0) - { - extinction += Brightness(particle.scattering); - } - - float particleCoverage = 0.0; - for(uint s = 0; s < VoxelSampleCount; s++) - { - float3 tcSample = tcBase + VoxelSamples[s] / textureSizeF; - float3 position = scene.ExtinctionTCToWorldSpace(tcSample, textureSizeF, extinctionWorldScale); - float dist = distance(position, particle.position); - if(dist >= particle.radius) - { - continue; - } - - float coverage = sqrt(saturate(1.0 - dist / particle.radius)); - particleCoverage += coverage; - } - particleCoverage /= float(VoxelSampleCount); - - accumExtinction += particleCoverage * extinction; - } - - if(accumExtinction > 0.0) - { - extinctionTexture[id] += accumExtinction; - } -} diff --git a/code/renderer/shaders/crp/vl_frustum_depth_test.hlsl b/code/renderer/shaders/crp/vl_frustum_depth_test.hlsl new file mode 100644 index 0000000..3bf4391 --- /dev/null +++ b/code/renderer/shaders/crp/vl_frustum_depth_test.hlsl @@ -0,0 +1,68 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// volumetric lighting: depth test screen tiles + + +#include "common.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + uint3 frustumTextureSize; + uint frustumVisTextureIndex; + uint depthMip; +} + +[numthreads(8, 8, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + SceneView scene = GetSceneView(); + RWTexture2D frustumVisTexture = ResourceDescriptorHeap[frustumVisTextureIndex]; + uint2 visTextureSize = GetTextureSize(frustumVisTexture); + uint2 tileIndex = id.xy; + if(any(tileIndex >= visTextureSize.xy)) + { + return; + } + + Texture2D depthMinMaxTexture = ResourceDescriptorHeap[scene.depthMinMaxTextureIndex]; + float3 frustumTextureSizeF = float3(frustumTextureSize); + // x=min is furthest with reverse Z + float tileDepthZW = depthMinMaxTexture.mips[depthMip][id.xy].x; + float tileDepth = scene.LinearDepth(tileDepthZW); + uint furthestVisibleIndex = 0; + for(uint d = 1; d < frustumTextureSize.z; d++) + { + // Z offset is 0 because we want the closest part of the froxel + float3 froxelTC = (float3(tileIndex.xy, d) + float3(0.5, 0.5, 0)) / frustumTextureSizeF; + float froxelDepth = scene.FroxelZ01ToViewDepth(froxelTC.z, frustumTextureSizeF.z); + if(froxelDepth < tileDepth) + { + furthestVisibleIndex = d; + } + else + { + break; + } + } + frustumVisTexture[tileIndex] = furthestVisibleIndex; +} diff --git a/code/renderer/shaders/crp/vl_frustum_injection_nanovdb.hlsl b/code/renderer/shaders/crp/vl_frustum_injection_nanovdb.hlsl new file mode 100644 index 0000000..bc54708 --- /dev/null +++ b/code/renderer/shaders/crp/vl_frustum_injection_nanovdb.hlsl @@ -0,0 +1,398 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// volumetric lighting: inject a NanoVDB volume into the material textures + + +//#define PREVIEW_MODE 1 +// preview mode is a faster and less complex version of the shader +// that doesn't crash the AMD/Intel shader compilers + +// this shader considers smoke to be fully isotropic (g = 0) +// in "Creating the Atmospheric World of Red Dead Redemption 2" by Fabian Bauer, +// g is 0.1 with strong backscatter for smoke +// RDR2 uses a phase function with multiple octaves of HG +// it fakes backscattering for a specific extinction range +// the phase function returns the max. of the sum of octaves and the backscatter + + +#include "common.hlsli" +#include "scene_view.h.hlsli" +#include "light_grid.h.hlsli" +#include "vl_common.h.hlsli" +#include "vl_nanovdb.hlsli" + + +cbuffer RootConstants +{ + float4 packedTransform[4]; + float2 packedTransform2; + uint nanovdbBufferIndex; + uint blackbodyTextureIndex; + LightGridRC lightGridRC; + uint materialTextureAIndex; + uint materialTextureBIndex; + uint materialTextureCIndex; + uint scatterExtTextureIndex; + uint frustumVisTextureIndex; + uint densityGridByteOffset; + uint flamesGridByteOffset; + uint densityGridByteOffset2; + uint flamesGridByteOffset2; + uint linearInterpolation; + uint accurateOverlapTest; + uint ambientAngularCoverage; + float densityExtinctionScale; + float densityAlbedo; + float flamesEmissionScale; + float flamesTemperatureScale; + float stepScale; // for super-sampling froxel average + float transStepScale; // for under-sampling ambient transmittance + float t; +} + +static const float SqrtOneThird = sqrt(1.0 / 3.0); +#if PREVIEW_MODE +static const float3 Dirs[4] = +{ + float3(-SqrtOneThird, -SqrtOneThird, SqrtOneThird), + float3(SqrtOneThird, SqrtOneThird, SqrtOneThird), + float3(-SqrtOneThird, SqrtOneThird, SqrtOneThird), + float3(SqrtOneThird, -SqrtOneThird, SqrtOneThird) +}; +#else +static const float3 Dirs[6 + 8] = +{ + float3(1, 0, 0), + float3(-1, 0, 0), + float3(0, 1, 0), + float3(0, -1, 0), + float3(0, 0, 1), + float3(0, 0, -1), + float3(-SqrtOneThird, -SqrtOneThird, -SqrtOneThird), + float3(-SqrtOneThird, -SqrtOneThird, SqrtOneThird), + float3(-SqrtOneThird, SqrtOneThird, -SqrtOneThird), + float3(-SqrtOneThird, SqrtOneThird, SqrtOneThird), + float3(SqrtOneThird, -SqrtOneThird, -SqrtOneThird), + float3(SqrtOneThird, -SqrtOneThird, SqrtOneThird), + float3(SqrtOneThird, SqrtOneThird, -SqrtOneThird), + float3(SqrtOneThird, SqrtOneThird, SqrtOneThird) +}; +#endif + +float3 BlackbodyColor(float temperatureK, Texture2D blackbodyTexture, SamplerState blackbodySampler) +{ + const float minT = 800; + const float maxT = 12000; + float t = saturate((temperatureK - minT) / (maxT - minT)); + float3 emission = blackbodyTexture.SampleLevel(blackbodySampler, float2(t, 0.5), 0.0).rgb; + + return emission; +} + +// Stefan-Boltzmann law +float BlackbodyRadiation(float temperatureK) +{ + const float sigma = 5.670373e-8; + float T = temperatureK; + float T2 = T * T; + float T4 = T2 * T2; + float radiation = (T4 * sigma) / PI; + + return radiation; +} + +float3 BlackbodyEmission(float temperatureK, Texture2D blackbodyTexture, SamplerState blackbodySampler) +{ + const float scale = 1.0e-6; + float3 color = BlackbodyColor(temperatureK, blackbodyTexture, blackbodySampler); + float radiation = BlackbodyRadiation(temperatureK); + float colorBrightness = Brightness(color); + float3 result = (color * radiation * scale) / colorBrightness; + + return result; +} + +struct SampleRequest +{ + pnanovdb_buf_t buffer; + uint gridByteOffset; + float3 froxelPosition; + int3 froxelIndex; + float3 textureSize; + float3 froxelSize; + Transform transform; + SceneView scene; + bool ambientLight; + int3 froxelId; + LightGrid lightGrid; +}; + +#if PREVIEW_MODE +SampleResult SampleFroxel(SampleRequest request) +{ + SampleResult result = CreateSampleResult(); + if(request.gridByteOffset == 0) + { + // no grid requested + return result; + } + + Grid grid = GetGrid(request.buffer, request.gridByteOffset); + float3 indexF = grid.WorldToIndex(request.froxelPosition, request.transform); + bool overlaps = IsInRange(indexF, grid.bboxMin, grid.bboxMax); + if(!overlaps) + { + // no grid/froxel intersection + return result; + } + + result = grid.GetFroxelAverage(request.scene, request.froxelPosition, request.froxelSize, request.transform); + if(!request.ambientLight) + { + // no ambient light requested + return result; + } + + const float inScatterScale = 2.0; + const float stepScale = 8.0; + const uint dirCount = 4; + float jitterScale = 0.5 * Hash3To1(request.froxelId); + float extScale = densityExtinctionScale; + float trans = 0.0; + [unroll] + for(uint i = 0; i < dirCount; i++) + { + float3 dir = Dirs[i]; + float3 step = dir * stepScale; + trans += grid.RaymarchTransmittance(request.froxelPosition + step * jitterScale, step, request.transform, extScale); + } + result.inScatteredLight = request.scene.ambientColor * (inScatterScale * trans * request.scene.ambientIntensity); + + return result; +} +#else +SampleResult SampleFroxel(SampleRequest request) +{ + SampleResult result = CreateSampleResult(); + if(request.gridByteOffset == 0) + { + // no grid requested + return result; + } + + Grid grid = GetGrid(request.buffer, request.gridByteOffset); + bool overlaps; + if(accurateOverlapTest != 0u) + { + overlaps = grid.OverlapsFroxel(request.scene, request.froxelIndex, request.textureSize, request.transform); + } + else + { + float3 indexF = grid.WorldToIndex(request.froxelPosition, request.transform); + overlaps = IsInRange(indexF, grid.bboxMin, grid.bboxMax); + } + if(!overlaps) + { + // no grid/froxel intersection + return result; + } + + result = grid.GetFroxelAverage(request.scene, request.froxelPosition, request.froxelSize, request.transform); + if(!request.ambientLight) + { + // no ambient light requested + return result; + } + + float3x3 rotation = RandomRotationMatrix3x3(float3(request.froxelId)); + float jitterScale = 0.5 * Hash3To1(request.froxelId); + float stepScale = transStepScale; + uint dirCount = ambientAngularCoverage != 0 ? 14 : 6; + float localWeight = ambientAngularCoverage != 0 ? 1.75 : 0.75; + if(lightGridRC.isAvailable) + { + LightGridSample ambient = request.lightGrid.SampleAtPosition(request.froxelPosition); + float extScale = densityExtinctionScale; + float3 lightDir = ambient.GetLightDirection(); + float3 dirTransStep = lightDir * stepScale; + float dirTrans = grid.RaymarchTransmittance(request.froxelPosition + dirTransStep * jitterScale, dirTransStep, request.transform, extScale); + float3 globalColor = float3(0, 0, 0); + for(uint i = 0; i < dirCount; i++) + { + float3 dir = mul(rotation, Dirs[i]); + float scale = stepScale; + float3 step = dir * stepScale; + LightGridSample sample = request.lightGrid.SampleAtPosition(request.froxelPosition + dir * scale * 0.5); + float trans = grid.RaymarchTransmittance(request.froxelPosition + step * jitterScale, step, request.transform, extScale); + float3 color = ColorAtBrightness(sample.GetGlobalColor(), 0.5); + globalColor += color * trans; + } + float3 cameraDir = normalize(request.froxelPosition - request.scene.cameraPosition); + float localScale = dot(-cameraDir, lightDir) * 0.5 + 0.5; // wraps around + float3 localColor = ColorAtBrightness(ambient.GetLocalColor(), 0.5) * localScale * dirTrans; + float3 color = (globalColor + localColor * localWeight) / (float(dirCount) + localWeight); + result.inScatteredLight = color * request.scene.ambientIntensity; + } + else + { + float extScale = densityExtinctionScale; + float trans = 0.0; + for(uint i = 0; i < dirCount; i++) + { + float3 dir = Dirs[i]; + float3 step = dir * stepScale; + trans += grid.RaymarchTransmittance(request.froxelPosition + step * jitterScale, step, request.transform, extScale); + } + trans /= float(dirCount); + result.inScatteredLight = request.scene.ambientColor * (trans * request.scene.ambientIntensity); + } + + return result; +} +#endif + +[numthreads(4, 4, 4)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWTexture3D materialTextureA = ResourceDescriptorHeap[materialTextureAIndex]; + uint3 textureSize = GetTextureSize(materialTextureA); + if(any(id >= textureSize)) + { + return; + } + + RWTexture2D frustumVisTexture = ResourceDescriptorHeap[frustumVisTextureIndex]; + uint furthestVisibleFroxelZIndex = frustumVisTexture[id.xy]; + if(id.z > furthestVisibleFroxelZIndex) + { + return; + } + + SceneView scene = GetSceneView(); + LightGrid lightGrid; + if(lightGridRC.isAvailable) + { + lightGrid = GetLightGrid(lightGridRC); + } + pnanovdb_buf_t nanovdbBuffer = ResourceDescriptorHeap[nanovdbBufferIndex]; + Texture2D blackbodyTexture = ResourceDescriptorHeap[blackbodyTextureIndex]; + SamplerState blackbodySampler = SamplerDescriptorHeap[scene.linearClampSamplerIndex]; + RWTexture3D materialTextureB = ResourceDescriptorHeap[materialTextureBIndex]; + RWTexture3D materialTextureC = ResourceDescriptorHeap[materialTextureCIndex]; + RWTexture3D scatterExtTexture = ResourceDescriptorHeap[scatterExtTextureIndex]; + + float3 textureSizeF = float3(textureSize); + float3 froxelPosition = scene.FroxelIndexToWorldSpace(int3(id), textureSizeF); + float3 froxelSize = scene.FroxelMaxDimensions(id, textureSizeF); + Transform transform = DecodeTransform(packedTransform, packedTransform2); + transform.stepSize *= stepScale; + +#if !PREVIEW_MODE + if(linearInterpolation != 0u) + { + SampleRequest r; + r.buffer = nanovdbBuffer; + r.froxelIndex = id; + r.froxelPosition = froxelPosition; + r.froxelSize = froxelSize; + r.gridByteOffset = densityGridByteOffset; + r.scene = scene; + r.textureSize = textureSizeF; + r.transform = transform; + r.lightGrid = lightGrid; + r.froxelId = id; + r.ambientLight = true; + SampleResult extResult1 = SampleFroxel(r); + r.gridByteOffset = densityGridByteOffset2; + SampleResult extResult2 = SampleFroxel(r); + + r.ambientLight = false; + r.gridByteOffset = flamesGridByteOffset; + SampleResult emResult1 = SampleFroxel(r); + r.gridByteOffset = flamesGridByteOffset2; + SampleResult emResult2 = SampleFroxel(r); + + if(extResult1.sum > 0.0 || extResult2.sum > 0.0) + { + float extinction1 = (extResult1.sum / float(extResult1.maxSampleCount)); + float extinction2 = (extResult2.sum / float(extResult2.maxSampleCount)); + float extinction = lerp(extinction1, extinction2, t) * densityExtinctionScale; + float scatter = extinction * densityAlbedo; + float absorption = extinction - scatter; + float coverage1 = float(extResult1.sampleCount) / float(extResult1.maxSampleCount); + float coverage2 = float(extResult2.sampleCount) / float(extResult2.maxSampleCount); + float coverage = lerp(coverage1, coverage2, t); + float3 inScattered = lerp(extResult1.inScatteredLight, extResult2.inScatteredLight, t); + materialTextureA[id] += float4(scatter.xxx, absorption); + materialTextureC[id] += coverage; + scatterExtTexture[id] += float4(scatter * inScattered, 0); + } + + if(emResult1.sum > 0.0 || emResult2.sum > 0.0) + { + float Tnorm1 = emResult1.sum / float(emResult1.maxSampleCount); + float Tnorm2 = emResult2.sum / float(emResult2.maxSampleCount); + float T = lerp(Tnorm1, Tnorm2, t) * flamesTemperatureScale; + float3 emission = BlackbodyEmission(T, blackbodyTexture, blackbodySampler) * flamesEmissionScale; + materialTextureB[id] += float4(emission, 0.0); + } + } + else +#endif + { + SampleRequest r; + r.buffer = nanovdbBuffer; + r.froxelIndex = id; + r.froxelPosition = froxelPosition; + r.froxelSize = froxelSize; + r.gridByteOffset = densityGridByteOffset; + r.scene = scene; + r.textureSize = textureSizeF; + r.transform = transform; + r.lightGrid = lightGrid; + r.froxelId = id; + r.ambientLight = true; + SampleResult extResult = SampleFroxel(r); + + r.gridByteOffset = flamesGridByteOffset; + r.ambientLight = false; + SampleResult emResult = SampleFroxel(r); + + if(extResult.sum > 0.0) + { + float extinction = (extResult.sum / float(extResult.maxSampleCount)) * densityExtinctionScale; + float scatter = extinction * densityAlbedo; + float absorption = extinction - scatter; + float coverage = float(extResult.sampleCount) / float(extResult.maxSampleCount); + materialTextureA[id] += float4(scatter.xxx, absorption); + materialTextureC[id] += coverage; + scatterExtTexture[id] += float4(scatter * extResult.inScatteredLight, 0); + } + + if(emResult.sum > 0.0) + { + float Tnorm = emResult.sum / float(emResult.maxSampleCount); + float T = Tnorm * flamesTemperatureScale; + float3 emission = BlackbodyEmission(T, blackbodyTexture, blackbodySampler) * flamesEmissionScale; + materialTextureB[id] += float4(emission, 0.0); + } + } +} diff --git a/code/renderer/shaders/crp/vl_frustum_injection_particles.hlsl b/code/renderer/shaders/crp/vl_frustum_injection_particles.hlsl index caa907f..752b6b3 100644 --- a/code/renderer/shaders/crp/vl_frustum_injection_particles.hlsl +++ b/code/renderer/shaders/crp/vl_frustum_injection_particles.hlsl @@ -18,60 +18,129 @@ You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ -// volumetric lighting: inject particles into the material textures +// volumetric lighting: inject particles into the frustum material textures + + +// 0 -> particle is a point +// 1 -> particle is a sphere, no super-sampling +// 2 -> particle is a sphere, 2x super-sampling +#define QUALITY 1 #include "common.hlsli" #include "scene_view.h.hlsli" +#if QUALITY >= 2 #define VOXEL_SUPERSAMPLING_2X +#define SPHERE_SUPERSAMPLING_2X +#else +#define VOXEL_SUPERSAMPLING_1X +#define SPHERE_SUPERSAMPLING_1X +#endif #include "vl_common.h.hlsli" cbuffer RootConstants { uint3 tileScale; + uint pad0; + uint3 tileResolution; uint particleBufferIndex; - uint particleCount; uint materialTextureAIndex; uint materialTextureBIndex; uint materialTextureCIndex; uint tileBufferIndex; + uint tileIndexBufferIndex; + uint particleIndexBufferIndex; + uint counterBufferIndex; uint tileCount; } -[numthreads(1024, 1, 1)] -void cs(uint3 dtid : SV_DispatchThreadID, uint gidx : SV_GroupIndex) -{ - uint tileIndex = dtid.x / 1024; - if(tileIndex >= tileCount) - { - return; - } +#define VOXEL_COUNT 512 +#define THREAD_COUNT 512 +groupshared uint s_scatterR[VOXEL_COUNT]; +groupshared uint s_scatterG[VOXEL_COUNT]; +groupshared uint s_scatterB[VOXEL_COUNT]; +groupshared uint s_absorption[VOXEL_COUNT]; +groupshared uint s_emissiveR[VOXEL_COUNT]; +groupshared uint s_emissiveG[VOXEL_COUNT]; +groupshared uint s_emissiveB[VOXEL_COUNT]; +groupshared uint s_anisotropy[VOXEL_COUNT]; +groupshared uint s_coverage[VOXEL_COUNT]; +static const float g_materialScale = 131072.0; +static const float g_anisotropyScale = 1024.0; +static const float g_coverageScale = 1024.0; - RWStructuredBuffer tileBuffer = ResourceDescriptorHeap[tileBufferIndex]; +float FroxelMinSize(SceneView scene, uint3 id, float3 textureSize) +{ + float3 center = scene.FroxelIndexToWorldSpace(id, textureSize); + float w = distance(center, scene.FroxelIndexToWorldSpace(id + uint3(1, 0, 0), textureSize)); + float h = distance(center, scene.FroxelIndexToWorldSpace(id + uint3(0, 1, 0), textureSize)); + float d = distance(center, scene.FroxelIndexToWorldSpace(id + uint3(0, 0, 1), textureSize)); + float size = min3(w, h, d); + + return size; +} + +[numthreads(THREAD_COUNT, 1, 1)] +void cs(uint3 dtid : SV_DispatchThreadID, uint gtidx : SV_GroupIndex) +{ + uint tileIndexIndex = dtid.x / THREAD_COUNT; +#if 0 + RWStructuredBuffer counterBuffer = ResourceDescriptorHeap[counterBufferIndex]; + Counters counters = counterBuffer[0]; + //if(tileIndexIndex >= tileCount) + if(tileIndexIndex >= counters.tileCount) + { + return; // should never happen + } +#endif + + RWStructuredBuffer tileIndexBuffer = ResourceDescriptorHeap[tileIndexBufferIndex]; RWTexture3D materialTextureA = ResourceDescriptorHeap[materialTextureAIndex]; uint3 textureSize = GetTextureSize(materialTextureA); - uint3 tileCornerIndex = tileBuffer[tileIndex]; - uint3 tileThreadIndex = UnflattenIndex(gidx, tileScale); + uint tileIndex = tileIndexBuffer[tileIndexIndex]; + uint3 tileCornerIndex = UnflattenIndex(tileIndex, tileResolution); + uint3 tileThreadIndex = UnflattenIndex(gtidx, tileScale); uint3 id = tileCornerIndex * tileScale + tileThreadIndex; - if(any(id >= textureSize)) + int3 froxelIndexMin = int3(tileCornerIndex * tileScale); + int3 froxelIndexMax = int3(tileCornerIndex * tileScale) + int3(tileScale) - int3(1, 1, 1); + uint smIndex = FlattenIndex(id - uint3(froxelIndexMin), tileScale); + if(smIndex < VOXEL_COUNT) { - return; + s_scatterR[smIndex] = 0; + s_scatterG[smIndex] = 0; + s_scatterB[smIndex] = 0; + s_absorption[smIndex] = 0; + s_emissiveR[smIndex] = 0; + s_emissiveG[smIndex] = 0; + s_emissiveB[smIndex] = 0; + s_anisotropy[smIndex] = 0; + s_coverage[smIndex] = 0; } + GroupMemoryBarrierWithGroupSync(); RWTexture3D materialTextureB = ResourceDescriptorHeap[materialTextureBIndex]; RWTexture3D materialTextureC = ResourceDescriptorHeap[materialTextureCIndex]; - StructuredBuffer particleBuffer = ResourceDescriptorHeap[particleBufferIndex]; + RWStructuredBuffer particleBuffer = ResourceDescriptorHeap[particleBufferIndex]; + RWStructuredBuffer tileBuffer = ResourceDescriptorHeap[tileBufferIndex]; + RWStructuredBuffer particleIndexBuffer = ResourceDescriptorHeap[particleIndexBufferIndex]; SceneView scene = GetSceneView(); + Tile tile = tileBuffer[tileIndex]; float3 textureSizeF = float3(textureSize); - float3 tcBase = (float3(id) + float3(0.5, 0.5, 0.5)) / textureSizeF; - float4 accumScatterAbs = float4(0, 0, 0, 0); - float4 accumEmissiveAniso = float4(0, 0, 0, 0); - float accumCoverage = 0.0; - for(uint i = 0; i < particleCount; i++) +#if QUALITY > 0 + float3 left = scene.cameraLeft; + float3 up = scene.cameraUp; + float3 fwd = scene.cameraForward; + float froxelMinSize = FroxelMinSize(scene, (tileCornerIndex * tileScale) + (tileScale / 2) - uint3(1, 1, 1), textureSizeF); +#endif + uint particleCount = tile.particleCount; + uint firstParticle = tile.firstParticle; + for(uint i = smIndex; i < particleCount; i += THREAD_COUNT) { - Particle particle = particleBuffer[i]; + uint particleIndex = particleIndexBuffer[firstParticle + i]; + Particle particle = particleBuffer[particleIndex]; + float3 scattering; float3 emissive; [flatten] @@ -86,29 +155,124 @@ void cs(uint3 dtid : SV_DispatchThreadID, uint gidx : SV_GroupIndex) emissive = float3(0, 0, 0); } - float particleCoverage = 0.0; - for(uint s = 0; s < VoxelSampleCount; s++) +#if QUALITY > 0 + + bool isBigParticle = particle.radius >= froxelMinSize; + bool isMediumParticle = particle.radius >= 0.125 * froxelMinSize; + int3 boxMin = particle.froxelMin - froxelIndexMin; + int3 boxMax = particle.froxelMax - froxelIndexMin; + boxMin = max(boxMin, int3(0, 0, 0)); + boxMax = min(boxMax, int3(tileScale) - int3(1, 1, 1)); + + if(all(boxMax < boxMin)) { - float3 tcSample = tcBase + VoxelSamples[s] / textureSizeF; - float3 position = scene.FroxelTCToWorldSpace(tcSample, textureSizeF); - float dist = distance(position, particle.position); - if(dist >= particle.radius) - { - continue; - } - - float coverage = sqrt(saturate(1.0 - dist / particle.radius)); - particleCoverage += coverage; + continue; } - particleCoverage /= float(VoxelSampleCount); - accumScatterAbs += particleCoverage * float4(scattering, particle.absorption); - accumEmissiveAniso += particleCoverage * float4(emissive, particle.anisotropy); - accumCoverage += particleCoverage; + for(int z = boxMin.z; z <= boxMax.z; z++) + { + for(int y = boxMin.y; y <= boxMax.y; y++) + { + for(int x = boxMin.x; x <= boxMax.x; x++) + { + uint3 froxelGroupThreadId = uint3(x, y, z); + uint froxelFlatIndex = FlattenIndex(froxelGroupThreadId, tileScale); + uint3 froxelThreadId = tileCornerIndex * tileScale + froxelGroupThreadId; + float particleCoverage = 0.0; + + if(isBigParticle) + { + float3 tcBase = (float3(froxelThreadId) + float3(0.5, 0.5, 0.5)) / textureSizeF; + for(uint s = 0; s < VoxelSampleCount; s++) + { + float3 tcSample = tcBase + VoxelSamples[s] / textureSizeF; + float3 position = scene.FroxelTCToWorldSpace(tcSample, textureSizeF); + float dist = distance(position, particle.position); + float coverage = sqrt(saturate(1.0 - dist / particle.radius)); + coverage *= 0.25 + 0.75 * SimplexNoise3D(0.25 * (position - particle.position)); + particleCoverage += coverage; + } + particleCoverage /= float(VoxelSampleCount); + } + else if(isMediumParticle) + { + float3 basePosition = scene.FroxelIndexToWorldSpace(froxelThreadId, textureSizeF); + for(uint s = 0; s < SphereSampleCount; s++) + { + float3 position = basePosition + particle.radius * SphereSamples[s]; + int3 sampleVoxelIdx = scene.FroxelWorldSpaceToIndex(position, textureSizeF); + bool isInVoxel = all(froxelThreadId == uint3(sampleVoxelIdx)); + float dist = isInVoxel ? distance(position, particle.position) : 0.0; + float coverage = sqrt(saturate(1.0 - dist / particle.radius)); + particleCoverage += coverage; + } + particleCoverage /= float(SphereSampleCount); + particleCoverage *= min(SphereVolume(particle.radius) / scene.FroxelVolume(froxelThreadId, textureSizeF), 1.0); + } + else + { + // assumes the sphere's density is not 1 but 1/distance + float density = 2.0 * PI * particle.radius * particle.radius; + particleCoverage = min(density / scene.FroxelVolume(froxelThreadId, textureSizeF), 1.0); + } + + if(particleCoverage == 0.0) + { + continue; + } + + uint4 scatterAbs = g_materialScale * particleCoverage * float4(scattering, particle.absorption); + uint4 emissiveAniso = float4(g_materialScale.xxx, g_anisotropyScale) * particleCoverage * float4(emissive, particle.anisotropy); + uint coverage = g_coverageScale * particleCoverage; + InterlockedAdd(s_scatterR[froxelFlatIndex], scatterAbs.r); + InterlockedAdd(s_scatterG[froxelFlatIndex], scatterAbs.g); + InterlockedAdd(s_scatterB[froxelFlatIndex], scatterAbs.b); + InterlockedAdd(s_absorption[froxelFlatIndex], scatterAbs.w); + InterlockedAdd(s_emissiveR[froxelFlatIndex], emissiveAniso.r); + InterlockedAdd(s_emissiveG[froxelFlatIndex], emissiveAniso.g); + InterlockedAdd(s_emissiveB[froxelFlatIndex], emissiveAniso.b); + InterlockedAdd(s_anisotropy[froxelFlatIndex], emissiveAniso.w); + InterlockedAdd(s_coverage[froxelFlatIndex], coverage); + } + } + } + +#else + + int3 froxelIndex = scene.FroxelWorldSpaceToIndex(particle.position, textureSizeF); + if(!IsInRange(froxelIndex, froxelIndexMin, froxelIndexMax)) + { + continue; + } + uint froxelFlatIndex = FlattenIndex(uint3(froxelIndex) - uint3(froxelIndexMin), tileScale); + float particleCoverage = 1.0; + + uint4 scatterAbs = g_materialScale * particleCoverage * float4(scattering, particle.absorption); + uint4 emissiveAniso = float4(g_materialScale.xxx, g_anisotropyScale) * particleCoverage * float4(emissive, particle.anisotropy); + uint coverage = g_coverageScale * particleCoverage; + InterlockedAdd(s_scatterR[froxelFlatIndex], scatterAbs.r); + InterlockedAdd(s_scatterG[froxelFlatIndex], scatterAbs.g); + InterlockedAdd(s_scatterB[froxelFlatIndex], scatterAbs.b); + InterlockedAdd(s_absorption[froxelFlatIndex], scatterAbs.w); + InterlockedAdd(s_emissiveR[froxelFlatIndex], emissiveAniso.r); + InterlockedAdd(s_emissiveG[froxelFlatIndex], emissiveAniso.g); + InterlockedAdd(s_emissiveB[froxelFlatIndex], emissiveAniso.b); + InterlockedAdd(s_anisotropy[froxelFlatIndex], emissiveAniso.w); + InterlockedAdd(s_coverage[froxelFlatIndex], coverage); + +#endif } - if(accumCoverage > 0.0) + GroupMemoryBarrierWithGroupSync(); + + if(smIndex < VOXEL_COUNT && + s_coverage[smIndex] > 0 && + all(id < textureSize)) { + float4 accumScatterAbs = float4(s_scatterR[smIndex], s_scatterG[smIndex], s_scatterB[smIndex], s_absorption[smIndex]) / g_materialScale; + float4 accumEmissiveAniso = float4(s_emissiveR[smIndex], s_emissiveG[smIndex], s_emissiveB[smIndex], s_anisotropy[smIndex]) / float4(g_materialScale.xxx, g_anisotropyScale); + float accumCoverage = s_coverage[smIndex] / g_coverageScale; + materialTextureA[id] += accumScatterAbs; materialTextureB[id] += accumEmissiveAniso; materialTextureC[id] += accumCoverage; diff --git a/code/renderer/shaders/crp/vl_frustum_inscatter_ambient.hlsl b/code/renderer/shaders/crp/vl_frustum_inscatter_ambient.hlsl index b002745..c90ea3e 100644 --- a/code/renderer/shaders/crp/vl_frustum_inscatter_ambient.hlsl +++ b/code/renderer/shaders/crp/vl_frustum_inscatter_ambient.hlsl @@ -23,18 +23,14 @@ along with Challenge Quake 3. If not, see . #include "common.hlsli" #include "scene_view.h.hlsli" +#include "light_grid.h.hlsli" cbuffer RootConstants { - float3 centerPosition; + LightGridRC lightGridRC; uint materialTextureAIndex; - float3 worldScale; uint scatterExtTextureIndex; - uint ambientLightTextureAIndex; - uint ambientLightTextureBIndex; - uint ambientSamplerIndex; - uint isLightGridAvailable; } [numthreads(4, 4, 4)] @@ -47,40 +43,28 @@ void cs(uint3 id : SV_DispatchThreadID) return; } - if(isLightGridAvailable != 0) - { - SceneView scene = GetSceneView(); - RWTexture3D materialTextureA = ResourceDescriptorHeap[materialTextureAIndex]; - Texture3D ambientLightTextureA = ResourceDescriptorHeap[ambientLightTextureAIndex]; - Texture3D ambientLightTextureB = ResourceDescriptorHeap[ambientLightTextureBIndex]; - SamplerState ambientSampler = SamplerDescriptorHeap[ambientSamplerIndex]; - float3 ambientTextureSize = float3(GetTextureSize(ambientLightTextureA)); + SceneView scene = GetSceneView(); + RWTexture3D materialTextureA = ResourceDescriptorHeap[materialTextureAIndex]; + ExtinctionCascade cascade = scene.GetExtinctionCascade(scene.extinctionWorldScale.y); + float3 positionWS = scene.FroxelIndexToWorldSpace(id, textureSize); + float3 normalWS = normalize(scene.cameraPosition - positionWS); + float4 scatterAbs = materialTextureA[id]; + float3 scattering = scatterAbs.rgb; + float extinction = Brightness(scattering) + scatterAbs.a; - float3 positionWS = scene.FroxelIndexToWorldSpace(id, textureSize); - float3 normalWS = normalize(scene.cameraPosition - positionWS); - float4 scatterAbs = materialTextureA[id]; - float3 scattering = scatterAbs.rgb; - float extinction = Brightness(scattering) + scatterAbs.a; - float3 ambientTC = AABoxWorldSpaceToTC(positionWS, centerPosition, ambientTextureSize, worldScale); - float4 ambientA = ambientLightTextureA.SampleLevel(ambientSampler, ambientTC, 0); - float4 ambientB = ambientLightTextureB.SampleLevel(ambientSampler, ambientTC, 0); - float3 ambientColor = AmbientColor(ambientA, ambientB, normalWS, scene.ambientColor); + if(lightGridRC.isAvailable != 0) + { + LightGrid lightGrid = GetLightGrid(lightGridRC); + LightGridSample ambient = lightGrid.SampleAtPosition(positionWS); + float3 ambientColor = ambient.GetAmbientColor(normalWS, scene.ambientColor, 1, 1); float3 inScattering = scattering * ambientColor * scene.ambientIntensity; - scatterExtTexture[id] = float4(inScattering, extinction); + scatterExtTexture[id] += float4(inScattering, extinction); } else { - SceneView scene = GetSceneView(); - RWTexture3D materialTextureA = ResourceDescriptorHeap[materialTextureAIndex]; - - float3 positionWS = scene.FroxelIndexToWorldSpace(id, textureSize); - float3 normalWS = normalize(scene.cameraPosition - positionWS); - float4 scatterAbs = materialTextureA[id]; - float3 scattering = scatterAbs.rgb; - float extinction = Brightness(scattering) + scatterAbs.a; float3 inScattering = scattering * scene.ambientColor * scene.ambientIntensity; - scatterExtTexture[id] = float4(inScattering, extinction); + scatterExtTexture[id] += float4(inScattering, extinction); } } diff --git a/code/renderer/shaders/crp/vl_frustum_inscatter_point_light.hlsl b/code/renderer/shaders/crp/vl_frustum_inscatter_point_light.hlsl index fa46bec..5456de7 100644 --- a/code/renderer/shaders/crp/vl_frustum_inscatter_point_light.hlsl +++ b/code/renderer/shaders/crp/vl_frustum_inscatter_point_light.hlsl @@ -70,7 +70,7 @@ void cs(uint3 id : SV_DispatchThreadID) float3 shadowTC = AABoxWorldSpaceToTC(froxelPosition, lightPosition, GetTextureSize(transmittanceTexture), shadowWorldScale); float trans = transmittanceTexture.SampleLevel(transmittanceSampler, shadowTC, 0); float intensity = saturate(1.0 - dist / radius); - float3 lightRaw = light.color * intensity * 50.0; // @TODO: + float3 lightRaw = light.color * intensity * scene.pointLightIntensityVL; float2 froxelTC = (float2(id.xy) + float2(0.5, 0.5)) / float2(textureSize.xy); float2 froxelNDC = TCToNDC(froxelTC); float3 cameraRay = scene.CamerayRay(froxelNDC); diff --git a/code/renderer/shaders/crp/vl_frustum_inscatter_sunlight.hlsl b/code/renderer/shaders/crp/vl_frustum_inscatter_sunlight.hlsl index 9061daa..ae09b26 100644 --- a/code/renderer/shaders/crp/vl_frustum_inscatter_sunlight.hlsl +++ b/code/renderer/shaders/crp/vl_frustum_inscatter_sunlight.hlsl @@ -34,24 +34,6 @@ cbuffer RootConstants uint sunlightVisTextureIndex; } -float FroxelSize(uint3 index, float3 textureSize, SceneView scene) -{ - float3 tcBase = (float3(index) + float3(0.5, 0.5, 0.5)) / textureSize; - float3 halfTexel = float3(0.5, 0.5, 0.5) / textureSize; - float3 posL = scene.FroxelTCToWorldSpace(tcBase + float3(-halfTexel.x, 0, 0), textureSize); - float3 posR = scene.FroxelTCToWorldSpace(tcBase + float3( halfTexel.x, 0, 0), textureSize); - float w = distance(posL, posR); - float3 posU = scene.FroxelTCToWorldSpace(tcBase + float3(0, halfTexel.y, 0), textureSize); - float3 posD = scene.FroxelTCToWorldSpace(tcBase + float3(0, -halfTexel.y, 0), textureSize); - float h = distance(posU, posD); - float3 posF = scene.FroxelTCToWorldSpace(tcBase + float3(0, 0, halfTexel.z), textureSize); - float3 posB = scene.FroxelTCToWorldSpace(tcBase + float3(0, 0, -halfTexel.z), textureSize); - float d = distance(posF, posB); - float size = max3(w, h, d); - - return size; -} - [numthreads(4, 4, 4)] void cs(uint3 id : SV_DispatchThreadID) { @@ -66,7 +48,8 @@ void cs(uint3 id : SV_DispatchThreadID) RWTexture3D materialTextureB = ResourceDescriptorHeap[materialTextureBIndex]; RWTexture3D sunlightVisTexture = ResourceDescriptorHeap[sunlightVisTextureIndex]; SceneView scene = GetSceneView(); - float froxelSize = FroxelSize(id, float3(textureSize), scene); + float3 froxelSize3 = scene.FroxelAverageDimensions(id, float3(textureSize)); + float froxelSize = max3(froxelSize3.x, froxelSize3.y, froxelSize3.z); SunVShadowCascade cascade = scene.GetSunVShadowCascade(froxelSize); float3 positionWS = scene.FroxelIndexToWorldSpace(id, textureSize); @@ -80,7 +63,7 @@ void cs(uint3 id : SV_DispatchThreadID) float3 scattering = materialTextureA[id].rgb; float anisotropy = materialTextureB[id].a; float phase = HenyeyGreenstein(cosTheta, anisotropy); - float3 inScattering = vis * scene.sunColor * scene.sunIntensity * scattering * phase; + float3 inScattering = vis * scene.sunColor * scene.sunIntensityVL * scattering * phase; scatterExtTexture[id].rgb += inScattering; } diff --git a/code/renderer/shaders/crp/vl_frustum_light_propagation.hlsl b/code/renderer/shaders/crp/vl_frustum_light_propagation.hlsl new file mode 100644 index 0000000..4a367ab --- /dev/null +++ b/code/renderer/shaders/crp/vl_frustum_light_propagation.hlsl @@ -0,0 +1,157 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// volumetric lighting: raymarch froxels on the X/Y axis to propagate light from emissive froxels + + +#include "common.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + uint materialTextureAIndex; + uint materialTextureBIndex; + float emissiveScatter; // how much of the light emitted sideways is reflected towards the camera +} + +[numthreads(8, 8, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWTexture3D materialTextureA = ResourceDescriptorHeap[materialTextureAIndex]; + uint3 textureSize = GetTextureSize(materialTextureA); +#if DIRECTION_NX || DIRECTION_PX + if(any(id.xy >= textureSize.yz)) +#else + if(any(id.xy >= textureSize.xz)) +#endif + { + return; + } + + SceneView scene = GetSceneView(); + RWTexture3D materialTextureB = ResourceDescriptorHeap[materialTextureBIndex]; + +#if DIRECTION_PX + uint3 coords = uint3(0, id.x, id.y); + uint3 index0 = coords; + float3 tc0 = (float3(index0) + float3(0, 0.5, 0.5)) / textureSize; + float3 prevPosition = scene.FroxelTCToWorldSpace(tc0, float3(textureSize)); + float3 accumEmit = float3(0, 0, 0); + for(uint x = 1; x < textureSize.x; x++) + { + uint3 prevIndex = uint3(x - 1, coords.yz); + uint3 currIndex = uint3(x, coords.yz); + float3 tc = (float3(currIndex) + float3(0, 0.5, 0.5)) / textureSize; + float3 currPosition = scene.FroxelTCToWorldSpace(tc, float3(textureSize)); + float4 scatterAbs = materialTextureA[prevIndex]; + float3 emissive = materialTextureB[prevIndex].rgb; + float scatter = Brightness(scatterAbs.xyz); + float extinction = scatter + scatterAbs.w; + float dist = distance(currPosition, prevPosition); + float froxelTrans = Transmittance(dist, extinction); + accumEmit = (accumEmit + emissive) * froxelTrans; + float3 extraEmit = accumEmit * emissiveScatter * scatter; + if(any(extraEmit > 0.0)) + { + materialTextureB[currIndex].rgb += extraEmit; + } + prevPosition = currPosition; + } +#elif DIRECTION_NX + uint3 coords = uint3(textureSize.x - 1, id.x, id.y); + uint3 index0 = coords; + float3 tc0 = (float3(index0) + float3(1, 0.5, 0.5)) / textureSize; + float3 prevPosition = scene.FroxelTCToWorldSpace(tc0, float3(textureSize)); + float3 accumEmit = float3(0, 0, 0); + for(int x = textureSize.x - 2; x >= 0; x--) + { + uint3 prevIndex = uint3(x + 1, coords.yz); + uint3 currIndex = uint3(x, coords.yz); + float3 tc = (float3(currIndex) + float3(1, 0.5, 0.5)) / textureSize; + float3 currPosition = scene.FroxelTCToWorldSpace(tc, float3(textureSize)); + float4 scatterAbs = materialTextureA[prevIndex]; + float3 emissive = materialTextureB[prevIndex].rgb; + float scatter = Brightness(scatterAbs.xyz); + float extinction = scatter + scatterAbs.w; + float dist = distance(currPosition, prevPosition); + float froxelTrans = Transmittance(dist, extinction); + accumEmit = (accumEmit + emissive) * froxelTrans; + float3 extraEmit = accumEmit * emissiveScatter * scatter; + if(any(extraEmit > 0.0)) + { + materialTextureB[currIndex].rgb += extraEmit; + } + prevPosition = currPosition; + } +#elif DIRECTION_PY + uint3 index0 = uint3(id.x, 0, id.y); + float3 tc0 = (float3(index0) + float3(0.5, 0, 0.5)) / textureSize; + float3 prevPosition = scene.FroxelTCToWorldSpace(tc0, float3(textureSize)); + float3 accumEmit = float3(0, 0, 0); + for(uint y = 1; y < textureSize.y; y++) + { + uint3 prevIndex = uint3(id.x, y - 1, id.y); + uint3 currIndex = uint3(id.x, y + 0, id.y); + float3 tc = (float3(currIndex) + float3(0.5, 0, 0.5)) / textureSize; + float3 currPosition = scene.FroxelTCToWorldSpace(tc, float3(textureSize)); + float4 scatterAbs = materialTextureA[prevIndex]; + float3 emissive = materialTextureB[prevIndex].rgb; + float scatter = Brightness(scatterAbs.xyz); + float extinction = scatter + scatterAbs.w; + float dist = distance(currPosition, prevPosition); + float froxelTrans = Transmittance(dist, extinction); + accumEmit = (accumEmit + emissive) * froxelTrans; + float3 extraEmit = accumEmit * emissiveScatter * scatter; + if(any(extraEmit > 0.0)) + { + materialTextureB[currIndex].rgb += extraEmit; + } + prevPosition = currPosition; + } +#elif DIRECTION_NY + uint3 index0 = uint3(id.x, textureSize.y - 1, id.y); + float3 tc0 = (float3(index0) + float3(0.5, 1, 0.5)) / textureSize; + float3 prevPosition = scene.FroxelTCToWorldSpace(tc0, float3(textureSize)); + float3 accumEmit = float3(0, 0, 0); + for(int y = textureSize.y - 2; y >= 0; y--) + { + uint3 prevIndex = uint3(id.x, y + 1, id.y); + uint3 currIndex = uint3(id.x, y + 0, id.y); + float3 tc = (float3(currIndex) + float3(0.5, 1, 0.5)) / textureSize; + float3 currPosition = scene.FroxelTCToWorldSpace(tc, float3(textureSize)); + float4 scatterAbs = materialTextureA[prevIndex]; + float3 emissive = materialTextureB[prevIndex].rgb; + float scatter = Brightness(scatterAbs.xyz); + float extinction = scatter + scatterAbs.w; + float dist = distance(currPosition, prevPosition); + float froxelTrans = Transmittance(dist, extinction); + accumEmit = (accumEmit + emissive) * froxelTrans; + float3 extraEmit = accumEmit * emissiveScatter * scatter; + if(any(extraEmit > 0.0)) + { + materialTextureB[currIndex].rgb += extraEmit; + } + prevPosition = currPosition; + } +#else + float MissingDirectionMacro[-1]; +#endif +} diff --git a/code/renderer/shaders/crp/vl_frustum_raymarch.hlsl b/code/renderer/shaders/crp/vl_frustum_raymarch.hlsl index 87dc778..3a9d086 100644 --- a/code/renderer/shaders/crp/vl_frustum_raymarch.hlsl +++ b/code/renderer/shaders/crp/vl_frustum_raymarch.hlsl @@ -42,8 +42,19 @@ void cs(uint3 id : SV_DispatchThreadID) return; } - // integScatter is computed using Frostbite's analytical solution: - // Int(S * T(Z) * dZ) == S * (1 - T(Z)) / extinction + // Even if the extinction coefficient is constant all along a given line segment of length Z, + // the transmittance is different at every point. + // We compute the final scatter/emissive using an analytical solution like Frostbite does. + // Integral(T(x) * dx) == Integral(e^(-extinction*x) * dZ) [0 to Z] + // == [e^(-extinction*x) / (-extinction)] [0 to Z] + // == (-1 / extinction) * [e^(-Z*extinction)] [0 to Z] + // == (-1 / extinction) * [T(Z)] [0 to Z] + // == (-1 / extinction) * (T(Z) - T(0)) + // == (-1 / extinction) * (T(Z) - 1) + // == (1 / extinction) * (1 - T(Z)) + // == (1 - T(Z)) / extinction + // The scatter/emissive coefficients are considered uniform in each froxel. + // They are therefore constants that can be pulled out of the integral, hence their omission. SceneView scene = GetSceneView(); RWTexture3D resolveTexture = ResourceDescriptorHeap[resolveTextureIndex]; @@ -58,14 +69,14 @@ void cs(uint3 id : SV_DispatchThreadID) uint3 index = uint3(id.xy, d); float3 tc = (float3(index) + float3(0.5, 0.5, 1)) / textureSize; // far edge of current voxel float4 froxelScatterExt = scatterTexture[index]; - float3 emissive = materialTextureB[index].rgb; - float3 froxelScatter = froxelScatterExt.rgb + emissive; + float3 froxelEmissive = materialTextureB[index].rgb; + float3 froxelScatter = froxelScatterExt.rgb; float froxelExtinction = froxelScatterExt.a; float3 currPosition = scene.FroxelTCToWorldSpace(tc, float3(textureSize)); float depthStep = distance(currPosition, prevPosition); float froxelTrans = Transmittance(depthStep, froxelExtinction); - float3 integScatter = froxelScatter * (1.0 - froxelTrans) / (froxelExtinction == 0.0 ? 1.0 : froxelExtinction); - accumScatter += accumTrans * integScatter; + float froxelTransInteg = (1.0 - froxelTrans) / (froxelExtinction == 0.0 ? 1.0 : froxelExtinction); + accumScatter += (accumTrans * froxelTransInteg) * (froxelScatter + froxelEmissive); accumTrans *= froxelTrans; resolveTexture[index] = float4(accumScatter, accumTrans); prevPosition = currPosition; diff --git a/code/renderer/shaders/crp/vl_frustum_sunlight_visibility.hlsl b/code/renderer/shaders/crp/vl_frustum_sunlight_visibility.hlsl index c96fe24..629db05 100644 --- a/code/renderer/shaders/crp/vl_frustum_sunlight_visibility.hlsl +++ b/code/renderer/shaders/crp/vl_frustum_sunlight_visibility.hlsl @@ -30,6 +30,7 @@ cbuffer RootConstants { float3 jitter; uint visTextureIndex; + uint frustumVisTextureIndex; uint depthMip; } @@ -43,9 +44,17 @@ void cs(uint3 id : SV_DispatchThreadID) return; } + RWTexture2D frustumVisTexture = ResourceDescriptorHeap[frustumVisTextureIndex]; + uint furthestVisibleFroxelZIndex = frustumVisTexture[id.xy]; + if(id.z > furthestVisibleFroxelZIndex) + { + // this helps fix issues like dark spots around opaque geometry set against the skybox + visTexture[id] = 0.0; + return; + } + SceneView scene = GetSceneView(); RTAS rtas = ResourceDescriptorHeap[scene.tlasBufferIndex]; - Texture2D depthMinMaxTexture = ResourceDescriptorHeap[scene.depthMinMaxTextureIndex]; float2 tc = (float2(id.xy) + float2(0.5, 0.5)) / float2(textureSize.xy); float2 ndc = TCToNDC(tc); @@ -57,14 +66,5 @@ void cs(uint3 id : SV_DispatchThreadID) jitter.z * cameraRay; float vis = TraceVisibilityWithoutAT(rtas, froxelPosition, scene.sunDirection, 10000.0); - // this helps fix dark spots around opaque geometry set against the skybox - float storedDepth = depthMinMaxTexture.mips[depthMip][id.xy].x; - float4 positionCS = mul(scene.projectionMatrix, mul(scene.viewMatrix, float4(froxelPosition, 1))); - float froxelDepth = positionCS.z / positionCS.w; - if(froxelDepth < storedDepth) - { - vis = 0.0; - } - visTexture[id] = vis; } diff --git a/code/renderer/shaders/crp/vl_frustum_temporal.hlsl b/code/renderer/shaders/crp/vl_frustum_temporal.hlsl index b53c51c..4adfb0c 100644 --- a/code/renderer/shaders/crp/vl_frustum_temporal.hlsl +++ b/code/renderer/shaders/crp/vl_frustum_temporal.hlsl @@ -25,6 +25,15 @@ along with Challenge Quake 3. If not, see . #include "scene_view.h.hlsli" +#if defined(TYPE_FLOAT4) +typedef float4 Type; +#elif defined(TYPE_FLOAT) +typedef float Type; +#else +#pragma message "define TYPE_FLOAT4 or TYPE_FLOAT" +#endif + + cbuffer RootConstants { uint currTextureIndex; @@ -36,7 +45,7 @@ cbuffer RootConstants [numthreads(4, 4, 4)] void cs(uint3 id : SV_DispatchThreadID) { - RWTexture3D currTexture = ResourceDescriptorHeap[currTextureIndex]; + RWTexture3D currTexture = ResourceDescriptorHeap[currTextureIndex]; uint3 textureSize = GetTextureSize(currTexture); if(any(id >= textureSize)) { @@ -44,17 +53,17 @@ void cs(uint3 id : SV_DispatchThreadID) } SceneView scene = GetSceneView(); - Texture3D prevTexture = ResourceDescriptorHeap[prevTextureIndex]; + Texture3D prevTexture = ResourceDescriptorHeap[prevTextureIndex]; SamplerState prevTextureSampler = SamplerDescriptorHeap[prevTextureSamplerIndex]; float3 tc = scene.FroxelReproject01(id, float3(textureSize)); - float currValue = currTexture[id]; + Type currValue = currTexture[id]; float3 halfPixelSize = float3(0.5, 0.5, 0.5) / float3(textureSize); if(IsInRange(tc, halfPixelSize, float3(1, 1, 1) - halfPixelSize)) { - float prevValue = prevTexture.SampleLevel(prevTextureSampler, tc, 0); - float finalValue = lerp(currValue, prevValue, alpha); - if(finalValue != currValue) + Type prevValue = prevTexture.SampleLevel(prevTextureSampler, tc, 0); + Type finalValue = lerp(currValue, prevValue, alpha); + if(any(finalValue != currValue)) { currTexture[id] = finalValue; } diff --git a/code/renderer/shaders/crp/vl_nanovdb.hlsli b/code/renderer/shaders/crp/vl_nanovdb.hlsli new file mode 100644 index 0000000..f658116 --- /dev/null +++ b/code/renderer/shaders/crp/vl_nanovdb.hlsli @@ -0,0 +1,279 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// volumetric lighting: sampling NanoVDB volumes over oriented froxels and axis-aligned boxes + + +#pragma once + + +#include "common.hlsli" +#include "scene_view.h.hlsli" +#define PNANOVDB_HLSL +#include "PNanoVDB.h" + + +struct Transform +{ + float3x3 worldToIndex; + float3 originOffset; + float3 translation; + float3 stepSize; + + void DecodeTransform(float4 transform[4], float2 transform2) + { + worldToIndex = float3x3( + transform[0].x, transform[0].y, transform[0].z, + transform[0].w, transform[1].x, transform[1].y, + transform[1].z, transform[1].w, transform[2].x); + originOffset = float3(transform[2].yzw); + translation = float3(transform[3].xyz); + stepSize = float3(transform[3].w, transform2.x, transform2.y); + } +}; + +Transform DecodeTransform(float4 packed[4], float2 packed2) +{ + Transform transform; + transform.DecodeTransform(packed, packed2); + + return transform; +} + +struct SampleResult +{ + float sum; + int maxSampleCount; + int sampleCount; + float transmittance; + float3 inScatteredLight; + + void Clear() + { + sum = 0.0; + maxSampleCount = 1; + sampleCount = 0; + transmittance = 1.0; + inScatteredLight = float3(0, 0, 0); + } +}; + +SampleResult CreateSampleResult() +{ + SampleResult result; + result.Clear(); + + return result; +} + +struct Grid +{ + pnanovdb_buf_t buffer; + pnanovdb_grid_handle_t grid; + pnanovdb_tree_handle_t tree; + pnanovdb_root_handle_t root; + pnanovdb_vec3_t bboxMin; + pnanovdb_vec3_t bboxMax; + pnanovdb_uint32_t gridType; + pnanovdb_readaccessor_t accessor; + + void Init(pnanovdb_buf_t gridBuffer, uint gridByteOffset) + { + buffer = gridBuffer; + grid.address.byte_offset = gridByteOffset; + pnanovdb_vec3_t extBboxMin; + pnanovdb_vec3_t extBboxMax; + pnanovdb_tree_handle_t tree = pnanovdb_grid_get_tree(buffer, grid); + pnanovdb_root_handle_t root = pnanovdb_tree_get_root(buffer, tree); + bboxMin = pnanovdb_coord_to_vec3(pnanovdb_root_get_bbox_min(buffer, root)); + bboxMax = pnanovdb_coord_to_vec3(pnanovdb_root_get_bbox_max(buffer, root)); + gridType = pnanovdb_grid_get_grid_type(buffer, grid); + pnanovdb_readaccessor_init(accessor, root); + } + + float3 WorldToIndex(float3 worldPosition, Transform transform) + { + float3 index = mul(transform.worldToIndex, worldPosition - transform.translation) - transform.originOffset; + + return index; + } + + float ReadFloat(pnanovdb_coord_t coords) + { + pnanovdb_uint32_t level; + pnanovdb_address_t address = pnanovdb_readaccessor_get_value_address_and_level(gridType, buffer, accessor, coords, level); + float result; + if(level == 0u && gridType != PNANOVDB_GRID_TYPE_FLOAT) + { + if(gridType == PNANOVDB_GRID_TYPE_FPN) + { + result = pnanovdb_leaf_fpn_read_float(buffer, address, coords); + } + else + { + result = pnanovdb_leaf_fp_read_float(buffer, address, coords, gridType - PNANOVDB_GRID_TYPE_FP4 + 2u); + } + } + else + { + result = pnanovdb_read_float(buffer, address); + } + + return result; + } + + SampleResult GetFroxelAverage(SceneView scene, float3 froxelPosition, float3 froxelSize, Transform transform) + { + int3 steps = int3(ceil(froxelSize / (2.0 * transform.stepSize))); + + SampleResult result; + result.maxSampleCount = (2 * steps.x + 1) * (2 * steps.y + 1) * (2 * steps.z + 1); + result.sampleCount = 0; + result.sum = 0.0; + for(int z = -steps.z; z <= steps.z; z++) + { + float3 offZ = float(z) * transform.stepSize.z * scene.cameraForward; + for(int y = -steps.y; y <= steps.y; y++) + { + float3 offY = float(y) * transform.stepSize.y * scene.cameraUp; + for(int x = -steps.x; x <= steps.x; x++) + { + float3 offX = float(x) * transform.stepSize.x * scene.cameraLeft; + float3 worldPosition = froxelPosition + offX + offY + offZ; + float3 indexF = WorldToIndex(worldPosition, transform); + if(IsInRange(indexF, bboxMin, bboxMax)) + { + int3 index = int3(indexF); + float value = ReadFloat(index); + result.sum += value; + result.sampleCount += value > 0.0 ? 1.0 : 0.0; + } + } + } + } + + return result; + } + + SampleResult GetAxisAlignedBoxAverage(SceneView scene, float3 voxelPosition, float3 voxelSize, Transform transform) + { + int3 steps = int3(voxelSize / (2.0 * transform.stepSize)); + + SampleResult result; + result.maxSampleCount = (2 * steps.x + 1) * (2 * steps.y + 1) * (2 * steps.z + 1); + result.sampleCount = 0; + result.sum = 0.0; + for(int z = -steps.z; z <= steps.z; z++) + { + for(int y = -steps.y; y <= steps.y; y++) + { + for(int x = -steps.x; x <= steps.x; x++) + { + float3 worldPosition = voxelPosition + float3(x, y, z) * transform.stepSize; + float3 indexF = WorldToIndex(worldPosition, transform); + int3 index = int3(indexF); + float value = ReadFloat(index); + result.sum += value; + result.sampleCount += value > 0.0 ? 1.0 : 0.0; + } + } + } + + return result; + } + + void ExpandFroxelBoundingBox(inout float3 boxMin, inout float3 boxMax, SceneView scene, float3 tc, float3 textureSize, Transform transform) + { + float3 pointWS = scene.FroxelTCToWorldSpace(tc, textureSize); + float3 indexF = WorldToIndex(pointWS, transform); + ExpandBoundingBox(boxMin, boxMax, indexF); + } + + bool OverlapsFroxel(SceneView scene, int3 index, float3 textureSize, Transform transform) + { + float3 tc = (float3(index) + float3(0.5, 0.5, 0.5)) / textureSize; + float3 halfTexel = float3(0.5, 0.5, 0.5) / textureSize; + float3 froxelMin, froxelMax; + ClearBoundingBox(froxelMin, froxelMax); + ExpandFroxelBoundingBox(froxelMin, froxelMax, scene, tc + float3(-halfTexel.x, 0, 0), textureSize, transform); + ExpandFroxelBoundingBox(froxelMin, froxelMax, scene, tc + float3(halfTexel.x, 0, 0), textureSize, transform); + ExpandFroxelBoundingBox(froxelMin, froxelMax, scene, tc + float3(0, -halfTexel.y, 0), textureSize, transform); + ExpandFroxelBoundingBox(froxelMin, froxelMax, scene, tc + float3(0, halfTexel.y, 0), textureSize, transform); + ExpandFroxelBoundingBox(froxelMin, froxelMax, scene, tc + float3(0, 0, -halfTexel.z), textureSize, transform); + ExpandFroxelBoundingBox(froxelMin, froxelMax, scene, tc + float3(0, 0, halfTexel.z), textureSize, transform); + bool overlaps = all(froxelMax >= bboxMin) && all(froxelMin <= bboxMax); + + return overlaps; + } + + void ExpandAABB(inout float3 boxMin, inout float3 boxMax, SceneView scene, float3 tc, float3 textureSize, float worldScale, Transform transform) + { + float3 pointWS = scene.ExtinctionTCToWorldSpace(tc, textureSize, worldScale); + float3 indexF = WorldToIndex(pointWS, transform); + ExpandBoundingBox(boxMin, boxMax, indexF); + } + + bool OverlapsAxisAlignedBox(SceneView scene, int3 index, float3 textureSize, float worldScale, Transform transform) + { + float3 tc = (float3(index) + float3(0.5, 0.5, 0.5)) / textureSize; + float3 halfTexel = float3(0.5, 0.5, 0.5) / textureSize; + float3 voxelMin, voxelMax; + ClearBoundingBox(voxelMin, voxelMax); + ExpandAABB(voxelMin, voxelMax, scene, tc + float3(-halfTexel.x, 0, 0), textureSize, worldScale, transform); + ExpandAABB(voxelMin, voxelMax, scene, tc + float3(halfTexel.x, 0, 0), textureSize, worldScale, transform); + ExpandAABB(voxelMin, voxelMax, scene, tc + float3(0, -halfTexel.y, 0), textureSize, worldScale, transform); + ExpandAABB(voxelMin, voxelMax, scene, tc + float3(0, halfTexel.y, 0), textureSize, worldScale, transform); + ExpandAABB(voxelMin, voxelMax, scene, tc + float3(0, 0, -halfTexel.z), textureSize, worldScale, transform); + ExpandAABB(voxelMin, voxelMax, scene, tc + float3(0, 0, halfTexel.z), textureSize, worldScale, transform); + bool overlaps = all(voxelMax >= bboxMin) && all(voxelMin <= bboxMax); + + return overlaps; + } + + float RaymarchTransmittance(float3 position, float3 step, Transform transform, float extinctionScale) + { + float stepDist = length(step); + float accumTrans = 1.0; + while(true) + { + position += step; + float3 indexF = WorldToIndex(position, transform); + if(!IsInRange(indexF, bboxMin, bboxMax)) + { + break; + } + + int3 index = int3(indexF); + float ext = ReadFloat(index) * extinctionScale; + float sampleTrans = Transmittance(stepDist, ext); + accumTrans *= saturate(sampleTrans); + } + + return accumTrans; + } +}; + +Grid GetGrid(pnanovdb_buf_t buffer, uint gridByteOffset) +{ + Grid grid; + grid.Init(buffer, gridByteOffset); + + return grid; +} diff --git a/code/renderer/shaders/crp/vl_particles_dispatch.hlsl b/code/renderer/shaders/crp/vl_particles_clear.hlsl similarity index 59% rename from code/renderer/shaders/crp/vl_particles_dispatch.hlsl rename to code/renderer/shaders/crp/vl_particles_clear.hlsl index 0683886..7d5d82d 100644 --- a/code/renderer/shaders/crp/vl_particles_dispatch.hlsl +++ b/code/renderer/shaders/crp/vl_particles_clear.hlsl @@ -18,38 +18,42 @@ You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ -// volumetric lighting: update indirect dispatch buffer for particle injection +// volumetric lighting particles: clear all froxel tiles and global counters #include "common.hlsli" +#include "vl_common.h.hlsli" +#include "scene_view.h.hlsli" cbuffer RootConstants { - uint3 tileResolution; + uint counterBufferIndex; uint tileBufferIndex; - uint dispatchBufferIndex; - uint particleTileBufferIndex; + uint tileCount; } -[numthreads(4, 4, 4)] +[numthreads(64, 1, 1)] void cs(uint3 id : SV_DispatchThreadID) { - if(any(id >= tileResolution)) + if(id.x >= tileCount) { return; } - RWByteAddressBuffer dispatchBuffer = ResourceDescriptorHeap[dispatchBufferIndex]; - RWByteAddressBuffer tileHitBuffer = ResourceDescriptorHeap[tileBufferIndex]; - RWStructuredBuffer tileWorkBuffer = ResourceDescriptorHeap[particleTileBufferIndex]; - - uint tileIndex = FlattenIndex(id, tileResolution); - uint hasParticle = tileHitBuffer.Load(tileIndex * 4); - if(hasParticle != 0) + RWStructuredBuffer tileBuffer= ResourceDescriptorHeap[tileBufferIndex]; + if(id.x == 0) { - uint workIndex; - dispatchBuffer.InterlockedAdd(0, 1, workIndex); - tileWorkBuffer[workIndex] = id; + RWStructuredBuffer counterBuffer = ResourceDescriptorHeap[counterBufferIndex]; + Counters counters; + counters.particleCount = 0; + counters.tileCount = 0; + counterBuffer[0] = counters; } + Tile tile; + tile.firstParticle = 0; + tile.particleCount = 0; + tile.particleIndex = 0; + tile.pad0 = 0; + tileBuffer[id.x] = tile; } diff --git a/code/renderer/shaders/crp/vl_particles_hit.hlsl b/code/renderer/shaders/crp/vl_particles_hit.hlsl new file mode 100644 index 0000000..54e8a55 --- /dev/null +++ b/code/renderer/shaders/crp/vl_particles_hit.hlsl @@ -0,0 +1,88 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// volumetric lighting particles: count the number of particles in each froxel tile + + +#include "common.hlsli" +#include "vl_common.h.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + uint3 fullResolution; + uint tileBufferIndex; + uint3 tileResolution; + uint pad0; + uint3 tileScale; + uint pad1; + uint particleBufferIndex; + uint emitterBufferIndex; + uint liveBufferIndex; + uint emitterIndex; +} + +[numthreads(64, 1, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + RWStructuredBuffer emitterBuffer = ResourceDescriptorHeap[emitterBufferIndex]; + ParticleEmitter emitter = emitterBuffer[emitterIndex]; + if(id.x >= emitter.liveCount2) + { + return; + } + + RWStructuredBuffer particleBuffer = ResourceDescriptorHeap[particleBufferIndex]; + RWStructuredBuffer tileBuffer= ResourceDescriptorHeap[tileBufferIndex]; + RWStructuredBuffer liveBuffer = ResourceDescriptorHeap[liveBufferIndex]; + SceneView scene = GetSceneView(); + + uint firstIndex = emitter.firstIndex; + uint particleIndex = liveBuffer[firstIndex + id.x]; + Particle particle = particleBuffer[firstIndex + particleIndex]; + float3 P = particle.position; + float r = particle.radius; + float3 fwd = scene.cameraForward; + float4 extentsXYNDC = ProjectedSphereExtentsNDC(particle.position, particle.radius, scene.viewMatrix, scene.projectionMatrix); + float4 extentsXYFroxel = (extentsXYNDC * 0.5 + float(0.5).xxxx) * float4(fullResolution.xy, fullResolution.xy); + int2 extentsZFroxel = scene.FroxelSphereZExtents(particle.position, particle.radius, fullResolution); + int3 boxMin = int3(extentsXYFroxel.x, extentsXYFroxel.y, extentsZFroxel.x); + int3 boxMax = int3(ceil(extentsXYFroxel.z), ceil(extentsXYFroxel.w), extentsZFroxel.y); + particleBuffer[firstIndex + particleIndex].froxelMin = max(boxMin, int3(0, 0, 0)); + particleBuffer[firstIndex + particleIndex].froxelMax = min(boxMax, int3(fullResolution) - int3(1, 1, 1)); + boxMin /= int3(tileScale); + boxMax /= int3(tileScale); + boxMin = max(boxMin, int3(0, 0, 0)); + boxMax = min(boxMax, int3(tileResolution) - int3(1, 1, 1)); + + for(int x = boxMin.x; x <= boxMax.x; x++) + { + for(int y = boxMin.y; y <= boxMax.y; y++) + { + for(int z = boxMin.z; z <= boxMax.z; z++) + { + uint3 tileIndex = uint3(x, y, z); + uint flatTileIndex = FlattenIndex(tileIndex, tileResolution); + InterlockedAdd(tileBuffer[flatTileIndex].particleCount, 1); + } + } + } +} diff --git a/code/renderer/shaders/crp/vl_particles_preprocess_frustum.hlsl b/code/renderer/shaders/crp/vl_particles_list.hlsl similarity index 50% rename from code/renderer/shaders/crp/vl_particles_preprocess_frustum.hlsl rename to code/renderer/shaders/crp/vl_particles_list.hlsl index 84158af..f1bcc27 100644 --- a/code/renderer/shaders/crp/vl_particles_preprocess_frustum.hlsl +++ b/code/renderer/shaders/crp/vl_particles_list.hlsl @@ -18,53 +18,49 @@ You should have received a copy of the GNU General Public License along with Challenge Quake 3. If not, see . =========================================================================== */ -// volumetric lighting: pre-process particles for frustum volume injection +// volumetric lighting particles: create final live particle index array #include "common.hlsli" +#include "vl_common.h.hlsli" #include "scene_view.h.hlsli" cbuffer RootConstants { uint3 fullResolution; - uint tileBufferIndex; + uint emitterIndex; uint3 tileResolution; - uint particleBufferIndex; + uint maxParticleIndexCount; uint3 tileScale; - uint particleCount; + uint tileBufferIndex; + uint emitterBufferIndex; + uint particleBufferIndex; + uint liveBufferIndex; + uint indexBufferIndex; } [numthreads(64, 1, 1)] void cs(uint3 id : SV_DispatchThreadID) { - uint particleIndex = id.x; - if(particleIndex >= particleCount) + RWStructuredBuffer emitterBuffer = ResourceDescriptorHeap[emitterBufferIndex]; + ParticleEmitter emitter = emitterBuffer[emitterIndex]; + if(id.x >= emitter.liveCount2) { return; } - StructuredBuffer particleBuffer = ResourceDescriptorHeap[particleBufferIndex]; - RWByteAddressBuffer tileBuffer = ResourceDescriptorHeap[tileBufferIndex]; + RWStructuredBuffer particleBuffer = ResourceDescriptorHeap[particleBufferIndex]; + RWStructuredBuffer tileBuffer = ResourceDescriptorHeap[tileBufferIndex]; + RWStructuredBuffer liveBuffer = ResourceDescriptorHeap[liveBufferIndex]; + RWStructuredBuffer indexBuffer = ResourceDescriptorHeap[indexBufferIndex]; SceneView scene = GetSceneView(); - Particle particle = particleBuffer[particleIndex]; - float3 P = particle.position; - float r = particle.radius * 1.0625; - float3 left = scene.cameraLeft; - float3 up = scene.cameraUp; - float3 fwd = scene.cameraForward; - int3 boxMin; - int3 boxMax; - ClearBoundingBox(boxMin, boxMax); - ExpandBoundingBox(boxMin, boxMax, scene.FroxelWorldSpaceToIndex(P + r * left, fullResolution)); - ExpandBoundingBox(boxMin, boxMax, scene.FroxelWorldSpaceToIndex(P - r * left, fullResolution)); - ExpandBoundingBox(boxMin, boxMax, scene.FroxelWorldSpaceToIndex(P + r * up, fullResolution)); - ExpandBoundingBox(boxMin, boxMax, scene.FroxelWorldSpaceToIndex(P - r * up, fullResolution)); - ExpandBoundingBox(boxMin, boxMax, scene.FroxelWorldSpaceToIndex(P + r * fwd, fullResolution)); - ExpandBoundingBox(boxMin, boxMax, scene.FroxelWorldSpaceToIndex(P - r * fwd, fullResolution)); - boxMin /= int3(tileScale); - boxMax /= int3(tileScale); + uint firstIndex = emitter.firstIndex; + uint particleIndex = liveBuffer[firstIndex + id.x]; + Particle particle = particleBuffer[firstIndex + particleIndex]; + int3 boxMin = particle.froxelMin / int3(tileScale); + int3 boxMax = particle.froxelMax / int3(tileScale); boxMin = max(boxMin, int3(0, 0, 0)); boxMax = min(boxMax, int3(tileResolution) - int3(1, 1, 1)); for(int x = boxMin.x; x <= boxMax.x; x++) @@ -74,8 +70,15 @@ void cs(uint3 id : SV_DispatchThreadID) for(int z = boxMin.z; z <= boxMax.z; z++) { uint3 tileIndex = uint3(x, y, z); - uint index = FlattenIndex(tileIndex, tileResolution); - tileBuffer.Store(index * 4, 1); + uint flatTileIndex = FlattenIndex(tileIndex, tileResolution); + uint particleWriteOffset; + InterlockedAdd(tileBuffer[flatTileIndex].particleIndex, 1, particleWriteOffset); + uint particleWriteIndex = tileBuffer[flatTileIndex].firstParticle + particleWriteOffset; + if(particleWriteOffset < tileBuffer[flatTileIndex].particleCount && + particleWriteIndex < maxParticleIndexCount) + { + indexBuffer[particleWriteIndex] = firstIndex + particleIndex; + } } } } diff --git a/code/renderer/shaders/crp/vl_particles_preprocess_extinction.hlsl b/code/renderer/shaders/crp/vl_particles_preprocess_extinction.hlsl deleted file mode 100644 index d8332eb..0000000 --- a/code/renderer/shaders/crp/vl_particles_preprocess_extinction.hlsl +++ /dev/null @@ -1,73 +0,0 @@ -/* -=========================================================================== -Copyright (C) 2024 Gian 'myT' Schellenbaum - -This file is part of Challenge Quake 3 (CNQ3). - -Challenge Quake 3 is free software; you can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the License, -or (at your option) any later version. - -Challenge Quake 3 is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Challenge Quake 3. If not, see . -=========================================================================== -*/ -// volumetric lighting: pre-process particles for extinction volume injection - - -#include "common.hlsli" -#include "scene_view.h.hlsli" - - -cbuffer RootConstants -{ - uint3 fullResolution; - uint tileBufferIndex; - uint3 tileResolution; - uint particleBufferIndex; - uint3 tileScale; - uint particleCount; - float extinctionWorldScale; -} - -[numthreads(64, 1, 1)] -void cs(uint3 id : SV_DispatchThreadID) -{ - uint particleIndex = id.x; - if(particleIndex >= particleCount) - { - return; - } - - StructuredBuffer particleBuffer = ResourceDescriptorHeap[particleBufferIndex]; - RWByteAddressBuffer tileBuffer = ResourceDescriptorHeap[tileBufferIndex]; - SceneView scene = GetSceneView(); - - Particle particle = particleBuffer[particleIndex]; - float3 P = particle.position; - float r = particle.radius; - int3 boxMin = scene.ExtinctionWorldSpaceToIndex(P - float3(r, r, r), fullResolution, extinctionWorldScale); - int3 boxMax = scene.ExtinctionWorldSpaceToIndex(P + float3(r, r, r), fullResolution, extinctionWorldScale); - boxMin /= int3(tileScale); - boxMax /= int3(tileScale); - boxMin = max(boxMin, int3(0, 0, 0)); - boxMax = min(boxMax, int3(tileResolution) - int3(1, 1, 1)); - for(int x = boxMin.x; x <= boxMax.x; x++) - { - for(int y = boxMin.y; y <= boxMax.y; y++) - { - for(int z = boxMin.z; z <= boxMax.z; z++) - { - uint3 tileIndex = uint3(x, y, z); - uint index = FlattenIndex(tileIndex, tileResolution); - tileBuffer.Store(index * 4, 1); - } - } - } -} diff --git a/code/renderer/shaders/crp/vl_particles_tiles.hlsl b/code/renderer/shaders/crp/vl_particles_tiles.hlsl new file mode 100644 index 0000000..21f7f8f --- /dev/null +++ b/code/renderer/shaders/crp/vl_particles_tiles.hlsl @@ -0,0 +1,59 @@ +/* +=========================================================================== +Copyright (C) 2024 Gian 'myT' Schellenbaum + +This file is part of Challenge Quake 3 (CNQ3). + +Challenge Quake 3 is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Challenge Quake 3 is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Challenge Quake 3. If not, see . +=========================================================================== +*/ +// volumetric lighting particles: create compacted froxel tile array and compute per-tile particle offset + + +#include "common.hlsli" +#include "vl_common.h.hlsli" +#include "scene_view.h.hlsli" + + +cbuffer RootConstants +{ + uint counterBufferIndex; + uint tileBufferIndex; + uint tileIndexBufferIndex; + uint tileCount; +} + +[numthreads(64, 1, 1)] +void cs(uint3 id : SV_DispatchThreadID) +{ + uint tileIndex = id.x; + if(tileIndex >= tileCount) + { + return; + } + + RWStructuredBuffer tileBuffer = ResourceDescriptorHeap[tileBufferIndex]; + if(tileBuffer[tileIndex].particleCount == 0) + { + return; + } + + RWStructuredBuffer counterBuffer = ResourceDescriptorHeap[counterBufferIndex]; + RWStructuredBuffer tileIndexBuffer = ResourceDescriptorHeap[tileIndexBufferIndex]; + + InterlockedAdd(counterBuffer[0].particleCount, tileBuffer[tileIndex].particleCount, tileBuffer[tileIndex].firstParticle); + uint tileIndexIndex; + InterlockedAdd(counterBuffer[0].tileCount, 1, tileIndexIndex); + tileIndexBuffer[tileIndexIndex] = tileIndex; +} diff --git a/code/renderer/tr_cmds.cpp b/code/renderer/tr_cmds.cpp index 0467be9..10ac9ee 100644 --- a/code/renderer/tr_cmds.cpp +++ b/code/renderer/tr_cmds.cpp @@ -337,6 +337,7 @@ void RE_BeginFrame( stereoFrame_t stereoFrame ) tr.renderMode = RM_NONE; tr.sceneCounterRT = 0; tr.numRTSurfs = 0; + tr.hasWorldRender = qfalse; // delayed screenshot if ( r_delayedScreenshotPending ) { diff --git a/code/renderer/tr_local.h b/code/renderer/tr_local.h index 540ce44..7ba63e7 100644 --- a/code/renderer/tr_local.h +++ b/code/renderer/tr_local.h @@ -1008,6 +1008,11 @@ typedef struct { renderMode_t renderMode; + // world rendering info for the current frame + qbool hasWorldRender; + int worldRenderTimeMS; + int worldRenderTimeUS; // [0;999] + // the following are only to be used after calling R_UpdateShader() // the save state boolean is needed because otherwise, a delayed shader load // could change the error and warning messages of the edited shader @@ -1669,6 +1674,7 @@ void R_TransposeMatrix( const matrix4x4_t in, matrix4x4_t out ); void R_CameraPositionFromMatrix( const matrix4x4_t modelView, vec3_t cameraPos ); void R_CameraAxisVectorsFromMatrix( const matrix4x4_t modelView, vec3_t axisX, vec3_t axisY, vec3_t axisZ ); void R_MakeIdentityMatrix( matrix4x4_t m ); +void R_MakeIdentityMatrix3x3( matrix3x3_t m ); void R_MakeOrthoProjectionMatrix( matrix4x4_t m, float w, float h ); // LinearDepth(depthZW, A, B, C) -> A / (B + depthZW * C) @@ -1774,6 +1780,7 @@ struct RHIInfo qbool isCacheCoherentUMA; qbool hasInlineRaytracing; qbool hasBarycentrics; + qbool forceNanoVDBPreviewMode; // work-around for driver crashes (shader compiler) }; extern RHIInfo rhiInfo; diff --git a/code/renderer/tr_main.cpp b/code/renderer/tr_main.cpp index 3b0a215..f247145 100644 --- a/code/renderer/tr_main.cpp +++ b/code/renderer/tr_main.cpp @@ -456,6 +456,20 @@ void R_MakeIdentityMatrix( matrix4x4_t m ) } +void R_MakeIdentityMatrix3x3( matrix3x3_t m ) +{ + m[0] = 1.0f; + m[1] = 0.0f; + m[2] = 0.0f; + m[3] = 0.0f; + m[4] = 1.0f; + m[5] = 0.0f; + m[6] = 0.0f; + m[7] = 0.0f; + m[8] = 1.0f; +} + + void R_MakeOrthoProjectionMatrix( matrix4x4_t m, float w, float h ) { // 2/(r-l) 0 0 0 diff --git a/code/renderer/tr_scene.cpp b/code/renderer/tr_scene.cpp index ed5a9cf..de5f973 100644 --- a/code/renderer/tr_scene.cpp +++ b/code/renderer/tr_scene.cpp @@ -292,6 +292,11 @@ void RE_RenderScene( const refdef_t* fd, int us ) if ((tr.refdef.rdflags & RDF_NOWORLDMODEL) == 0) { tr.sceneCounterRT++; } + if (!tr.hasWorldRender && (tr.refdef.rdflags & RDF_NOWORLDMODEL) == 0) { + tr.hasWorldRender = qtrue; + tr.worldRenderTimeMS = tr.refdef.time; + tr.worldRenderTimeUS = tr.refdef.microSeconds; + } // setup view parms for the initial view // diff --git a/code/shadercomp/shadercomp.cpp b/code/shadercomp/shadercomp.cpp index a79e965..47ad66e 100644 --- a/code/shadercomp/shadercomp.cpp +++ b/code/shadercomp/shadercomp.cpp @@ -268,20 +268,31 @@ void CompilePixelShader(const char* headerPath, const char* shaderPath, const ch CompileShader(args, psExtraCount, psExtras); } -void CompileCompute(const char* headerPath, const char* shaderPath, const char* varName) +void CompileCompute(const char* headerPath, const char* shaderPath, const char* varName, int csOptionCount = 0, ...) { - const char* extras[] = + int csExtraCount = 4; + const char* csExtras[64] = { "-D", "COMPUTE_SHADER=1", "-Vn", HeaderVariable(va("g_%s_cs", varName)) }; + assert(csExtraCount + csOptionCount <= _countof(csExtras)); + + va_list argPtr; + va_start(argPtr, csOptionCount); + for(int i = 0; i < csOptionCount; i++) + { + csExtras[csExtraCount++] = va_arg(argPtr, const char*); + } + va_end(argPtr); + ShaderArgs args; args.entryPoint = "cs"; args.headerPath = headerPath; args.shaderPath = shaderPath; args.targetProfile = targetCS; - CompileShader(args, _countof(extras), extras); + CompileShader(args, csExtraCount, csExtras); } void CompileUberVS(const char* headerPath, const char* shaderPath, int stageCount) @@ -455,22 +466,23 @@ void ProcessCRP() CompilePixelShader("sun_blur.h", "sun_blur.hlsl", "sun_blur"); const char* vlComputeShaders[] = { -#if 0 - "vl_particles_dispatch", - "vl_particles_preprocess_extinction", - "vl_particles_preprocess_frustum", - "vl_extinction_injection_particles", - "vl_frustum_injection_particles", -#endif "vl_extinction_injection_fog", + "vl_extinction_injection_nanovdb", + //"vl_extinction_injection_particles", "vl_frustum_anisotropy_average", + "vl_frustum_depth_test", "vl_frustum_injection_fog", + "vl_frustum_injection_nanovdb", + "vl_frustum_injection_particles", "vl_frustum_inscatter_ambient", "vl_frustum_inscatter_point_light", "vl_frustum_inscatter_sunlight", "vl_frustum_raymarch", "vl_frustum_sunlight_visibility", - "vl_frustum_temporal", + "vl_particles_clear", + "vl_particles_hit", + "vl_particles_list", + "vl_particles_tiles", "vl_shadow_point_light", "vl_shadow_sun" }; @@ -479,10 +491,21 @@ void ProcessCRP() const char* const s = vlComputeShaders[i]; CompileCompute(va("%s.h", s), va("%s.hlsl", s), s); } + CompileCompute("vl_frustum_temporal_float4.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float4", 1, "-D TYPE_FLOAT4=1"); + CompileCompute("vl_frustum_temporal_float.h", "vl_frustum_temporal.hlsl", "vl_frustum_temporal_float", 1, "-D TYPE_FLOAT=1"); + CompileCompute("vl_frustum_injection_nanovdb_lq.h", "vl_frustum_injection_nanovdb.hlsl", "vl_frustum_injection_nanovdb_lq", 1, "-D PREVIEW_MODE=1"); + CompileCompute("vl_frustum_light_propagation_nx.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_nx", 1, "-D DIRECTION_NX=1"); + CompileCompute("vl_frustum_light_propagation_ny.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_ny", 1, "-D DIRECTION_NY=1"); + CompileCompute("vl_frustum_light_propagation_px.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_px", 1, "-D DIRECTION_PX=1"); + CompileCompute("vl_frustum_light_propagation_py.h", "vl_frustum_light_propagation.hlsl", "vl_frustum_light_propagation_py", 1, "-D DIRECTION_PY=1"); CompileGraphics("vl_debug_ambient.h", "vl_debug_ambient.hlsl", "vl_debug_ambient"); CompileGraphics("vl_debug_extinction.h", "vl_debug_extinction.hlsl", "vl_debug_extinction"); CompileGraphics("vl_debug_shadow_sun.h", "vl_debug_shadow_sun.hlsl", "vl_debug_shadow_sun"); CompileCompute("depth_pyramid.h", "depth_pyramid.hlsl", "depth_pyramid"); + CompileCompute("particles_clear.h", "particles_clear.hlsl", "particles_clear"); + CompileCompute("particles_setup.h", "particles_setup.hlsl", "particles_setup"); + CompileCompute("particles_emit.h", "particles_emit.hlsl", "particles_emit"); + CompileCompute("particles_simulate.h", "particles_simulate.hlsl", "particles_simulate"); } int main(int /*argc*/, const char** argv) diff --git a/makefiles/premake5.lua b/makefiles/premake5.lua index 198b798..b30636f 100644 --- a/makefiles/premake5.lua +++ b/makefiles/premake5.lua @@ -650,6 +650,7 @@ solution "cnq3" kind "StaticLib" language "C++" AddSourcesAndHeaders("renderer") + includedirs { string.format("%s", path_src) } if os.istarget("bsd") then includedirs { "/usr/local/include" } end @@ -668,6 +669,7 @@ solution "cnq3" flags { "ExcludeFromBuild" } filter { } end + files { string.format("%s/renderer/shaders/**.h", path_src) } ApplyLibProjectSettings() includedirs { path_src.."/imgui" } filter "action:gmake" diff --git a/makefiles/windows_vs2019/renderer.vcxproj b/makefiles/windows_vs2019/renderer.vcxproj index 75ee910..2d6295e 100644 --- a/makefiles/windows_vs2019/renderer.vcxproj +++ b/makefiles/windows_vs2019/renderer.vcxproj @@ -59,7 +59,7 @@ NotUsing Level4 DEBUG;_DEBUG;_CRT_SECURE_NO_WARNINGS;WIN32;_WIN32;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions) - ..\..\..\cnq3tools\aftermath;..\..\..\cnq3tools\nvapi;..\..\code\imgui;%(AdditionalIncludeDirectories) + ..\..\code;..\..\..\cnq3tools\aftermath;..\..\..\cnq3tools\nvapi;..\..\code\imgui;%(AdditionalIncludeDirectories) ProgramDatabase Disabled MultiThreadedDebug @@ -83,7 +83,7 @@ Level4 Default NDEBUG;_CRT_SECURE_NO_WARNINGS;WIN32;_WIN32;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions) - ..\..\..\cnq3tools\aftermath;..\..\..\cnq3tools\nvapi;..\..\code\imgui;%(AdditionalIncludeDirectories) + ..\..\code;..\..\..\cnq3tools\aftermath;..\..\..\cnq3tools\nvapi;..\..\code\imgui;%(AdditionalIncludeDirectories) ProgramDatabase MinSpace true @@ -117,6 +117,7 @@ + @@ -135,7 +136,9 @@ + + @@ -273,6 +276,18 @@ true + + true + + + true + + + true + + + true + true @@ -315,15 +330,21 @@ true - + true true + + true + true + + true + true @@ -336,6 +357,9 @@ true + + true + true @@ -345,13 +369,16 @@ true - + true - + true - + + true + + true @@ -421,12 +448,14 @@ + + diff --git a/makefiles/windows_vs2019/renderer.vcxproj.filters b/makefiles/windows_vs2019/renderer.vcxproj.filters index 0491571..74cfde0 100644 --- a/makefiles/windows_vs2019/renderer.vcxproj.filters +++ b/makefiles/windows_vs2019/renderer.vcxproj.filters @@ -21,6 +21,9 @@ + + shaders\crp + @@ -39,7 +42,9 @@ + + @@ -177,6 +182,18 @@ shaders\crp + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + shaders\crp @@ -219,15 +236,21 @@ shaders\crp - + shaders\crp shaders\crp + + shaders\crp + shaders\crp + + shaders\crp + shaders\crp @@ -240,6 +263,9 @@ shaders\crp + + shaders\crp + shaders\crp @@ -249,13 +275,16 @@ shaders\crp - + shaders\crp - + shaders\crp - + + shaders\crp + + shaders\crp @@ -341,6 +370,9 @@ shaders\crp + + shaders\crp + shaders\crp @@ -359,6 +391,9 @@ shaders\crp + + shaders\crp + shaders\grp diff --git a/makefiles/windows_vs2022/renderer.vcxproj b/makefiles/windows_vs2022/renderer.vcxproj index 40b4f8f..0dbcbf4 100644 --- a/makefiles/windows_vs2022/renderer.vcxproj +++ b/makefiles/windows_vs2022/renderer.vcxproj @@ -59,7 +59,7 @@ NotUsing Level4 DEBUG;_DEBUG;_CRT_SECURE_NO_WARNINGS;WIN32;_WIN32;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions) - ..\..\..\cnq3tools\aftermath;..\..\..\cnq3tools\nvapi;..\..\code\imgui;%(AdditionalIncludeDirectories) + ..\..\code;..\..\..\cnq3tools\aftermath;..\..\..\cnq3tools\nvapi;..\..\code\imgui;%(AdditionalIncludeDirectories) ProgramDatabase Disabled MultiThreadedDebug @@ -84,7 +84,7 @@ Level4 Default NDEBUG;_CRT_SECURE_NO_WARNINGS;WIN32;_WIN32;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions) - ..\..\..\cnq3tools\aftermath;..\..\..\cnq3tools\nvapi;..\..\code\imgui;%(AdditionalIncludeDirectories) + ..\..\code;..\..\..\cnq3tools\aftermath;..\..\..\cnq3tools\nvapi;..\..\code\imgui;%(AdditionalIncludeDirectories) ProgramDatabase MinSpace true @@ -119,6 +119,7 @@ + @@ -137,7 +138,9 @@ + + @@ -275,6 +278,18 @@ true + + true + + + true + + + true + + + true + true @@ -317,15 +332,21 @@ true - + true true + + true + true + + true + true @@ -338,6 +359,9 @@ true + + true + true @@ -347,13 +371,16 @@ true - + true - + true - + + true + + true @@ -423,12 +450,14 @@ + + diff --git a/makefiles/windows_vs2022/renderer.vcxproj.filters b/makefiles/windows_vs2022/renderer.vcxproj.filters index 0491571..74cfde0 100644 --- a/makefiles/windows_vs2022/renderer.vcxproj.filters +++ b/makefiles/windows_vs2022/renderer.vcxproj.filters @@ -21,6 +21,9 @@ + + shaders\crp + @@ -39,7 +42,9 @@ + + @@ -177,6 +182,18 @@ shaders\crp + + shaders\crp + + + shaders\crp + + + shaders\crp + + + shaders\crp + shaders\crp @@ -219,15 +236,21 @@ shaders\crp - + shaders\crp shaders\crp + + shaders\crp + shaders\crp + + shaders\crp + shaders\crp @@ -240,6 +263,9 @@ shaders\crp + + shaders\crp + shaders\crp @@ -249,13 +275,16 @@ shaders\crp - + shaders\crp - + shaders\crp - + + shaders\crp + + shaders\crp @@ -341,6 +370,9 @@ shaders\crp + + shaders\crp + shaders\crp @@ -359,6 +391,9 @@ shaders\crp + + shaders\crp + shaders\grp