cnq3/code/renderer/crp_nano_vdb.cpp

/*
===========================================================================
Copyright (C) 2024 Gian 'myT' Schellenbaum

This file is part of Challenge Quake 3 (CNQ3).

Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.

Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// Cinematic Rendering Pipeline - NanoVDB support


#include "crp_local.h"
#include "../client/cl_imgui.h"
#include "../im3d/im3d.h"

#define NANOVDB_MAGIC_NUMBER 0x304244566F6E614Eul // "NanoVDB0"
#define NANOVDB_MAGIC_GRID   0x314244566F6E614Eul // "NanoVDB1"
#define NANOVDB_MAGIC_FILE   0x324244566F6E614Eul // "NanoVDB2"

#define NANOVDB_GRID_BUFFER_ALIGNMENT 32


/*
File structure:
FileHeader [GridHeader GridName]+ [GridData]+
*/

#pragma pack(push, 1)

struct FileHeader
{
	uint64_t magic;
	uint32_t version;
	uint16_t gridCount;
	uint16_t codec;

	bool IsValid()
	{
		return magic == NANOVDB_MAGIC_NUMBER || magic == NANOVDB_MAGIC_FILE;
	}
};

static_assert(sizeof(FileHeader) == 16, "Invalid FileHeader size");

struct FileGridHeader
{
	uint64_t memoryByteCount;
	uint64_t fileByteCount;
	uint64_t gridNameHashKey;
	uint64_t activeVoxelCount;
	uint32_t gridType;
	uint32_t gridClass;
	double worldBBox[6]; // AABB in world space
	int32_t indexBBox[6]; // AABB in index space
	double voxelSize[3]; // in world units
	uint32_t gridNameLength; // it includes the NULL terminator
	uint32_t nodeCount[4]; // # nodes per level
	uint32_t tileCount[3]; // # of active tiles per level
	uint16_t codec;
	uint16_t padding;
	uint32_t versionNumber;
};

static_assert(sizeof(FileGridHeader) == 176, "Invalid FileHeader size");

#pragma pack(pop)

struct FileGrid
{
	uint32_t byteOffset;
	uint32_t byteCount;

	bool IsValid() const
	{
		return byteOffset > 0 && byteCount > 0;
	}

	enum Id
	{
		Smoke,
		Fire,
		Count
	};
};


static void ScaleMatrix(matrix3x3_t m, const vec3_t scale)
{
	m[0] = scale[0];
	m[1] = 0.0f;
	m[2] = 0.0f;
	m[3] = 0.0f;
	m[4] = scale[1];
	m[5] = 0.0f;
	m[6] = 0.0f;
	m[7] = 0.0f;
	m[8] = scale[2];
}

static void RotationMatrixX(matrix3x3_t m, float angleRad)
{
	const float c = cosf(angleRad);
	const float s = sinf(angleRad);
	m[0] = 1.0f;
	m[1] = 0.0f;
	m[2] = 0.0f;
	m[3] = 0.0f;
	m[4] = c;
	m[5] = -s;
	m[6] = 0.0f;
	m[7] = s;
	m[8] = c;
}

static void RotationMatrixY(matrix3x3_t m, float angleRad)
{
	const float c = cosf(angleRad);
	const float s = sinf(angleRad);
	m[0] = c;
	m[1] = 0.0f;
	m[2] = s;
	m[3] = 0.0f;
	m[4] = 1.0f;
	m[5] = 0.0f;
	m[6] = -s;
	m[7] = 0.0f;
	m[8] = c;
}

static void RotationMatrixZ(matrix3x3_t m, float angleRad)
{
	const float c = cosf(angleRad);
	const float s = sinf(angleRad);
	m[0] = c;
	m[1] = -s;
	m[2] = 0.0f;
	m[3] = s;
	m[4] = c;
	m[5] = 0.0f;
	m[6] = 0.0f;
	m[7] = 0.0f;
	m[8] = 1.0f;
}

static void MultMatrix(matrix3x3_t m, const matrix3x3_t a, const matrix3x3_t b)
{
	m[0] = a[0] * b[0] + a[1] * b[3] + a[2] * b[6];
	m[1] = a[0] * b[1] + a[1] * b[4] + a[2] * b[7];
	m[2] = a[0] * b[2] + a[1] * b[5] + a[2] * b[8];
	m[3] = a[3] * b[0] + a[4] * b[3] + a[5] * b[6];
	m[4] = a[3] * b[1] + a[4] * b[4] + a[5] * b[7];
	m[5] = a[3] * b[2] + a[4] * b[5] + a[5] * b[8];
	m[6] = a[6] * b[0] + a[7] * b[3] + a[8] * b[6];
	m[7] = a[6] * b[1] + a[7] * b[4] + a[8] * b[7];
	m[8] = a[6] * b[2] + a[7] * b[5] + a[8] * b[8];
}

// similar to Python's math.isclose but the second value is the target for relative tolerance
// i.e. abs(b) is used instead of max(abs(a), abs(b))
static float IsCloseToTarget(float value, float target)
{
	const float relTol = 1.e-6f; // relative tolerance
	const float absTol = 1.e-9f; // absolute tolerance

	return fabsf(value - target) <= max(absTol, relTol * fabsf(target));
}

// adapted from "Computing Euler angles from a rotation matrix" by Gregory G. Slabaugh
static void AnglesRadFromMatrix(vec3_t anglesRad, const matrix3x3_t m)
{
#define R(x,y) m[y*3 + x]
#define psi anglesRad[0]
#define theta anglesRad[1]
#define phi anglesRad[2]

	phi = 0.0f;
	if(IsCloseToTarget(R(2, 0), -1.0f))
	{
		theta = M_PI / 2.0f;
		psi = atan2f(R(0, 1), R(0, 2));
	}
	else if(IsCloseToTarget(R(2, 0), 1.0f))
	{
		theta = -M_PI / 2.0f;
		psi = atan2f(-R(0, 1), -R(0, 2));
	}
	else
	{
		theta = -asinf(R(2, 0)); // theta2 = M_PI + asinf(R(2, 0));
		const float recCosTheta = 1.0f / cosf(theta);
		psi = atan2f(R(2, 1) * recCosTheta, R(2, 2) * recCosTheta);
		phi = atan2f(R(1, 0) * recCosTheta, R(0, 0) * recCosTheta);
	}

#undef R
#undef psi
#undef theta
#undef phi
}

static void FindGrids(FileGrid* grids, fileHandle_t fh, int byteCount, const VDBSequenceDesc& desc)
{
	FileHeader fileHeader;
	FS_Read(&fileHeader, sizeof(fileHeader), fh);
	if(!fileHeader.IsValid())
	{
		return;
	}

	// for all grids
	uint32_t gridByteCounts[16] = {};
	Q_assert(fileHeader.gridCount <= ARRAY_LEN(gridByteCounts));

	// for grids of interest
	int fileToCNQ3[FileGrid::Count];
	for(int g = 0; g < FileGrid::Count; g++)
	{
		fileToCNQ3[g] = -1;
		grids[g].byteOffset = 0;
		grids[g].byteCount = 0;
	}

	const uint32_t fileGridCount = (uint32_t)fileHeader.gridCount;
	for(uint32_t g = 0; g < fileGridCount; g++)
	{
		FileGridHeader gridHeader;
		FS_Read(&gridHeader, sizeof(gridHeader), fh);

		char gridName[64];
		Q_assert(gridHeader.gridNameLength <= ARRAY_LEN(gridName));
		FS_Read(gridName, (int)gridHeader.gridNameLength, fh);

		// vdb_lod.exe auto-renames "density" to "density_level_2" for mip level 2
		if(Q_stristr(gridName, desc.smokeGridName) != NULL)
		{
			fileToCNQ3[g] = (int)FileGrid::Smoke;
		}
		else if(Q_stristr(gridName, desc.fireGridName) != NULL)
		{
			fileToCNQ3[g] = (int)FileGrid::Fire;
		}
		gridByteCounts[g] = gridHeader.fileByteCount;

		if(fileToCNQ3[g] >= 0 && fileToCNQ3[g] < FileGrid::Count)
		{
			grids[fileToCNQ3[g]].byteOffset = 0;
			grids[fileToCNQ3[g]].byteCount = gridHeader.fileByteCount;
		}
	}

	for(uint32_t g = 0; g < fileGridCount; g++)
	{
		uint64_t magic;
		FS_Read(&magic, sizeof(magic), fh);
		Q_assert(magic == NANOVDB_MAGIC_NUMBER || magic == NANOVDB_MAGIC_GRID);
		if(fileToCNQ3[g] >= 0 && fileToCNQ3[g] < FileGrid::Count)
		{
			grids[fileToCNQ3[g]].byteOffset = (uint32_t)FS_FTell(fh) - 8;
		}
		FS_Seek(fh, gridByteCounts[g] - 8, FS_SEEK_CUR);
	}

	Q_assert(grids[FileGrid::Smoke].IsValid() || grids[FileGrid::Fire].IsValid());
}

static void ReadTransform(vec3_t originOffset, vec3_t scale, fileHandle_t fh, int byteOffset)
{
	FS_Seek(fh, byteOffset + 296, FS_SEEK_SET);
	FS_Read(&scale[0], 4, fh);
	FS_Seek(fh, byteOffset + 312, FS_SEEK_SET);
	FS_Read(&scale[1], 4, fh);
	FS_Seek(fh, byteOffset + 328, FS_SEEK_SET);
	FS_Read(&scale[2], 4, fh);
	FS_Seek(fh, byteOffset + 368, FS_SEEK_SET);
	FS_Read(originOffset, 12, fh);
}

static void ReadTransform(vec3_t originOffset, vec3_t scale, fileHandle_t fh, const FileGrid* grids)
{
	if(grids[FileGrid::Smoke].IsValid())
	{
		ReadTransform(originOffset, scale, fh, grids[FileGrid::Smoke].byteOffset);
	}
	else if(grids[FileGrid::Fire].IsValid())
	{
		ReadTransform(originOffset, scale, fh, grids[FileGrid::Fire].byteOffset);
	}
}

static void VectorScaleGUI(vec3_t vector, const char* id)
{
	ImGui::Text("  ");
	ImGui::SameLine();
	if(ImGui::Button(va("x2##%s", id)))
	{
		VectorScale(vector, 2.0f, vector);
	}
	ImGui::SameLine();
	if(ImGui::Button(va("/2##%s", id)))
	{
		VectorScale(vector, 0.5f, vector);
	}
}

static void UploadFrame(
	uint32_t& smokeByteOffset, uint32_t& fireByteOffset, uint32_t& gpuBufferOffset,
	HBuffer buffer, const NanoVDBManager::CPUFrame& frame)
{
	if(frame.fireByteCount > 0 || frame.smokeByteCount > 0)
	{
		fileHandle_t fh;
		const int fileByteCount = FS_FOpenFileRead(frame.filePath, &fh, qfalse);
		if(fileByteCount > 0)
		{
			if(frame.smokeByteCount > 0)
			{
				smokeByteOffset = gpuBufferOffset;
				FS_Seek(fh, frame.smokeByteOffset, FS_SEEK_SET);
				const uint32_t gridByteCount = AlignUp<uint32_t>(frame.smokeByteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
				uint8_t* const mapped = BeginBufferUpload(buffer, gpuBufferOffset, gridByteCount);
				FS_Read(mapped, frame.smokeByteCount, fh);
				EndBufferUpload(buffer);
				gpuBufferOffset += gridByteCount;
			}
			if(frame.fireByteCount > 0)
			{
				fireByteOffset = gpuBufferOffset;
				FS_Seek(fh, frame.fireByteOffset, FS_SEEK_SET);
				const uint32_t gridByteCount = AlignUp<uint32_t>(frame.fireByteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
				uint8_t* const mapped = BeginBufferUpload(buffer, gpuBufferOffset, gridByteCount);
				FS_Read(mapped, frame.fireByteCount, fh);
				EndBufferUpload(buffer);
				gpuBufferOffset += gridByteCount;
			}
		}
		if(fileByteCount >= 0)
		{
			FS_FCloseFile(fh);
		}
	}
}

static int64_t GetTimeStampUS(int ms, int us)
{
	return int64_t(1000) * (int64_t)ms + (int64_t)us;
}


void NanoVDBManager::Init()
{
	sequences.Clear();
	instances.Clear();
	drawInstances.Clear();
	cpuFrames.Clear();
	gpuFrames.Clear();
	streamBufferIndex = 0;

	{
		streamBufferByteCount = 256 << 20; // @TODO: CVar
		BufferDesc desc("", streamBufferByteCount, ResourceStates::ComputeShaderAccessBit);
		desc.shortLifeTime = true;
		desc.structureByteCount = 4;
		for(int i = 0; i < ARRAY_LEN(streamBuffers); i++)
		{
			desc.name = va("NanoVDB stream #%d", i + 1);
			streamBuffers[i] = CreateBuffer(desc);
		}
	}
}

void NanoVDBManager::BeforeFrame()
{
	drawInstances.Clear();
	streamedFrames.Clear();
	if(!tr.hasWorldRender)
	{
		return;
	}

	streamBufferIndex = (streamBufferIndex + 1) % ARRAY_LEN(streamBuffers);
	const HBuffer streamBuffer = streamBuffers[streamBufferIndex];

	uint32_t gpuBufferOffset = NANOVDB_GRID_BUFFER_ALIGNMENT;

	const int64_t renderTimeUS = GetTimeStampUS(tr.worldRenderTimeMS, tr.worldRenderTimeUS);
	for(int i = (int)instances.count - 1; i >= 0; i--)
	{
		const Instance& inst = instances[i];
		if(inst.loop)
		{
			continue;
		}

		const Sequence& seq = sequences[inst.sequenceIndex];
		const float durationSec = (float)seq.frameCount / inst.frameRate;
		const int64_t durationUS = (int64_t)ceilf(durationSec * 1000000.0f);
		const int64_t endTimeUS = GetTimeStampUS(inst.startTimeMS, inst.startTimeUS) + durationUS;
		if(renderTimeUS >= endTimeUS)
		{
			instances.RemoveUnordered((uint32_t)i);
		}
	}

	for(uint32_t i = 0; i < instances.count; i++)
	{
		const Instance& inst = instances[i];
		const Sequence& seq = sequences[inst.sequenceIndex];
		const int64_t startTimeUS = GetTimeStampUS(inst.startTimeMS, inst.startTimeUS);
		const int64_t usPerFrame = (int64_t)(1000000.0f / instances[i].frameRate);
		const uint32_t frameIndex = (uint32_t)((renderTimeUS - startTimeUS) / usPerFrame) % seq.frameCount;
		const uint32_t remainder = (uint32_t)((renderTimeUS - startTimeUS) % usPerFrame);
		const uint32_t frameIndex2 = min(frameIndex + 1, seq.frameCount - 1);
		const float t = (float)remainder / (float)usPerFrame; // lerp(frame, frame2, t)

		DrawInstance drawInst = {};
		if(IsNullHandle(seq.buffer))
		{
			const CPUFrame& frame = cpuFrames[seq.firstFrameIndex + frameIndex];
			const CPUFrame& frame2 = cpuFrames[seq.firstFrameIndex + frameIndex2];
			const int sf1 = FindStreamedFrameIndex(inst.sequenceIndex, frameIndex);
			const int sf2 = FindStreamedFrameIndex(inst.sequenceIndex, frameIndex2);

			uint32_t requestedByteCount = 0;
			if(sf1 >= 0)
			{
				drawInst.smokeByteOffset = streamedFrames[sf1].smokeByteOffset;
				drawInst.fireByteOffset = streamedFrames[sf1].flamesByteOffset;
			}
			else
			{
				requestedByteCount += frame.smokeByteCount + frame.fireByteCount;
			}
			if(sf2 >= 0)
			{
				drawInst.smokeByteOffset2 = streamedFrames[sf2].smokeByteOffset;
				drawInst.fireByteOffset2 = streamedFrames[sf2].flamesByteOffset;
			}
			else
			{
				requestedByteCount += frame2.smokeByteOffset + frame2.fireByteCount;
			}

			drawInst.buffer = streamBuffer;
			if(requestedByteCount > 0 &&
				gpuBufferOffset + requestedByteCount <= streamBufferByteCount)
			{
				UploadFrame(drawInst.smokeByteOffset, drawInst.fireByteOffset, gpuBufferOffset, streamBuffer, frame);
				UploadFrame(drawInst.smokeByteOffset2, drawInst.fireByteOffset2, gpuBufferOffset, streamBuffer, frame2);

				StreamedFrame sf = {};
				sf.sequenceIndex = inst.sequenceIndex;
				if(drawInst.smokeByteOffset > 0 || drawInst.fireByteOffset > 0)
				{
					sf.frameIndex = frameIndex;
					sf.smokeByteOffset = drawInst.smokeByteOffset;
					sf.flamesByteOffset = drawInst.fireByteOffset;
					streamedFrames.Add(sf);
				}
				if(drawInst.smokeByteOffset2 > 0 || drawInst.fireByteOffset2 > 0)
				{
					sf.frameIndex = frameIndex2;
					sf.smokeByteOffset = drawInst.smokeByteOffset2;
					sf.flamesByteOffset = drawInst.fireByteOffset2;
					streamedFrames.Add(sf);
				}
			}
		}
		else
		{
			const NanoVDBManager::GPUFrame& frame = gpuFrames[seq.firstFrameIndex + frameIndex];
			const NanoVDBManager::GPUFrame& frame2 = gpuFrames[seq.firstFrameIndex + frameIndex2];
			drawInst.buffer = seq.buffer;
			drawInst.fireByteOffset = frame.fireByteOffset;
			drawInst.fireByteOffset2 = frame2.fireByteOffset;
			drawInst.smokeByteOffset = frame.smokeByteOffset;
			drawInst.smokeByteOffset2 = frame2.smokeByteOffset;
		}
		drawInst.t = t;
		drawInstances.Add(drawInst);
	}
}

bool NanoVDBManager::AddSequence(const VDBSequenceDesc& desc)
{
	if(!tr.worldMapLoaded)
	{
		return false;
	}

	if(instances.IsFull())
	{
		ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB instance limit reached\n");
		return false;
	}

	vec3_t originOffset = {};
	vec3_t scale;
	VectorSet(scale, 1, 1, 1);

	int sequenceIndex = -1;
	for(uint32_t i = 0; i < sequences.count; i++)
	{
		if(Q_stricmp(sequences[i].folderPath, desc.folderPath) == 0)
		{
			sequenceIndex = (int)i;
			break;
		}
	}
	if(sequenceIndex < 0 && !sequences.IsFull())
	{
		HBuffer gpuBuffer = RHI_MAKE_NULL_HANDLE();
		uint32_t gpuByteCount = 0;
		uint32_t firstFrameIndex = 0;
		int fileCount = 0;

		if(desc.gpuResident)
		{
			int startTimeMS = Sys_Milliseconds();

			gpuByteCount = NANOVDB_GRID_BUFFER_ALIGNMENT;
			char** fileList = ri.FS_ListFiles(desc.folderPath, ".nvdb", &fileCount);
			for(int f = 0; f < fileCount; f++)
			{
				FileGrid grids[FileGrid::Count] = {};
				fileHandle_t fh;
				const int byteCount = FS_FOpenFileRead(va("%s/%s", desc.folderPath, fileList[f]), &fh, qfalse);
				if(byteCount > 0)
				{
					FindGrids(grids, fh, byteCount, desc);
				}
				if(byteCount >= 0)
				{
					FS_FCloseFile(fh);
				}
				for(uint32_t g = 0; g < FileGrid::Count; g++)
				{
					if(grids[g].byteCount > 0)
					{
						gpuByteCount += AlignUp<uint32_t>(grids[g].byteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
					}
				}
			}
			ri.FS_FreeFileList(fileList);
			if(fileCount <= 0 || gpuByteCount <= NANOVDB_GRID_BUFFER_ALIGNMENT)
			{
				ri.Printf(PRINT_WARNING, "^3WARNING: invalid NanoVDB folder '%s'\n", desc.folderPath);
				return false;
			}

			if(gpuByteCount >= uint32_t(1 << 31))
			{
				ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB sequence '%s' too large for GPU storage\n", desc.folderPath);
				VDBSequenceDesc newDesc = desc;
				newDesc.gpuResident = false;
				return AddSequence(newDesc);
			}

			if(gpuFrames.count + fileCount > gpuFrames.capacity)
			{
				ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB frame limit reached\n");
				return false;
			}

			ri.Printf(PRINT_ALL, "NanoVDB: analyzed %s in %d ms\n", desc.folderPath, Sys_Milliseconds() - startTimeMS);
			startTimeMS = Sys_Milliseconds();

			gpuByteCount = AlignUp<uint32_t>(gpuByteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
			BufferDesc bufferDesc("NanoVDB full sequence", gpuByteCount, ResourceStates::ComputeShaderAccessBit);
			bufferDesc.shortLifeTime = true;
			bufferDesc.structureByteCount = 4;
			gpuBuffer = CreateBuffer(bufferDesc);

			uint32_t gpuByteOffset = NANOVDB_GRID_BUFFER_ALIGNMENT;
			firstFrameIndex = gpuFrames.count;
			fileList = ri.FS_ListFiles(desc.folderPath, ".nvdb", &fileCount);
			for(int f = 0; f < fileCount; f++)
			{
				GPUFrame frame = {};
				FileGrid grids[FileGrid::Count] = {};
				fileHandle_t fh;
				const int byteCount = FS_FOpenFileRead(va("%s/%s", desc.folderPath, fileList[f]), &fh, qfalse);
				if(byteCount > 0)
				{
					FindGrids(grids, fh, byteCount, desc);
					if(grids[FileGrid::Smoke].IsValid())
					{
						const uint32_t gridByteCount = AlignUp<uint32_t>(grids[FileGrid::Smoke].byteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
						uint8_t* const cpuBuffer = BeginBufferUpload(gpuBuffer, gpuByteOffset, gridByteCount);
						FS_Seek(fh, (int)grids[FileGrid::Smoke].byteOffset, FS_SEEK_SET);
						FS_Read(cpuBuffer, (int)grids[FileGrid::Smoke].byteCount, fh);
						EndBufferUpload(gpuBuffer);
						frame.smokeByteOffset = gpuByteOffset;
						gpuByteOffset += gridByteCount;
					}
					if(grids[FileGrid::Fire].IsValid())
					{
						const uint32_t gridByteCount = AlignUp<uint32_t>(grids[FileGrid::Fire].byteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
						uint8_t* const cpuBuffer = BeginBufferUpload(gpuBuffer, gpuByteOffset, gridByteCount);
						FS_Seek(fh, (int)grids[FileGrid::Fire].byteOffset, FS_SEEK_SET);
						FS_Read(cpuBuffer, (int)grids[FileGrid::Fire].byteCount, fh);
						EndBufferUpload(gpuBuffer);
						frame.fireByteOffset = gpuByteOffset;
						gpuByteOffset += gridByteCount;
					}

					if(f == 0)
					{
						ReadTransform(originOffset, scale, fh, grids);
					}
				}
				if(byteCount >= 0)
				{
					FS_FCloseFile(fh);
				}
				Q_assert(frame.fireByteOffset > 0 || frame.smokeByteOffset > 0);
				gpuFrames.Add(frame);
			}
			ri.FS_FreeFileList(fileList);

			ri.Printf(PRINT_ALL, "NanoVDB: processed %s in %d ms\n", desc.folderPath, Sys_Milliseconds() - startTimeMS);
		}
		else
		{
			const int startTimeMS = Sys_Milliseconds();

			firstFrameIndex = cpuFrames.count;
			char** fileList = ri.FS_ListFiles(desc.folderPath, ".nvdb", &fileCount);
			for(int f = 0; f < fileCount; f++)
			{
				FileGrid grids[FileGrid::Count] = {};
				const char* const filePath = va("%s/%s", desc.folderPath, fileList[f]);
				fileHandle_t fh;
				const int byteCount = FS_FOpenFileRead(filePath, &fh, qfalse);
				if(byteCount > 0)
				{
					FindGrids(grids, fh, byteCount, desc);
					CPUFrame frame = {};
					Q_strncpyz(frame.filePath, filePath, sizeof(frame.filePath));
					frame.fireByteOffset = grids[FileGrid::Fire].byteOffset;
					frame.fireByteCount = grids[FileGrid::Fire].byteCount;
					frame.smokeByteOffset = grids[FileGrid::Smoke].byteOffset;
					frame.smokeByteCount = grids[FileGrid::Smoke].byteCount;
					cpuFrames.Add(frame);
					if(f == 0)
					{
						ReadTransform(originOffset, scale, fh, grids);
					}
				}
				if(byteCount >= 0)
				{
					FS_FCloseFile(fh);
				}
			}
			ri.FS_FreeFileList(fileList);

			if(fileCount <= 0)
			{
				ri.Printf(PRINT_WARNING, "^3WARNING: invalid NanoVDB folder '%s'\n", desc.folderPath);
				return false;
			}

			if(cpuFrames.count + fileCount > cpuFrames.capacity)
			{
				ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB frame limit reached\n");
				return false;
			}

			ri.Printf(PRINT_ALL, "NanoVDB: analyzed %s in %d ms\n", desc.folderPath, Sys_Milliseconds() - startTimeMS);
		}

		sequenceIndex = (int)sequences.count;
		Sequence sequence = {};
		Q_strncpyz(sequence.folderPath, desc.folderPath, sizeof(sequence.folderPath));
		sequence.frameCount = (uint32_t)fileCount;
		sequence.firstFrameIndex = firstFrameIndex;
		sequence.buffer = gpuBuffer;
		sequence.bufferByteCount = gpuByteCount;
		VectorCopy(originOffset, sequence.originOffset);
		VectorCopy(scale, sequence.scale);
		sequences.Add(sequence);
	}
	if(sequenceIndex < 0)
	{
		ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB sequence limit reached\n");
		return false;
	}

	Instance instance = {};
	instance.fireEmissionScale = desc.fireEmissionScale;
	Q_strncpyz(instance.fireGridName, desc.fireGridName, sizeof(instance.fireGridName));
	instance.fireTemperatureScale = desc.fireTemperatureScale;
	instance.frameRate = desc.frameRate;
	instance.smokeExtinctionScale = desc.smokeExtinctionScale;
	instance.smokeAlbedo = desc.smokeAlbedo;
	Q_strncpyz(instance.smokeGridName, desc.smokeGridName, sizeof(instance.smokeGridName));
	instance.startTimeMS = desc.startTimeMS;
	instance.startTimeUS = desc.startTimeUS;
	instance.sequenceIndex = (uint32_t)sequenceIndex;
	VectorMultiply(desc.scale, sequences[sequenceIndex].scale, instance.scale);
	VectorCopy(desc.position, instance.position);
	VectorCopy(desc.useSequenceOffset ? sequences[sequenceIndex].originOffset : desc.originOffset, instance.originOffset);
	VectorCopy(desc.anglesRad, instance.anglesRad);
	instance.loop = desc.loop;
	instances.Add(instance);

	return true;
}

void NanoVDBManager::MakeWorldToIndexMatrix(matrix3x3_t matrix, const Instance& instance)
{
	matrix3x3_t scale, rot, temp, temp2;
	vec3_t scaleVector;

	for(int i = 0; i < 3; i++)
	{
		scaleVector[i] = 1.0f / instance.scale[i];
	}
	ScaleMatrix(scale, scaleVector);

	RotationMatrixX(rot, -instance.anglesRad[0]);
	MultMatrix(temp, scale, rot);

	RotationMatrixY(rot, -instance.anglesRad[1]);
	MultMatrix(temp2, temp, rot);

	RotationMatrixZ(rot, -instance.anglesRad[2]);
	MultMatrix(matrix, temp2, rot);
}

void NanoVDBManager::DrawGUI()
{
	static const char* const sequencePopupTitle = "Add NanoVDB Sequence";

	activeInstanceIndex = -1;
	if(!tr.worldMapLoaded)
	{
		return;
	}

	GUI_AddMainMenuItem(GUI_MainMenu::Tools, "Edit NanoVDB", "", &windowActive);

	if(!windowActive)
	{
		return;
	}

	if(ImGui::Begin("NanoVDB Settings", &windowActive, ImGuiWindowFlags_AlwaysAutoResize))
	{
		if(rhiInfo.forceNanoVDBPreviewMode)
		{
			static bool forcedPreviewMode = true;
			ImGui::BeginDisabled(true);
			ImGui::Checkbox("Preview mode (forced due to driver bug)", &forcedPreviewMode);
			ImGui::EndDisabled();
		}
		else
		{
			ImGui::Checkbox("Preview mode", &previewMode);
			if(!previewMode)
			{
				ImGui::Checkbox("2x super-sampling", &supersampling);
				ImGui::Checkbox("Linear interpolation", &linearInterpolation);
				ImGui::Checkbox("Accurate overlap test", &accurateOverlapTest);
				ImGui::Checkbox("Ambient lighting: higher angular LoD", &ambientIncreasedCoverage);
				ImGui::SliderInt("Ambient lighting: sub-sampling", &ambientRaymarchLOD, 1, 8);
				ImGui::SliderFloat("Emissive scattering scale", &emissiveScatterScale, 0.0f, 1.0f, "%g");
			}
		}

		const uint64_t streamByteCount = (uint64_t)ARRAY_LEN(streamBuffers) * (uint64_t)streamBufferByteCount;
		uint64_t dedicatedByteCount = 0;
		for(uint32_t i = 0; i < sequences.count; i++)
		{
			dedicatedByteCount += (uint64_t)sequences[i].bufferByteCount;
		}
		ImGui::Text("%d sequence%s, %s dedicated, %s stream",
			(int)sequences.count, sequences.count >= 2 ? "s" : "",
			Com_FormatBytes(dedicatedByteCount),
			Com_FormatBytes(streamByteCount));
		ImGui::Text("%d CPU frame%s, %d GPU frame%s",
			(int)cpuFrames.count, cpuFrames.count >= 2 ? "s" : "",
			(int)gpuFrames.count, gpuFrames.count >= 2 ? "s" : "");
		ImGui::Text("%d streamed file%s", (int)streamedFrames.count, streamedFrames.count >= 2 ? "s" : "");

		ImGui::Separator();
		if(ImGui::Button("Purge unused sequences"))
		{
			Purge();
		}

		if(ImGui::BeginTabBar("Tabs#VDB", ImGuiTabBarFlags_AutoSelectNewTabs))
		{
			for(uint32_t i = 0; i < instances.count; i++)
			{
				if(ImGui::BeginTabItem(va("#%d", i + 1)))
				{
					activeInstanceIndex = (int)i;

					Instance& inst = instances[i];
					Sequence& seq = sequences[inst.sequenceIndex];

					ImGui::Text("%s (%d frame%s, %s)", seq.folderPath, (int)seq.frameCount,
						seq.frameCount >= 2 ? "s" : "",
						IsNullHandle(seq.buffer) ? "streamed" : "in VRAM");
					ImGui::SliderFloat("Framerate", &inst.frameRate, 15.0f, 120.0f, "%g");
					ImGui::SliderFloat("Smoke extinction scale (thickness)", &inst.smokeExtinctionScale, 0.0f, 10.0f, "%g");
					ImGui::SliderFloat("Smoke albedo (reflectivity)", &inst.smokeAlbedo, 0.0f, 1.0f, "%g");
					ImGui::SliderFloat("Flame emission scale (brightness)", &inst.fireEmissionScale, 0.0f, 1.0f, "%g");
					ImGui::SliderFloat("Flame temperature scale (color)", &inst.fireTemperatureScale, 0.0f, 20000.0f, "%g");

					vec3_t angles;
					for(int a = 0; a < 3; a++)
					{
						angles[a] = RAD2DEG(inst.anglesRad[a]);
					}

					ImGui::SliderFloat3("Origin offset (index space)", inst.originOffset, -1000.0f, 1000.0f, "%g");
					VectorScaleGUI(inst.originOffset, "origin");
					ImGui::SliderFloat3("Scale", inst.scale, 0.0f, 100.0f, "%g");
					VectorScaleGUI(inst.scale, "scale");
					ImGui::SliderFloat3("Position (world space)", inst.position, -100 * 1000.0f, 100 * 1000.0f, "%g");
					ImGui::SliderFloat3("Angles", angles, 0.0f, 360.0f, "%g");
					for(int a = 0; a < 3; a++)
					{
						inst.anglesRad[a] = DEG2RAD(angles[a]);
					}

					ImGui::Separator();
					if(ImGui::Button("Remove"))
					{
						instances.Remove(i);
					}

					ImGui::EndTabItem();
				}
			}

			if(ImGui::BeginTabItem("Add"))
			{
				static char sequencePath[64];
				static char flamesGridName[64] = "flames";
				static char smokeGridName[64] = "density";
				static bool gpuResident = true;

				ImGui::InputText("Folder path", sequencePath, sizeof(sequencePath));
				ImGui::SameLine();
				if(ImGui::Button("..."))
				{
					OpenFolderDialog_Open("nanovdb");
				}
				ImGui::InputText("Flames grid", flamesGridName, sizeof(flamesGridName));
				ImGui::InputText("Smoke grid", smokeGridName, sizeof(smokeGridName));
				ImGui::Checkbox("GPU resident", &gpuResident);

				ImGui::Separator();
				if(ImGui::Button("Add"))
				{
					VDBSequenceDesc desc = {};
					desc.fireGridName = flamesGridName;
					desc.folderPath = sequencePath;
					desc.gpuResident = gpuResident;
					desc.loop = true;
					desc.smokeGridName = smokeGridName;
					AddSequence(desc);
				}

				if(OpenFolderDialog_Do())
				{
					Q_strncpyz(sequencePath, OpenFolderDialog_GetPath(), sizeof(sequencePath));
				}

				ImGui::EndTabItem();
			}

			ImGui::EndTabBar();
		}
	}
	ImGui::End();
}

void NanoVDBManager::DrawIm3d()
{
	if((uint32_t)activeInstanceIndex < instances.count &&
		crp.im3d.ShouldDrawGizmos())
	{
		Instance& instance = instances[activeInstanceIndex];
		matrix3x3_t rotation, temp0, temp1, temp2;
		RotationMatrixX(temp0, -instance.anglesRad[0]);
		RotationMatrixY(temp1, -instance.anglesRad[1]);
		MultMatrix(temp2, temp0, temp1);
		RotationMatrixZ(temp0, -instance.anglesRad[2]);
		MultMatrix(rotation, temp2, temp0);
		const char* const id = va("VDB #%d", activeInstanceIndex);
		if(Im3d::Gizmo(id, instance.position, rotation, instance.scale))
		{
			AnglesRadFromMatrix(instance.anglesRad, rotation);
		}
	}
}

void NanoVDBManager::Purge()
{
	// build sequence reference counts
	uint32_t sequenceRefCounts[ARRAY_LEN(sequences.items)] = {};
	for(uint32_t i = 0; i < instances.count; i++)
	{
		const uint32_t s = instances[i].sequenceIndex;
		sequenceRefCounts[s]++;
	}

	// queue GPU buffer deletions
	for(uint32_t s = 0; s < sequences.count; s++)
	{
		if(sequenceRefCounts[s] == 0 &&
			!IsNullHandle(sequences[s].buffer))
		{
			DestroyBufferDelayed(sequences[s].buffer);
		}
	}

	// compact sequence array, build index map, remove frames, fix frame offsets
	uint32_t sequenceRemap[ARRAY_LEN(sequences.items)] = {};
	uint32_t removed = 0;
	uint32_t dst = 0;
	uint32_t src = 0;
	for(; src < sequences.count; src++)
	{
		if(sequenceRefCounts[src] == 0)
		{
			const uint32_t first = sequences[src].firstFrameIndex;
			const uint32_t count = sequences[src].frameCount;
			const bool streamed = IsNullHandle(sequences[src].buffer);
			if(streamed)
			{
				cpuFrames.RemoveRange(first, count);
			}
			else
			{
				gpuFrames.RemoveRange(first, count);
			}
			for(uint32_t s = 0; s < sequences.count; s++)
			{
				if(sequences[s].firstFrameIndex > first)
				{
					sequences[s].firstFrameIndex -= count;
				}
			}
			removed++;
			continue;
		}

		sequenceRemap[src] = dst;
		if(src > dst)
		{
			sequences[dst] = sequences[src];
		}
		dst++;
	}
	sequences.count -= removed;

	// fix sequence indices
	for(uint32_t i = 0; i < instances.count; i++)
	{
		const uint32_t s = instances[i].sequenceIndex;
		instances[i].sequenceIndex = sequenceRemap[s];
	}

#if defined(_DEBUG)
	for(uint32_t i = 0; i < instances.count; i++)
	{
		Q_assert(instances[i].sequenceIndex < sequences.count);
	}
	for(uint32_t s = 0; s < sequences.count; s++)
	{
		const Sequence& seq = sequences[s];
		const uint32_t frameCount = IsNullHandle(seq.buffer) ? cpuFrames.count : gpuFrames.count;
		Q_assert(seq.firstFrameIndex + seq.frameCount <= frameCount);
	}
#endif
}

int NanoVDBManager::FindStreamedFrameIndex(uint32_t sequenceIndex, uint32_t frameIndex)
{
	int index = -1;
	for(uint32_t f = 0; f < streamedFrames.count; f++)
	{
		if(streamedFrames[f].sequenceIndex == sequenceIndex &&
			streamedFrames[f].frameIndex == frameIndex)
		{
			index = (int)f;
			break;
		}
	}

	return index;
}