cnq3/code/renderer/crp_nano_vdb.cpp
2024-07-23 18:24:44 +02:00

1008 lines
29 KiB
C++

/*
===========================================================================
Copyright (C) 2024 Gian 'myT' Schellenbaum
This file is part of Challenge Quake 3 (CNQ3).
Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// Cinematic Rendering Pipeline - NanoVDB support
#include "crp_local.h"
#include "../client/cl_imgui.h"
#include "../im3d/im3d.h"
#define NANOVDB_MAGIC_NUMBER 0x304244566F6E614Eul // "NanoVDB0"
#define NANOVDB_MAGIC_GRID 0x314244566F6E614Eul // "NanoVDB1"
#define NANOVDB_MAGIC_FILE 0x324244566F6E614Eul // "NanoVDB2"
#define NANOVDB_GRID_BUFFER_ALIGNMENT 32
/*
File structure:
FileHeader [GridHeader GridName]+ [GridData]+
*/
#pragma pack(push, 1)
struct FileHeader
{
uint64_t magic;
uint32_t version;
uint16_t gridCount;
uint16_t codec;
bool IsValid()
{
return magic == NANOVDB_MAGIC_NUMBER || magic == NANOVDB_MAGIC_FILE;
}
};
static_assert(sizeof(FileHeader) == 16, "Invalid FileHeader size");
struct FileGridHeader
{
uint64_t memoryByteCount;
uint64_t fileByteCount;
uint64_t gridNameHashKey;
uint64_t activeVoxelCount;
uint32_t gridType;
uint32_t gridClass;
double worldBBox[6]; // AABB in world space
int32_t indexBBox[6]; // AABB in index space
double voxelSize[3]; // in world units
uint32_t gridNameLength; // it includes the NULL terminator
uint32_t nodeCount[4]; // # nodes per level
uint32_t tileCount[3]; // # of active tiles per level
uint16_t codec;
uint16_t padding;
uint32_t versionNumber;
};
static_assert(sizeof(FileGridHeader) == 176, "Invalid FileHeader size");
#pragma pack(pop)
struct FileGrid
{
uint32_t byteOffset;
uint32_t byteCount;
bool IsValid() const
{
return byteOffset > 0 && byteCount > 0;
}
enum Id
{
Smoke,
Fire,
Count
};
};
static void ScaleMatrix(matrix3x3_t m, const vec3_t scale)
{
m[0] = scale[0];
m[1] = 0.0f;
m[2] = 0.0f;
m[3] = 0.0f;
m[4] = scale[1];
m[5] = 0.0f;
m[6] = 0.0f;
m[7] = 0.0f;
m[8] = scale[2];
}
static void RotationMatrixX(matrix3x3_t m, float angleRad)
{
const float c = cosf(angleRad);
const float s = sinf(angleRad);
m[0] = 1.0f;
m[1] = 0.0f;
m[2] = 0.0f;
m[3] = 0.0f;
m[4] = c;
m[5] = -s;
m[6] = 0.0f;
m[7] = s;
m[8] = c;
}
static void RotationMatrixY(matrix3x3_t m, float angleRad)
{
const float c = cosf(angleRad);
const float s = sinf(angleRad);
m[0] = c;
m[1] = 0.0f;
m[2] = s;
m[3] = 0.0f;
m[4] = 1.0f;
m[5] = 0.0f;
m[6] = -s;
m[7] = 0.0f;
m[8] = c;
}
static void RotationMatrixZ(matrix3x3_t m, float angleRad)
{
const float c = cosf(angleRad);
const float s = sinf(angleRad);
m[0] = c;
m[1] = -s;
m[2] = 0.0f;
m[3] = s;
m[4] = c;
m[5] = 0.0f;
m[6] = 0.0f;
m[7] = 0.0f;
m[8] = 1.0f;
}
static void MultMatrix(matrix3x3_t m, const matrix3x3_t a, const matrix3x3_t b)
{
m[0] = a[0] * b[0] + a[1] * b[3] + a[2] * b[6];
m[1] = a[0] * b[1] + a[1] * b[4] + a[2] * b[7];
m[2] = a[0] * b[2] + a[1] * b[5] + a[2] * b[8];
m[3] = a[3] * b[0] + a[4] * b[3] + a[5] * b[6];
m[4] = a[3] * b[1] + a[4] * b[4] + a[5] * b[7];
m[5] = a[3] * b[2] + a[4] * b[5] + a[5] * b[8];
m[6] = a[6] * b[0] + a[7] * b[3] + a[8] * b[6];
m[7] = a[6] * b[1] + a[7] * b[4] + a[8] * b[7];
m[8] = a[6] * b[2] + a[7] * b[5] + a[8] * b[8];
}
// similar to Python's math.isclose but the second value is the target for relative tolerance
// i.e. abs(b) is used instead of max(abs(a), abs(b))
static float IsCloseToTarget(float value, float target)
{
const float relTol = 1.e-6f; // relative tolerance
const float absTol = 1.e-9f; // absolute tolerance
return fabsf(value - target) <= max(absTol, relTol * fabsf(target));
}
// adapted from "Computing Euler angles from a rotation matrix" by Gregory G. Slabaugh
static void AnglesRadFromMatrix(vec3_t anglesRad, const matrix3x3_t m)
{
#define R(x,y) m[y*3 + x]
#define psi anglesRad[0]
#define theta anglesRad[1]
#define phi anglesRad[2]
phi = 0.0f;
if(IsCloseToTarget(R(2, 0), -1.0f))
{
theta = M_PI / 2.0f;
psi = atan2f(R(0, 1), R(0, 2));
}
else if(IsCloseToTarget(R(2, 0), 1.0f))
{
theta = -M_PI / 2.0f;
psi = atan2f(-R(0, 1), -R(0, 2));
}
else
{
theta = -asinf(R(2, 0)); // theta2 = M_PI + asinf(R(2, 0));
const float recCosTheta = 1.0f / cosf(theta);
psi = atan2f(R(2, 1) * recCosTheta, R(2, 2) * recCosTheta);
phi = atan2f(R(1, 0) * recCosTheta, R(0, 0) * recCosTheta);
}
#undef R
#undef psi
#undef theta
#undef phi
}
static void FindGrids(FileGrid* grids, fileHandle_t fh, int byteCount, const VDBSequenceDesc& desc)
{
FileHeader fileHeader;
FS_Read(&fileHeader, sizeof(fileHeader), fh);
if(!fileHeader.IsValid())
{
return;
}
// for all grids
uint32_t gridByteCounts[16] = {};
Q_assert(fileHeader.gridCount <= ARRAY_LEN(gridByteCounts));
// for grids of interest
int fileToCNQ3[FileGrid::Count];
for(int g = 0; g < FileGrid::Count; g++)
{
fileToCNQ3[g] = -1;
grids[g].byteOffset = 0;
grids[g].byteCount = 0;
}
const uint32_t fileGridCount = (uint32_t)fileHeader.gridCount;
for(uint32_t g = 0; g < fileGridCount; g++)
{
FileGridHeader gridHeader;
FS_Read(&gridHeader, sizeof(gridHeader), fh);
char gridName[64];
Q_assert(gridHeader.gridNameLength <= ARRAY_LEN(gridName));
FS_Read(gridName, (int)gridHeader.gridNameLength, fh);
// vdb_lod.exe auto-renames "density" to "density_level_2" for mip level 2
if(Q_stristr(gridName, desc.smokeGridName) != NULL)
{
fileToCNQ3[g] = (int)FileGrid::Smoke;
}
else if(Q_stristr(gridName, desc.fireGridName) != NULL)
{
fileToCNQ3[g] = (int)FileGrid::Fire;
}
gridByteCounts[g] = gridHeader.fileByteCount;
if(fileToCNQ3[g] >= 0 && fileToCNQ3[g] < FileGrid::Count)
{
grids[fileToCNQ3[g]].byteOffset = 0;
grids[fileToCNQ3[g]].byteCount = gridHeader.fileByteCount;
}
}
for(uint32_t g = 0; g < fileGridCount; g++)
{
uint64_t magic;
FS_Read(&magic, sizeof(magic), fh);
Q_assert(magic == NANOVDB_MAGIC_NUMBER || magic == NANOVDB_MAGIC_GRID);
if(fileToCNQ3[g] >= 0 && fileToCNQ3[g] < FileGrid::Count)
{
grids[fileToCNQ3[g]].byteOffset = (uint32_t)FS_FTell(fh) - 8;
}
FS_Seek(fh, gridByteCounts[g] - 8, FS_SEEK_CUR);
}
Q_assert(grids[FileGrid::Smoke].IsValid() || grids[FileGrid::Fire].IsValid());
}
static void ReadTransform(vec3_t originOffset, vec3_t scale, fileHandle_t fh, int byteOffset)
{
FS_Seek(fh, byteOffset + 296, FS_SEEK_SET);
FS_Read(&scale[0], 4, fh);
FS_Seek(fh, byteOffset + 312, FS_SEEK_SET);
FS_Read(&scale[1], 4, fh);
FS_Seek(fh, byteOffset + 328, FS_SEEK_SET);
FS_Read(&scale[2], 4, fh);
FS_Seek(fh, byteOffset + 368, FS_SEEK_SET);
FS_Read(originOffset, 12, fh);
}
static void ReadTransform(vec3_t originOffset, vec3_t scale, fileHandle_t fh, const FileGrid* grids)
{
if(grids[FileGrid::Smoke].IsValid())
{
ReadTransform(originOffset, scale, fh, grids[FileGrid::Smoke].byteOffset);
}
else if(grids[FileGrid::Fire].IsValid())
{
ReadTransform(originOffset, scale, fh, grids[FileGrid::Fire].byteOffset);
}
}
static void VectorScaleGUI(vec3_t vector, const char* id)
{
ImGui::Text(" ");
ImGui::SameLine();
if(ImGui::Button(va("x2##%s", id)))
{
VectorScale(vector, 2.0f, vector);
}
ImGui::SameLine();
if(ImGui::Button(va("/2##%s", id)))
{
VectorScale(vector, 0.5f, vector);
}
}
static void UploadFrame(
uint32_t& smokeByteOffset, uint32_t& fireByteOffset, uint32_t& gpuBufferOffset,
HBuffer buffer, const NanoVDBManager::CPUFrame& frame)
{
if(frame.fireByteCount > 0 || frame.smokeByteCount > 0)
{
fileHandle_t fh;
const int fileByteCount = FS_FOpenFileRead(frame.filePath, &fh, qfalse);
if(fileByteCount > 0)
{
if(frame.smokeByteCount > 0)
{
smokeByteOffset = gpuBufferOffset;
FS_Seek(fh, frame.smokeByteOffset, FS_SEEK_SET);
const uint32_t gridByteCount = AlignUp<uint32_t>(frame.smokeByteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
uint8_t* const mapped = BeginBufferUpload(buffer, gpuBufferOffset, gridByteCount);
FS_Read(mapped, frame.smokeByteCount, fh);
EndBufferUpload(buffer);
gpuBufferOffset += gridByteCount;
}
if(frame.fireByteCount > 0)
{
fireByteOffset = gpuBufferOffset;
FS_Seek(fh, frame.fireByteOffset, FS_SEEK_SET);
const uint32_t gridByteCount = AlignUp<uint32_t>(frame.fireByteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
uint8_t* const mapped = BeginBufferUpload(buffer, gpuBufferOffset, gridByteCount);
FS_Read(mapped, frame.fireByteCount, fh);
EndBufferUpload(buffer);
gpuBufferOffset += gridByteCount;
}
}
if(fileByteCount >= 0)
{
FS_FCloseFile(fh);
}
}
}
static int64_t GetTimeStampUS(int ms, int us)
{
return int64_t(1000) * (int64_t)ms + (int64_t)us;
}
void NanoVDBManager::Init()
{
sequences.Clear();
instances.Clear();
drawInstances.Clear();
cpuFrames.Clear();
gpuFrames.Clear();
streamBufferIndex = 0;
{
streamBufferByteCount = 256 << 20; // @TODO: CVar
BufferDesc desc("", streamBufferByteCount, ResourceStates::ComputeShaderAccessBit);
desc.shortLifeTime = true;
desc.structureByteCount = 4;
for(int i = 0; i < ARRAY_LEN(streamBuffers); i++)
{
desc.name = va("NanoVDB stream #%d", i + 1);
streamBuffers[i] = CreateBuffer(desc);
}
}
}
void NanoVDBManager::BeforeFrame()
{
drawInstances.Clear();
streamedFrames.Clear();
if(!tr.hasWorldRender)
{
return;
}
streamBufferIndex = (streamBufferIndex + 1) % ARRAY_LEN(streamBuffers);
const HBuffer streamBuffer = streamBuffers[streamBufferIndex];
uint32_t gpuBufferOffset = NANOVDB_GRID_BUFFER_ALIGNMENT;
const int64_t renderTimeUS = GetTimeStampUS(tr.worldRenderTimeMS, tr.worldRenderTimeUS);
for(int i = (int)instances.count - 1; i >= 0; i--)
{
const Instance& inst = instances[i];
if(inst.loop)
{
continue;
}
const Sequence& seq = sequences[inst.sequenceIndex];
const float durationSec = (float)seq.frameCount / inst.frameRate;
const int64_t durationUS = (int64_t)ceilf(durationSec * 1000000.0f);
const int64_t endTimeUS = GetTimeStampUS(inst.startTimeMS, inst.startTimeUS) + durationUS;
if(renderTimeUS >= endTimeUS)
{
instances.RemoveUnordered((uint32_t)i);
}
}
for(uint32_t i = 0; i < instances.count; i++)
{
const Instance& inst = instances[i];
const Sequence& seq = sequences[inst.sequenceIndex];
const int64_t startTimeUS = GetTimeStampUS(inst.startTimeMS, inst.startTimeUS);
const int64_t usPerFrame = (int64_t)(1000000.0f / instances[i].frameRate);
const uint32_t frameIndex = (uint32_t)((renderTimeUS - startTimeUS) / usPerFrame) % seq.frameCount;
const uint32_t remainder = (uint32_t)((renderTimeUS - startTimeUS) % usPerFrame);
const uint32_t frameIndex2 = min(frameIndex + 1, seq.frameCount - 1);
const float t = (float)remainder / (float)usPerFrame; // lerp(frame, frame2, t)
DrawInstance drawInst = {};
if(IsNullHandle(seq.buffer))
{
const CPUFrame& frame = cpuFrames[seq.firstFrameIndex + frameIndex];
const CPUFrame& frame2 = cpuFrames[seq.firstFrameIndex + frameIndex2];
const int sf1 = FindStreamedFrameIndex(inst.sequenceIndex, frameIndex);
const int sf2 = FindStreamedFrameIndex(inst.sequenceIndex, frameIndex2);
uint32_t requestedByteCount = 0;
if(sf1 >= 0)
{
drawInst.smokeByteOffset = streamedFrames[sf1].smokeByteOffset;
drawInst.fireByteOffset = streamedFrames[sf1].flamesByteOffset;
}
else
{
requestedByteCount += frame.smokeByteCount + frame.fireByteCount;
}
if(sf2 >= 0)
{
drawInst.smokeByteOffset2 = streamedFrames[sf2].smokeByteOffset;
drawInst.fireByteOffset2 = streamedFrames[sf2].flamesByteOffset;
}
else
{
requestedByteCount += frame2.smokeByteOffset + frame2.fireByteCount;
}
drawInst.buffer = streamBuffer;
if(requestedByteCount > 0 &&
gpuBufferOffset + requestedByteCount <= streamBufferByteCount)
{
UploadFrame(drawInst.smokeByteOffset, drawInst.fireByteOffset, gpuBufferOffset, streamBuffer, frame);
UploadFrame(drawInst.smokeByteOffset2, drawInst.fireByteOffset2, gpuBufferOffset, streamBuffer, frame2);
StreamedFrame sf = {};
sf.sequenceIndex = inst.sequenceIndex;
if(drawInst.smokeByteOffset > 0 || drawInst.fireByteOffset > 0)
{
sf.frameIndex = frameIndex;
sf.smokeByteOffset = drawInst.smokeByteOffset;
sf.flamesByteOffset = drawInst.fireByteOffset;
streamedFrames.Add(sf);
}
if(drawInst.smokeByteOffset2 > 0 || drawInst.fireByteOffset2 > 0)
{
sf.frameIndex = frameIndex2;
sf.smokeByteOffset = drawInst.smokeByteOffset2;
sf.flamesByteOffset = drawInst.fireByteOffset2;
streamedFrames.Add(sf);
}
}
}
else
{
const NanoVDBManager::GPUFrame& frame = gpuFrames[seq.firstFrameIndex + frameIndex];
const NanoVDBManager::GPUFrame& frame2 = gpuFrames[seq.firstFrameIndex + frameIndex2];
drawInst.buffer = seq.buffer;
drawInst.fireByteOffset = frame.fireByteOffset;
drawInst.fireByteOffset2 = frame2.fireByteOffset;
drawInst.smokeByteOffset = frame.smokeByteOffset;
drawInst.smokeByteOffset2 = frame2.smokeByteOffset;
}
drawInst.t = t;
drawInstances.Add(drawInst);
}
}
bool NanoVDBManager::AddSequence(const VDBSequenceDesc& desc)
{
if(!tr.worldMapLoaded)
{
return false;
}
if(instances.IsFull())
{
ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB instance limit reached\n");
return false;
}
vec3_t originOffset = {};
vec3_t scale;
VectorSet(scale, 1, 1, 1);
int sequenceIndex = -1;
for(uint32_t i = 0; i < sequences.count; i++)
{
if(Q_stricmp(sequences[i].folderPath, desc.folderPath) == 0)
{
sequenceIndex = (int)i;
break;
}
}
if(sequenceIndex < 0 && !sequences.IsFull())
{
HBuffer gpuBuffer = RHI_MAKE_NULL_HANDLE();
uint32_t gpuByteCount = 0;
uint32_t firstFrameIndex = 0;
int fileCount = 0;
if(desc.gpuResident)
{
int startTimeMS = Sys_Milliseconds();
gpuByteCount = NANOVDB_GRID_BUFFER_ALIGNMENT;
char** fileList = ri.FS_ListFiles(desc.folderPath, ".nvdb", &fileCount);
for(int f = 0; f < fileCount; f++)
{
FileGrid grids[FileGrid::Count] = {};
fileHandle_t fh;
const int byteCount = FS_FOpenFileRead(va("%s/%s", desc.folderPath, fileList[f]), &fh, qfalse);
if(byteCount > 0)
{
FindGrids(grids, fh, byteCount, desc);
}
if(byteCount >= 0)
{
FS_FCloseFile(fh);
}
for(uint32_t g = 0; g < FileGrid::Count; g++)
{
if(grids[g].byteCount > 0)
{
gpuByteCount += AlignUp<uint32_t>(grids[g].byteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
}
}
}
ri.FS_FreeFileList(fileList);
if(fileCount <= 0 || gpuByteCount <= NANOVDB_GRID_BUFFER_ALIGNMENT)
{
ri.Printf(PRINT_WARNING, "^3WARNING: invalid NanoVDB folder '%s'\n", desc.folderPath);
return false;
}
if(gpuByteCount >= uint32_t(1 << 31))
{
ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB sequence '%s' too large for GPU storage\n", desc.folderPath);
VDBSequenceDesc newDesc = desc;
newDesc.gpuResident = false;
return AddSequence(newDesc);
}
if(gpuFrames.count + fileCount > gpuFrames.capacity)
{
ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB frame limit reached\n");
return false;
}
ri.Printf(PRINT_ALL, "NanoVDB: analyzed %s in %d ms\n", desc.folderPath, Sys_Milliseconds() - startTimeMS);
startTimeMS = Sys_Milliseconds();
gpuByteCount = AlignUp<uint32_t>(gpuByteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
BufferDesc bufferDesc("NanoVDB full sequence", gpuByteCount, ResourceStates::ComputeShaderAccessBit);
bufferDesc.shortLifeTime = true;
bufferDesc.structureByteCount = 4;
gpuBuffer = CreateBuffer(bufferDesc);
uint32_t gpuByteOffset = NANOVDB_GRID_BUFFER_ALIGNMENT;
firstFrameIndex = gpuFrames.count;
fileList = ri.FS_ListFiles(desc.folderPath, ".nvdb", &fileCount);
for(int f = 0; f < fileCount; f++)
{
GPUFrame frame = {};
FileGrid grids[FileGrid::Count] = {};
fileHandle_t fh;
const int byteCount = FS_FOpenFileRead(va("%s/%s", desc.folderPath, fileList[f]), &fh, qfalse);
if(byteCount > 0)
{
FindGrids(grids, fh, byteCount, desc);
if(grids[FileGrid::Smoke].IsValid())
{
const uint32_t gridByteCount = AlignUp<uint32_t>(grids[FileGrid::Smoke].byteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
uint8_t* const cpuBuffer = BeginBufferUpload(gpuBuffer, gpuByteOffset, gridByteCount);
FS_Seek(fh, (int)grids[FileGrid::Smoke].byteOffset, FS_SEEK_SET);
FS_Read(cpuBuffer, (int)grids[FileGrid::Smoke].byteCount, fh);
EndBufferUpload(gpuBuffer);
frame.smokeByteOffset = gpuByteOffset;
gpuByteOffset += gridByteCount;
}
if(grids[FileGrid::Fire].IsValid())
{
const uint32_t gridByteCount = AlignUp<uint32_t>(grids[FileGrid::Fire].byteCount, NANOVDB_GRID_BUFFER_ALIGNMENT);
uint8_t* const cpuBuffer = BeginBufferUpload(gpuBuffer, gpuByteOffset, gridByteCount);
FS_Seek(fh, (int)grids[FileGrid::Fire].byteOffset, FS_SEEK_SET);
FS_Read(cpuBuffer, (int)grids[FileGrid::Fire].byteCount, fh);
EndBufferUpload(gpuBuffer);
frame.fireByteOffset = gpuByteOffset;
gpuByteOffset += gridByteCount;
}
if(f == 0)
{
ReadTransform(originOffset, scale, fh, grids);
}
}
if(byteCount >= 0)
{
FS_FCloseFile(fh);
}
Q_assert(frame.fireByteOffset > 0 || frame.smokeByteOffset > 0);
gpuFrames.Add(frame);
}
ri.FS_FreeFileList(fileList);
ri.Printf(PRINT_ALL, "NanoVDB: processed %s in %d ms\n", desc.folderPath, Sys_Milliseconds() - startTimeMS);
}
else
{
const int startTimeMS = Sys_Milliseconds();
firstFrameIndex = cpuFrames.count;
char** fileList = ri.FS_ListFiles(desc.folderPath, ".nvdb", &fileCount);
for(int f = 0; f < fileCount; f++)
{
FileGrid grids[FileGrid::Count] = {};
const char* const filePath = va("%s/%s", desc.folderPath, fileList[f]);
fileHandle_t fh;
const int byteCount = FS_FOpenFileRead(filePath, &fh, qfalse);
if(byteCount > 0)
{
FindGrids(grids, fh, byteCount, desc);
CPUFrame frame = {};
Q_strncpyz(frame.filePath, filePath, sizeof(frame.filePath));
frame.fireByteOffset = grids[FileGrid::Fire].byteOffset;
frame.fireByteCount = grids[FileGrid::Fire].byteCount;
frame.smokeByteOffset = grids[FileGrid::Smoke].byteOffset;
frame.smokeByteCount = grids[FileGrid::Smoke].byteCount;
cpuFrames.Add(frame);
if(f == 0)
{
ReadTransform(originOffset, scale, fh, grids);
}
}
if(byteCount >= 0)
{
FS_FCloseFile(fh);
}
}
ri.FS_FreeFileList(fileList);
if(fileCount <= 0)
{
ri.Printf(PRINT_WARNING, "^3WARNING: invalid NanoVDB folder '%s'\n", desc.folderPath);
return false;
}
if(cpuFrames.count + fileCount > cpuFrames.capacity)
{
ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB frame limit reached\n");
return false;
}
ri.Printf(PRINT_ALL, "NanoVDB: analyzed %s in %d ms\n", desc.folderPath, Sys_Milliseconds() - startTimeMS);
}
sequenceIndex = (int)sequences.count;
Sequence sequence = {};
Q_strncpyz(sequence.folderPath, desc.folderPath, sizeof(sequence.folderPath));
sequence.frameCount = (uint32_t)fileCount;
sequence.firstFrameIndex = firstFrameIndex;
sequence.buffer = gpuBuffer;
sequence.bufferByteCount = gpuByteCount;
VectorCopy(originOffset, sequence.originOffset);
VectorCopy(scale, sequence.scale);
sequences.Add(sequence);
}
if(sequenceIndex < 0)
{
ri.Printf(PRINT_WARNING, "^3WARNING: NanoVDB sequence limit reached\n");
return false;
}
Instance instance = {};
instance.fireEmissionScale = desc.fireEmissionScale;
Q_strncpyz(instance.fireGridName, desc.fireGridName, sizeof(instance.fireGridName));
instance.fireTemperatureScale = desc.fireTemperatureScale;
instance.frameRate = desc.frameRate;
instance.smokeExtinctionScale = desc.smokeExtinctionScale;
instance.smokeAlbedo = desc.smokeAlbedo;
Q_strncpyz(instance.smokeGridName, desc.smokeGridName, sizeof(instance.smokeGridName));
instance.startTimeMS = desc.startTimeMS;
instance.startTimeUS = desc.startTimeUS;
instance.sequenceIndex = (uint32_t)sequenceIndex;
VectorMultiply(desc.scale, sequences[sequenceIndex].scale, instance.scale);
VectorCopy(desc.position, instance.position);
VectorCopy(desc.useSequenceOffset ? sequences[sequenceIndex].originOffset : desc.originOffset, instance.originOffset);
VectorCopy(desc.anglesRad, instance.anglesRad);
instance.loop = desc.loop;
instances.Add(instance);
return true;
}
void NanoVDBManager::MakeWorldToIndexMatrix(matrix3x3_t matrix, const Instance& instance)
{
matrix3x3_t scale, rot, temp, temp2;
vec3_t scaleVector;
for(int i = 0; i < 3; i++)
{
scaleVector[i] = 1.0f / instance.scale[i];
}
ScaleMatrix(scale, scaleVector);
RotationMatrixX(rot, -instance.anglesRad[0]);
MultMatrix(temp, scale, rot);
RotationMatrixY(rot, -instance.anglesRad[1]);
MultMatrix(temp2, temp, rot);
RotationMatrixZ(rot, -instance.anglesRad[2]);
MultMatrix(matrix, temp2, rot);
}
void NanoVDBManager::DrawGUI()
{
static const char* const sequencePopupTitle = "Add NanoVDB Sequence";
activeInstanceIndex = -1;
if(!tr.worldMapLoaded)
{
return;
}
GUI_AddMainMenuItem(GUI_MainMenu::Tools, "Edit NanoVDB", "", &windowActive);
if(!windowActive)
{
return;
}
if(ImGui::Begin("NanoVDB Settings", &windowActive, ImGuiWindowFlags_AlwaysAutoResize))
{
if(rhiInfo.forceNanoVDBPreviewMode)
{
static bool forcedPreviewMode = true;
ImGui::BeginDisabled(true);
ImGui::Checkbox("Preview mode (forced due to driver bug)", &forcedPreviewMode);
ImGui::EndDisabled();
}
else
{
ImGui::Checkbox("Preview mode", &previewMode);
if(!previewMode)
{
ImGui::Checkbox("2x super-sampling", &supersampling);
ImGui::Checkbox("Linear interpolation", &linearInterpolation);
ImGui::Checkbox("Accurate overlap test", &accurateOverlapTest);
ImGui::Checkbox("Ambient lighting: higher angular LoD", &ambientIncreasedCoverage);
ImGui::SliderInt("Ambient lighting: sub-sampling", &ambientRaymarchLOD, 1, 8);
ImGui::SliderFloat("Emissive scattering scale", &emissiveScatterScale, 0.0f, 1.0f, "%g");
}
}
const uint64_t streamByteCount = (uint64_t)ARRAY_LEN(streamBuffers) * (uint64_t)streamBufferByteCount;
uint64_t dedicatedByteCount = 0;
for(uint32_t i = 0; i < sequences.count; i++)
{
dedicatedByteCount += (uint64_t)sequences[i].bufferByteCount;
}
ImGui::Text("%d sequence%s, %s dedicated, %s stream",
(int)sequences.count, sequences.count >= 2 ? "s" : "",
Com_FormatBytes(dedicatedByteCount),
Com_FormatBytes(streamByteCount));
ImGui::Text("%d CPU frame%s, %d GPU frame%s",
(int)cpuFrames.count, cpuFrames.count >= 2 ? "s" : "",
(int)gpuFrames.count, gpuFrames.count >= 2 ? "s" : "");
ImGui::Text("%d streamed file%s", (int)streamedFrames.count, streamedFrames.count >= 2 ? "s" : "");
ImGui::Separator();
if(ImGui::Button("Purge unused sequences"))
{
Purge();
}
if(ImGui::BeginTabBar("Tabs#VDB", ImGuiTabBarFlags_AutoSelectNewTabs))
{
for(uint32_t i = 0; i < instances.count; i++)
{
if(ImGui::BeginTabItem(va("#%d", i + 1)))
{
activeInstanceIndex = (int)i;
Instance& inst = instances[i];
Sequence& seq = sequences[inst.sequenceIndex];
ImGui::Text("%s (%d frame%s, %s)", seq.folderPath, (int)seq.frameCount,
seq.frameCount >= 2 ? "s" : "",
IsNullHandle(seq.buffer) ? "streamed" : "in VRAM");
ImGui::SliderFloat("Framerate", &inst.frameRate, 15.0f, 120.0f, "%g");
ImGui::SliderFloat("Smoke extinction scale (thickness)", &inst.smokeExtinctionScale, 0.0f, 10.0f, "%g");
ImGui::SliderFloat("Smoke albedo (reflectivity)", &inst.smokeAlbedo, 0.0f, 1.0f, "%g");
ImGui::SliderFloat("Flame emission scale (brightness)", &inst.fireEmissionScale, 0.0f, 1.0f, "%g");
ImGui::SliderFloat("Flame temperature scale (color)", &inst.fireTemperatureScale, 0.0f, 20000.0f, "%g");
vec3_t angles;
for(int a = 0; a < 3; a++)
{
angles[a] = RAD2DEG(inst.anglesRad[a]);
}
ImGui::SliderFloat3("Origin offset (index space)", inst.originOffset, -1000.0f, 1000.0f, "%g");
VectorScaleGUI(inst.originOffset, "origin");
ImGui::SliderFloat3("Scale", inst.scale, 0.0f, 100.0f, "%g");
VectorScaleGUI(inst.scale, "scale");
ImGui::SliderFloat3("Position (world space)", inst.position, -100 * 1000.0f, 100 * 1000.0f, "%g");
ImGui::SliderFloat3("Angles", angles, 0.0f, 360.0f, "%g");
for(int a = 0; a < 3; a++)
{
inst.anglesRad[a] = DEG2RAD(angles[a]);
}
ImGui::Separator();
if(ImGui::Button("Remove"))
{
instances.Remove(i);
}
ImGui::EndTabItem();
}
}
if(ImGui::BeginTabItem("Add"))
{
static char sequencePath[64];
static char flamesGridName[64] = "flames";
static char smokeGridName[64] = "density";
static bool gpuResident = true;
ImGui::InputText("Folder path", sequencePath, sizeof(sequencePath));
ImGui::SameLine();
if(ImGui::Button("..."))
{
OpenFolderDialog_Open("nanovdb");
}
ImGui::InputText("Flames grid", flamesGridName, sizeof(flamesGridName));
ImGui::InputText("Smoke grid", smokeGridName, sizeof(smokeGridName));
ImGui::Checkbox("GPU resident", &gpuResident);
ImGui::Separator();
if(ImGui::Button("Add"))
{
VDBSequenceDesc desc = {};
desc.fireGridName = flamesGridName;
desc.folderPath = sequencePath;
desc.gpuResident = gpuResident;
desc.loop = true;
desc.smokeGridName = smokeGridName;
AddSequence(desc);
}
if(OpenFolderDialog_Do())
{
Q_strncpyz(sequencePath, OpenFolderDialog_GetPath(), sizeof(sequencePath));
}
ImGui::EndTabItem();
}
ImGui::EndTabBar();
}
}
ImGui::End();
}
void NanoVDBManager::DrawIm3d()
{
if((uint32_t)activeInstanceIndex < instances.count &&
crp.im3d.ShouldDrawGizmos())
{
Instance& instance = instances[activeInstanceIndex];
matrix3x3_t rotation, temp0, temp1, temp2;
RotationMatrixX(temp0, -instance.anglesRad[0]);
RotationMatrixY(temp1, -instance.anglesRad[1]);
MultMatrix(temp2, temp0, temp1);
RotationMatrixZ(temp0, -instance.anglesRad[2]);
MultMatrix(rotation, temp2, temp0);
const char* const id = va("VDB #%d", activeInstanceIndex);
if(Im3d::Gizmo(id, instance.position, rotation, instance.scale))
{
AnglesRadFromMatrix(instance.anglesRad, rotation);
}
}
}
void NanoVDBManager::Purge()
{
// build sequence reference counts
uint32_t sequenceRefCounts[ARRAY_LEN(sequences.items)] = {};
for(uint32_t i = 0; i < instances.count; i++)
{
const uint32_t s = instances[i].sequenceIndex;
sequenceRefCounts[s]++;
}
// queue GPU buffer deletions
for(uint32_t s = 0; s < sequences.count; s++)
{
if(sequenceRefCounts[s] == 0 &&
!IsNullHandle(sequences[s].buffer))
{
DestroyBufferDelayed(sequences[s].buffer);
}
}
// compact sequence array, build index map, remove frames, fix frame offsets
uint32_t sequenceRemap[ARRAY_LEN(sequences.items)] = {};
uint32_t removed = 0;
uint32_t dst = 0;
uint32_t src = 0;
for(; src < sequences.count; src++)
{
if(sequenceRefCounts[src] == 0)
{
const uint32_t first = sequences[src].firstFrameIndex;
const uint32_t count = sequences[src].frameCount;
const bool streamed = IsNullHandle(sequences[src].buffer);
if(streamed)
{
cpuFrames.RemoveRange(first, count);
}
else
{
gpuFrames.RemoveRange(first, count);
}
for(uint32_t s = 0; s < sequences.count; s++)
{
if(sequences[s].firstFrameIndex > first)
{
sequences[s].firstFrameIndex -= count;
}
}
removed++;
continue;
}
sequenceRemap[src] = dst;
if(src > dst)
{
sequences[dst] = sequences[src];
}
dst++;
}
sequences.count -= removed;
// fix sequence indices
for(uint32_t i = 0; i < instances.count; i++)
{
const uint32_t s = instances[i].sequenceIndex;
instances[i].sequenceIndex = sequenceRemap[s];
}
#if defined(_DEBUG)
for(uint32_t i = 0; i < instances.count; i++)
{
Q_assert(instances[i].sequenceIndex < sequences.count);
}
for(uint32_t s = 0; s < sequences.count; s++)
{
const Sequence& seq = sequences[s];
const uint32_t frameCount = IsNullHandle(seq.buffer) ? cpuFrames.count : gpuFrames.count;
Q_assert(seq.firstFrameIndex + seq.frameCount <= frameCount);
}
#endif
}
int NanoVDBManager::FindStreamedFrameIndex(uint32_t sequenceIndex, uint32_t frameIndex)
{
int index = -1;
for(uint32_t f = 0; f < streamedFrames.count; f++)
{
if(streamedFrames[f].sequenceIndex == sequenceIndex &&
streamedFrames[f].frameIndex == frameIndex)
{
index = (int)f;
break;
}
}
return index;
}