Masked Software Occlusion Culling kinda works

This commit is contained in:
Robert Beckebans 2024-08-22 00:05:04 +02:00
parent 9e919c8d76
commit 1fda97ec25
8 changed files with 280 additions and 32 deletions

View file

@ -1670,9 +1670,9 @@ endif()
# needs to come after nvrhi_d3d11 etc. for link order
target_link_libraries(RBDoom3BFG nvrhi)
target_link_libraries(RBDoom3BFG ShaderMakeBlob)
target_link_libraries(RBDoom3BFG ShaderMakeBlob MaskedOcclusionCulling)
add_dependencies(Shaders ShaderMake)
add_dependencies(RBDoom3BFG Shaders)
add_dependencies(RBDoom3BFG Shaders MaskedOcclusionCulling)

View file

@ -350,7 +350,7 @@ float idConsoleLocal::DrawFPS( float y )
if( com_showFPS.GetInteger() > 2 )
statsWindowWidth += 230;
statsWindowHeight += 120;
statsWindowHeight += 135;
ImVec2 pos;
@ -501,6 +501,12 @@ float idConsoleLocal::DrawFPS( float y )
//ImGui::Text( "Cull: %i box in %i box out\n",
// commonLocal.stats_frontend.c_box_cull_in, commonLocal.stats_frontend.c_box_cull_out );
ImGui::TextColored( colorLtGrey, "MASKCULL: tests:%-3i culls:%i maskVerts:%i maskTris:%i",
commonLocal.stats_frontend.c_mocIndexes );
ImGui::TextColored( colorLtGrey, "ADDMODEL: callback:%-2i createInteractions:%i createShadowVolumes:%i",

View file

@ -64,6 +64,7 @@ SURFACES
#include "ModelOverlay.h"
#include "Interaction.h"
class MaskedOcclusionCulling; // RB
class idRenderWorldLocal;
struct viewEntity_t;
struct viewLight_t;
@ -1044,6 +1045,10 @@ public:
idRenderBackend backend;
MaskedOcclusionCulling* maskedOcclusionCulling;
idVec4 maskZeroOneCubeVerts[8];
unsigned int maskZeroOneCubeIndexes[36];
bool bInitialized;
bool omitSwapBuffers;
void R_FillMaskedOcclusionBufferWithModels();
void R_FillMaskedOcclusionBufferWithModels( viewDef_t* viewDef );

View file

@ -126,6 +126,11 @@ struct performanceCounters_t
int c_lightReferences;
int c_guiSurfs;
int c_mocVerts;
int c_mocIndexes;
int c_mocTests;
int c_mocCulls;
uint64 frontEndMicroSec; // sum of time in all RE_RenderScene's in a frame

View file

@ -33,6 +33,7 @@ If you have questions concerning this license or the applicable additional terms
#pragma hdrstop
#include "imgui.h"
#include "../libs/moc/MaskedOcclusionCulling.h"
#include "RenderCommon.h"
@ -52,8 +53,6 @@ If you have questions concerning this license or the applicable additional terms
#include "../framework/Common_local.h"
//#include "idlib/HandleManager.h"
// DeviceContext bypasses RenderSystem to work directly with this
idGuiModel* tr_guiModel;
@ -1935,6 +1934,87 @@ static srfTriangles_t* R_MakeZeroOneCubeTris()
// RB begin
static void R_MakeZeroOneCubeTrisForMaskedOcclusionCulling()
//idDrawVert* verts = tri->verts;
const float low = 0.0f;
const float high = 1.0f;
idVec3 center( 0.0f );
idVec3 mx( low, 0.0f, 0.0f );
idVec3 px( high, 0.0f, 0.0f );
idVec3 my( 0.0f, low, 0.0f );
idVec3 py( 0.0f, high, 0.0f );
idVec3 mz( 0.0f, 0.0f, low );
idVec3 pz( 0.0f, 0.0f, high );
idVec4* verts = tr.maskZeroOneCubeVerts;
verts[0].ToVec3() = center + mx + my + mz;
verts[1].ToVec3() = center + px + my + mz;
verts[2].ToVec3() = center + px + py + mz;
verts[3].ToVec3() = center + mx + py + mz;
verts[4].ToVec3() = center + mx + my + pz;
verts[5].ToVec3() = center + px + my + pz;
verts[6].ToVec3() = center + px + py + pz;
verts[7].ToVec3() = center + mx + py + pz;
verts[0].w = 1;
verts[1].w = 1;
verts[2].w = 1;
verts[3].w = 1;
verts[4].w = 1;
verts[5].w = 1;
verts[6].w = 1;
verts[7].w = 1;
unsigned int* indexes = tr.maskZeroOneCubeIndexes;
// bottom
indexes[ 0 * 3 + 0] = 2;
indexes[ 0 * 3 + 1] = 3;
indexes[ 0 * 3 + 2] = 0;
indexes[ 1 * 3 + 0] = 1;
indexes[ 1 * 3 + 1] = 2;
indexes[ 1 * 3 + 2] = 0;
// back
indexes[ 2 * 3 + 0] = 5;
indexes[ 2 * 3 + 1] = 1;
indexes[ 2 * 3 + 2] = 0;
indexes[ 3 * 3 + 0] = 4;
indexes[ 3 * 3 + 1] = 5;
indexes[ 3 * 3 + 2] = 0;
// left
indexes[ 4 * 3 + 0] = 7;
indexes[ 4 * 3 + 1] = 4;
indexes[ 4 * 3 + 2] = 0;
indexes[ 5 * 3 + 0] = 3;
indexes[ 5 * 3 + 1] = 7;
indexes[ 5 * 3 + 2] = 0;
// right
indexes[ 6 * 3 + 0] = 1;
indexes[ 6 * 3 + 1] = 5;
indexes[ 6 * 3 + 2] = 6;
indexes[ 7 * 3 + 0] = 2;
indexes[ 7 * 3 + 1] = 1;
indexes[ 7 * 3 + 2] = 6;
// front
indexes[ 8 * 3 + 0] = 3;
indexes[ 8 * 3 + 1] = 2;
indexes[ 8 * 3 + 2] = 6;
indexes[ 9 * 3 + 0] = 7;
indexes[ 9 * 3 + 1] = 3;
indexes[ 9 * 3 + 2] = 6;
// top
indexes[10 * 3 + 0] = 4;
indexes[10 * 3 + 1] = 7;
indexes[10 * 3 + 2] = 6;
indexes[11 * 3 + 0] = 5;
indexes[11 * 3 + 1] = 4;
indexes[11 * 3 + 2] = 6;
static srfTriangles_t* R_MakeZeroOneSphereTris()
srfTriangles_t* tri = ( srfTriangles_t* )Mem_ClearedAlloc( sizeof( *tri ), TAG_RENDER_TOOLS );
@ -2170,6 +2250,12 @@ void idRenderSystemLocal::Init()
omitSwapBuffers = true;
// Flush denorms to zero to avoid performance issues with small values
_mm_setcsr( _mm_getcsr() | 0x8040 );
maskedOcclusionCulling = MaskedOcclusionCulling::Create();
// make sure the command buffers are ready to accept the first screen update
SwapCommandBuffers( NULL, NULL, NULL, NULL, NULL, NULL );

View file

@ -30,6 +30,8 @@ If you have questions concerning this license or the applicable additional terms
#include "precompiled.h"
#pragma hdrstop
#include "../libs/moc/MaskedOcclusionCulling.h"
#include "RenderCommon.h"
#include "Model_local.h"
@ -667,7 +669,92 @@ void R_AddSingleModel( viewEntity_t* vEntity )
// than the entire entity reference bounds
// If the entire model wasn't visible, there is no need to check the
// individual surfaces.
const bool surfaceDirectlyVisible = modelIsVisible && !idRenderMatrix::CullBoundsToMVP( vEntity->mvp, tri->bounds );
bool surfaceDirectlyVisible = modelIsVisible && !idRenderMatrix::CullBoundsToMVP( vEntity->mvp, tri->bounds );
#if 0
// RB: test surface visibility by drawing the triangles of the bounds
if( tr.maskedOcclusionCulling != NULL )
idRenderMatrix modelRenderMatrix;
idRenderMatrix::CreateFromOriginAxis( renderEntity->origin, renderEntity->axis, modelRenderMatrix );
idRenderMatrix inverseBaseModelProject;
idRenderMatrix::OffsetScaleForBounds( modelRenderMatrix, tri->bounds, inverseBaseModelProject );
idRenderMatrix invProjectMVPMatrix;
idRenderMatrix::Multiply( viewDef->worldSpace.mvp, inverseBaseModelProject, invProjectMVPMatrix );
idRenderMatrix invProjectMVPMatrix2;
idRenderMatrix::Transpose( invProjectMVPMatrix, invProjectMVPMatrix2 );
// query the triangle
MaskedOcclusionCulling::CullingResult result;
result = tr.maskedOcclusionCulling->TestTriangles( ( float* )tr.maskZeroOneCubeVerts, tr.maskZeroOneCubeIndexes, 36, ( float* )&invProjectMVPMatrix2[0][0], MaskedOcclusionCulling::BACKFACE_CCW );
if( result == MaskedOcclusionCulling::OCCLUDED )
surfaceDirectlyVisible = false;
tr.pc.c_mocCulls += 1;
#elif 1
idVec4 triVerts[3];
unsigned int triIndices[] = { 0, 1, 2 };
tr.pc.c_mocIndexes += tri->numIndexes;
tr.pc.c_mocVerts += tri->numIndexes;
tr.pc.c_mocTests += 1;
bool maskVisible = false;
for( int i = 0, face = 0; i < tri->numIndexes; i += 3, face++ )
const idDrawVert& v0 = tri->verts[tri->indexes[i + 0]];
const idDrawVert& v1 = tri->verts[tri->indexes[i + 1]];
const idDrawVert& v2 = tri->verts[tri->indexes[i + 2]];
// transform to clip space
vEntity->mvp.TransformPoint( idVec4(,,, 1 ), triVerts[0] );
vEntity->mvp.TransformPoint( idVec4(,,, 1 ), triVerts[1] );
vEntity->mvp.TransformPoint( idVec4(,,, 1 ), triVerts[2] );
MaskedOcclusionCulling::CullingResult result = tr.maskedOcclusionCulling->TestTriangles( ( float* )triVerts, triIndices, 1, NULL, MaskedOcclusionCulling::BACKFACE_CCW );
if( result == MaskedOcclusionCulling::VISIBLE )
maskVisible = true;
if( !maskVisible )
tr.pc.c_mocCulls += 1;
surfaceDirectlyVisible = false;
idVec4 triVerts[3];
unsigned int triIndices[] = { 0, 1, 2 };
tr.pc.c_mocIndexes += tri->numIndexes;
tr.pc.c_mocVerts += tri->numIndexes;
for( int i = 0, face = 0; i < tri->numIndexes; i += 3, face++ )
const idDrawVert& v0 = tri->verts[tri->indexes[i + 0]];
const idDrawVert& v1 = tri->verts[tri->indexes[i + 1]];
const idDrawVert& v2 = tri->verts[tri->indexes[i + 2]];
// transform to clip space
vEntity->mvp.TransformPoint( idVec4(,,, 1 ), triVerts[0] );
vEntity->mvp.TransformPoint( idVec4(,,, 1 ), triVerts[1] );
vEntity->mvp.TransformPoint( idVec4(,,, 1 ), triVerts[2] );
tr.maskedOcclusionCulling->RenderTriangles( ( float* )triVerts, triIndices, 1, NULL, MaskedOcclusionCulling::BACKFACE_CCW );
// RB: added check wether GPU skinning is available at all
const bool gpuSkinned = ( tri->staticModelWithJoints != NULL && r_useGPUSkinning.GetBool() );

View file

@ -646,7 +646,7 @@ void R_RenderView( viewDef_t* parms )
// RB: render worldspawn geometry to the software culling buffer
R_FillMaskedOcclusionBufferWithModels( tr.viewDef );
// make sure that interactions exist for all light / entity combinations that are visible
// add any pre-generated light shadows, and calculate the light shader values

View file

@ -114,9 +114,6 @@ void R_RenderSingleModel( viewEntity_t* vEntity )
// we will add all interaction surfs here, to be chained to the lights in later serial code
vEntity->drawSurfs = NULL;
// RB
vEntity->useLightGrid = false;
// globals we really should pass in...
const viewDef_t* viewDef = tr.viewDef;
@ -424,28 +421,35 @@ void R_RenderSingleModel( viewEntity_t* vEntity )
// TODO render to masked occlusion buffer
// make sure we have an ambient cache and all necessary normals / tangents
if( !vertexCache.CacheIsCurrent( tri->indexCache ) )
#if 1
// super simple bruteforce
idVec4 triVerts[3];
unsigned int triIndices[] = { 0, 1, 2 };
tr.pc.c_mocIndexes += tri->numIndexes;
tr.pc.c_mocVerts += tri->numIndexes;
for( int i = 0, face = 0; i < tri->numIndexes; i += 3, face++ )
tri->indexCache = vertexCache.AllocIndex( tri->indexes, tri->numIndexes );
const idDrawVert& v0 = tri->verts[tri->indexes[i + 0]];
const idDrawVert& v1 = tri->verts[tri->indexes[i + 1]];
const idDrawVert& v2 = tri->verts[tri->indexes[i + 2]];
// transform to clip space
vEntity->mvp.TransformPoint( idVec4(,,, 1 ), triVerts[0] );
vEntity->mvp.TransformPoint( idVec4(,,, 1 ), triVerts[1] );
vEntity->mvp.TransformPoint( idVec4(,,, 1 ), triVerts[2] );
// tri->indexes is unsigned short instead of uint
//triIndices[0] = tri->indexes[i + 0];
//triIndices[1] = tri->indexes[i + 1];
//triIndices[2] = tri->indexes[i + 2];
tr.maskedOcclusionCulling->RenderTriangles( ( float* )triVerts, triIndices, 1, NULL, MaskedOcclusionCulling::BACKFACE_CCW );
if( !vertexCache.CacheIsCurrent( tri->ambientCache ) )
// we are going to use it for drawing, so make sure we have the tangents and normals
if( shader->ReceivesLighting() && !tri->tangentsCalculated )
assert( tri->staticModelWithJoints == NULL );
R_DeriveTangents( tri );
// RB: this was hit by parametric particle models ..
//assert( false ); // this should no longer be hit
// RB end
tri->ambientCache = vertexCache.AllocVertex( tri->verts, tri->numVerts );
// TODO write faster alternative
// add the surface for drawing
@ -517,10 +521,19 @@ void R_RenderSingleModel( viewEntity_t* vEntity )
void R_FillMaskedOcclusionBufferWithModels()
void R_FillMaskedOcclusionBufferWithModels( viewDef_t* viewDef )
SCOPED_PROFILE_EVENT( "R_FillMaskedOcclusionBufferWithModels" );
const int viewWidth = viewDef->viewport.x2 - viewDef->viewport.x1 + 1;
const int viewHeight = viewDef->viewport.y2 - viewDef->viewport.y1 + 1;
const float zNear = ( viewDef->renderView.cramZNear ) ? ( r_znear.GetFloat() * 0.25f ) : r_znear.GetFloat();
tr.maskedOcclusionCulling->SetResolution( viewWidth, viewHeight );
tr.maskedOcclusionCulling->SetNearClipPlane( zNear );
tr.viewDef->viewEntitys = R_SortViewEntities( tr.viewDef->viewEntitys );
@ -548,10 +561,56 @@ void R_FillMaskedOcclusionBufferWithModels()
// skip after rendering BSP area models
if( !model->IsStaticWorldModel() )
R_RenderSingleModel( vEntity );
static void TonemapDepth( float* depth, unsigned char* image, int w, int h )
// Find min/max w coordinate (discard cleared pixels)
float minW = FLT_MAX, maxW = 0.0f;
for( int i = 0; i < w * h; ++i )
if( depth[i] > 0.0f )
minW = std::min( minW, depth[i] );
maxW = std::max( maxW, depth[i] );
// Tonemap depth values
for( int i = 0; i < w * h; ++i )
int intensity = 0;
if( depth[i] > 0 )
intensity = ( unsigned char )( 223.0 * ( depth[i] - minW ) / ( maxW - minW ) + 32.0 );
image[i * 3 + 0] = intensity;
image[i * 3 + 1] = intensity;
image[i * 3 + 2] = intensity;
CONSOLE_COMMAND( maskShot, "Dumping masked occlusion culling buffer", NULL )
unsigned int width, height;
tr.maskedOcclusionCulling->GetResolution( width, height );
// compute a per pixel depth buffer from the hierarchical depth buffer, used for visualization
float* perPixelZBuffer = new float[width * height];
tr.maskedOcclusionCulling->ComputePixelDepthBuffer( perPixelZBuffer, false );
// Tonemap the image
unsigned char* image = new unsigned char[width * height * 3];
TonemapDepth( perPixelZBuffer, image, width, height );
R_WritePNG( "occlusion_buffer.png", image, 3, width, height, "fs_basepath" );
delete[] image;