From 5f1a5db3df2d5e54dce5ba81d38e24a8a7ee5ac6 Mon Sep 17 00:00:00 2001 From: Robert Beckebans Date: Fri, 11 Oct 2024 13:53:12 +0200 Subject: [PATCH] Deleted renderer/CullingThreadpool.cpp, we stick to the single threaded solution for now --- neo/renderer/CullingThreadpool.cpp | 506 ----------------------------- neo/renderer/CullingThreadpool.h | 311 ------------------ 2 files changed, 817 deletions(-) delete mode 100644 neo/renderer/CullingThreadpool.cpp delete mode 100644 neo/renderer/CullingThreadpool.h diff --git a/neo/renderer/CullingThreadpool.cpp b/neo/renderer/CullingThreadpool.cpp deleted file mode 100644 index c7c71cad..00000000 --- a/neo/renderer/CullingThreadpool.cpp +++ /dev/null @@ -1,506 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 2017 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy -// of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations -// under the License. -//////////////////////////////////////////////////////////////////////////////// -#include "precompiled.h" -#pragma hdrstop - -#include -#include "CullingThreadpool.h" - -#define SAFE_DELETE(X) {if (X != nullptr) delete X; X = nullptr;} -#define SAFE_DELETE_ARRAY(X) {if (X != nullptr) delete[] X; X = nullptr;} - -template CullingThreadpool::StateData::StateData( unsigned int maxJobs ) : - mMaxJobs( maxJobs ), - mCurrentIdx( ~0 ) -{ - mData = new T[mMaxJobs]; -} - -template CullingThreadpool::StateData::~StateData() -{ - SAFE_DELETE_ARRAY( mData ); -} - -template void CullingThreadpool::StateData::AddData( const T& data ) -{ - mCurrentIdx++; - mData[mCurrentIdx % mMaxJobs] = data; -} - -template const T* CullingThreadpool::StateData::GetData() const -{ - return &mData[mCurrentIdx % mMaxJobs]; -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Helper class: Mostly lockless queue for render jobs -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -CullingThreadpool::RenderJobQueue::RenderJobQueue( unsigned int nBins, unsigned int maxJobs ) : - mNumBins( nBins ), - mMaxJobs( maxJobs ) -{ - mRenderPtrs = new std::atomic_uint[mNumBins]; - mBinMutexes = new std::atomic_uint[mNumBins]; - for( unsigned int i = 0; i < mNumBins; ++i ) - { - mBinMutexes[i] = 0; - } - - mJobs = new Job[mMaxJobs]; - for( unsigned int i = 0; i < mMaxJobs; ++i ) - { - mJobs[i].mRenderJobs = new TriList[mNumBins]; - } - - // Compute worst case job size (we allocate memory for the worst case) - const unsigned int TriSize = 3 * 3; - const unsigned int MaxTrisPerJob = TRIS_PER_JOB * 6; - const unsigned int MaxJobSize = MaxTrisPerJob * TriSize; - mTrilistData = new float[MaxJobSize * mMaxJobs * mNumBins]; - - // Setup trilist objects used for binning - for( unsigned int i = 0; i < mMaxJobs; ++i ) - { - for( unsigned int j = 0; j < mNumBins; ++j ) - { - int idx = i * mNumBins + j; - TriList& tList = mJobs[i].mRenderJobs[j]; - tList.mNumTriangles = MaxTrisPerJob; - tList.mTriIdx = 0; - tList.mPtr = mTrilistData + idx * MaxJobSize; - } - } - - // Clear render queue - Reset(); -} - -CullingThreadpool::RenderJobQueue::~RenderJobQueue() -{ - SAFE_DELETE_ARRAY( mRenderPtrs ); - SAFE_DELETE_ARRAY( mBinMutexes ); - for( unsigned int i = 0; i < mMaxJobs; ++i ) - { - SAFE_DELETE_ARRAY( mJobs[i].mRenderJobs ); - } - SAFE_DELETE_ARRAY( mJobs ); - SAFE_DELETE_ARRAY( mTrilistData ); -} - -inline unsigned int CullingThreadpool::RenderJobQueue::GetMinRenderPtr() const -{ - unsigned int minRenderPtr = mRenderPtrs[0]; - for( unsigned int i = 1; i < mNumBins; ++i ) - { - unsigned int renderPtr = mRenderPtrs[i]; - minRenderPtr = renderPtr < minRenderPtr ? renderPtr : minRenderPtr; - } - return minRenderPtr; -} - -inline void CullingThreadpool::RenderJobQueue::AdvanceRenderJob( int binIdx ) -{ - mRenderPtrs[binIdx]++; - mBinMutexes[binIdx] = 0; -} - -inline unsigned int CullingThreadpool::RenderJobQueue::GetBestGlobalQueue() const -{ - // Find least advanced queue - unsigned int bestBin = ~0, bestPtr = mWritePtr; - for( unsigned int i = 0; i < mNumBins; ++i ) - { - if( mRenderPtrs[i] < bestPtr && mBinMutexes[i] == 0 ) - { - bestBin = i; - bestPtr = mRenderPtrs[i]; - } - } - return bestBin; -} - -inline bool CullingThreadpool::RenderJobQueue::IsPipelineEmpty() const -{ - return GetMinRenderPtr() == mWritePtr; -} - -inline bool CullingThreadpool::RenderJobQueue::CanWrite() const -{ - return mWritePtr - GetMinRenderPtr() < mMaxJobs; -} - -inline bool CullingThreadpool::RenderJobQueue::CanBin() const -{ - return mBinningPtr < mWritePtr && mBinningPtr - GetMinRenderPtr() < mMaxJobs; -} - -inline CullingThreadpool::RenderJobQueue::Job* CullingThreadpool::RenderJobQueue::GetWriteJob() -{ - return &mJobs[mWritePtr % mMaxJobs]; -} - -inline void CullingThreadpool::RenderJobQueue::AdvanceWriteJob() -{ - mWritePtr++; -} - -inline CullingThreadpool::RenderJobQueue::Job* CullingThreadpool::RenderJobQueue::GetBinningJob() -{ - unsigned int binningPtr = mBinningPtr; - if( binningPtr < mWritePtr && binningPtr - GetMinRenderPtr() < mMaxJobs ) - { - if( mBinningPtr.compare_exchange_strong( binningPtr, binningPtr + 1 ) ) - { - mJobs[binningPtr % mMaxJobs].mBinningJobStartedIdx = binningPtr; - return &mJobs[binningPtr % mMaxJobs]; - } - } - return nullptr; -} - -inline void CullingThreadpool::RenderJobQueue::FinishedBinningJob( Job* job ) -{ - job->mBinningJobCompletedIdx = job->mBinningJobStartedIdx; -} - -inline CullingThreadpool::RenderJobQueue::Job* CullingThreadpool::RenderJobQueue::GetRenderJob( int binIdx ) -{ - // Attempt to lock bin mutex - unsigned int expected = 0; - if( !mBinMutexes[binIdx].compare_exchange_strong( expected, 1 ) ) - { - return nullptr; - } - - // Check any items in the queue, and bail if empty - if( mRenderPtrs[binIdx] != mJobs[mRenderPtrs[binIdx] % mMaxJobs].mBinningJobCompletedIdx ) - { - mBinMutexes[binIdx] = 0; - return nullptr; - } - - return &mJobs[mRenderPtrs[binIdx] % mMaxJobs]; -} - -void CullingThreadpool::RenderJobQueue::Reset() -{ - mWritePtr = 0; - mBinningPtr = 0; - - for( unsigned int i = 0; i < mNumBins; ++i ) - { - mRenderPtrs[i] = 0; - } - - for( unsigned int i = 0; i < mMaxJobs; ++i ) - { - mJobs[i].mBinningJobCompletedIdx = -1; - mJobs[i].mBinningJobStartedIdx = -1; - } -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Culling threadpool private helper functions -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void CullingThreadpool::SetupScissors() -{ - unsigned int width, height; - mMOC->GetResolution( width, height ); - - unsigned int binWidth; - unsigned int binHeight; - mMOC->ComputeBinWidthHeight( mBinsW, mBinsH, binWidth, binHeight ); - - for( unsigned int ty = 0; ty < mBinsH; ++ty ) - { - for( unsigned int tx = 0; tx < mBinsW; ++tx ) - { - unsigned int threadIdx = tx + ty * mBinsW; - - // Adjust rects on final row / col to match resolution - mRects[threadIdx].mMinX = tx * binWidth; - mRects[threadIdx].mMaxX = tx + 1 == mBinsW ? width : ( tx + 1 ) * binWidth; - mRects[threadIdx].mMinY = ty * binHeight; - mRects[threadIdx].mMaxY = ty + 1 == mBinsH ? height : ( ty + 1 ) * binHeight; - } - } -} - -void CullingThreadpool::ThreadRun( CullingThreadpool* threadPool, unsigned int threadId ) -{ - threadPool->ThreadMain( threadId ); -} - -void CullingThreadpool::ThreadMain( unsigned int threadIdx ) -{ - while( true ) - { - bool threadIsIdle = true; - unsigned int threadBinIdx = threadIdx; - - // Wait for threads to be woken up (low CPU load sleep) - std::unique_lock lock( mSuspendedMutex ); - mNumSuspendedThreads++; - mSuspendedCV.wait( lock, [&] {return !mSuspendThreads; } ); - mNumSuspendedThreads--; - lock.unlock(); - - // Loop until suspended again - while( !mSuspendThreads || !threadIsIdle ) - { - if( mKillThreads ) - { - return; - } - - threadIsIdle = false; - - // Prio 1: Process any render jobs local to this thread - unsigned int binIdx = threadBinIdx; - threadBinIdx = threadBinIdx + mNumThreads < mNumBins ? threadBinIdx + mNumThreads : threadIdx; - RenderJobQueue::Job* job = mRenderQueue->GetRenderJob( binIdx ); - if( job != nullptr ) - { - if( job->mRenderJobs[binIdx].mTriIdx > 0 ) - { - mMOC->RenderTrilist( job->mRenderJobs[binIdx], &mRects[binIdx] ); - } - - mRenderQueue->AdvanceRenderJob( binIdx ); - continue; - } - - // Prio 2: Process any outstanding setup/binning jobs - if( mRenderQueue->CanBin() ) - { - // If no more rasterization jobs, get next binning job - RenderJobQueue::Job* job = mRenderQueue->GetBinningJob(); - if( job != nullptr ) - { - RenderJobQueue::BinningJob& sjob = job->mBinningJob; - for( unsigned int i = 0; i < mNumBins; ++i ) - { - job->mRenderJobs[i].mTriIdx = 0; - } - mMOC->BinTriangles( sjob.mVerts, sjob.mTris, sjob.nTris, job->mRenderJobs, mBinsW, mBinsH, sjob.mMatrix, sjob.mBfWinding, sjob.mClipPlanes, *sjob.mVtxLayout ); - mRenderQueue->FinishedBinningJob( job ); - } - continue; - } - - // Prio 3: No work is available, work steal from another thread's queue - if( mNumBins > mNumThreads ) - { - binIdx = mRenderQueue->GetBestGlobalQueue(); - if( binIdx < mRenderQueue->mNumBins ) - { - RenderJobQueue::Job* job = mRenderQueue->GetRenderJob( binIdx ); - if( job != nullptr ) - { - if( job->mRenderJobs[binIdx].mTriIdx > 0 ) - { - mMOC->RenderTrilist( job->mRenderJobs[binIdx], &mRects[binIdx] ); - } - - mRenderQueue->AdvanceRenderJob( binIdx ); - } - continue; - } - } - - // No work available: Yield this thread - std::this_thread::yield(); - threadIsIdle = true; - } - } -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Culling threadpool public API, similar to the MaskedOcclusionCulling class -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -CullingThreadpool::CullingThreadpool( unsigned int numThreads, unsigned int binsW, unsigned int binsH, unsigned int maxJobs ) : - mNumThreads( numThreads ), - mMaxJobs( maxJobs ), - mBinsW( binsW ), - mBinsH( binsH ), - mKillThreads( false ), - mSuspendThreads( true ), - mNumSuspendedThreads( 0 ), - mModelToClipMatrices( maxJobs ), - mVertexLayouts( maxJobs ), - mMOC( nullptr ) -{ - mNumBins = mBinsW * mBinsH; - assert( mNumBins >= mNumThreads ); // Having less bins than threads is a bad idea! - - mRects = new ScissorRect[mNumBins]; - mRenderQueue = new RenderJobQueue( mNumBins, mMaxJobs ); - - // Add default vertex layout and matrix - mVertexLayouts.AddData( VertexLayout( 16, 4, 12 ) ); - mCurrentMatrix = nullptr; - - mThreads = new std::thread[mNumThreads]; - for( unsigned int i = 0; i < mNumThreads; ++i ) - { - mThreads[i] = std::thread( ThreadRun, this, i ); - } - -} - -CullingThreadpool::~CullingThreadpool() -{ - // Wait for threads to terminate - if( mThreads != nullptr || !mKillThreads ) - { - //WakeThreads(); - mKillThreads = true; - for( unsigned int i = 0; i < mNumThreads; ++i ) - { - mThreads[i].join(); - } - - } - - // Free memory - SAFE_DELETE( mRenderQueue ); - SAFE_DELETE_ARRAY( mRects ); - SAFE_DELETE_ARRAY( mThreads ); -} - -void CullingThreadpool::WakeThreads() -{ - // Wait for all threads to be in suspended mode - while( mNumSuspendedThreads < mNumThreads ) - { - std::this_thread::yield(); - } - - // Send wake up event - std::unique_lock lock( mSuspendedMutex ); - mSuspendThreads = false; - lock.unlock(); - mSuspendedCV.notify_all(); -} - -void CullingThreadpool::SuspendThreads() -{ - // Signal threads to go into suspended mode (after finishing all outstanding work) - mSuspendThreads = true; -} - -void CullingThreadpool::Flush() -{ - // Wait for pipeline to be empty (i.e. all work is finished) - while( !mRenderQueue->IsPipelineEmpty() ) - { - std::this_thread::yield(); - } - - // Reset queue counters - mRenderQueue->Reset(); -} - -void CullingThreadpool::SetBuffer( MaskedOcclusionCulling* moc ) -{ - Flush(); - mMOC = moc; - SetupScissors(); -} - -void CullingThreadpool::SetResolution( unsigned int width, unsigned int height ) -{ - Flush(); - mMOC->SetResolution( width, height ); - SetupScissors(); -} - -void CullingThreadpool::SetNearClipPlane( float nearDist ) -{ - Flush(); - mMOC->SetNearClipPlane( nearDist ); -} - -void CullingThreadpool::SetMatrix( const float* modelToClipMatrix ) -{ - // Treat nullptr matrix as a special case, otherwise copy the contents of the pointer and add to state - if( modelToClipMatrix == nullptr ) - { - mCurrentMatrix = nullptr; - } - else - { - mModelToClipMatrices.AddData( Matrix4x4( modelToClipMatrix ) ); - mCurrentMatrix = mModelToClipMatrices.GetData()->mValues; - } -} - -void CullingThreadpool::SetVertexLayout( const VertexLayout& vtxLayout ) -{ - mVertexLayouts.AddData( vtxLayout ); -} - -void CullingThreadpool::ClearBuffer() -{ - Flush(); - mMOC->ClearBuffer(); -} - -void CullingThreadpool::RenderTriangles( const float* inVtx, const unsigned int* inTris, int nTris, BackfaceWinding bfWinding, ClipPlanes clipPlaneMask ) -{ -#if MOC_RECORDER_ENABLE != 0 - mMOC->RecordRenderTriangles( inVtx, inTris, nTris, mCurrentMatrix, clipPlaneMask, bfWinding, *mVertexLayouts.GetData( ) ); -#endif - - for( int i = 0; i < nTris; i += TRIS_PER_JOB ) - { - // Yield if work queue is full - while( !mRenderQueue->CanWrite() ) - { - std::this_thread::yield(); - } - - // Create new renderjob - RenderJobQueue::Job* job = mRenderQueue->GetWriteJob(); - job->mBinningJob.mVerts = inVtx; - job->mBinningJob.mTris = inTris + i * 3; - job->mBinningJob.nTris = nTris - i < TRIS_PER_JOB ? nTris - i : TRIS_PER_JOB; - job->mBinningJob.mMatrix = mCurrentMatrix; - job->mBinningJob.mClipPlanes = clipPlaneMask; - job->mBinningJob.mBfWinding = bfWinding; - job->mBinningJob.mVtxLayout = mVertexLayouts.GetData(); - mRenderQueue->AdvanceWriteJob(); - } -} - -CullingThreadpool::CullingResult CullingThreadpool::TestRect( float xmin, float ymin, float xmax, float ymax, float wmin ) -{ - return mMOC->TestRect( xmin, ymin, xmax, ymax, wmin ); -} - -CullingThreadpool::CullingResult CullingThreadpool::TestTriangles( const float* inVtx, const unsigned int* inTris, int nTris, BackfaceWinding bfWinding, ClipPlanes clipPlaneMask ) -{ - return mMOC->TestTriangles( inVtx, inTris, nTris, mCurrentMatrix, bfWinding, clipPlaneMask, *mVertexLayouts.GetData() ); -} - -void CullingThreadpool::ComputePixelDepthBuffer( float* depthData, bool flipY ) -{ - Flush(); - mMOC->ComputePixelDepthBuffer( depthData, flipY ); -} diff --git a/neo/renderer/CullingThreadpool.h b/neo/renderer/CullingThreadpool.h deleted file mode 100644 index 7d4f8052..00000000 --- a/neo/renderer/CullingThreadpool.h +++ /dev/null @@ -1,311 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Copyright 2017 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy -// of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations -// under the License. -//////////////////////////////////////////////////////////////////////////////// -#pragma once - -/*! - * \file CullingThreadpool.h - * \brief Worker threadpool example for threaded masked occlusion culling. - * - * This class implements a threadpool for occluder rendering. Calls to CullingThreadpool::RenderTriangle() - * will immediately return, after adding work items to a queue, and occluder rendering is performed - * by worker threads as quickly as possible. Occlusion queries are performed directly on the calling - * threadand can be performed either synchronosly, by calling Flush() before executing the query, or - * asynchronosly, by performing the query without waiting for the worker threads to finish. - * - * Note that this implementation should be considered an example rather than the best threading - * solution. You may want to integrate threading in your own task system, and it may also be beneficial - * to thread the traversal code. Refer to MaskedOcclusionCulling::BinTriangles() and - * MaskedOcclusionCulling::RenderTrilist() for functions that can be used to make your own - * threaded culling system. - */ - -#include -#include -#include -#include - -#include "MaskedOcclusionCulling.h" - -class CullingThreadpool -{ -protected: - static const int TRIS_PER_JOB = 1024; // Maximum number of triangles per job (bigger drawcalls are split), affects memory requirements - - typedef MaskedOcclusionCulling::CullingResult CullingResult; - typedef MaskedOcclusionCulling::ClipPlanes ClipPlanes; - typedef MaskedOcclusionCulling::BackfaceWinding BackfaceWinding; - typedef MaskedOcclusionCulling::ScissorRect ScissorRect; - typedef MaskedOcclusionCulling::VertexLayout VertexLayout; - typedef MaskedOcclusionCulling::TriList TriList; - - // Small utility class for 4x4 matrices - struct Matrix4x4 - { - float mValues[16]; - Matrix4x4() {} - Matrix4x4( const float* matrix ) - { - for( int i = 0; i < 16; ++i ) - { - mValues[i] = matrix[i]; - } - } - }; - - // Internal utility class for a (mostly) lockless queue for binning & rendering jobs - struct RenderJobQueue - { - struct BinningJob - { - const float* mVerts; - const unsigned int* mTris; - unsigned int nTris; - - const float* mMatrix; - ClipPlanes mClipPlanes; - BackfaceWinding mBfWinding; - const VertexLayout* mVtxLayout; - }; - - struct Job - { - volatile unsigned int mBinningJobStartedIdx; - volatile unsigned int mBinningJobCompletedIdx; - BinningJob mBinningJob; - TriList* mRenderJobs; - }; - - unsigned int mNumBins; - unsigned int mMaxJobs; - - volatile unsigned int mWritePtr; - std::atomic_uint mBinningPtr; - std::atomic_uint* mRenderPtrs; - std::atomic_uint* mBinMutexes; - - float* mTrilistData; - Job* mJobs; - - RenderJobQueue( unsigned int nBins, unsigned int maxJobs ); - ~RenderJobQueue(); - - unsigned int GetMinRenderPtr() const; - unsigned int GetBestGlobalQueue() const; - bool IsPipelineEmpty() const; - - bool CanWrite() const; - bool CanBin() const; - - Job* GetWriteJob(); - void AdvanceWriteJob(); - - Job* GetBinningJob(); - void FinishedBinningJob( Job* job ); - - Job* GetRenderJob( int binIdx ); - void AdvanceRenderJob( int binIdx ); - - void Reset(); - }; - - // Internal utility class for state (matrix / vertex layout) - template struct StateData - { - unsigned int mMaxJobs; - unsigned int mCurrentIdx; - T* mData; - - StateData( unsigned int maxJobs ); - ~StateData(); - void AddData( const T& data ); - const T* GetData() const; - }; - - // Number of worker threads and bins - unsigned int mNumThreads; - unsigned int mNumBins; - unsigned int mMaxJobs; - unsigned int mBinsW; - unsigned int mBinsH; - - // Threads and control variables - std::mutex mSuspendedMutex; - std::condition_variable mSuspendedCV; - volatile bool mKillThreads; - volatile bool mSuspendThreads; - volatile unsigned int mNumSuspendedThreads; - std::thread* mThreads; - - // State variables and command queue - const float* mCurrentMatrix; - StateData mModelToClipMatrices; - StateData mVertexLayouts; - RenderJobQueue* mRenderQueue; - - // Occlusion culling object and related scissor rectangles - ScissorRect* mRects; - MaskedOcclusionCulling* mMOC; - - void SetupScissors(); - - static void ThreadRun( CullingThreadpool* threadPool, unsigned int threadId ); - void ThreadMain( unsigned int threadIdx ); - -public: - /*! - * \brief Creates a new threadpool for masked occlusion culling. This object has a - * similar API to the MaskedOcclusionCulling class, but performs occluder - * rendering asynchronously on worker threads (similar to how DX/GL works). - * - * \param numThreads Number of worker threads to perform occluder rendering. Best - * balance may be scene/machine dependent, but it's good practice to leave at - * least one full core (2 threads with hyperthreading) for the main thread. - * \param binsW The screen is divided into binsW x binsH rectangular bins for load - * balancing. The number of bins should be atleast equal to the number of - * worker threads. - * \param binsH See description for the binsW parameter. - * \param maxJobs Maximum number of jobs that may be in flight at any given time. If - * the caller thread generates jobs faster than the worker threads can finish - * them, then the job queue will fill up and the caller thread will stall once - * "maxJobs" items have been queued up. For culling systems interleaving occlusion - * queries and rendering, this value should be kept quite low to minimize false - * positives (see TestRect()). We've observed that 32 [default] items typically - * works well for our interleaved queries, while also allowing good load-balancing, - * and this is the recommended setting. - */ - CullingThreadpool( unsigned int numThreads, unsigned int binsW, unsigned int binsH, unsigned int maxJobs = 32 ); - - /*! - * \brief Destroys the threadpool and terminates all worker threads. - */ - ~CullingThreadpool(); - - /*! - * \brief Wakes up culling worker threads from suspended sleep, and puts them in a - * ready state (using an idle spinlock with significantly higher CPU overhead). - * - * It may take on the order of 100us to wake up the threads, so this function should - * preferably be called slightly ahead of starting occlusion culling work. - */ - void WakeThreads(); - - /*! - * \brief Suspend all culling worker threads to a low CPU overhead sleep state. - * - * For performance and latency reasons, the culling work is performed in an active - * processing loop (with no thread sleeping) with high CPU overhead. In a system - * with more worker threads it's important to put the culling worker threads in a - * low overhead sleep state after occlusion culling work has completed. - */ - void SuspendThreads(); - - /*! - * \brief Waits for all outstanding occluder rendering work to complete. Can be used - * to ensure that rendering has completed before performing a TestRect() or - * TestTriangles() call. - */ - void Flush(); - - /* - * \brief Sets the MaskedOcclusionCulling object (buffer) to be used for rendering and - * testing calls. This method causes a Flush() to ensure that all unfinished - * rendering is completed. - */ - void SetBuffer( MaskedOcclusionCulling* moc ); - - /* - * \brief Changes the resolution of the occlusion buffer, see MaskedOcclusionCulling::SetResolution(). - * This method causes a Flush() to ensure that all unfinished rendering is completed. - */ - void SetResolution( unsigned int width, unsigned int height ); - - /* - * \brief Sets the near clipping plane, see MaskedOcclusionCulling::SetNearClipPlane(). This - * method causes a Flush() to ensure that all unfinished rendering is completed. - */ - void SetNearClipPlane( float nearDist ); - - /* - * \brief Sets the model to clipspace transform matrix used for the RenderTriangles() and TestTriangles() - * function calls. The contents of the matrix is copied, and it's safe to modify it without calling - * Flush(). The copy may be costly, which is the reason for passing this parameter as "state". - * - * \param modelToClipMatrix All vertices will be transformed by the specified model to clipspace matrix. - * Passing nullptr [default] disables the transform (equivalent to using an identity matrix). - */ - void SetMatrix( const float* modelToClipMatrix = nullptr ); - - /* - * \brief Sets the vertex layout used for the RenderTriangles() and TestTriangles() function calls. - * The vertex layout is copied, and it's safe to modify it without calling Flush(). The copy - * may be costly, which is the reason for passing this parameter as "state". - * - * \param vtxLayout A struct specifying the vertex layout (see struct for detailed - * description). For best performance, it is advicable to store position data - * as compactly in memory as possible. - */ - void SetVertexLayout( const VertexLayout& vtxLayout = VertexLayout( 16, 4, 12 ) ); - - /* - * \brief Clears the occlusion buffer, see MaskedOcclusionCulling::ClearBuffer(). This method - * causes a Flush() to ensure that all unfinished rendering is completed. - */ - void ClearBuffer(); - - /* - * \brief Asynchronously render occluder triangles, see MaskedOcclusionCulling::RenderTriangles(). - * - * This method puts the drawcall into a command queue, and immediately returns. The rendering is - * performed by the worker threads at the earliest opportunity. - * - * Important: As rendering is performed asynchronously, the application is not allowed to - * change the contents of the *inVtx or *inTris buffers until after rendering is completed. If - * you wish to use dynamic buffers, the application must perform a Flush() to ensure that rendering - * is finished, or make sure to rotate between more buffers than the maximum number of outstanding - * render jobs (see the CullingThreadpool() constructor). - */ - void RenderTriangles( const float* inVtx, const unsigned int* inTris, int nTris, BackfaceWinding bfWinding = MaskedOcclusionCulling::BACKFACE_CW, ClipPlanes clipPlaneMask = MaskedOcclusionCulling::CLIP_PLANE_ALL ); - - /* - * \brief Occlusion query for a rectangle with a given depth, see MaskedOcclusionCulling::TestRect(). - * - * Important: This method is performed on the main thread and does not wait for outstanding - * occluder rendering to be finished. To ensure that all occluder rendering is completed you must - * perform a Flush() prior to calling this function. - * - * It is conservatively correct to perform occlusion queries without calling Flush() (it may only - * lead to objects being incorrectly classified as visible), and it can lead to much better performance - * if occlusion queries are used for traversing a BVH or similar data structure. It's possible to - * use "asynchronous" queries during traversal, and removing false positives later, when rendering - * has completed. - */ - CullingResult TestRect( float xmin, float ymin, float xmax, float ymax, float wmin ); - - /* - * \brief Occlusion query for a mesh, see MaskedOcclusionCulling::TestTriangles(). - * - * Important: See the TestRect() method for a brief discussion about asynchronous occlusion - * queries. - */ - CullingResult TestTriangles( const float* inVtx, const unsigned int* inTris, int nTris, BackfaceWinding bfWinding = MaskedOcclusionCulling::BACKFACE_CW, ClipPlanes clipPlaneMask = MaskedOcclusionCulling::CLIP_PLANE_ALL ); - - /*! - * \brief Creates a per-pixel depth buffer from the hierarchical z buffer representation, see - * MaskedOcclusionCulling::ComputePixelDepthBuffer(). This method causes a Flush() to - * ensure that all unfinished rendering is completed. - */ - void ComputePixelDepthBuffer( float* depthData, bool flipY ); -};