mirror of
https://github.com/id-Software/DOOM-3-BFG.git
synced 2025-03-14 06:34:10 +00:00
Merge pull request #932 from SRSaunders/moc-opt2-pch-fixes
MOC branch: PCH & Portability fixes for Linux x86_64 and macOS x86_64/arm64
This commit is contained in:
commit
eb8825a5d4
6 changed files with 687 additions and 67 deletions
|
@ -69,16 +69,16 @@ set(NVRHI_INSTALL OFF)
|
|||
|
||||
set(CPU_TYPE "" CACHE STRING "When set, passes this string as CPU-ID which will be embedded into the binary.")
|
||||
|
||||
# SRS - Turn on optimization when cross-compiling from Apple arm64 to x86_64
|
||||
# SRS - Turn on compiler optimizations for x86 and also when cross-compiling from Apple arm64 to x86_64
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)" OR CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
|
||||
set(CPU_OPTIMIZATION "-mmmx -msse -msse2" CACHE STRING "Which CPU specific optimitations should be used beside the compiler's default?")
|
||||
set(CPU_OPTIMIZATION "-mmmx -msse -msse2" CACHE STRING "Which CPU-specific optimizations should be used besides the compiler's default?")
|
||||
endif()
|
||||
|
||||
# SRS - Turn off MMX/SSE intrinsics when cross-compiling from Apple x86_64 to arm64
|
||||
# SRS - Turn on MMX/SSE intrinsics for x86 but not when cross-compiling from Apple x86_64 to arm64
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)|(e2k)|(E2K)" AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
||||
option(USE_INTRINSICS_SSE "Compile using SSE intrinsics (e.g mmx, sse, msse2)" ON)
|
||||
option(USE_INTRINSICS_SSE "Compile using x86 MMX/SSE intrinsics (e.g SSE SIMD instructions)" ON)
|
||||
else()
|
||||
option(USE_INTRINSICS_SSE "Compile using SSE intrinsics (e.g mmx, sse, msse2)" OFF)
|
||||
option(USE_INTRINSICS_SSE "Compile using x86 MMX/SSE intrinsics (e.g SSE SIMD instructions)" OFF)
|
||||
endif()
|
||||
|
||||
if(FFMPEG AND BINKDEC)
|
||||
|
@ -122,12 +122,12 @@ endif()
|
|||
|
||||
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
|
||||
add_definitions(-pipe)
|
||||
#add_definitions(-Wall)
|
||||
add_definitions(-Werror=format-security)
|
||||
add_definitions(-Werror=format)
|
||||
add_definitions(-Wno-format-zero-length)
|
||||
add_definitions(-Wno-nonnull)
|
||||
add_compile_options(-pipe)
|
||||
#add_compile_options(-Wall)
|
||||
add_compile_options(-Werror=format-security)
|
||||
add_compile_options(-Werror=format)
|
||||
add_compile_options(-Wno-format-zero-length)
|
||||
add_compile_options(-Wno-nonnull)
|
||||
|
||||
# Compiler check (needs -std=c++17 flag)
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
@ -144,7 +144,8 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
|||
add_definitions(-DCPUSTRING="${CPU_TYPE}")
|
||||
endif()
|
||||
if (CPU_OPTIMIZATION)
|
||||
add_definitions(${CPU_OPTIMIZATION})
|
||||
SEPARATE_ARGUMENTS(CPU_OPTIMIZATION)
|
||||
add_compile_options(${CPU_OPTIMIZATION})
|
||||
endif()
|
||||
if(WIN32)
|
||||
# require msvcr70.dll or newer for _aligned_malloc etc
|
||||
|
@ -166,14 +167,14 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
|||
# add clang-specific settings for warnings (the second one make sure clang doesn't complain
|
||||
# about unknown -W flags, like -Wno-unused-but-set-variable)
|
||||
# SRS - Add -Wno-expansion-to-defined, Wno-nullability-completeness and -Wno-shorten-64-to-32 to list of warning settings
|
||||
add_definitions(-Wno-local-type-template-args -Wno-unknown-warning-option -Wno-inline-new-delete -Wno-switch-enum -Wno-expansion-to-defined -Wno-nullability-completeness -Wno-shorten-64-to-32)
|
||||
add_compile_options(-Wno-local-type-template-args -Wno-unknown-warning-option -Wno-inline-new-delete -Wno-switch-enum -Wno-expansion-to-defined -Wno-nullability-completeness -Wno-shorten-64-to-32)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_CROSSCOMPILING AND ONATIVE)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "((powerpc|ppc)64le)|(mips64)")
|
||||
add_definitions(-mcpu=native)
|
||||
add_compile_options(-mcpu=native)
|
||||
else()
|
||||
add_definitions(-march=native)
|
||||
add_compile_options(-march=native)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -182,13 +183,13 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
|||
# SRS - Make sure OSX can find system headers and add support for minimum OSX runtime version
|
||||
if(APPLE)
|
||||
# SRS - Also add -fasm-blocks otherwise Xcode complains and -Qunused-arguments to silence MMX/SSE unused arg warnings when compiling for Apple arm64
|
||||
add_definitions(-fasm-blocks -Qunused-arguments)
|
||||
add_compile_options(-fasm-blocks -Qunused-arguments)
|
||||
if(CMAKE_OSX_SYSROOT)
|
||||
add_definitions(-isysroot "${CMAKE_OSX_SYSROOT}")
|
||||
add_compile_options(-isysroot ${CMAKE_OSX_SYSROOT})
|
||||
message(STATUS "Using macOS sysroot: " ${CMAKE_OSX_SYSROOT})
|
||||
endif()
|
||||
if(CMAKE_OSX_DEPLOYMENT_TARGET)
|
||||
add_definitions(-mmacosx-version-min="${CMAKE_OSX_DEPLOYMENT_TARGET}")
|
||||
add_compile_options(-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -1511,7 +1512,11 @@ else()
|
|||
|
||||
list(REMOVE_DUPLICATES RBDOOM3_SOURCES)
|
||||
|
||||
if(USE_PRECOMPILED_HEADERS)
|
||||
if(USE_PRECOMPILED_HEADERS)
|
||||
# we need to recreate the precompiled header for RBDoom3BFG
|
||||
# (i.e. can't use the one created for idlib before)
|
||||
# because some definitions (e.g. -D__IDLIB__ -D__DOOM_DLL__) differ
|
||||
|
||||
set(RBDOOM3_PRECOMPILED_SOURCES ${RBDOOM3_SOURCES})
|
||||
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${TIMIDITY_SOURCES} ${ZLIB_SOURCES} ${BINKDEC_SOURCES} ${IMGUI_SOURCES} ${MIKKTSPACE_SOURCES} ${OGGVORBIS_SOURCES} ${OPTICK_SOURCES})
|
||||
list(REMOVE_ITEM RBDOOM3_PRECOMPILED_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/libs/zlib/minizip/ioapi.c)
|
||||
|
@ -1535,6 +1540,30 @@ else()
|
|||
FOREACH(item ${_directory_flags})
|
||||
LIST(APPEND _compiler_FLAGS " -I${item}")
|
||||
ENDFOREACH(item)
|
||||
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags COMPILE_OPTIONS)
|
||||
LIST(APPEND _compiler_FLAGS ${_directory_flags})
|
||||
SEPARATE_ARGUMENTS(_compiler_FLAGS)
|
||||
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags COMPILE_DEFINITIONS)
|
||||
FOREACH(item ${_directory_flags})
|
||||
LIST(APPEND _compiler_FLAGS "-D${item}")
|
||||
ENDFOREACH(item)
|
||||
|
||||
if(OPTICK)
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=1)
|
||||
else()
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=0)
|
||||
endif()
|
||||
|
||||
add_custom_target(precomp_header_rbdoom3bfg ALL
|
||||
COMMAND ${CMAKE_CXX_COMPILER} ${_compiler_FLAGS} -x c++-header idlib/precompiled.h -o idlib/precompiled.h.gch
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
COMMENT "Creating idlib/precompiled.h.gch for RBDoom3BFG"
|
||||
)
|
||||
|
||||
# make sure this is run after creating idlib
|
||||
add_dependencies(precomp_header_rbdoom3bfg idlib)
|
||||
endif()
|
||||
|
||||
# SRS - disable certain gcc/clang warnings for select third-party source libraries, consider updating versions in the future?
|
||||
|
@ -1554,29 +1583,6 @@ else()
|
|||
)
|
||||
endif()
|
||||
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags DEFINITIONS)
|
||||
LIST(APPEND _compiler_FLAGS ${_directory_flags})
|
||||
SEPARATE_ARGUMENTS(_compiler_FLAGS)
|
||||
|
||||
if(USE_PRECOMPILED_HEADERS)
|
||||
# we need to recreate the precompiled header for RBDoom3BFG
|
||||
# (i.e. can't use the one created for idlib before)
|
||||
# because some definitions (e.g. -D__IDLIB__ -D__DOOM_DLL__) differ
|
||||
if(OPTICK)
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=1)
|
||||
else()
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=0)
|
||||
endif()
|
||||
add_custom_target(precomp_header_rbdoom3bfg ALL
|
||||
COMMAND ${CMAKE_CXX_COMPILER} ${_compiler_FLAGS} -x c++-header idlib/precompiled.h -o idlib/precompiled.h.gch
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
COMMENT "Creating idlib/precompiled.h.gch for RBDoom3BFG"
|
||||
)
|
||||
|
||||
# make sure this is run after creating idlib
|
||||
add_dependencies(precomp_header_rbdoom3bfg idlib)
|
||||
endif()
|
||||
|
||||
if(MACOSX_BUNDLE)
|
||||
# define contents of macOS app bundle Resources folder
|
||||
set(MACOS_RESOURCES sys/posix/res/Doom3BFG.icns ../base)
|
||||
|
@ -1602,6 +1608,7 @@ else()
|
|||
if(USE_VULKAN)
|
||||
add_dependencies(RBDoom3BFG nvrhi_vk)
|
||||
endif()
|
||||
|
||||
if (USE_PRECOMPILED_HEADERS)
|
||||
# make sure precompiled header is created before executable is compiled
|
||||
add_dependencies(RBDoom3BFG precomp_header_rbdoom3bfg)
|
||||
|
|
|
@ -122,11 +122,15 @@ else()
|
|||
LIST(APPEND _compiler_FLAGS " -I${item}")
|
||||
ENDFOREACH(item)
|
||||
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags DEFINITIONS)
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags COMPILE_OPTIONS)
|
||||
LIST(APPEND _compiler_FLAGS ${_directory_flags})
|
||||
|
||||
SEPARATE_ARGUMENTS(_compiler_FLAGS)
|
||||
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags COMPILE_DEFINITIONS)
|
||||
FOREACH(item ${_directory_flags})
|
||||
LIST(APPEND _compiler_FLAGS "-D${item}")
|
||||
ENDFOREACH(item)
|
||||
|
||||
if(OPTICK)
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=1)
|
||||
else()
|
||||
|
|
|
@ -46,6 +46,16 @@ FORCE_INLINE void aligned_free( void* ptr )
|
|||
_aligned_free( ptr );
|
||||
}
|
||||
|
||||
FORCE_INLINE void cpuidex( int cpuinfo[4], int function, int subfunction )
|
||||
{
|
||||
__cpuidex( cpuinfo, function, subfunction );
|
||||
}
|
||||
|
||||
FORCE_INLINE unsigned long long xgetbv( unsigned int index )
|
||||
{
|
||||
return _xgetbv( index );
|
||||
}
|
||||
|
||||
#elif defined(__GNUG__) || defined(__clang__) // G++ or clang
|
||||
#include <cpuid.h>
|
||||
#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
|
||||
|
@ -69,7 +79,9 @@ FORCE_INLINE unsigned long find_clear_lsb( unsigned int* mask )
|
|||
|
||||
FORCE_INLINE void* aligned_alloc( size_t alignment, size_t size )
|
||||
{
|
||||
return memalign( alignment, size );
|
||||
void* ret;
|
||||
posix_memalign( &ret, alignment, size );
|
||||
return ret;
|
||||
}
|
||||
|
||||
FORCE_INLINE void aligned_free( void* ptr )
|
||||
|
@ -77,14 +89,12 @@ FORCE_INLINE void aligned_free( void* ptr )
|
|||
free( ptr );
|
||||
}
|
||||
|
||||
// RB: commented out
|
||||
#if 0
|
||||
FORCE_INLINE void __cpuidex( int* cpuinfo, int function, int subfunction )
|
||||
FORCE_INLINE void cpuidex( int cpuinfo[4], int function, int subfunction )
|
||||
{
|
||||
__cpuid_count( function, subfunction, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3] );
|
||||
}
|
||||
|
||||
FORCE_INLINE unsigned long long _xgetbv( unsigned int index )
|
||||
FORCE_INLINE unsigned long long xgetbv( unsigned int index )
|
||||
{
|
||||
unsigned int eax, edx;
|
||||
__asm__ __volatile__(
|
||||
|
@ -94,7 +104,6 @@ FORCE_INLINE unsigned long long _xgetbv( unsigned int index )
|
|||
);
|
||||
return ( ( unsigned long long )edx << 32 ) | eax;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
#error Unsupported compiler
|
||||
|
|
|
@ -42,7 +42,7 @@ static MaskedOcclusionCulling::Implementation DetectCPUFeatures( MaskedOcclusion
|
|||
|
||||
// Get regular CPUID values
|
||||
int regs[4];
|
||||
__cpuidex( regs, 0, 0 );
|
||||
cpuidex( regs, 0, 0 );
|
||||
|
||||
// MOCVectorAllocator<CpuInfo> mocalloc( alignedAlloc, alignedFree );
|
||||
// std::vector<CpuInfo, MOCVectorAllocator<CpuInfo>> cpuId( mocalloc ), cpuIdEx( mocalloc );
|
||||
|
@ -52,11 +52,11 @@ static MaskedOcclusionCulling::Implementation DetectCPUFeatures( MaskedOcclusion
|
|||
|
||||
for( size_t i = 0; i < cpuIdCount; ++i )
|
||||
{
|
||||
__cpuidex( cpuId[i].regs, ( int )i, 0 );
|
||||
cpuidex( cpuId[i].regs, ( int )i, 0 );
|
||||
}
|
||||
|
||||
// Get extended CPUID values
|
||||
__cpuidex( regs, 0x80000000, 0 );
|
||||
cpuidex( regs, 0x80000000, 0 );
|
||||
|
||||
//cpuIdEx.resize(regs[0] - 0x80000000);
|
||||
size_t cpuIdExCount = regs[0] - 0x80000000;
|
||||
|
@ -64,15 +64,15 @@ static MaskedOcclusionCulling::Implementation DetectCPUFeatures( MaskedOcclusion
|
|||
|
||||
for( size_t i = 0; i < cpuIdExCount; ++i )
|
||||
{
|
||||
__cpuidex( cpuIdEx[i].regs, 0x80000000 + ( int )i, 0 );
|
||||
cpuidex( cpuIdEx[i].regs, 0x80000000 + ( int )i, 0 );
|
||||
}
|
||||
|
||||
#define TEST_BITS(A, B) (((A) & (B)) == (B))
|
||||
#define TEST_FMA_MOVE_OXSAVE (cpuIdCount >= 1 && TEST_BITS(cpuId[1].regs[2], (1 << 12) | (1 << 22) | (1 << 27)))
|
||||
#define TEST_LZCNT (cpuIdExCount >= 1 && TEST_BITS(cpuIdEx[1].regs[2], 0x20))
|
||||
#define TEST_SSE41 (cpuIdCount >= 1 && TEST_BITS(cpuId[1].regs[2], (1 << 19)))
|
||||
#define TEST_XMM_YMM (cpuIdCount >= 1 && TEST_BITS(_xgetbv(0), (1 << 2) | (1 << 1)))
|
||||
#define TEST_OPMASK_ZMM (cpuIdCount >= 1 && TEST_BITS(_xgetbv(0), (1 << 7) | (1 << 6) | (1 << 5)))
|
||||
#define TEST_XMM_YMM (cpuIdCount >= 1 && TEST_BITS(xgetbv(0), (1 << 2) | (1 << 1)))
|
||||
#define TEST_OPMASK_ZMM (cpuIdCount >= 1 && TEST_BITS(xgetbv(0), (1 << 7) | (1 << 6) | (1 << 5)))
|
||||
#define TEST_BMI1_BMI2_AVX2 (cpuIdCount >= 7 && TEST_BITS(cpuId[7].regs[1], (1 << 3) | (1 << 5) | (1 << 8)))
|
||||
#define TEST_AVX512_F_BW_DQ (cpuIdCount >= 7 && TEST_BITS(cpuId[7].regs[1], (1 << 16) | (1 << 17) | (1 << 30)))
|
||||
|
||||
|
|
596
neo/renderer/MaskedOcclusionCulling.h
Normal file
596
neo/renderer/MaskedOcclusionCulling.h
Normal file
|
@ -0,0 +1,596 @@
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright 2017 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
// use this file except in compliance with the License. You may obtain a copy
|
||||
// of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
// License for the specific language governing permissions and limitations
|
||||
// under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
|
||||
/*!
|
||||
* \file MaskedOcclusionCulling.h
|
||||
* \brief Masked Occlusion Culling
|
||||
*
|
||||
* General information
|
||||
* - Input to all API functions are (x,y,w) clip-space coordinates (x positive left, y positive up, w positive away from camera).
|
||||
* We entirely skip the z component and instead compute it as 1 / w, see next bullet. For TestRect the input is NDC (x/w, y/w).
|
||||
* - We use a simple z = 1 / w transform, which is a bit faster than OGL/DX depth transforms. Thus, depth is REVERSED and z = 0 at
|
||||
* the far plane and z = inf at w = 0. We also have to use a GREATER depth function, which explains why all the conservative
|
||||
* tests will be reversed compared to what you might be used to (for example zMaxTri >= zMinBuffer is a visibility test)
|
||||
* - We support different layouts for vertex data (basic AoS and SoA), but note that it's beneficial to store the position data
|
||||
* as tightly in memory as possible to reduce cache misses. Big strides are bad, so it's beneficial to keep position as a separate
|
||||
* stream (rather than bundled with attributes) or to keep a copy of the position data for the occlusion culling system.
|
||||
* - The resolution width must be a multiple of 8 and height a multiple of 4.
|
||||
* - The hierarchical Z buffer is stored OpenGL-style with the y axis pointing up. This includes the scissor box.
|
||||
* - This code is only tested with Visual Studio 2015, but should hopefully be easy to port to other compilers.
|
||||
*/
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Defines used to configure the implementation
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef QUICK_MASK
|
||||
/*!
|
||||
* Configure the algorithm used for updating and merging hierarchical z buffer entries. If QUICK_MASK
|
||||
* is defined to 1, use the algorithm from the paper "Masked Software Occlusion Culling", which has good
|
||||
* balance between performance and low leakage. If QUICK_MASK is defined to 0, use the algorithm from
|
||||
* "Masked Depth Culling for Graphics Hardware" which has less leakage, but also lower performance.
|
||||
*/
|
||||
#define QUICK_MASK 1
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef USE_D3D
|
||||
/*!
|
||||
* Configures the library for use with Direct3D (default) or OpenGL rendering. This changes whether the
|
||||
* screen space Y axis points downwards (D3D) or upwards (OGL), and is primarily important in combination
|
||||
* with the PRECISE_COVERAGE define, where this is important to ensure correct rounding and tie-breaker
|
||||
* behaviour. It also affects the ScissorRect screen space coordinates.
|
||||
*/
|
||||
#define USE_D3D 1
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef PRECISE_COVERAGE
|
||||
/*!
|
||||
* Define PRECISE_COVERAGE to 1 to more closely match GPU rasterization rules. The increased precision comes
|
||||
* at a cost of slightly lower performance.
|
||||
*/
|
||||
#define PRECISE_COVERAGE 1
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef USE_AVX512
|
||||
/*!
|
||||
* Define USE_AVX512 to 1 to enable experimental AVX-512 support. It's currently mostly untested and only
|
||||
* validated on simple examples using Intel SDE. Older compilers may not support AVX-512 intrinsics.
|
||||
*/
|
||||
#define USE_AVX512 0
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef CLIPPING_PRESERVES_ORDER
|
||||
/*!
|
||||
* Define CLIPPING_PRESERVES_ORDER to 1 to prevent clipping from reordering triangle rasterization
|
||||
* order; This comes at a cost (approx 3-4%) but removes one source of temporal frame-to-frame instability.
|
||||
*/
|
||||
#define CLIPPING_PRESERVES_ORDER 1
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_STATS
|
||||
/*!
|
||||
* Define ENABLE_STATS to 1 to gather various statistics during occlusion culling. Can be used for profiling
|
||||
* and debugging. Note that enabling this function will reduce performance significantly.
|
||||
*/
|
||||
#define ENABLE_STATS 0
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef MOC_RECORDER_ENABLE
|
||||
/*!
|
||||
* Define MOC_RECORDER_ENABLE to 1 to enable frame recorder (see FrameRecorder.h/cpp for details)
|
||||
*/
|
||||
#define MOC_RECORDER_ENABLE 0
|
||||
|
||||
#endif
|
||||
|
||||
#if MOC_RECORDER_ENABLE
|
||||
#ifndef MOC_RECORDER_ENABLE_PLAYBACK
|
||||
/*!
|
||||
* Define MOC_RECORDER_ENABLE_PLAYBACK to 1 to enable compilation of the playback code (not needed
|
||||
for recording)
|
||||
*/
|
||||
#define MOC_RECORDER_ENABLE_PLAYBACK 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if MOC_RECORDER_ENABLE
|
||||
|
||||
#include <mutex>
|
||||
|
||||
class FrameRecorder;
|
||||
|
||||
#endif // #if MOC_RECORDER_ENABLE
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Masked occlusion culling class
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class MaskedOcclusionCulling
|
||||
{
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Memory management callback functions
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void* ( *pfnAlignedAlloc )( size_t alignment, size_t size );
|
||||
typedef void ( *pfnAlignedFree )( void* ptr );
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Enums
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum Implementation
|
||||
{
|
||||
SSE2 = 0,
|
||||
SSE41 = 1,
|
||||
AVX2 = 2,
|
||||
AVX512 = 3
|
||||
};
|
||||
|
||||
enum BackfaceWinding
|
||||
{
|
||||
BACKFACE_NONE = 0,
|
||||
BACKFACE_CW = 1,
|
||||
BACKFACE_CCW = 2,
|
||||
};
|
||||
|
||||
enum CullingResult
|
||||
{
|
||||
VISIBLE = 0x0,
|
||||
OCCLUDED = 0x1,
|
||||
VIEW_CULLED = 0x3
|
||||
};
|
||||
|
||||
enum ClipPlanes
|
||||
{
|
||||
CLIP_PLANE_NONE = 0x00,
|
||||
CLIP_PLANE_NEAR = 0x01,
|
||||
CLIP_PLANE_LEFT = 0x02,
|
||||
CLIP_PLANE_RIGHT = 0x04,
|
||||
CLIP_PLANE_BOTTOM = 0x08,
|
||||
CLIP_PLANE_TOP = 0x10,
|
||||
CLIP_PLANE_SIDES = ( CLIP_PLANE_LEFT | CLIP_PLANE_RIGHT | CLIP_PLANE_BOTTOM | CLIP_PLANE_TOP ),
|
||||
CLIP_PLANE_ALL = ( CLIP_PLANE_LEFT | CLIP_PLANE_RIGHT | CLIP_PLANE_BOTTOM | CLIP_PLANE_TOP | CLIP_PLANE_NEAR )
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Structs
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*!
|
||||
* Used to specify custom vertex layout. Memory offsets to y and z coordinates are set through
|
||||
* mOffsetY and mOffsetW, and vertex stride is given by mStride. It's possible to configure both
|
||||
* AoS and SoA layouts. Note that large strides may cause more cache misses and decrease
|
||||
* performance. It is advisable to store position data as compactly in memory as possible.
|
||||
*/
|
||||
struct VertexLayout
|
||||
{
|
||||
VertexLayout() {}
|
||||
VertexLayout( int stride, int offsetY, int offsetZW ) :
|
||||
mStride( stride ), mOffsetY( offsetY ), mOffsetW( offsetZW ) {}
|
||||
|
||||
int mStride; //!< byte stride between vertices
|
||||
int mOffsetY; //!< byte offset from X to Y coordinate
|
||||
union
|
||||
{
|
||||
int mOffsetZ; //!< byte offset from X to Z coordinate
|
||||
int mOffsetW; //!< byte offset from X to W coordinate
|
||||
};
|
||||
};
|
||||
|
||||
/*!
|
||||
* Used to control scissoring during rasterization. Note that we only provide coarse scissor support.
|
||||
* The scissor box x coordinates must be a multiple of 32, and the y coordinates a multiple of 8.
|
||||
* Scissoring is mainly meant as a means of enabling binning (sort middle) rasterizers in case
|
||||
* application developers want to use that approach for multithreading.
|
||||
*/
|
||||
struct ScissorRect
|
||||
{
|
||||
ScissorRect() {}
|
||||
ScissorRect( int minX, int minY, int maxX, int maxY ) :
|
||||
mMinX( minX ), mMinY( minY ), mMaxX( maxX ), mMaxY( maxY ) {}
|
||||
|
||||
int mMinX; //!< Screen space X coordinate for left side of scissor rect, inclusive and must be a multiple of 32
|
||||
int mMinY; //!< Screen space Y coordinate for bottom side of scissor rect, inclusive and must be a multiple of 8
|
||||
int mMaxX; //!< Screen space X coordinate for right side of scissor rect, <B>non</B> inclusive and must be a multiple of 32
|
||||
int mMaxY; //!< Screen space Y coordinate for top side of scissor rect, <B>non</B> inclusive and must be a multiple of 8
|
||||
};
|
||||
|
||||
/*!
|
||||
* Used to specify storage area for a binlist, containing triangles. This struct is used for binning
|
||||
* and multithreading. The host application is responsible for allocating memory for the binlists.
|
||||
*/
|
||||
struct TriList
|
||||
{
|
||||
unsigned int mNumTriangles; //!< Maximum number of triangles that may be stored in mPtr
|
||||
unsigned int mTriIdx; //!< Index of next triangle to be written, clear before calling BinTriangles to start from the beginning of the list
|
||||
float* mPtr; //!< Scratchpad buffer allocated by the host application
|
||||
};
|
||||
|
||||
/*!
|
||||
* Statistics that can be gathered during occluder rendering and visibility to aid debugging
|
||||
* and profiling. Must be enabled by changing the ENABLE_STATS define.
|
||||
*/
|
||||
struct OcclusionCullingStatistics
|
||||
{
|
||||
struct
|
||||
{
|
||||
long long mNumProcessedTriangles; //!< Number of occluder triangles processed in total
|
||||
long long mNumRasterizedTriangles; //!< Number of occluder triangles passing view frustum and backface culling
|
||||
long long mNumTilesTraversed; //!< Number of tiles traversed by the rasterizer
|
||||
long long mNumTilesUpdated; //!< Number of tiles where the hierarchical z buffer was updated
|
||||
long long mNumTilesMerged; //!< Number of tiles where the hierarchical z buffer was updated
|
||||
} mOccluders;
|
||||
|
||||
struct
|
||||
{
|
||||
long long mNumProcessedRectangles; //!< Number of rects processed (TestRect())
|
||||
long long mNumProcessedTriangles; //!< Number of ocludee triangles processed (TestTriangles())
|
||||
long long mNumRasterizedTriangles; //!< Number of ocludee triangle passing view frustum and backface culling
|
||||
long long mNumTilesTraversed; //!< Number of tiles traversed by triangle & rect rasterizers
|
||||
} mOccludees;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Functions
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*!
|
||||
* \brief Creates a new object with default state, no z buffer attached/allocated.
|
||||
*/
|
||||
static MaskedOcclusionCulling* Create( Implementation RequestedSIMD = AVX512 );
|
||||
|
||||
/*!
|
||||
* \brief Creates a new object with default state, no z buffer attached/allocated.
|
||||
* \param alignedAlloc Pointer to a callback function used when allocating memory
|
||||
* \param alignedFree Pointer to a callback function used when freeing memory
|
||||
*/
|
||||
static MaskedOcclusionCulling* Create( Implementation RequestedSIMD, pfnAlignedAlloc alignedAlloc, pfnAlignedFree alignedFree );
|
||||
|
||||
/*!
|
||||
* \brief Destroys an object and frees the z buffer memory. Note that you cannot
|
||||
* use the delete operator, and should rather use this function to free up memory.
|
||||
*/
|
||||
static void Destroy( MaskedOcclusionCulling* moc );
|
||||
|
||||
/*!
|
||||
* \brief Sets the resolution of the hierarchical depth buffer. This function will
|
||||
* re-allocate the current depth buffer (if present). The contents of the
|
||||
* buffer is undefined until ClearBuffer() is called.
|
||||
*
|
||||
* \param witdh The width of the buffer in pixels, must be a multiple of 8
|
||||
* \param height The height of the buffer in pixels, must be a multiple of 4
|
||||
*/
|
||||
virtual void SetResolution( unsigned int width, unsigned int height ) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Gets the resolution of the hierarchical depth buffer.
|
||||
*
|
||||
* \param witdh Output: The width of the buffer in pixels
|
||||
* \param height Output: The height of the buffer in pixels
|
||||
*/
|
||||
virtual void GetResolution( unsigned int& width, unsigned int& height ) const = 0;
|
||||
|
||||
/*!
|
||||
* \brief Returns the tile size for the current implementation.
|
||||
*
|
||||
* \param nBinsW Number of vertical bins, the screen is divided into nBinsW x nBinsH
|
||||
* rectangular bins.
|
||||
* \param nBinsH Number of horizontal bins, the screen is divided into nBinsW x nBinsH
|
||||
* rectangular bins.
|
||||
* \param outBinWidth Output: The width of the single bin in pixels (except for the
|
||||
* rightmost bin width, which is extended to resolution width)
|
||||
* \param outBinHeight Output: The height of the single bin in pixels (except for the
|
||||
* bottommost bin height, which is extended to resolution height)
|
||||
*/
|
||||
virtual void ComputeBinWidthHeight( unsigned int nBinsW, unsigned int nBinsH, unsigned int& outBinWidth, unsigned int& outBinHeight ) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Sets the distance for the near clipping plane. Default is nearDist = 0.
|
||||
*
|
||||
* \param nearDist The distance to the near clipping plane, given as clip space w
|
||||
*/
|
||||
virtual void SetNearClipPlane( float nearDist ) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Gets the distance for the near clipping plane.
|
||||
*/
|
||||
virtual float GetNearClipPlane() const = 0;
|
||||
|
||||
/*!
|
||||
* \brief Clears the hierarchical depth buffer.
|
||||
*/
|
||||
virtual void ClearBuffer() = 0;
|
||||
|
||||
/*!
|
||||
* \brief Merge a second hierarchical depth buffer into the main buffer.
|
||||
*/
|
||||
virtual void MergeBuffer( MaskedOcclusionCulling* BufferB ) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Renders a mesh of occluder triangles and updates the hierarchical z buffer
|
||||
* with conservative depth values.
|
||||
*
|
||||
* This function is optimized for vertex layouts with stride 16 and y and w
|
||||
* offsets of 4 and 12 bytes, respectively.
|
||||
*
|
||||
* \param inVtx Pointer to an array of input vertices, should point to the x component
|
||||
* of the first vertex. The input vertices are given as (x,y,w) coordinates
|
||||
* in clip space. The memory layout can be changed using vtxLayout.
|
||||
* \param inTris Pointer to an array of vertex indices. Each triangle is created
|
||||
* from three indices consecutively fetched from the array.
|
||||
* \param nTris The number of triangles to render (inTris must contain atleast 3*nTris
|
||||
* entries)
|
||||
* \param modelToClipMatrix all vertices will be transformed by this matrix before
|
||||
* performing projection. If nullptr is passed the transform step will be skipped
|
||||
* \param bfWinding Sets triangle winding order to consider backfacing, must be one one
|
||||
* of (BACKFACE_NONE, BACKFACE_CW and BACKFACE_CCW). Back-facing triangles are culled
|
||||
* and will not be rasterized. You may use BACKFACE_NONE to disable culling for
|
||||
* double sided geometry
|
||||
* \param clipPlaneMask A mask indicating which clip planes should be considered by the
|
||||
* triangle clipper. Can be used as an optimization if your application can
|
||||
* determine (for example during culling) that a group of triangles does not
|
||||
* intersect a certain frustum plane. However, setting an incorrect mask may
|
||||
* cause out of bounds memory accesses.
|
||||
* \param vtxLayout A struct specifying the vertex layout (see struct for detailed
|
||||
* description). For best performance, it is advisable to store position data
|
||||
* as compactly in memory as possible.
|
||||
* \return Will return VIEW_CULLED if all triangles are either outside the frustum or
|
||||
* backface culled, returns VISIBLE otherwise.
|
||||
*/
|
||||
virtual CullingResult RenderTriangles( const float* inVtx, const unsigned int* inTris, int nTris, const float* modelToClipMatrix = nullptr, BackfaceWinding bfWinding = BACKFACE_CW, ClipPlanes clipPlaneMask = CLIP_PLANE_ALL, const VertexLayout& vtxLayout = VertexLayout( 16, 4, 12 ) ) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Occlusion query for a rectangle with a given depth. The rectangle is given
|
||||
* in normalized device coordinates where (x,y) coordinates between [-1,1] map
|
||||
* to the visible screen area. The query uses a GREATER_EQUAL (reversed) depth
|
||||
* test meaning that depth values equal to the contents of the depth buffer are
|
||||
* counted as visible.
|
||||
*
|
||||
* \param xmin NDC coordinate of the left side of the rectangle.
|
||||
* \param ymin NDC coordinate of the bottom side of the rectangle.
|
||||
* \param xmax NDC coordinate of the right side of the rectangle.
|
||||
* \param ymax NDC coordinate of the top side of the rectangle.
|
||||
* \param ymax NDC coordinate of the top side of the rectangle.
|
||||
* \param wmin Clip space W coordinate for the rectangle.
|
||||
* \return The query will return VISIBLE if the rectangle may be visible, OCCLUDED
|
||||
* if the rectangle is occluded by a previously rendered object, or VIEW_CULLED
|
||||
* if the rectangle is outside the view frustum.
|
||||
*/
|
||||
virtual CullingResult TestRect( float xmin, float ymin, float xmax, float ymax, float wmin ) const = 0;
|
||||
|
||||
/*!
|
||||
* \brief This function is similar to RenderTriangles(), but performs an occlusion
|
||||
* query instead and does not update the hierarchical z buffer. The query uses
|
||||
* a GREATER_EQUAL (reversed) depth test meaning that depth values equal to the
|
||||
* contents of the depth buffer are counted as visible.
|
||||
*
|
||||
* This function is optimized for vertex layouts with stride 16 and y and w
|
||||
* offsets of 4 and 12 bytes, respectively.
|
||||
*
|
||||
* \param inVtx Pointer to an array of input vertices, should point to the x component
|
||||
* of the first vertex. The input vertices are given as (x,y,w) coordinates
|
||||
* in clip space. The memory layout can be changed using vtxLayout.
|
||||
* \param inTris Pointer to an array of triangle indices. Each triangle is created
|
||||
* from three indices consecutively fetched from the array.
|
||||
* \param nTris The number of triangles to render (inTris must contain atleast 3*nTris
|
||||
* entries)
|
||||
* \param modelToClipMatrix all vertices will be transformed by this matrix before
|
||||
* performing projection. If nullptr is passed the transform step will be skipped
|
||||
* \param bfWinding Sets triangle winding order to consider backfacing, must be one one
|
||||
* of (BACKFACE_NONE, BACKFACE_CW and BACKFACE_CCW). Back-facing triangles are culled
|
||||
* and will not be occlusion tested. You may use BACKFACE_NONE to disable culling
|
||||
* for double sided geometry
|
||||
* \param clipPlaneMask A mask indicating which clip planes should be considered by the
|
||||
* triangle clipper. Can be used as an optimization if your application can
|
||||
* determine (for example during culling) that a group of triangles does not
|
||||
* intersect a certain frustum plane. However, setting an incorrect mask may
|
||||
* cause out of bounds memory accesses.
|
||||
* \param vtxLayout A struct specifying the vertex layout (see struct for detailed
|
||||
* description). For best performance, it is advisable to store position data
|
||||
* as compactly in memory as possible.
|
||||
* \return The query will return VISIBLE if the triangle mesh may be visible, OCCLUDED
|
||||
* if the mesh is occluded by a previously rendered object, or VIEW_CULLED if all
|
||||
* triangles are entirely outside the view frustum or backface culled.
|
||||
*/
|
||||
virtual CullingResult TestTriangles( const float* inVtx, const unsigned int* inTris, int nTris, const float* modelToClipMatrix = nullptr, BackfaceWinding bfWinding = BACKFACE_CW, ClipPlanes clipPlaneMask = CLIP_PLANE_ALL, const VertexLayout& vtxLayout = VertexLayout( 16, 4, 12 ) ) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Perform input assembly, clipping , projection, triangle setup, and write
|
||||
* triangles to the screen space bins they overlap. This function can be used to
|
||||
* distribute work for threading (See the CullingThreadpool class for an example)
|
||||
*
|
||||
* \param inVtx Pointer to an array of input vertices, should point to the x component
|
||||
* of the first vertex. The input vertices are given as (x,y,w) coordinates
|
||||
* in clip space. The memory layout can be changed using vtxLayout.
|
||||
* \param inTris Pointer to an array of vertex indices. Each triangle is created
|
||||
* from three indices consecutively fetched from the array.
|
||||
* \param nTris The number of triangles to render (inTris must contain atleast 3*nTris
|
||||
* entries)
|
||||
* \param triLists Pointer to an array of TriList objects with one TriList object per
|
||||
* bin. If a triangle overlaps a bin, it will be written to the corresponding
|
||||
* trilist. Note that this method appends the triangles to the current list, to
|
||||
* start writing from the beginning of the list, set triList.mTriIdx = 0
|
||||
* \param nBinsW Number of vertical bins, the screen is divided into nBinsW x nBinsH
|
||||
* rectangular bins.
|
||||
* \param nBinsH Number of horizontal bins, the screen is divided into nBinsW x nBinsH
|
||||
* rectangular bins.
|
||||
* \param modelToClipMatrix all vertices will be transformed by this matrix before
|
||||
* performing projection. If nullptr is passed the transform step will be skipped
|
||||
* \param clipPlaneMask A mask indicating which clip planes should be considered by the
|
||||
* triangle clipper. Can be used as an optimization if your application can
|
||||
* determine (for example during culling) that a group of triangles does not
|
||||
* intersect a certain frustum plane. However, setting an incorrect mask may
|
||||
* cause out of bounds memory accesses.
|
||||
* \param vtxLayout A struct specifying the vertex layout (see struct for detailed
|
||||
* description). For best performance, it is advisable to store position data
|
||||
* as compactly in memory as possible.
|
||||
* \param bfWinding Sets triangle winding order to consider backfacing, must be one one
|
||||
* of (BACKFACE_NONE, BACKFACE_CW and BACKFACE_CCW). Back-facing triangles are culled
|
||||
* and will not be binned / rasterized. You may use BACKFACE_NONE to disable culling
|
||||
* for double sided geometry
|
||||
*/
|
||||
virtual void BinTriangles( const float* inVtx, const unsigned int* inTris, int nTris, TriList* triLists, unsigned int nBinsW, unsigned int nBinsH, const float* modelToClipMatrix = nullptr, BackfaceWinding bfWinding = BACKFACE_CW, ClipPlanes clipPlaneMask = CLIP_PLANE_ALL, const VertexLayout& vtxLayout = VertexLayout( 16, 4, 12 ) ) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Renders all occluder triangles in a trilist. This function can be used in
|
||||
* combination with BinTriangles() to create a threded (binning) rasterizer. The
|
||||
* bins can be processed independently by different threads without risking writing
|
||||
* to overlapping memory regions.
|
||||
*
|
||||
* \param triLists A triangle list, filled using the BinTriangles() function that is to
|
||||
* be rendered.
|
||||
* \param scissor A scissor box limiting the rendering region to the bin. The size of each
|
||||
* bin must be a multiple of 32x8 pixels due to implementation constraints. For a
|
||||
* render target with (width, height) resolution and (nBinsW, nBinsH) bins, the
|
||||
* size of a bin is:
|
||||
* binWidth = (width / nBinsW) - (width / nBinsW) % 32;
|
||||
* binHeight = (height / nBinsH) - (height / nBinsH) % 8;
|
||||
* The last row and column of tiles have a different size:
|
||||
* lastColBinWidth = width - (nBinsW-1)*binWidth;
|
||||
* lastRowBinHeight = height - (nBinsH-1)*binHeight;
|
||||
*/
|
||||
virtual void RenderTrilist( const TriList& triList, const ScissorRect* scissor ) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Creates a per-pixel depth buffer from the hierarchical z buffer representation.
|
||||
* Intended for visualizing the hierarchical depth buffer for debugging. The
|
||||
* buffer is written in scanline order, from the top to bottom (D3D) or bottom to
|
||||
* top (OGL) of the surface. See the USE_D3D define.
|
||||
*
|
||||
* \param depthData Pointer to memory where the per-pixel depth data is written. Must
|
||||
* hold storage for atleast width*height elements as set by setResolution.
|
||||
*/
|
||||
virtual void ComputePixelDepthBuffer( float* depthData, bool flipY ) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Fetch occlusion culling statistics, returns zeroes if ENABLE_STATS define is
|
||||
* not defined. The statistics can be used for profiling or debugging.
|
||||
*/
|
||||
virtual OcclusionCullingStatistics GetStatistics() = 0;
|
||||
|
||||
/*!
|
||||
* \brief Returns the implementation (CPU instruction set) version of this object.
|
||||
*/
|
||||
virtual Implementation GetImplementation() = 0;
|
||||
|
||||
/*!
|
||||
* \brief Utility function for transforming vertices and outputting them to an (x,y,z,w)
|
||||
* format suitable for the occluder rasterization and occludee testing functions.
|
||||
*
|
||||
* \param mtx Pointer to matrix data. The matrix should column major for post
|
||||
* multiplication (OGL) and row major for pre-multiplication (DX). This is
|
||||
* consistent with OpenGL / DirectX behavior.
|
||||
* \param inVtx Pointer to an array of input vertices. The input vertices are given as
|
||||
* (x,y,z) coordinates. The memory layout can be changed using vtxLayout.
|
||||
* \param xfVtx Pointer to an array to store transformed vertices. The transformed
|
||||
* vertices are always stored as array of structs (AoS) (x,y,z,w) packed in memory.
|
||||
* \param nVtx Number of vertices to transform.
|
||||
* \param vtxLayout A struct specifying the vertex layout (see struct for detailed
|
||||
* description). For best performance, it is advisable to store position data
|
||||
* as compactly in memory as possible. Note that for this function, the
|
||||
* w-component is assumed to be 1.0.
|
||||
*/
|
||||
static void TransformVertices( const float* mtx, const float* inVtx, float* xfVtx, unsigned int nVtx, const VertexLayout& vtxLayout = VertexLayout( 12, 4, 8 ) );
|
||||
|
||||
/*!
|
||||
* \brief Get used memory alloc/free callbacks.
|
||||
*/
|
||||
void GetAllocFreeCallback( pfnAlignedAlloc& allocCallback, pfnAlignedFree& freeCallback )
|
||||
{
|
||||
allocCallback = mAlignedAllocCallback, freeCallback = mAlignedFreeCallback;
|
||||
}
|
||||
|
||||
#if MOC_RECORDER_ENABLE
|
||||
/*!
|
||||
* \brief Start recording subsequent rasterization and testing calls using the FrameRecorder.
|
||||
* The function calls that are recorded are:
|
||||
* - ClearBuffer
|
||||
* - RenderTriangles
|
||||
* - TestTriangles
|
||||
* - TestRect
|
||||
* All inputs and outputs are recorded, which can be used for correctness validation
|
||||
* and performance testing.
|
||||
*
|
||||
* \param outputFilePath Pointer to name of the output file.
|
||||
* \return 'true' if recording was started successfully, 'false' otherwise (file access error).
|
||||
*/
|
||||
bool RecorderStart( const char* outputFilePath ) const;
|
||||
|
||||
/*!
|
||||
* \brief Stop recording, flush output and release used memory.
|
||||
*/
|
||||
void RecorderStop( ) const;
|
||||
|
||||
/*!
|
||||
* \brief Manually record triangles. This is called automatically from MaskedOcclusionCulling::RenderTriangles
|
||||
* if the recording is started, but not from BinTriangles/RenderTrilist (used in multithreaded codepath), in
|
||||
* which case it has to be called manually.
|
||||
*
|
||||
* \param inVtx Pointer to an array of input vertices, should point to the x component
|
||||
* of the first vertex. The input vertices are given as (x,y,w) coordinates
|
||||
* in clip space. The memory layout can be changed using vtxLayout.
|
||||
* \param inTris Pointer to an array of triangle indices. Each triangle is created
|
||||
* from three indices consecutively fetched from the array.
|
||||
* \param nTris The number of triangles to render (inTris must contain atleast 3*nTris
|
||||
* entries)
|
||||
* \param modelToClipMatrix all vertices will be transformed by this matrix before
|
||||
* performing projection. If nullptr is passed the transform step will be skipped
|
||||
* \param bfWinding Sets triangle winding order to consider backfacing, must be one one
|
||||
* of (BACKFACE_NONE, BACKFACE_CW and BACKFACE_CCW). Back-facing triangles are culled
|
||||
* and will not be occlusion tested. You may use BACKFACE_NONE to disable culling
|
||||
* for double sided geometry
|
||||
* \param clipPlaneMask A mask indicating which clip planes should be considered by the
|
||||
* triangle clipper. Can be used as an optimization if your application can
|
||||
* determine (for example during culling) that a group of triangles does not
|
||||
* intersect a certain frustum plane. However, setting an incorrect mask may
|
||||
* cause out of bounds memory accesses.
|
||||
* \param vtxLayout A struct specifying the vertex layout (see struct for detailed
|
||||
* description). For best performance, it is advisable to store position data
|
||||
* as compactly in memory as possible.
|
||||
* \param cullingResult cull result value expected to be returned by executing the
|
||||
* RenderTriangles call with recorded parameters.
|
||||
*/
|
||||
//
|
||||
// merge the binned data back into original layout; in this case, call it manually from your Threadpool implementation (already added to CullingThreadpool).
|
||||
// If recording is not enabled, calling this function will do nothing.
|
||||
void RecordRenderTriangles( const float* inVtx, const unsigned int* inTris, int nTris, const float* modelToClipMatrix = nullptr, ClipPlanes clipPlaneMask = CLIP_PLANE_ALL, BackfaceWinding bfWinding = BACKFACE_CW, const VertexLayout& vtxLayout = VertexLayout( 16, 4, 12 ), CullingResult cullingResult = ( CullingResult ) - 1 );
|
||||
#endif // #if MOC_RECORDER_ENABLE
|
||||
|
||||
protected:
|
||||
pfnAlignedAlloc mAlignedAllocCallback;
|
||||
pfnAlignedFree mAlignedFreeCallback;
|
||||
|
||||
mutable OcclusionCullingStatistics mStats;
|
||||
|
||||
#if MOC_RECORDER_ENABLE
|
||||
mutable FrameRecorder* mRecorder;
|
||||
mutable std::mutex mRecorderMutex;
|
||||
#endif // #if MOC_RECORDER_ENABLE
|
||||
|
||||
virtual ~MaskedOcclusionCulling() {}
|
||||
};
|
|
@ -295,14 +295,6 @@ if(MSVC)
|
|||
else()
|
||||
include_directories(.)
|
||||
|
||||
# SRS - disable certain gcc/clang warnings for select third-party source libraries, consider updating versions in the future?
|
||||
set_source_files_properties(
|
||||
${MC_ZLIB_SOURCES}
|
||||
${MC_MINIZIP_SOURCES}
|
||||
PROPERTIES
|
||||
COMPILE_FLAGS "-Wno-stringop-overread -Wno-deprecated-non-prototype"
|
||||
)
|
||||
|
||||
if (USE_PRECOMPILED_HEADERS)
|
||||
foreach( src_file ${MC_PRECOMPILED_SOURCES} )
|
||||
#message(STATUS "-include precompiled.h for ${src_file}")
|
||||
|
@ -321,11 +313,15 @@ else()
|
|||
LIST(APPEND _compiler_FLAGS " -I${item}")
|
||||
ENDFOREACH(item)
|
||||
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags DEFINITIONS)
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags COMPILE_OPTIONS)
|
||||
LIST(APPEND _compiler_FLAGS ${_directory_flags})
|
||||
|
||||
SEPARATE_ARGUMENTS(_compiler_FLAGS)
|
||||
|
||||
GET_DIRECTORY_PROPERTY(_directory_flags COMPILE_DEFINITIONS)
|
||||
FOREACH(item ${_directory_flags})
|
||||
LIST(APPEND _compiler_FLAGS "-D${item}")
|
||||
ENDFOREACH(item)
|
||||
|
||||
# SRS - USE_OPTICK not useful for rbdmap, but definition required to avoid mismatch with idlib precompiled header
|
||||
if(OPTICK)
|
||||
LIST(APPEND _compiler_FLAGS -DUSE_OPTICK=1)
|
||||
|
@ -339,7 +335,15 @@ else()
|
|||
COMMENT "Creating tools/compilers/precompiled.h.gch for rbdmap"
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
# SRS - disable certain gcc/clang warnings for select third-party source libraries, consider updating versions in the future?
|
||||
set_source_files_properties(
|
||||
${MC_ZLIB_SOURCES}
|
||||
${MC_MINIZIP_SOURCES}
|
||||
PROPERTIES
|
||||
COMPILE_FLAGS "-Wno-stringop-overread -Wno-deprecated-non-prototype"
|
||||
)
|
||||
|
||||
add_executable(rbdmap ${MC_SOURCES_ALL} ${MC_INCLUDES_ALL})
|
||||
add_dependencies(rbdmap idlib)
|
||||
|
||||
|
|
Loading…
Reference in a new issue