Win/Linux/macOS: Update MOC cpuidex() and xgetbv() defs for cross-platform portability

This commit is contained in:
SRSaunders 2024-09-08 20:30:48 -04:00
parent 666f554c31
commit f1ce7a787d
2 changed files with 21 additions and 12 deletions

View file

@ -46,6 +46,16 @@ FORCE_INLINE void aligned_free( void* ptr )
_aligned_free( ptr );
}
FORCE_INLINE void cpuidex( int cpuinfo[4], int function, int subfunction )
{
__cpuidex( cpuinfo, function, subfunction );
}
FORCE_INLINE unsigned long long xgetbv( unsigned int index )
{
return _xgetbv( index );
}
#elif defined(__GNUG__) || defined(__clang__) // G++ or clang
#include <cpuid.h>
#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
@ -69,7 +79,9 @@ FORCE_INLINE unsigned long find_clear_lsb( unsigned int* mask )
FORCE_INLINE void* aligned_alloc( size_t alignment, size_t size )
{
return memalign( alignment, size );
void* ret;
posix_memalign( &ret, alignment, size );
return ret;
}
FORCE_INLINE void aligned_free( void* ptr )
@ -77,14 +89,12 @@ FORCE_INLINE void aligned_free( void* ptr )
free( ptr );
}
// RB: commented out
#if 0
FORCE_INLINE void __cpuidex( int* cpuinfo, int function, int subfunction )
FORCE_INLINE void cpuidex( int cpuinfo[4], int function, int subfunction )
{
__cpuid_count( function, subfunction, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3] );
}
FORCE_INLINE unsigned long long _xgetbv( unsigned int index )
FORCE_INLINE unsigned long long xgetbv( unsigned int index )
{
unsigned int eax, edx;
__asm__ __volatile__(
@ -94,7 +104,6 @@ FORCE_INLINE unsigned long long _xgetbv( unsigned int index )
);
return ( ( unsigned long long )edx << 32 ) | eax;
}
#endif
#else
#error Unsupported compiler

View file

@ -42,7 +42,7 @@ static MaskedOcclusionCulling::Implementation DetectCPUFeatures( MaskedOcclusion
// Get regular CPUID values
int regs[4];
__cpuidex( regs, 0, 0 );
cpuidex( regs, 0, 0 );
// MOCVectorAllocator<CpuInfo> mocalloc( alignedAlloc, alignedFree );
// std::vector<CpuInfo, MOCVectorAllocator<CpuInfo>> cpuId( mocalloc ), cpuIdEx( mocalloc );
@ -52,11 +52,11 @@ static MaskedOcclusionCulling::Implementation DetectCPUFeatures( MaskedOcclusion
for( size_t i = 0; i < cpuIdCount; ++i )
{
__cpuidex( cpuId[i].regs, ( int )i, 0 );
cpuidex( cpuId[i].regs, ( int )i, 0 );
}
// Get extended CPUID values
__cpuidex( regs, 0x80000000, 0 );
cpuidex( regs, 0x80000000, 0 );
//cpuIdEx.resize(regs[0] - 0x80000000);
size_t cpuIdExCount = regs[0] - 0x80000000;
@ -64,15 +64,15 @@ static MaskedOcclusionCulling::Implementation DetectCPUFeatures( MaskedOcclusion
for( size_t i = 0; i < cpuIdExCount; ++i )
{
__cpuidex( cpuIdEx[i].regs, 0x80000000 + ( int )i, 0 );
cpuidex( cpuIdEx[i].regs, 0x80000000 + ( int )i, 0 );
}
#define TEST_BITS(A, B) (((A) & (B)) == (B))
#define TEST_FMA_MOVE_OXSAVE (cpuIdCount >= 1 && TEST_BITS(cpuId[1].regs[2], (1 << 12) | (1 << 22) | (1 << 27)))
#define TEST_LZCNT (cpuIdExCount >= 1 && TEST_BITS(cpuIdEx[1].regs[2], 0x20))
#define TEST_SSE41 (cpuIdCount >= 1 && TEST_BITS(cpuId[1].regs[2], (1 << 19)))
#define TEST_XMM_YMM (cpuIdCount >= 1 && TEST_BITS(_xgetbv(0), (1 << 2) | (1 << 1)))
#define TEST_OPMASK_ZMM (cpuIdCount >= 1 && TEST_BITS(_xgetbv(0), (1 << 7) | (1 << 6) | (1 << 5)))
#define TEST_XMM_YMM (cpuIdCount >= 1 && TEST_BITS(xgetbv(0), (1 << 2) | (1 << 1)))
#define TEST_OPMASK_ZMM (cpuIdCount >= 1 && TEST_BITS(xgetbv(0), (1 << 7) | (1 << 6) | (1 << 5)))
#define TEST_BMI1_BMI2_AVX2 (cpuIdCount >= 7 && TEST_BITS(cpuId[7].regs[1], (1 << 3) | (1 << 5) | (1 << 8)))
#define TEST_AVX512_F_BW_DQ (cpuIdCount >= 7 && TEST_BITS(cpuId[7].regs[1], (1 << 16) | (1 << 17) | (1 << 30)))