mirror of
https://github.com/ZDoom/Raze.git
synced 2024-12-11 05:11:07 +00:00
aa54d3af10
LZMA update plus several ZScript improvements.
823 lines
21 KiB
C
823 lines
21 KiB
C
/* CpuArch.c -- CPU specific code
|
|
2023-05-18 : Igor Pavlov : Public domain */
|
|
|
|
#include "Precomp.h"
|
|
|
|
// #include <stdio.h>
|
|
|
|
#include "CpuArch.h"
|
|
|
|
#ifdef MY_CPU_X86_OR_AMD64
|
|
|
|
#undef NEED_CHECK_FOR_CPUID
|
|
#if !defined(MY_CPU_AMD64)
|
|
#define NEED_CHECK_FOR_CPUID
|
|
#endif
|
|
|
|
/*
|
|
cpuid instruction supports (subFunction) parameter in ECX,
|
|
that is used only with some specific (function) parameter values.
|
|
But we always use only (subFunction==0).
|
|
*/
|
|
/*
|
|
__cpuid(): MSVC and GCC/CLANG use same function/macro name
|
|
but parameters are different.
|
|
We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
|
|
*/
|
|
|
|
#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
|
|
|| defined(__clang__) /* && (__clang_major__ >= 10) */
|
|
|
|
/* there was some CLANG/GCC compilers that have issues with
|
|
rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
|
|
compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
|
|
The history of __cpuid() changes in CLANG/GCC:
|
|
GCC:
|
|
2007: it preserved ebx for (__PIC__ && __i386__)
|
|
2013: it preserved rbx and ebx for __PIC__
|
|
2014: it doesn't preserves rbx and ebx anymore
|
|
we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
|
|
CLANG:
|
|
2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
|
|
Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
|
|
Do we need __PIC__ test for CLANG or we must care about rbx even if
|
|
__PIC__ is not defined?
|
|
*/
|
|
|
|
#define ASM_LN "\n"
|
|
|
|
#if defined(MY_CPU_AMD64) && defined(__PIC__) \
|
|
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
|
|
|
|
#define x86_cpuid_MACRO(p, func) { \
|
|
__asm__ __volatile__ ( \
|
|
ASM_LN "mov %%rbx, %q1" \
|
|
ASM_LN "cpuid" \
|
|
ASM_LN "xchg %%rbx, %q1" \
|
|
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
|
|
|
|
/* "=&r" selects free register. It can select even rbx, if that register is free.
|
|
"=&D" for (RDI) also works, but the code can be larger with "=&D"
|
|
"2"(0) means (subFunction = 0),
|
|
2 is (zero-based) index in the output constraint list "=c" (ECX). */
|
|
|
|
#elif defined(MY_CPU_X86) && defined(__PIC__) \
|
|
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
|
|
|
|
#define x86_cpuid_MACRO(p, func) { \
|
|
__asm__ __volatile__ ( \
|
|
ASM_LN "mov %%ebx, %k1" \
|
|
ASM_LN "cpuid" \
|
|
ASM_LN "xchg %%ebx, %k1" \
|
|
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
|
|
|
|
#else
|
|
|
|
#define x86_cpuid_MACRO(p, func) { \
|
|
__asm__ __volatile__ ( \
|
|
ASM_LN "cpuid" \
|
|
: "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
|
|
|
|
#endif
|
|
|
|
|
|
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
|
{
|
|
x86_cpuid_MACRO(p, func)
|
|
}
|
|
|
|
|
|
Z7_NO_INLINE
|
|
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
|
{
|
|
#if defined(NEED_CHECK_FOR_CPUID)
|
|
#define EFALGS_CPUID_BIT 21
|
|
UInt32 a;
|
|
__asm__ __volatile__ (
|
|
ASM_LN "pushf"
|
|
ASM_LN "pushf"
|
|
ASM_LN "pop %0"
|
|
// ASM_LN "movl %0, %1"
|
|
// ASM_LN "xorl $0x200000, %0"
|
|
ASM_LN "btc %1, %0"
|
|
ASM_LN "push %0"
|
|
ASM_LN "popf"
|
|
ASM_LN "pushf"
|
|
ASM_LN "pop %0"
|
|
ASM_LN "xorl (%%esp), %0"
|
|
|
|
ASM_LN "popf"
|
|
ASM_LN
|
|
: "=&r" (a) // "=a"
|
|
: "i" (EFALGS_CPUID_BIT)
|
|
);
|
|
if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
|
|
return 0;
|
|
#endif
|
|
{
|
|
UInt32 p[4];
|
|
x86_cpuid_MACRO(p, 0)
|
|
return p[0];
|
|
}
|
|
}
|
|
|
|
#undef ASM_LN
|
|
|
|
#elif !defined(_MSC_VER)
|
|
|
|
/*
|
|
// for gcc/clang and other: we can try to use __cpuid macro:
|
|
#include <cpuid.h>
|
|
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
|
{
|
|
__cpuid(func, p[0], p[1], p[2], p[3]);
|
|
}
|
|
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
|
{
|
|
return (UInt32)__get_cpuid_max(0, NULL);
|
|
}
|
|
*/
|
|
// for unsupported cpuid:
|
|
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
|
{
|
|
UNUSED_VAR(func)
|
|
p[0] = p[1] = p[2] = p[3] = 0;
|
|
}
|
|
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#else // _MSC_VER
|
|
|
|
#if !defined(MY_CPU_AMD64)
|
|
|
|
UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
|
{
|
|
#if defined(NEED_CHECK_FOR_CPUID)
|
|
#define EFALGS_CPUID_BIT 21
|
|
__asm pushfd
|
|
__asm pushfd
|
|
/*
|
|
__asm pop eax
|
|
// __asm mov edx, eax
|
|
__asm btc eax, EFALGS_CPUID_BIT
|
|
__asm push eax
|
|
*/
|
|
__asm btc dword ptr [esp], EFALGS_CPUID_BIT
|
|
__asm popfd
|
|
__asm pushfd
|
|
__asm pop eax
|
|
// __asm xor eax, edx
|
|
__asm xor eax, [esp]
|
|
// __asm push edx
|
|
__asm popfd
|
|
__asm and eax, (1 shl EFALGS_CPUID_BIT)
|
|
__asm jz end_func
|
|
#endif
|
|
__asm push ebx
|
|
__asm xor eax, eax // func
|
|
__asm xor ecx, ecx // subFunction (optional) for (func == 0)
|
|
__asm cpuid
|
|
__asm pop ebx
|
|
#if defined(NEED_CHECK_FOR_CPUID)
|
|
end_func:
|
|
#endif
|
|
__asm ret 0
|
|
}
|
|
|
|
void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
|
{
|
|
UNUSED_VAR(p)
|
|
UNUSED_VAR(func)
|
|
__asm push ebx
|
|
__asm push edi
|
|
__asm mov edi, ecx // p
|
|
__asm mov eax, edx // func
|
|
__asm xor ecx, ecx // subfunction (optional) for (func == 0)
|
|
__asm cpuid
|
|
__asm mov [edi ], eax
|
|
__asm mov [edi + 4], ebx
|
|
__asm mov [edi + 8], ecx
|
|
__asm mov [edi + 12], edx
|
|
__asm pop edi
|
|
__asm pop ebx
|
|
__asm ret 0
|
|
}
|
|
|
|
#else // MY_CPU_AMD64
|
|
|
|
#if _MSC_VER >= 1600
|
|
#include <intrin.h>
|
|
#define MY_cpuidex __cpuidex
|
|
#else
|
|
/*
|
|
__cpuid (func == (0 or 7)) requires subfunction number in ECX.
|
|
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
|
|
__cpuid() in new MSVC clears ECX.
|
|
__cpuid() in old MSVC (14.00) x64 doesn't clear ECX
|
|
We still can use __cpuid for low (func) values that don't require ECX,
|
|
but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
|
|
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
|
|
where ECX value is first parameter for FASTCALL / NO_INLINE func,
|
|
So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
|
|
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
|
|
|
|
DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
|
|
*/
|
|
static
|
|
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo)
|
|
{
|
|
UNUSED_VAR(subFunction)
|
|
__cpuid(CPUInfo, func);
|
|
}
|
|
#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
|
|
#pragma message("======== MY_cpuidex_HACK WAS USED ========")
|
|
#endif // _MSC_VER >= 1600
|
|
|
|
#if !defined(MY_CPU_AMD64)
|
|
/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
|
|
so we disable inlining here */
|
|
Z7_NO_INLINE
|
|
#endif
|
|
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
|
|
{
|
|
MY_cpuidex((int *)p, (int)func, 0);
|
|
}
|
|
|
|
Z7_NO_INLINE
|
|
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
|
|
{
|
|
int a[4];
|
|
MY_cpuidex(a, 0, 0);
|
|
return a[0];
|
|
}
|
|
|
|
#endif // MY_CPU_AMD64
|
|
#endif // _MSC_VER
|
|
|
|
#if defined(NEED_CHECK_FOR_CPUID)
|
|
#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
|
|
#else
|
|
#define CHECK_CPUID_IS_SUPPORTED
|
|
#endif
|
|
#undef NEED_CHECK_FOR_CPUID
|
|
|
|
|
|
static
|
|
BoolInt x86cpuid_Func_1(UInt32 *p)
|
|
{
|
|
CHECK_CPUID_IS_SUPPORTED
|
|
z7_x86_cpuid(p, 1);
|
|
return True;
|
|
}
|
|
|
|
/*
|
|
static const UInt32 kVendors[][1] =
|
|
{
|
|
{ 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
|
|
{ 0x68747541 }, // , 0x69746E65, 0x444D4163 },
|
|
{ 0x746E6543 } // , 0x48727561, 0x736C7561 }
|
|
};
|
|
*/
|
|
|
|
/*
|
|
typedef struct
|
|
{
|
|
UInt32 maxFunc;
|
|
UInt32 vendor[3];
|
|
UInt32 ver;
|
|
UInt32 b;
|
|
UInt32 c;
|
|
UInt32 d;
|
|
} Cx86cpuid;
|
|
|
|
enum
|
|
{
|
|
CPU_FIRM_INTEL,
|
|
CPU_FIRM_AMD,
|
|
CPU_FIRM_VIA
|
|
};
|
|
int x86cpuid_GetFirm(const Cx86cpuid *p);
|
|
#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
|
|
#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
|
|
#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
|
|
|
|
int x86cpuid_GetFirm(const Cx86cpuid *p)
|
|
{
|
|
unsigned i;
|
|
for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
|
|
{
|
|
const UInt32 *v = kVendors[i];
|
|
if (v[0] == p->vendor[0]
|
|
// && v[1] == p->vendor[1]
|
|
// && v[2] == p->vendor[2]
|
|
)
|
|
return (int)i;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
BoolInt CPU_Is_InOrder()
|
|
{
|
|
Cx86cpuid p;
|
|
UInt32 family, model;
|
|
if (!x86cpuid_CheckAndRead(&p))
|
|
return True;
|
|
|
|
family = x86cpuid_ver_GetFamily(p.ver);
|
|
model = x86cpuid_ver_GetModel(p.ver);
|
|
|
|
switch (x86cpuid_GetFirm(&p))
|
|
{
|
|
case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
|
|
// In-Order Atom CPU
|
|
model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
|
|
|| model == 0x26 // 45 nm, Z6xx
|
|
|| model == 0x27 // 32 nm, Z2460
|
|
|| model == 0x35 // 32 nm, Z2760
|
|
|| model == 0x36 // 32 nm, N2xxx, D2xxx
|
|
)));
|
|
case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
|
|
case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
|
|
}
|
|
return False; // v23 : unknown processors are not In-Order
|
|
}
|
|
*/
|
|
|
|
#ifdef _WIN32
|
|
#include "7zWindows.h"
|
|
#endif
|
|
|
|
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
|
|
|
|
/* for legacy SSE ia32: there is no user-space cpu instruction to check
|
|
that OS supports SSE register storing/restoring on context switches.
|
|
So we need some OS-specific function to check that it's safe to use SSE registers.
|
|
*/
|
|
|
|
Z7_FORCE_INLINE
|
|
static BoolInt CPU_Sys_Is_SSE_Supported(void)
|
|
{
|
|
#ifdef _MSC_VER
|
|
#pragma warning(push)
|
|
#pragma warning(disable : 4996) // `GetVersion': was declared deprecated
|
|
#endif
|
|
/* low byte is major version of Windows
|
|
We suppose that any Windows version since
|
|
Windows2000 (major == 5) supports SSE registers */
|
|
return (Byte)GetVersion() >= 5;
|
|
#if defined(_MSC_VER)
|
|
#pragma warning(pop)
|
|
#endif
|
|
}
|
|
#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
|
|
#else
|
|
#define CHECK_SYS_SSE_SUPPORT
|
|
#endif
|
|
|
|
|
|
#if !defined(MY_CPU_AMD64)
|
|
|
|
BoolInt CPU_IsSupported_CMOV(void)
|
|
{
|
|
UInt32 a[4];
|
|
if (!x86cpuid_Func_1(&a[0]))
|
|
return 0;
|
|
return (a[3] >> 15) & 1;
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_SSE(void)
|
|
{
|
|
UInt32 a[4];
|
|
CHECK_SYS_SSE_SUPPORT
|
|
if (!x86cpuid_Func_1(&a[0]))
|
|
return 0;
|
|
return (a[3] >> 25) & 1;
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_SSE2(void)
|
|
{
|
|
UInt32 a[4];
|
|
CHECK_SYS_SSE_SUPPORT
|
|
if (!x86cpuid_Func_1(&a[0]))
|
|
return 0;
|
|
return (a[3] >> 26) & 1;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
static UInt32 x86cpuid_Func_1_ECX(void)
|
|
{
|
|
UInt32 a[4];
|
|
CHECK_SYS_SSE_SUPPORT
|
|
if (!x86cpuid_Func_1(&a[0]))
|
|
return 0;
|
|
return a[2];
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_AES(void)
|
|
{
|
|
return (x86cpuid_Func_1_ECX() >> 25) & 1;
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_SSSE3(void)
|
|
{
|
|
return (x86cpuid_Func_1_ECX() >> 9) & 1;
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_SSE41(void)
|
|
{
|
|
return (x86cpuid_Func_1_ECX() >> 19) & 1;
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_SHA(void)
|
|
{
|
|
CHECK_SYS_SSE_SUPPORT
|
|
|
|
if (z7_x86_cpuid_GetMaxFunc() < 7)
|
|
return False;
|
|
{
|
|
UInt32 d[4];
|
|
z7_x86_cpuid(d, 7);
|
|
return (d[1] >> 29) & 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
MSVC: _xgetbv() intrinsic is available since VS2010SP1.
|
|
MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
|
|
<immintrin.h> that we can use or check.
|
|
For any 32-bit x86 we can use asm code in MSVC,
|
|
but MSVC asm code is huge after compilation.
|
|
So _xgetbv() is better
|
|
|
|
ICC: _xgetbv() intrinsic is available (in what version of ICC?)
|
|
ICC defines (__GNUC___) and it supports gnu assembler
|
|
also ICC supports MASM style code with -use-msasm switch.
|
|
but ICC doesn't support __attribute__((__target__))
|
|
|
|
GCC/CLANG 9:
|
|
_xgetbv() is macro that works via __builtin_ia32_xgetbv()
|
|
and we need __attribute__((__target__("xsave")).
|
|
But with __target__("xsave") the function will be not
|
|
inlined to function that has no __target__("xsave") attribute.
|
|
If we want _xgetbv() call inlining, then we should use asm version
|
|
instead of calling _xgetbv().
|
|
Note:intrinsic is broke before GCC 8.2:
|
|
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
|
|
*/
|
|
|
|
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
|
|
|| defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
|
|
|| defined(__GNUC__) && (__GNUC__ >= 9) \
|
|
|| defined(__clang__) && (__clang_major__ >= 9)
|
|
// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
|
|
#if defined(__INTEL_COMPILER)
|
|
#define ATTRIB_XGETBV
|
|
#elif defined(__GNUC__) || defined(__clang__)
|
|
// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
|
|
// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
|
|
#else
|
|
#define ATTRIB_XGETBV
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(ATTRIB_XGETBV)
|
|
#include <immintrin.h>
|
|
#endif
|
|
|
|
|
|
// XFEATURE_ENABLED_MASK/XCR0
|
|
#define MY_XCR_XFEATURE_ENABLED_MASK 0
|
|
|
|
#if defined(ATTRIB_XGETBV)
|
|
ATTRIB_XGETBV
|
|
#endif
|
|
static UInt64 x86_xgetbv_0(UInt32 num)
|
|
{
|
|
#if defined(ATTRIB_XGETBV)
|
|
{
|
|
return
|
|
#if (defined(_MSC_VER))
|
|
_xgetbv(num);
|
|
#else
|
|
__builtin_ia32_xgetbv(
|
|
#if !defined(__clang__)
|
|
(int)
|
|
#endif
|
|
num);
|
|
#endif
|
|
}
|
|
|
|
#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
|
|
|
|
UInt32 a, d;
|
|
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
|
|
__asm__
|
|
(
|
|
"xgetbv"
|
|
: "=a"(a), "=d"(d) : "c"(num) : "cc"
|
|
);
|
|
#else // is old gcc
|
|
__asm__
|
|
(
|
|
".byte 0x0f, 0x01, 0xd0" "\n\t"
|
|
: "=a"(a), "=d"(d) : "c"(num) : "cc"
|
|
);
|
|
#endif
|
|
return ((UInt64)d << 32) | a;
|
|
// return a;
|
|
|
|
#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
|
|
|
|
UInt32 a, d;
|
|
__asm {
|
|
push eax
|
|
push edx
|
|
push ecx
|
|
mov ecx, num;
|
|
// xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
|
|
_emit 0x0f
|
|
_emit 0x01
|
|
_emit 0xd0
|
|
mov a, eax
|
|
mov d, edx
|
|
pop ecx
|
|
pop edx
|
|
pop eax
|
|
}
|
|
return ((UInt64)d << 32) | a;
|
|
// return a;
|
|
|
|
#else // it's unknown compiler
|
|
// #error "Need xgetbv function"
|
|
UNUSED_VAR(num)
|
|
// for MSVC-X64 we could call external function from external file.
|
|
/* Actually we had checked OSXSAVE/AVX in cpuid before.
|
|
So it's expected that OS supports at least AVX and below. */
|
|
// if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
|
|
return
|
|
// (1 << 0) | // x87
|
|
(1 << 1) // SSE
|
|
| (1 << 2); // AVX
|
|
|
|
#endif
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
/*
|
|
Windows versions do not know about new ISA extensions that
|
|
can be introduced. But we still can use new extensions,
|
|
even if Windows doesn't report about supporting them,
|
|
But we can use new extensions, only if Windows knows about new ISA extension
|
|
that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
|
|
So it's enough to check
|
|
MY_PF_AVX_INSTRUCTIONS_AVAILABLE
|
|
instead of
|
|
MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
|
|
*/
|
|
#define MY_PF_XSAVE_ENABLED 17
|
|
// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
|
|
// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
|
|
// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
|
|
// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
|
|
// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
|
|
// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
|
|
#endif
|
|
|
|
BoolInt CPU_IsSupported_AVX(void)
|
|
{
|
|
#ifdef _WIN32
|
|
if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
|
|
return False;
|
|
/* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
|
|
some latest Win10 revisions. But we need AVX in older Windows also.
|
|
So we don't use the following check: */
|
|
/*
|
|
if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
|
|
return False;
|
|
*/
|
|
#endif
|
|
|
|
/*
|
|
OS must use new special XSAVE/XRSTOR instructions to save
|
|
AVX registers when it required for context switching.
|
|
At OS statring:
|
|
OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
|
|
Also OS sets bitmask in XCR0 register that defines what
|
|
registers will be processed by XSAVE instruction:
|
|
XCR0.SSE[bit 0] - x87 registers and state
|
|
XCR0.SSE[bit 1] - SSE registers and state
|
|
XCR0.AVX[bit 2] - AVX registers and state
|
|
CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
|
|
So we can read that bit in user-space.
|
|
XCR0 is available for reading in user-space by new XGETBV instruction.
|
|
*/
|
|
{
|
|
const UInt32 c = x86cpuid_Func_1_ECX();
|
|
if (0 == (1
|
|
& (c >> 28) // AVX instructions are supported by hardware
|
|
& (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
|
|
return False;
|
|
}
|
|
|
|
/* also we can check
|
|
CPUID.1:ECX.XSAVE [bit 26] : that shows that
|
|
XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
|
|
But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
|
|
|
|
/* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
|
|
in most cases we expect that OS also will support storing/restoring
|
|
for AVX and SSE states at least.
|
|
But to be ensure for that we call user-space instruction
|
|
XGETBV(0) to get XCR0 value that contains bitmask that defines
|
|
what exact states(registers) OS have enabled for storing/restoring.
|
|
*/
|
|
|
|
{
|
|
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
|
|
// printf("\n=== XGetBV=%d\n", bm);
|
|
return 1
|
|
& (bm >> 1) // SSE state is supported (set by OS) for storing/restoring
|
|
& (bm >> 2); // AVX state is supported (set by OS) for storing/restoring
|
|
}
|
|
// since Win7SP1: we can use GetEnabledXStateFeatures();
|
|
}
|
|
|
|
|
|
BoolInt CPU_IsSupported_AVX2(void)
|
|
{
|
|
if (!CPU_IsSupported_AVX())
|
|
return False;
|
|
if (z7_x86_cpuid_GetMaxFunc() < 7)
|
|
return False;
|
|
{
|
|
UInt32 d[4];
|
|
z7_x86_cpuid(d, 7);
|
|
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
|
|
return 1
|
|
& (d[1] >> 5); // avx2
|
|
}
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_VAES_AVX2(void)
|
|
{
|
|
if (!CPU_IsSupported_AVX())
|
|
return False;
|
|
if (z7_x86_cpuid_GetMaxFunc() < 7)
|
|
return False;
|
|
{
|
|
UInt32 d[4];
|
|
z7_x86_cpuid(d, 7);
|
|
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
|
|
return 1
|
|
& (d[1] >> 5) // avx2
|
|
// & (d[1] >> 31) // avx512vl
|
|
& (d[2] >> 9); // vaes // VEX-256/EVEX
|
|
}
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_PageGB(void)
|
|
{
|
|
CHECK_CPUID_IS_SUPPORTED
|
|
{
|
|
UInt32 d[4];
|
|
z7_x86_cpuid(d, 0x80000000);
|
|
if (d[0] < 0x80000001)
|
|
return False;
|
|
z7_x86_cpuid(d, 0x80000001);
|
|
return (d[3] >> 26) & 1;
|
|
}
|
|
}
|
|
|
|
|
|
#elif defined(MY_CPU_ARM_OR_ARM64)
|
|
|
|
#ifdef _WIN32
|
|
|
|
#include "7zWindows.h"
|
|
|
|
BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
|
|
BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
|
|
BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
|
|
|
|
#else
|
|
|
|
#if defined(__APPLE__)
|
|
|
|
/*
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
static void Print_sysctlbyname(const char *name)
|
|
{
|
|
size_t bufSize = 256;
|
|
char buf[256];
|
|
int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
|
|
{
|
|
int i;
|
|
printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
|
|
for (i = 0; i < 20; i++)
|
|
printf(" %2x", (unsigned)(Byte)buf[i]);
|
|
|
|
}
|
|
}
|
|
*/
|
|
/*
|
|
Print_sysctlbyname("hw.pagesize");
|
|
Print_sysctlbyname("machdep.cpu.brand_string");
|
|
*/
|
|
|
|
static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
|
|
{
|
|
UInt32 val = 0;
|
|
if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_CRC32(void)
|
|
{
|
|
return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
|
|
}
|
|
|
|
BoolInt CPU_IsSupported_NEON(void)
|
|
{
|
|
return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
|
|
}
|
|
|
|
#ifdef MY_CPU_ARM64
|
|
#define APPLE_CRYPTO_SUPPORT_VAL 1
|
|
#else
|
|
#define APPLE_CRYPTO_SUPPORT_VAL 0
|
|
#endif
|
|
|
|
BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
|
|
BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
|
|
BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
|
|
|
|
|
|
#else // __APPLE__
|
|
|
|
#include <sys/auxv.h>
|
|
|
|
#define USE_HWCAP
|
|
|
|
#ifdef USE_HWCAP
|
|
|
|
#include <asm/hwcap.h>
|
|
|
|
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
|
|
BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
|
|
|
|
#ifdef MY_CPU_ARM64
|
|
#define MY_HWCAP_CHECK_FUNC(name) \
|
|
MY_HWCAP_CHECK_FUNC_2(name, name)
|
|
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
|
|
// MY_HWCAP_CHECK_FUNC (ASIMD)
|
|
#elif defined(MY_CPU_ARM)
|
|
#define MY_HWCAP_CHECK_FUNC(name) \
|
|
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
|
|
MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
|
|
#endif
|
|
|
|
#else // USE_HWCAP
|
|
|
|
#define MY_HWCAP_CHECK_FUNC(name) \
|
|
BoolInt CPU_IsSupported_ ## name() { return 0; }
|
|
MY_HWCAP_CHECK_FUNC(NEON)
|
|
|
|
#endif // USE_HWCAP
|
|
|
|
MY_HWCAP_CHECK_FUNC (CRC32)
|
|
MY_HWCAP_CHECK_FUNC (SHA1)
|
|
MY_HWCAP_CHECK_FUNC (SHA2)
|
|
MY_HWCAP_CHECK_FUNC (AES)
|
|
|
|
#endif // __APPLE__
|
|
#endif // _WIN32
|
|
|
|
#endif // MY_CPU_ARM_OR_ARM64
|
|
|
|
|
|
|
|
#ifdef __APPLE__
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
|
|
{
|
|
return sysctlbyname(name, buf, bufSize, NULL, 0);
|
|
}
|
|
|
|
int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
|
|
{
|
|
size_t bufSize = sizeof(*val);
|
|
const int res = z7_sysctlbyname_Get(name, val, &bufSize);
|
|
if (res == 0 && bufSize != sizeof(*val))
|
|
return EFAULT;
|
|
return res;
|
|
}
|
|
|
|
#endif
|