mirror of
https://github.com/DarkPlacesEngine/gmqcc.git
synced 2025-02-01 12:20:49 +00:00
Remove SSE hash, t's just too much effort to maintain.
This commit is contained in:
parent
31e13e6e64
commit
f24bdced10
1 changed files with 0 additions and 141 deletions
141
hash.c
141
hash.c
|
@ -261,149 +261,8 @@ static GMQCC_FORCEINLINE GMQCC_USED uint32_t hash_native(const void *GMQCC_RESTR
|
|||
return hash_native_result(hash, carry, length);
|
||||
}
|
||||
|
||||
/*
|
||||
* Inline assembly optimized SSE version for when SSE is present via CPUID
|
||||
* or the host compiler has __SSE__. This is about 16 cycles faster than
|
||||
* native at -O2 for GCC and 11 cycles for -O3.
|
||||
*
|
||||
* Tested with -m32 on a Phenom II X4 with:
|
||||
* gcc version 4.8.1 20130725 (prerelease) (GCC)
|
||||
*/
|
||||
#if defined(__GNUC__) && defined(__i386__)
|
||||
static GMQCC_FORCEINLINE uint32_t hash_sse(const void *GMQCC_RESTRICT key, size_t length) {
|
||||
uint32_t ret;
|
||||
__asm__ __volatile__ (
|
||||
" mov %%eax, %%ebx\n"
|
||||
" mov %2, %%eax\n"
|
||||
" movd %%eax, %%xmm7\n"
|
||||
" shufps $0, %%xmm7, %%xmm7\n"
|
||||
" mov %3, %%eax\n"
|
||||
" movd %%eax, %%xmm6\n"
|
||||
" shufps $0, %%xmm6, %%xmm6\n"
|
||||
" lea (%%esi, %%ecx, 1), %%edi\n"
|
||||
" jmp 2f\n"
|
||||
"1:\n"
|
||||
" movaps (%%esi), %%xmm0\n"
|
||||
" pmulld %%xmm7, %%xmm0\n"
|
||||
" movaps %%xmm0, %%xmm2\n"
|
||||
" pslld $15, %%xmm0\n"
|
||||
" psrld $17, %%xmm2\n"
|
||||
" orps %%xmm2, %%xmm0\n"
|
||||
" pmulld %%xmm6, %%xmm0\n"
|
||||
" movd %%xmm0, %%eax\n"
|
||||
" xor %%eax, %%ebx\n"
|
||||
" rol $13, %%ebx\n"
|
||||
" imul $5, %%ebx\n"
|
||||
" add $0xE6546B64, %%ebx\n"
|
||||
" shufps $0x39, %%xmm0, %%xmm0\n"
|
||||
" movd %%xmm0, %%eax\n"
|
||||
" xor %%eax, %%ebx\n"
|
||||
" rol $13, %%ebx\n"
|
||||
" imul $5, %%ebx\n"
|
||||
" add $0xE6546B64, %%ebx\n"
|
||||
" shufps $0x39, %%xmm0, %%xmm0\n"
|
||||
" movd %%xmm0, %%eax\n"
|
||||
" xor %%eax, %%ebx\n"
|
||||
" rol $13, %%ebx\n"
|
||||
" imul $5, %%ebx\n"
|
||||
" add $0xE6546B64, %%ebx\n"
|
||||
" shufps $0x39, %%xmm0, %%xmm0\n"
|
||||
" movd %%xmm0, %%eax\n"
|
||||
" xor %%eax, %%ebx\n"
|
||||
" rol $13, %%ebx\n"
|
||||
" imul $5, %%ebx\n"
|
||||
" add $0xE6546B64, %%ebx\n"
|
||||
" add $16, %%esi\n"
|
||||
"2:\n"
|
||||
" cmp %%esi, %%edi\n"
|
||||
" jne 1b\n"
|
||||
" xor %%ecx, %%ebx\n"
|
||||
" mov %%ebx, %%eax\n"
|
||||
" shr $16, %%ebx\n"
|
||||
" xor %%ebx, %%eax\n"
|
||||
" imul $0x85EBCA6b, %%eax\n"
|
||||
" mov %%eax, %%ebx\n"
|
||||
" shr $13, %%ebx\n"
|
||||
" xor %%ebx, %%eax\n"
|
||||
" imul $0xC2B2AE35, %%eax\n"
|
||||
" mov %%eax, %%ebx\n"
|
||||
" shr $16, %%ebx\n"
|
||||
" xor %%ebx, %%eax\n"
|
||||
: "=a" (ret)
|
||||
|
||||
: "a" (HASH_SEED),
|
||||
"i" (HASH_MASK1),
|
||||
"i" (HASH_MASK2),
|
||||
"S" (key),
|
||||
"c" (length)
|
||||
|
||||
: "%ebx",
|
||||
"%edi"
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (__GNUC__) && defined(__i386__) && !defined(__SSE__)
|
||||
/*
|
||||
* Emulate MSVC _cpuid intrinsic for GCC/MinGW/Clang, this will be used
|
||||
* to determine if we should use the SSE route.
|
||||
*/
|
||||
static GMQCC_FORCEINLINE void hash_cpuid(int *lanes, int entry) {
|
||||
__asm__ __volatile__ (
|
||||
"cpuid"
|
||||
: "=a"(lanes[0]),
|
||||
"=b"(lanes[1]),
|
||||
"=c"(lanes[2]),
|
||||
"=d"(lanes[3])
|
||||
|
||||
: "a" (entry)
|
||||
);
|
||||
}
|
||||
|
||||
#endif /* !(defined(__GNUC__) && defined(__i386__) */
|
||||
|
||||
static uint32_t hash_entry(const void *GMQCC_RESTRICT key, size_t length) {
|
||||
/*
|
||||
* No host SSE instruction set assumed do runtime test instead. This
|
||||
* is for MinGW32 mostly which doesn't define SSE.
|
||||
*/
|
||||
#if defined (__GNUC__) && defined(__i386__) && !defined(__SSE__)
|
||||
static bool memoize = false;
|
||||
static bool sse = false;
|
||||
|
||||
if (GMQCC_UNLIKELY(!memoize)) {
|
||||
/*
|
||||
* Only calculate SSE one time, thus it's unlikely that this branch
|
||||
* is taken more than once.
|
||||
*/
|
||||
static int lanes[4];
|
||||
hash_cpuid(lanes, 0);
|
||||
/*
|
||||
* It's very likely that lanes[0] will contain a value unless it
|
||||
* isn't a modern x86.
|
||||
*/
|
||||
if (GMQCC_LIKELY(*lanes >= 1))
|
||||
sse = (lanes[3] & ((int)1 << 25)) != 0;
|
||||
memoize = true;
|
||||
}
|
||||
|
||||
return (GMQCC_LIKELY(sse))
|
||||
? hash_sse(key, length)
|
||||
: hash_native(key, length);
|
||||
/*
|
||||
* Same as above but this time host compiler was defined with SSE support.
|
||||
* This handles MinGW32 builds for i686+
|
||||
*/
|
||||
#elif defined (__GNUC__) && defined(__i386__) && defined(__SSE__)
|
||||
return hash_sse(key, length);
|
||||
#else
|
||||
/*
|
||||
* Go the native route which itself is highly optimized as well for
|
||||
* unaligned load/store when dealing with LE.
|
||||
*/
|
||||
return hash_native(key, length);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define HASH_LEN_ALIGN (sizeof(size_t))
|
||||
|
|
Loading…
Reference in a new issue