diff --git a/source/thirdparty/include/xxh3.h b/source/thirdparty/include/xxh3.h index e3cb45f70..b3a829584 100644 --- a/source/thirdparty/include/xxh3.h +++ b/source/thirdparty/include/xxh3.h @@ -122,7 +122,8 @@ # define XXH_VECTOR XXH_SSE2 # elif defined(__GNUC__) /* msvc support maybe later */ \ && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \ - && defined(__LITTLE_ENDIAN__) /* ARM big endian is a thing */ + && (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) # define XXH_VECTOR XXH_NEON # elif defined(__PPC64__) && defined(__POWER8_VECTOR__) && defined(__GNUC__) # define XXH_VECTOR XXH_VSX @@ -147,12 +148,12 @@ # endif #endif -/* U64 XXH_mult32to64(U32 a, U64 b) { return (U64)a * (U64)b; } */ +/* xxh_u64 XXH_mult32to64(xxh_u32 a, xxh_u64 b) { return (xxh_u64)a * (xxh_u64)b; } */ #if defined(_MSC_VER) && defined(_M_IX86) # include # define XXH_mult32to64(x, y) __emulu(x, y) #else -# define XXH_mult32to64(x, y) ((U64)((x) & 0xFFFFFFFF) * (U64)((y) & 0xFFFFFFFF)) +# define XXH_mult32to64(x, y) ((xxh_u64)((x) & 0xFFFFFFFF) * (xxh_u64)((y) & 0xFFFFFFFF)) #endif /* VSX stuff. It's a lot because VSX support is mediocre across compilers and @@ -165,7 +166,8 @@ typedef __vector unsigned char U8x16; typedef __vector unsigned U32x4; #ifndef XXH_VSX_BE -# ifdef __BIG_ENDIAN__ +# if defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) # define XXH_VSX_BE 1 # elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ # warning "-maltivec=be is not recommended. Please use native endianness." @@ -201,7 +203,7 @@ XXH_FORCE_INLINE U64x2 XXH_vec_revb(U64x2 val) * return ret; * } * - * Because both of the main loops load the key, swap, and xor it with data, + * Because both of the main loops load the key, swap, and xor it with input, * we can combine the key swap into this instruction. */ # ifdef vec_permxor @@ -246,7 +248,7 @@ XXH_FORCE_INLINE U64x2 XXH_vec_mule(U32x4 a, U32x4 b) { # error "default keyset is not large enough" #endif -XXH_ALIGN(64) static const BYTE kSecret[XXH_SECRET_DEFAULT_SIZE] = { +XXH_ALIGN(64) static const xxh_u8 kSecret[XXH_SECRET_DEFAULT_SIZE] = { 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, @@ -274,7 +276,7 @@ XXH_ALIGN(64) static const BYTE kSecret[XXH_SECRET_DEFAULT_SIZE] = { __attribute__((__target__("no-sse"))) #endif static XXH128_hash_t -XXH_mult64to128(U64 lhs, U64 rhs) +XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) { /* * GCC/Clang __uint128_t method. @@ -296,13 +298,13 @@ XXH_mult64to128(U64 lhs, U64 rhs) || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs; - XXH128_hash_t const r128 = { (U64)(product), (U64)(product >> 64) }; + XXH128_hash_t const r128 = { (xxh_u64)(product), (xxh_u64)(product >> 64) }; return r128; /* * MSVC for x64's _umul128 method. * - * U64 _umul128(U64 Multiplier, U64 Multiplicand, U64 *HighProduct); + * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct); * * This compiles to single operand MUL on x64. */ @@ -311,8 +313,8 @@ XXH_mult64to128(U64 lhs, U64 rhs) #ifndef _MSC_VER # pragma intrinsic(_umul128) #endif - U64 product_high; - U64 const product_low = _umul128(lhs, rhs, &product_high); + xxh_u64 product_high; + xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); XXH128_hash_t const r128 = { product_low, product_high }; return r128; @@ -344,11 +346,11 @@ XXH_mult64to128(U64 lhs, U64 rhs) * 2. It hints for, and on Clang, compiles to, the powerful UMAAL * instruction available in ARMv6+ A32/T32, which is shown below: * - * void UMAAL(U32 *RdLo, U32 *RdHi, U32 Rn, U32 Rm) + * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm) * { - * U64 product = (U64)*RdLo * (U64)*RdHi + Rn + Rm; - * *RdLo = (U32)(product & 0xFFFFFFFF); - * *RdHi = (U32)(product >> 32); + * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm; + * *RdLo = (xxh_u32)(product & 0xFFFFFFFF); + * *RdHi = (xxh_u32)(product >> 32); * } * * This instruction was designed for efficient long multiplication, @@ -360,15 +362,15 @@ XXH_mult64to128(U64 lhs, U64 rhs) */ /* First calculate all of the cross products. */ - U64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); - U64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); - U64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); - U64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); + xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); + xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); + xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); + xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); /* Now add the products together. These will never overflow. */ - U64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; - U64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; - U64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); + xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; + xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); XXH128_hash_t r128 = { lower, upper }; return r128; @@ -387,15 +389,15 @@ XXH_mult64to128(U64 lhs, U64 rhs) #if defined(__GNUC__) && !defined(__clang__) && defined(__i386__) __attribute__((__target__("no-sse"))) #endif -static U64 -XXH3_mul128_fold64(U64 lhs, U64 rhs) +static xxh_u64 +XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) { XXH128_hash_t product = XXH_mult64to128(lhs, rhs); return product.low64 ^ product.high64; } -static XXH64_hash_t XXH3_avalanche(U64 h64) +static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) { h64 ^= h64 >> 37; h64 *= PRIME64_3; @@ -409,57 +411,56 @@ static XXH64_hash_t XXH3_avalanche(U64 h64) * ========================================== */ XXH_FORCE_INLINE XXH64_hash_t -XXH3_len_1to3_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed) +XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) { - XXH_ASSERT(data != NULL); + XXH_ASSERT(input != NULL); XXH_ASSERT(1 <= len && len <= 3); - XXH_ASSERT(keyPtr != NULL); - { BYTE const c1 = ((const BYTE*)data)[0]; - BYTE const c2 = ((const BYTE*)data)[len >> 1]; - BYTE const c3 = ((const BYTE*)data)[len - 1]; - U32 const combined = ((U32)c1) + (((U32)c2) << 8) + (((U32)c3) << 16) + (((U32)len) << 24); - U64 const keyed = (U64)combined ^ (XXH_readLE32(keyPtr) + seed); - U64 const mixed = keyed * PRIME64_1; + XXH_ASSERT(secret != NULL); + { xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combined = ((xxh_u32)c1) | (((xxh_u32)c2) << 8) | (((xxh_u32)c3) << 16) | (((xxh_u32)len) << 24); + xxh_u64 const keyed = (xxh_u64)combined ^ (XXH_readLE32(secret) + seed); + xxh_u64 const mixed = keyed * PRIME64_1; return XXH3_avalanche(mixed); } } XXH_FORCE_INLINE XXH64_hash_t -XXH3_len_4to8_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed) +XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) { - XXH_ASSERT(data != NULL); - XXH_ASSERT(keyPtr != NULL); + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); XXH_ASSERT(4 <= len && len <= 8); - { U32 const in1 = XXH_readLE32(data); - U32 const in2 = XXH_readLE32((const BYTE*)data + len - 4); - U64 const in64 = in1 + ((U64)in2 << 32); - U64 const keyed = in64 ^ (XXH_readLE64(keyPtr) + seed); - U64 const mix64 = len + ((keyed ^ (keyed >> 51)) * PRIME32_1); + { xxh_u32 const input_lo = XXH_readLE32(input); + xxh_u32 const input_hi = XXH_readLE32(input + len - 4); + xxh_u64 const input_64 = input_lo | ((xxh_u64)input_hi << 32); + xxh_u64 const keyed = input_64 ^ (XXH_readLE64(secret) + seed); + xxh_u64 const mix64 = len + ((keyed ^ (keyed >> 51)) * PRIME32_1); return XXH3_avalanche((mix64 ^ (mix64 >> 47)) * PRIME64_2); } } XXH_FORCE_INLINE XXH64_hash_t -XXH3_len_9to16_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed) +XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) { - XXH_ASSERT(data != NULL); - XXH_ASSERT(keyPtr != NULL); + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); XXH_ASSERT(9 <= len && len <= 16); - { const U64* const key64 = (const U64*) keyPtr; - U64 const ll1 = XXH_readLE64(data) ^ (XXH_readLE64(key64) + seed); - U64 const ll2 = XXH_readLE64((const BYTE*)data + len - 8) ^ (XXH_readLE64(key64+1) - seed); - U64 const acc = len + (ll1 + ll2) + XXH3_mul128_fold64(ll1, ll2); + { xxh_u64 const input_lo = XXH_readLE64(input) ^ (XXH_readLE64(secret) + seed); + xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ (XXH_readLE64(secret + 8) - seed); + xxh_u64 const acc = len + (input_lo + input_hi) + XXH3_mul128_fold64(input_lo, input_hi); return XXH3_avalanche(acc); } } XXH_FORCE_INLINE XXH64_hash_t -XXH3_len_0to16_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed) +XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) { XXH_ASSERT(len <= 16); - { if (len > 8) return XXH3_len_9to16_64b(data, len, keyPtr, seed); - if (len >= 4) return XXH3_len_4to8_64b(data, len, keyPtr, seed); - if (len) return XXH3_len_1to3_64b(data, len, keyPtr, seed); + { if (len > 8) return XXH3_len_9to16_64b(input, len, secret, seed); + if (len >= 4) return XXH3_len_4to8_64b(input, len, secret, seed); + if (len) return XXH3_len_1to3_64b(input, len, secret, seed); return 0; } } @@ -469,59 +470,59 @@ XXH3_len_0to16_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_ #define STRIPE_LEN 64 #define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */ -#define ACC_NB (STRIPE_LEN / sizeof(U64)) +#define ACC_NB (STRIPE_LEN / sizeof(xxh_u64)) typedef enum { XXH3_acc_64bits, XXH3_acc_128bits } XXH3_accWidth_e; XXH_FORCE_INLINE void XXH3_accumulate_512( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT data, - const void* XXH_RESTRICT key, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret, XXH3_accWidth_e accWidth) { #if (XXH_VECTOR == XXH_AVX2) XXH_ASSERT((((size_t)acc) & 31) == 0); { XXH_ALIGN(32) __m256i* const xacc = (__m256i *) acc; - const __m256i* const xdata = (const __m256i *) data; /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */ - const __m256i* const xkey = (const __m256i *) key; /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */ + const __m256i* const xinput = (const __m256i *) input; /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */ + const __m256i* const xsecret = (const __m256i *) secret; /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */ size_t i; for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) { - __m256i const d = _mm256_loadu_si256 (xdata+i); - __m256i const k = _mm256_loadu_si256 (xkey+i); - __m256i const dk = _mm256_xor_si256 (d,k); /* uint32 dk[8] = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */ - __m256i const mul = _mm256_mul_epu32 (dk, _mm256_shuffle_epi32 (dk, 0x31)); /* uint64 mul[4] = {dk0*dk1, dk2*dk3, ...} */ + __m256i const data_vec = _mm256_loadu_si256 (xinput+i); + __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); + __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); /* uint32 dk[8] = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */ + __m256i const product = _mm256_mul_epu32 (data_key, _mm256_shuffle_epi32 (data_key, 0x31)); /* uint64 mul[4] = {dk0*dk1, dk2*dk3, ...} */ if (accWidth == XXH3_acc_128bits) { - __m256i const dswap = _mm256_shuffle_epi32(d, _MM_SHUFFLE(1,0,3,2)); - __m256i const add = _mm256_add_epi64(xacc[i], dswap); - xacc[i] = _mm256_add_epi64(mul, add); + __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2)); + __m256i const sum = _mm256_add_epi64(xacc[i], data_swap); + xacc[i] = _mm256_add_epi64(product, sum); } else { /* XXH3_acc_64bits */ - __m256i const add = _mm256_add_epi64(xacc[i], d); - xacc[i] = _mm256_add_epi64(mul, add); + __m256i const sum = _mm256_add_epi64(xacc[i], data_vec); + xacc[i] = _mm256_add_epi64(product, sum); } } } #elif (XXH_VECTOR == XXH_SSE2) XXH_ASSERT((((size_t)acc) & 15) == 0); - { XXH_ALIGN(16) __m128i* const xacc = (__m128i *) acc; /* presumed */ - const __m128i* const xdata = (const __m128i *) data; /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */ - const __m128i* const xkey = (const __m128i *) key; /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */ + { XXH_ALIGN(16) __m128i* const xacc = (__m128i *) acc; + const __m128i* const xinput = (const __m128i *) input; /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */ + const __m128i* const xsecret = (const __m128i *) secret; /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */ size_t i; for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) { - __m128i const d = _mm_loadu_si128 (xdata+i); - __m128i const k = _mm_loadu_si128 (xkey+i); - __m128i const dk = _mm_xor_si128 (d,k); /* uint32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */ - __m128i const mul = _mm_mul_epu32 (dk, _mm_shuffle_epi32 (dk, 0x31)); /* uint64 mul[2] = {dk0*dk1,dk2*dk3} */ + __m128i const data_vec = _mm_loadu_si128 (xinput+i); + __m128i const key_vec = _mm_loadu_si128 (xsecret+i); + __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); /* uint32 dk[8] = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */ + __m128i const product = _mm_mul_epu32 (data_key, _mm_shuffle_epi32 (data_key, 0x31)); /* uint64 mul[4] = {dk0*dk1, dk2*dk3, ...} */ if (accWidth == XXH3_acc_128bits) { - __m128i const dswap = _mm_shuffle_epi32(d, _MM_SHUFFLE(1,0,3,2)); - __m128i const add = _mm_add_epi64(xacc[i], dswap); - xacc[i] = _mm_add_epi64(mul, add); + __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2)); + __m128i const sum = _mm_add_epi64(xacc[i], data_swap); + xacc[i] = _mm_add_epi64(product, sum); } else { /* XXH3_acc_64bits */ - __m128i const add = _mm_add_epi64(xacc[i], d); - xacc[i] = _mm_add_epi64(mul, add); + __m128i const sum = _mm_add_epi64(xacc[i], data_vec); + xacc[i] = _mm_add_epi64(product, sum); } } } @@ -531,8 +532,8 @@ XXH3_accumulate_512( void* XXH_RESTRICT acc, { XXH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc; /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ - uint32_t const* const xdata = (const uint32_t *) data; - uint32_t const* const xkey = (const uint32_t *) key; + uint8_t const* const xinput = (const uint8_t *) input; + uint8_t const* const xsecret = (const uint8_t *) secret; size_t i; for (i=0; i < STRIPE_LEN / sizeof(uint64x2_t); i++) { @@ -549,26 +550,26 @@ XXH3_accumulate_512( void* XXH_RESTRICT acc, * ...to do what ARM does in one: * vzip.32 d0, d1 // Interleave high and low bits and overwrite. */ - /* data_vec = xdata[i]; */ - uint32x4_t const data_vec = vld1q_u32(xdata + (i * 4)); - /* key_vec = xkey[i]; */ - uint32x4_t const key_vec = vld1q_u32(xkey + (i * 4)); + /* data_vec = xsecret[i]; */ + uint8x16_t const data_vec = vld1q_u8(xinput + (i * 16)); + /* key_vec = xsecret[i]; */ + uint8x16_t const key_vec = vld1q_u8(xsecret + (i * 16)); /* data_key = data_vec ^ key_vec; */ uint32x4_t data_key; if (accWidth == XXH3_acc_64bits) { /* Add first to prevent register swaps */ /* xacc[i] += data_vec; */ - xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u32(data_vec)); + xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u8(data_vec)); } else { /* XXH3_acc_128bits */ /* xacc[i] += swap(data_vec); */ /* can probably be optimized better */ - uint64x2_t const data64 = vreinterpretq_u64_u32(data_vec); + uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec); uint64x2_t const swapped= vextq_u64(data64, data64, 1); xacc[i] = vaddq_u64 (xacc[i], swapped); } - data_key = veorq_u32(data_vec, key_vec); + data_key = vreinterpretq_u32_u8(veorq_u8(data_vec, key_vec)); /* Here's the magic. We use the quirkiness of vzip to shuffle data_key in place. * shuffle: data_key[0, 1, 2, 3] = data_key[0, 2, 1, 3] */ @@ -579,22 +580,22 @@ XXH3_accumulate_512( void* XXH_RESTRICT acc, #else /* On aarch64, vshrn/vmovn seems to be equivalent to, if not faster than, the vzip method. */ - /* data_vec = xdata[i]; */ - uint32x4_t const data_vec = vld1q_u32(xdata + (i * 4)); - /* key_vec = xkey[i]; */ - uint32x4_t const key_vec = vld1q_u32(xkey + (i * 4)); + /* data_vec = xsecret[i]; */ + uint8x16_t const data_vec = vld1q_u8(xinput + (i * 16)); + /* key_vec = xsecret[i]; */ + uint8x16_t const key_vec = vld1q_u8(xsecret + (i * 16)); /* data_key = data_vec ^ key_vec; */ - uint32x4_t const data_key = veorq_u32(data_vec, key_vec); + uint64x2_t const data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec)); /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF); */ - uint32x2_t const data_key_lo = vmovn_u64 (vreinterpretq_u64_u32(data_key)); + uint32x2_t const data_key_lo = vmovn_u64 (data_key); /* data_key_hi = (uint32x2_t) (data_key >> 32); */ - uint32x2_t const data_key_hi = vshrn_n_u64 (vreinterpretq_u64_u32(data_key), 32); + uint32x2_t const data_key_hi = vshrn_n_u64 (data_key, 32); if (accWidth == XXH3_acc_64bits) { /* xacc[i] += data_vec; */ - xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u32(data_vec)); + xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u8(data_vec)); } else { /* XXH3_acc_128bits */ /* xacc[i] += swap(data_vec); */ - uint64x2_t const data64 = vreinterpretq_u64_u32(data_vec); + uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec); uint64x2_t const swapped= vextq_u64(data64, data64, 1); xacc[i] = vaddq_u64 (xacc[i], swapped); } @@ -607,8 +608,8 @@ XXH3_accumulate_512( void* XXH_RESTRICT acc, #elif (XXH_VECTOR == XXH_VSX) U64x2* const xacc = (U64x2*) acc; /* presumed aligned */ - U64x2 const* const xdata = (U64x2 const*) data; /* no alignment restriction */ - U64x2 const* const xkey = (U64x2 const*) key; /* no alignment restriction */ + U64x2 const* const xinput = (U64x2 const*) input; /* no alignment restriction */ + U64x2 const* const xsecret = (U64x2 const*) secret; /* no alignment restriction */ U64x2 const v32 = { 32, 32 }; #if XXH_VSX_BE U8x16 const vXorSwap = { 0x07, 0x16, 0x25, 0x34, 0x43, 0x52, 0x61, 0x70, @@ -616,17 +617,17 @@ XXH3_accumulate_512( void* XXH_RESTRICT acc, #endif size_t i; for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) { - /* data_vec = xdata[i]; */ - /* key_vec = xkey[i]; */ + /* data_vec = xinput[i]; */ + /* key_vec = xsecret[i]; */ #if XXH_VSX_BE /* byteswap */ - U64x2 const data_vec = XXH_vec_revb(vec_vsx_ld(0, xdata + i)); - U64x2 const key_raw = vec_vsx_ld(0, xkey + i); - /* See comment above. data_key = data_vec ^ swap(xkey[i]); */ + U64x2 const data_vec = XXH_vec_revb(vec_vsx_ld(0, xinput + i)); + U64x2 const key_raw = vec_vsx_ld(0, xsecret + i); + /* See comment above. data_key = data_vec ^ swap(xsecret[i]); */ U64x2 const data_key = (U64x2)XXH_vec_permxor((U8x16)data_vec, (U8x16)key_raw, vXorSwap); #else - U64x2 const data_vec = vec_vsx_ld(0, xdata + i); - U64x2 const key_vec = vec_vsx_ld(0, xkey + i); + U64x2 const data_vec = vec_vsx_ld(0, xinput + i); + U64x2 const key_vec = vec_vsx_ld(0, xsecret + i); U64x2 const data_key = data_vec ^ key_vec; #endif /* shuffled = (data_key << 32) | (data_key >> 32); */ @@ -646,83 +647,76 @@ XXH3_accumulate_512( void* XXH_RESTRICT acc, #else /* scalar variant of Accumulator - universal */ - XXH_ALIGN(XXH_ACC_ALIGN) U64* const xacc = (U64*) acc; /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */ - const char* const xdata = (const char*) data; /* no alignment restriction */ - const char* const xkey = (const char*) key; /* no alignment restriction */ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */ + const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */ + const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ size_t i; XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0); - for (i=0; i < ACC_NB; i+=2) { - U64 const in1 = XXH_readLE64(xdata + 8*i); - U64 const in2 = XXH_readLE64(xdata + 8*(i+1)); - U64 const key1 = XXH_readLE64(xkey + 8*i); - U64 const key2 = XXH_readLE64(xkey + 8*(i+1)); - U64 const data_key1 = key1 ^ in1; - U64 const data_key2 = key2 ^ in2; - xacc[i] += XXH_mult32to64(data_key1 & 0xFFFFFFFF, data_key1 >> 32); - xacc[i+1] += XXH_mult32to64(data_key2 & 0xFFFFFFFF, data_key2 >> 32); - if (accWidth == XXH3_acc_128bits) { - xacc[i] += in2; - xacc[i+1] += in1; - } else { /* XXH3_acc_64bits */ - xacc[i] += in1; - xacc[i+1] += in2; + for (i=0; i < ACC_NB; i++) { + xxh_u64 const data_val = XXH_readLE64(xinput + 8*i); + xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8); + + if (accWidth == XXH3_acc_64bits) { + xacc[i] += data_val; + } else { + xacc[i ^ 1] += data_val; /* swap adjacent lanes */ } + xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32); } #endif } XXH_FORCE_INLINE void -XXH3_scrambleAcc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT key) +XXH3_scrambleAcc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) { #if (XXH_VECTOR == XXH_AVX2) XXH_ASSERT((((size_t)acc) & 31) == 0); { XXH_ALIGN(32) __m256i* const xacc = (__m256i*) acc; - const __m256i* const xkey = (const __m256i *) key; /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this argument type */ + const __m256i* const xsecret = (const __m256i *) secret; /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this argument type */ const __m256i prime32 = _mm256_set1_epi32((int)PRIME32_1); size_t i; for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) { - __m256i data = xacc[i]; - __m256i const shifted = _mm256_srli_epi64(data, 47); - data = _mm256_xor_si256(data, shifted); + /* xacc[i] ^= (xacc[i] >> 47) */ + __m256i const acc_vec = xacc[i]; + __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47); + __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted); + /* xacc[i] ^= xsecret; */ + __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); + __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); - { __m256i const k = _mm256_loadu_si256 (xkey+i); - __m256i const dk = _mm256_xor_si256 (data, k); - - __m256i const dk1 = _mm256_mul_epu32 (dk, prime32); - - __m256i const d2 = _mm256_shuffle_epi32 (dk, 0x31); - __m256i const dk2 = _mm256_mul_epu32 (d2, prime32); - __m256i const dk2h= _mm256_slli_epi64 (dk2, 32); - - xacc[i] = _mm256_add_epi64(dk1, dk2h); - } } + /* xacc[i] *= PRIME32_1; */ + __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, 0x31); + __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32); + __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32); + xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)); + } } #elif (XXH_VECTOR == XXH_SSE2) + XXH_ASSERT((((size_t)acc) & 15) == 0); { XXH_ALIGN(16) __m128i* const xacc = (__m128i*) acc; - const __m128i* const xkey = (const __m128i *) key; /* not really aligned, just for ptr arithmetic */ + const __m128i* const xsecret = (const __m128i *) secret; /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this argument type */ const __m128i prime32 = _mm_set1_epi32((int)PRIME32_1); size_t i; for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) { - __m128i data = xacc[i]; - __m128i const shifted = _mm_srli_epi64(data, 47); - data = _mm_xor_si128(data, shifted); + /* xacc[i] ^= (xacc[i] >> 47) */ + __m128i const acc_vec = xacc[i]; + __m128i const shifted = _mm_srli_epi64 (acc_vec, 47); + __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted); + /* xacc[i] ^= xsecret; */ + __m128i const key_vec = _mm_loadu_si128 (xsecret+i); + __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); - { __m128i const k = _mm_loadu_si128 (xkey+i); - __m128i const dk = _mm_xor_si128 (data,k); - - __m128i const dk1 = _mm_mul_epu32 (dk, prime32); - - __m128i const d2 = _mm_shuffle_epi32 (dk, 0x31); - __m128i const dk2 = _mm_mul_epu32 (d2, prime32); - __m128i const dk2h= _mm_slli_epi64(dk2, 32); - - xacc[i] = _mm_add_epi64(dk1, dk2h); - } } + /* xacc[i] *= PRIME32_1; */ + __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, 0x31); + __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32); + __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32); + xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)); + } } #elif (XXH_VECTOR == XXH_NEON) @@ -730,7 +724,7 @@ XXH3_scrambleAcc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT key) XXH_ASSERT((((size_t)acc) & 15) == 0); { uint64x2_t* const xacc = (uint64x2_t*) acc; - uint32_t const* const xkey = (uint32_t const*) key; + uint8_t const* const xsecret = (uint8_t const*) secret; uint32x2_t const prime = vdup_n_u32 (PRIME32_1); size_t i; @@ -740,8 +734,8 @@ XXH3_scrambleAcc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT key) uint64x2_t const shifted = vshrq_n_u64 (acc_vec, 47); uint64x2_t const data_vec = veorq_u64 (acc_vec, shifted); - /* key_vec = xkey[i]; */ - uint32x4_t const key_vec = vld1q_u32 (xkey + (i * 4)); + /* key_vec = xsecret[i]; */ + uint32x4_t const key_vec = vreinterpretq_u32_u8(vld1q_u8(xsecret + (i * 16))); /* data_key = data_vec ^ key_vec; */ uint32x4_t const data_key = veorq_u32 (vreinterpretq_u32_u64(data_vec), key_vec); /* shuffled = { data_key[0, 2], data_key[1, 3] }; */ @@ -760,7 +754,7 @@ XXH3_scrambleAcc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT key) #elif (XXH_VECTOR == XXH_VSX) U64x2* const xacc = (U64x2*) acc; - const U64x2* const xkey = (const U64x2*) key; + const U64x2* const xsecret = (const U64x2*) secret; /* constants */ U64x2 const v32 = { 32, 32 }; U64x2 const v47 = { 47, 47 }; @@ -774,13 +768,13 @@ XXH3_scrambleAcc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT key) for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) { U64x2 const acc_vec = xacc[i]; U64x2 const data_vec = acc_vec ^ (acc_vec >> v47); - /* key_vec = xkey[i]; */ + /* key_vec = xsecret[i]; */ #if XXH_VSX_BE /* swap bytes words */ - U64x2 const key_raw = vec_vsx_ld(0, xkey + i); - U64x2 const data_key = (U64x2)XXH_vec_permxor((U8x16)data_vec, (U8x16)key_raw, vXorSwap); + U64x2 const key_raw = vec_vsx_ld(0, xsecret + i); + U64x2 const data_key = (U64x2)XXH_vec_permxor((U8x16)data_vec, (U8x16)key_raw, vXorSwap); #else - U64x2 const key_vec = vec_vsx_ld(0, xkey + i); + U64x2 const key_vec = vec_vsx_ld(0, xsecret + i); U64x2 const data_key = data_vec ^ key_vec; #endif @@ -795,14 +789,13 @@ XXH3_scrambleAcc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT key) #else /* scalar variant of Scrambler - universal */ - XXH_ALIGN(XXH_ACC_ALIGN) U64* const xacc = (U64*) acc; /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */ - const char* const xkey = (const char*) key; /* no alignment restriction */ - int i; + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */ + const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ + size_t i; XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0); - - for (i=0; i < (int)ACC_NB; i++) { - U64 const key64 = XXH_readLE64(xkey + 8*i); - U64 acc64 = xacc[i]; + for (i=0; i < ACC_NB; i++) { + xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i); + xxh_u64 acc64 = xacc[i]; acc64 ^= acc64 >> 47; acc64 ^= key64; acc64 *= PRIME32_1; @@ -814,17 +807,17 @@ XXH3_scrambleAcc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT key) /* assumption : nbStripes will not overflow secret size */ XXH_FORCE_INLINE void -XXH3_accumulate( U64* XXH_RESTRICT acc, - const void* XXH_RESTRICT data, - const void* XXH_RESTRICT secret, +XXH3_accumulate( xxh_u64* XXH_RESTRICT acc, + const xxh_u8* XXH_RESTRICT input, + const xxh_u8* XXH_RESTRICT secret, size_t nbStripes, XXH3_accWidth_e accWidth) { size_t n; for (n = 0; n < nbStripes; n++ ) { XXH3_accumulate_512(acc, - (const char*)data + n*STRIPE_LEN, - (const char*)secret + n*XXH_SECRET_CONSUME_RATE, + input + n*STRIPE_LEN, + secret + n*XXH_SECRET_CONSUME_RATE, accWidth); } } @@ -834,14 +827,15 @@ XXH3_accumulate( U64* XXH_RESTRICT acc, * However, it auto-vectorizes better AVX2 if it is `FORCE_INLINE` * Pretty much every other modes and compilers prefer `FORCE_INLINE`. */ -#if defined(__clang__) && (XXH_VECTOR==0) && !defined(__AVX2__) + +#if defined(__clang__) && (XXH_VECTOR==0) && !defined(__AVX2__) && !defined(__arm__) && !defined(__thumb__) static void #else XXH_FORCE_INLINE void #endif -XXH3_hashLong_internal_loop( U64* XXH_RESTRICT acc, - const void* XXH_RESTRICT data, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize, +XXH3_hashLong_internal_loop( xxh_u64* XXH_RESTRICT acc, + const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, XXH3_accWidth_e accWidth) { size_t const nb_rounds = (secretSize - STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; @@ -853,42 +847,41 @@ XXH3_hashLong_internal_loop( U64* XXH_RESTRICT acc, XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); for (n = 0; n < nb_blocks; n++) { - XXH3_accumulate(acc, (const char*)data + n*block_len, secret, nb_rounds, accWidth); - XXH3_scrambleAcc(acc, (const char*)secret + secretSize - STRIPE_LEN); + XXH3_accumulate(acc, input + n*block_len, secret, nb_rounds, accWidth); + XXH3_scrambleAcc(acc, secret + secretSize - STRIPE_LEN); } /* last partial block */ XXH_ASSERT(len > STRIPE_LEN); { size_t const nbStripes = (len - (block_len * nb_blocks)) / STRIPE_LEN; XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE)); - XXH3_accumulate(acc, (const char*)data + nb_blocks*block_len, secret, nbStripes, accWidth); + XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, accWidth); /* last stripe */ if (len & (STRIPE_LEN - 1)) { - const void* const p = (const char*)data + len - STRIPE_LEN; + const xxh_u8* const p = input + len - STRIPE_LEN; #define XXH_SECRET_LASTACC_START 7 /* do not align on 8, so that secret is different from scrambler */ - XXH3_accumulate_512(acc, p, (const char*)secret + secretSize - STRIPE_LEN - XXH_SECRET_LASTACC_START, accWidth); + XXH3_accumulate_512(acc, p, secret + secretSize - STRIPE_LEN - XXH_SECRET_LASTACC_START, accWidth); } } } -XXH_FORCE_INLINE U64 -XXH3_mix2Accs(const U64* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +XXH_FORCE_INLINE xxh_u64 +XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret) { - const U64* const key64 = (const U64*)secret; return XXH3_mul128_fold64( - acc[0] ^ XXH_readLE64(key64), - acc[1] ^ XXH_readLE64(key64+1) ); + acc[0] ^ XXH_readLE64(secret), + acc[1] ^ XXH_readLE64(secret+8) ); } static XXH64_hash_t -XXH3_mergeAccs(const U64* XXH_RESTRICT acc, const void* XXH_RESTRICT secret, U64 start) +XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start) { - U64 result64 = start; + xxh_u64 result64 = start; - result64 += XXH3_mix2Accs(acc+0, (const char*)secret + 0); - result64 += XXH3_mix2Accs(acc+2, (const char*)secret + 16); - result64 += XXH3_mix2Accs(acc+4, (const char*)secret + 32); - result64 += XXH3_mix2Accs(acc+6, (const char*)secret + 48); + result64 += XXH3_mix2Accs(acc+0, secret + 0); + result64 += XXH3_mix2Accs(acc+2, secret + 16); + result64 += XXH3_mix2Accs(acc+4, secret + 32); + result64 += XXH3_mix2Accs(acc+6, secret + 48); return XXH3_avalanche(result64); } @@ -897,56 +890,54 @@ XXH3_mergeAccs(const U64* XXH_RESTRICT acc, const void* XXH_RESTRICT secret, U64 PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1 }; XXH_FORCE_INLINE XXH64_hash_t -XXH3_hashLong_internal(const void* XXH_RESTRICT data, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize) +XXH3_hashLong_internal(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize) { - XXH_ALIGN(XXH_ACC_ALIGN) U64 acc[ACC_NB] = XXH3_INIT_ACC; + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[ACC_NB] = XXH3_INIT_ACC; - XXH3_hashLong_internal_loop(acc, data, len, secret, secretSize, XXH3_acc_64bits); + XXH3_hashLong_internal_loop(acc, input, len, secret, secretSize, XXH3_acc_64bits); /* converge into final hash */ XXH_STATIC_ASSERT(sizeof(acc) == 64); #define XXH_SECRET_MERGEACCS_START 11 /* do not align on 8, so that secret is different from accumulator */ XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - return XXH3_mergeAccs(acc, (const char*)secret + XXH_SECRET_MERGEACCS_START, (U64)len * PRIME64_1); + return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * PRIME64_1); } XXH_NO_INLINE XXH64_hash_t /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */ -XXH3_hashLong_64b_defaultSecret(const void* XXH_RESTRICT data, size_t len) +XXH3_hashLong_64b_defaultSecret(const xxh_u8* XXH_RESTRICT input, size_t len) { - return XXH3_hashLong_internal(data, len, kSecret, sizeof(kSecret)); + return XXH3_hashLong_internal(input, len, kSecret, sizeof(kSecret)); } XXH_NO_INLINE XXH64_hash_t /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */ -XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT data, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize) +XXH3_hashLong_64b_withSecret(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize) { - return XXH3_hashLong_internal(data, len, secret, secretSize); + return XXH3_hashLong_internal(input, len, secret, secretSize); } -XXH_FORCE_INLINE void XXH_writeLE64(void* dst, U64 v64) +XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) { if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64); memcpy(dst, &v64, sizeof(v64)); } -/* XXH3_initKeySeed() : +/* XXH3_initCustomSecret() : * destination `customSecret` is presumed allocated and same size as `kSecret`. */ -XXH_FORCE_INLINE void XXH3_initKeySeed(void* customSecret, U64 seed64) +XXH_FORCE_INLINE void XXH3_initCustomSecret(xxh_u8* customSecret, xxh_u64 seed64) { - char* const dst = (char*)customSecret; - const char* const src = (const char*)kSecret; int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; int i; XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); for (i=0; i < nbRounds; i++) { - XXH_writeLE64(dst + 16*i, XXH_readLE64(src + 16*i) + seed64); - XXH_writeLE64(dst + 16*i + 8, XXH_readLE64(src + 16*i + 8) - seed64); + XXH_writeLE64(customSecret + 16*i, XXH_readLE64(kSecret + 16*i) + seed64); + XXH_writeLE64(customSecret + 16*i + 8, XXH_readLE64(kSecret + 16*i + 8) - seed64); } } @@ -959,53 +950,49 @@ XXH_FORCE_INLINE void XXH3_initKeySeed(void* customSecret, U64 seed64) * Try to avoid it whenever possible (typically when seed==0). */ XXH_NO_INLINE XXH64_hash_t /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */ -XXH3_hashLong_64b_withSeed(const void* data, size_t len, XXH64_hash_t seed) +XXH3_hashLong_64b_withSeed(const xxh_u8* input, size_t len, XXH64_hash_t seed) { - XXH_ALIGN(8) char secret[XXH_SECRET_DEFAULT_SIZE]; - if (seed==0) return XXH3_hashLong_64b_defaultSecret(data, len); - XXH3_initKeySeed(secret, seed); - return XXH3_hashLong_internal(data, len, secret, sizeof(secret)); + XXH_ALIGN(8) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + if (seed==0) return XXH3_hashLong_64b_defaultSecret(input, len); + XXH3_initCustomSecret(secret, seed); + return XXH3_hashLong_internal(input, len, secret, sizeof(secret)); } -XXH_FORCE_INLINE U64 XXH3_mix16B(const void* XXH_RESTRICT data, - const void* XXH_RESTRICT key, U64 seed64) +XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input, + const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64) { - const U64* const key64 = (const U64*)key; - U64 const ll1 = XXH_readLE64(data); - U64 const ll2 = XXH_readLE64((const BYTE*)data+8); + xxh_u64 const input_lo = XXH_readLE64(input); + xxh_u64 const input_hi = XXH_readLE64(input+8); return XXH3_mul128_fold64( - ll1 ^ (XXH_readLE64(key64) + seed64), - ll2 ^ (XXH_readLE64(key64+1) - seed64) ); + input_lo ^ (XXH_readLE64(secret) + seed64), + input_hi ^ (XXH_readLE64(secret+8) - seed64) ); } XXH_FORCE_INLINE XXH64_hash_t -XXH3_len_17to128_64b(const void* XXH_RESTRICT data, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize, +XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, XXH64_hash_t seed) { - const BYTE* const p = (const BYTE*)data; - const char* const key = (const char*)secret; - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; XXH_ASSERT(16 < len && len <= 128); - { U64 acc = len * PRIME64_1; + { xxh_u64 acc = len * PRIME64_1; if (len > 32) { if (len > 64) { if (len > 96) { - acc += XXH3_mix16B(p+48, key+96, seed); - acc += XXH3_mix16B(p+len-64, key+112, seed); + acc += XXH3_mix16B(input+48, secret+96, seed); + acc += XXH3_mix16B(input+len-64, secret+112, seed); } - acc += XXH3_mix16B(p+32, key+64, seed); - acc += XXH3_mix16B(p+len-48, key+80, seed); + acc += XXH3_mix16B(input+32, secret+64, seed); + acc += XXH3_mix16B(input+len-48, secret+80, seed); } - acc += XXH3_mix16B(p+16, key+32, seed); - acc += XXH3_mix16B(p+len-32, key+48, seed); + acc += XXH3_mix16B(input+16, secret+32, seed); + acc += XXH3_mix16B(input+len-32, secret+48, seed); } - acc += XXH3_mix16B(p+0, key+0, seed); - acc += XXH3_mix16B(p+len-16, key+16, seed); + acc += XXH3_mix16B(input+0, secret+0, seed); + acc += XXH3_mix16B(input+len-16, secret+16, seed); return XXH3_avalanche(acc); } @@ -1014,67 +1001,64 @@ XXH3_len_17to128_64b(const void* XXH_RESTRICT data, size_t len, #define XXH3_MIDSIZE_MAX 240 XXH_NO_INLINE XXH64_hash_t -XXH3_len_129to240_64b(const void* XXH_RESTRICT data, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize, +XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, XXH64_hash_t seed) { - const BYTE* const p = (const BYTE*)data; - const char* const key = (const char*)secret; - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); #define XXH3_MIDSIZE_STARTOFFSET 3 #define XXH3_MIDSIZE_LASTOFFSET 17 - { U64 acc = len * PRIME64_1; + { xxh_u64 acc = len * PRIME64_1; int const nbRounds = (int)len / 16; int i; for (i=0; i<8; i++) { - acc += XXH3_mix16B(p+(16*i), key+(16*i), seed); + acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed); } acc = XXH3_avalanche(acc); XXH_ASSERT(nbRounds >= 8); for (i=8 ; i < nbRounds; i++) { - acc += XXH3_mix16B(p+(16*i), key+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed); + acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed); } /* last bytes */ - acc += XXH3_mix16B(p + len - 16, key + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); + acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); return XXH3_avalanche(acc); } } /* === Public entry point === */ -XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len) +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len) { - if (len <= 16) return XXH3_len_0to16_64b(data, len, kSecret, 0); - if (len <= 128) return XXH3_len_17to128_64b(data, len, kSecret, sizeof(kSecret), 0); - if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_64b(data, len, kSecret, sizeof(kSecret), 0); - return XXH3_hashLong_64b_defaultSecret(data, len); + if (len <= 16) return XXH3_len_0to16_64b((const xxh_u8*)input, len, kSecret, 0); + if (len <= 128) return XXH3_len_17to128_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), 0); + if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), 0); + return XXH3_hashLong_64b_defaultSecret((const xxh_u8*)input, len); } XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize) +XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize) { XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); /* if an action must be taken should `secret` conditions not be respected, * it should be done here. * For now, it's a contract pre-condition. * Adding a check and a branch here would cost performance at every hash */ - if (len <= 16) return XXH3_len_0to16_64b(data, len, secret, 0); - if (len <= 128) return XXH3_len_17to128_64b(data, len, secret, secretSize, 0); - if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_64b(data, len, secret, secretSize, 0); - return XXH3_hashLong_64b_withSecret(data, len, secret, secretSize); + if (len <= 16) return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, 0); + if (len <= 128) return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, 0); + if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, 0); + return XXH3_hashLong_64b_withSecret((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize); } XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed) +XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed) { - if (len <= 16) return XXH3_len_0to16_64b(data, len, kSecret, seed); - if (len <= 128) return XXH3_len_17to128_64b(data, len, kSecret, sizeof(kSecret), seed); - if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_64b(data, len, kSecret, sizeof(kSecret), seed); - return XXH3_hashLong_64b_withSeed(data, len, seed); + if (len <= 16) return XXH3_len_0to16_64b((const xxh_u8*)input, len, kSecret, seed); + if (len <= 128) return XXH3_len_17to128_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), seed); + if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), seed); + return XXH3_hashLong_64b_withSeed((const xxh_u8*)input, len, seed); } /* === XXH3 streaming === */ @@ -1099,7 +1083,7 @@ XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state) static void XXH3_64bits_reset_internal(XXH3_state_t* statePtr, XXH64_hash_t seed, - const void* secret, size_t secretSize) + const xxh_u8* secret, size_t secretSize) { XXH_ASSERT(statePtr != NULL); memset(statePtr, 0, sizeof(*statePtr)); @@ -1131,7 +1115,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize) { if (statePtr == NULL) return XXH_ERROR; - XXH3_64bits_reset_internal(statePtr, 0, secret, secretSize); + XXH3_64bits_reset_internal(statePtr, 0, (const xxh_u8*)secret, secretSize); if (secret == NULL) return XXH_ERROR; if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; return XXH_OK; @@ -1142,34 +1126,34 @@ XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed) { if (statePtr == NULL) return XXH_ERROR; XXH3_64bits_reset_internal(statePtr, seed, kSecret, XXH_SECRET_DEFAULT_SIZE); - XXH3_initKeySeed(statePtr->customSecret, seed); + XXH3_initCustomSecret(statePtr->customSecret, seed); statePtr->secret = statePtr->customSecret; return XXH_OK; } XXH_FORCE_INLINE void -XXH3_consumeStripes( U64* acc, - XXH32_hash_t* nbStripesSoFarPtr, XXH32_hash_t nbStripesPerBlock, - const void* data, size_t totalStripes, - const void* secret, size_t secretLimit, - XXH3_accWidth_e accWidth) +XXH3_consumeStripes( xxh_u64* acc, + XXH32_hash_t* nbStripesSoFarPtr, XXH32_hash_t nbStripesPerBlock, + const xxh_u8* input, size_t totalStripes, + const xxh_u8* secret, size_t secretLimit, + XXH3_accWidth_e accWidth) { XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock); if (nbStripesPerBlock - *nbStripesSoFarPtr <= totalStripes) { /* need a scrambling operation */ size_t const nbStripes = nbStripesPerBlock - *nbStripesSoFarPtr; - XXH3_accumulate(acc, data, (const char*)secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, accWidth); - XXH3_scrambleAcc(acc, (const char*)secret + secretLimit); - XXH3_accumulate(acc, (const char*)data + nbStripes * STRIPE_LEN, secret, totalStripes - nbStripes, accWidth); + XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, accWidth); + XXH3_scrambleAcc(acc, secret + secretLimit); + XXH3_accumulate(acc, input + nbStripes * STRIPE_LEN, secret, totalStripes - nbStripes, accWidth); *nbStripesSoFarPtr = (XXH32_hash_t)(totalStripes - nbStripes); } else { - XXH3_accumulate(acc, data, (const char*)secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, totalStripes, accWidth); + XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, totalStripes, accWidth); *nbStripesSoFarPtr += (XXH32_hash_t)totalStripes; } } XXH_FORCE_INLINE XXH_errorcode -XXH3_update(XXH3_state_t* state, const void* input, size_t len, XXH3_accWidth_e accWidth) +XXH3_update(XXH3_state_t* state, const xxh_u8* input, size_t len, XXH3_accWidth_e accWidth) { if (input==NULL) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) @@ -1178,8 +1162,7 @@ XXH3_update(XXH3_state_t* state, const void* input, size_t len, XXH3_accWidth_e return XXH_ERROR; #endif - { const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; + { const xxh_u8* const bEnd = input + len; state->totalLen += len; @@ -1193,10 +1176,10 @@ XXH3_update(XXH3_state_t* state, const void* input, size_t len, XXH3_accWidth_e #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / STRIPE_LEN) XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % STRIPE_LEN == 0); /* clean multiple */ - if (state->bufferedSize) { /* some data within internal buffer: fill then consume it */ + if (state->bufferedSize) { /* some input within internal buffer: fill then consume it */ size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize; XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize); - p += loadSize; + input += loadSize; XXH3_consumeStripes(state->acc, &state->nbStripesSoFar, state->nbStripesPerBlock, state->buffer, XXH3_INTERNALBUFFER_STRIPES, @@ -1206,21 +1189,21 @@ XXH3_update(XXH3_state_t* state, const void* input, size_t len, XXH3_accWidth_e } /* consume input by full buffer quantities */ - if (p+XXH3_INTERNALBUFFER_SIZE <= bEnd) { - const BYTE* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE; + if (input+XXH3_INTERNALBUFFER_SIZE <= bEnd) { + const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE; do { XXH3_consumeStripes(state->acc, &state->nbStripesSoFar, state->nbStripesPerBlock, - p, XXH3_INTERNALBUFFER_STRIPES, + input, XXH3_INTERNALBUFFER_STRIPES, state->secret, state->secretLimit, accWidth); - p += XXH3_INTERNALBUFFER_SIZE; - } while (p<=limit); + input += XXH3_INTERNALBUFFER_SIZE; + } while (input<=limit); } - if (p < bEnd) { /* some remaining input data : buffer it */ - XXH_memcpy(state->buffer, p, (size_t)(bEnd-p)); - state->bufferedSize = (XXH32_hash_t)(bEnd-p); + if (input < bEnd) { /* some remaining input input : buffer it */ + XXH_memcpy(state->buffer, input, (size_t)(bEnd-input)); + state->bufferedSize = (XXH32_hash_t)(bEnd-input); } } @@ -1230,14 +1213,14 @@ XXH3_update(XXH3_state_t* state, const void* input, size_t len, XXH3_accWidth_e XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len) { - return XXH3_update(state, input, len, XXH3_acc_64bits); + return XXH3_update(state, (const xxh_u8*)input, len, XXH3_acc_64bits); } XXH_FORCE_INLINE void XXH3_digest_long (XXH64_hash_t* acc, const XXH3_state_t* state, XXH3_accWidth_e accWidth) { - memcpy(acc, state->acc, sizeof(state->acc)); /* digest locally, state remains unaltered, and can continue ingesting more data afterwards */ + memcpy(acc, state->acc, sizeof(state->acc)); /* digest locally, state remains unaltered, and can continue ingesting more input afterwards */ if (state->bufferedSize >= STRIPE_LEN) { size_t const totalNbStripes = state->bufferedSize / STRIPE_LEN; XXH32_hash_t nbStripesSoFar = state->nbStripesSoFar; @@ -1249,18 +1232,18 @@ XXH3_digest_long (XXH64_hash_t* acc, const XXH3_state_t* state, XXH3_accWidth_e if (state->bufferedSize % STRIPE_LEN) { /* one last partial stripe */ XXH3_accumulate_512(acc, state->buffer + state->bufferedSize - STRIPE_LEN, - (const char*)state->secret + state->secretLimit - XXH_SECRET_LASTACC_START, + state->secret + state->secretLimit - XXH_SECRET_LASTACC_START, accWidth); } } else { /* bufferedSize < STRIPE_LEN */ if (state->bufferedSize) { /* one last stripe */ - char lastStripe[STRIPE_LEN]; + xxh_u8 lastStripe[STRIPE_LEN]; size_t const catchupSize = STRIPE_LEN - state->bufferedSize; - memcpy(lastStripe, (const char*)state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); + memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize); XXH3_accumulate_512(acc, lastStripe, - (const char*)state->secret + state->secretLimit - XXH_SECRET_LASTACC_START, + state->secret + state->secretLimit - XXH_SECRET_LASTACC_START, accWidth); } } } @@ -1270,7 +1253,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state) if (state->totalLen > XXH3_MIDSIZE_MAX) { XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[ACC_NB]; XXH3_digest_long(acc, state, XXH3_acc_64bits); - return XXH3_mergeAccs(acc, (const char*)state->secret + XXH_SECRET_MERGEACCS_START, (U64)state->totalLen * PRIME64_1); + return XXH3_mergeAccs(acc, state->secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)state->totalLen * PRIME64_1); } /* len <= XXH3_MIDSIZE_MAX : short code */ if (state->seed) @@ -1283,21 +1266,20 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state) * ========================================== */ XXH_FORCE_INLINE XXH128_hash_t -XXH3_len_1to3_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed) +XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) { - XXH_ASSERT(data != NULL); + XXH_ASSERT(input != NULL); XXH_ASSERT(1 <= len && len <= 3); - XXH_ASSERT(keyPtr != NULL); - { const U32* const key32 = (const U32*) keyPtr; - BYTE const c1 = ((const BYTE*)data)[0]; - BYTE const c2 = ((const BYTE*)data)[len >> 1]; - BYTE const c3 = ((const BYTE*)data)[len - 1]; - U32 const combinedl = ((U32)c1) + (((U32)c2) << 8) + (((U32)c3) << 16) + (((U32)len) << 24); - U32 const combinedh = XXH_swap32(combinedl); - U64 const keyedl = (U64)combinedl ^ (XXH_readLE32(key32) + seed); - U64 const keyedh = (U64)combinedh ^ (XXH_readLE32(key32+1) - seed); - U64 const mixedl = keyedl * PRIME64_1; - U64 const mixedh = keyedh * PRIME64_5; + XXH_ASSERT(secret != NULL); + { xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combinedl = ((xxh_u32)c1) + (((xxh_u32)c2) << 8) + (((xxh_u32)c3) << 16) + (((xxh_u32)len) << 24); + xxh_u32 const combinedh = XXH_swap32(combinedl); + xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ (XXH_readLE32(secret) + seed); + xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ (XXH_readLE32(secret+4) - seed); + xxh_u64 const mixedl = keyed_lo * PRIME64_1; + xxh_u64 const mixedh = keyed_hi * PRIME64_5; XXH128_hash_t const h128 = { XXH3_avalanche(mixedl) /*low64*/, XXH3_avalanche(mixedh) /*high64*/ }; return h128; } @@ -1305,39 +1287,38 @@ XXH3_len_1to3_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_ XXH_FORCE_INLINE XXH128_hash_t -XXH3_len_4to8_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed) +XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) { - XXH_ASSERT(data != NULL); - XXH_ASSERT(keyPtr != NULL); + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); XXH_ASSERT(4 <= len && len <= 8); - { U32 const in1 = XXH_readLE32(data); - U32 const in2 = XXH_readLE32((const BYTE*)data + len - 4); - U64 const in64l = in1 + ((U64)in2 << 32); - U64 const in64h = XXH_swap64(in64l); - U64 const keyedl = in64l ^ (XXH_readLE64(keyPtr) + seed); - U64 const keyedh = in64h ^ (XXH_readLE64((const char*)keyPtr + 8) - seed); - U64 const mix64l1 = len + ((keyedl ^ (keyedl >> 51)) * PRIME32_1); - U64 const mix64l2 = (mix64l1 ^ (mix64l1 >> 47)) * PRIME64_2; - U64 const mix64h1 = ((keyedh ^ (keyedh >> 47)) * PRIME64_1) - len; - U64 const mix64h2 = (mix64h1 ^ (mix64h1 >> 43)) * PRIME64_4; + { xxh_u32 const input_lo = XXH_readLE32(input); + xxh_u32 const input_hi = XXH_readLE32(input + len - 4); + xxh_u64 const input_64_lo = input_lo + ((xxh_u64)input_hi << 32); + xxh_u64 const input_64_hi = XXH_swap64(input_64_lo); + xxh_u64 const keyed_lo = input_64_lo ^ (XXH_readLE64(secret) + seed); + xxh_u64 const keyed_hi = input_64_hi ^ (XXH_readLE64(secret + 8) - seed); + xxh_u64 const mix64l1 = len + ((keyed_lo ^ (keyed_lo >> 51)) * PRIME32_1); + xxh_u64 const mix64l2 = (mix64l1 ^ (mix64l1 >> 47)) * PRIME64_2; + xxh_u64 const mix64h1 = ((keyed_hi ^ (keyed_hi >> 47)) * PRIME64_1) - len; + xxh_u64 const mix64h2 = (mix64h1 ^ (mix64h1 >> 43)) * PRIME64_4; { XXH128_hash_t const h128 = { XXH3_avalanche(mix64l2) /*low64*/, XXH3_avalanche(mix64h2) /*high64*/ }; return h128; } } } XXH_FORCE_INLINE XXH128_hash_t -XXH3_len_9to16_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed) +XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) { - XXH_ASSERT(data != NULL); - XXH_ASSERT(keyPtr != NULL); + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); XXH_ASSERT(9 <= len && len <= 16); - { const U64* const key64 = (const U64*) keyPtr; - U64 const ll1 = XXH_readLE64(data) ^ (XXH_readLE64(key64) + seed); - U64 const ll2 = XXH_readLE64((const BYTE*)data + len - 8) ^ (XXH_readLE64(key64+1) - seed); - U64 const inlow = ll1 ^ ll2; - XXH128_hash_t m128 = XXH_mult64to128(inlow, PRIME64_1); - U64 const lenContrib = (U64)(U32)len * (U64)PRIME32_5; m128.low64 += lenContrib; - m128.high64 += ll2 * PRIME64_1; + { xxh_u64 const input_lo = XXH_readLE64(input) ^ (XXH_readLE64(secret) + seed); + xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ (XXH_readLE64(secret+8) - seed); + XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi, PRIME64_1); + xxh_u64 const lenContrib = XXH_mult32to64(len, PRIME32_5); + m128.low64 += lenContrib; + m128.high64 += input_hi * PRIME64_1; m128.low64 ^= (m128.high64 >> 32); { XXH128_hash_t h128 = XXH_mult64to128(m128.low64, PRIME64_2); h128.high64 += m128.high64 * PRIME64_2; @@ -1350,166 +1331,166 @@ XXH3_len_9to16_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash /* Assumption : `secret` size is >= 16 * Note : it should be >= XXH3_SECRET_SIZE_MIN anyway */ XXH_FORCE_INLINE XXH128_hash_t -XXH3_len_0to16_128b(const void* data, size_t len, const void* secret, XXH64_hash_t seed) +XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) { XXH_ASSERT(len <= 16); - { if (len > 8) return XXH3_len_9to16_128b(data, len, secret, seed); - if (len >= 4) return XXH3_len_4to8_128b(data, len, secret, seed); - if (len) return XXH3_len_1to3_128b(data, len, secret, seed); + { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed); + if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed); + if (len) return XXH3_len_1to3_128b(input, len, secret, seed); { XXH128_hash_t const h128 = { 0, 0 }; return h128; } } } XXH_FORCE_INLINE XXH128_hash_t -XXH3_hashLong_128b_internal(const void* XXH_RESTRICT data, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize) +XXH3_hashLong_128b_internal(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize) { - XXH_ALIGN(XXH_ACC_ALIGN) U64 acc[ACC_NB] = XXH3_INIT_ACC; + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[ACC_NB] = XXH3_INIT_ACC; - XXH3_hashLong_internal_loop(acc, data, len, secret, secretSize, XXH3_acc_128bits); + XXH3_hashLong_internal_loop(acc, input, len, secret, secretSize, XXH3_acc_128bits); /* converge into final hash */ XXH_STATIC_ASSERT(sizeof(acc) == 64); XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - { U64 const low64 = XXH3_mergeAccs(acc, (const char*)secret + XXH_SECRET_MERGEACCS_START, (U64)len * PRIME64_1); - U64 const high64 = XXH3_mergeAccs(acc, (const char*)secret + secretSize - sizeof(acc) - XXH_SECRET_MERGEACCS_START, ~((U64)len * PRIME64_2)); + { xxh_u64 const low64 = XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * PRIME64_1); + xxh_u64 const high64 = XXH3_mergeAccs(acc, secret + secretSize - sizeof(acc) - XXH_SECRET_MERGEACCS_START, ~((xxh_u64)len * PRIME64_2)); XXH128_hash_t const h128 = { low64, high64 }; return h128; } } XXH_NO_INLINE XXH128_hash_t /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */ -XXH3_hashLong_128b_defaultSecret(const void* data, size_t len) +XXH3_hashLong_128b_defaultSecret(const xxh_u8* input, size_t len) { - return XXH3_hashLong_128b_internal(data, len, kSecret, sizeof(kSecret)); + return XXH3_hashLong_128b_internal(input, len, kSecret, sizeof(kSecret)); } XXH_NO_INLINE XXH128_hash_t /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */ -XXH3_hashLong_128b_withSecret(const void* data, size_t len, - const void* secret, size_t secretSize) +XXH3_hashLong_128b_withSecret(const xxh_u8* input, size_t len, + const xxh_u8* secret, size_t secretSize) { - return XXH3_hashLong_128b_internal(data, len, secret, secretSize); + return XXH3_hashLong_128b_internal(input, len, secret, secretSize); } XXH_NO_INLINE XXH128_hash_t /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */ -XXH3_hashLong_128b_withSeed(const void* data, size_t len, XXH64_hash_t seed) +XXH3_hashLong_128b_withSeed(const xxh_u8* input, size_t len, XXH64_hash_t seed) { - XXH_ALIGN(8) char secret[XXH_SECRET_DEFAULT_SIZE]; - if (seed == 0) return XXH3_hashLong_128b_defaultSecret(data, len); - XXH3_initKeySeed(secret, seed); - return XXH3_hashLong_128b_internal(data, len, secret, sizeof(secret)); + XXH_ALIGN(8) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + if (seed == 0) return XXH3_hashLong_128b_defaultSecret(input, len); + XXH3_initCustomSecret(secret, seed); + return XXH3_hashLong_128b_internal(input, len, secret, sizeof(secret)); +} + + +XXH_FORCE_INLINE XXH128_hash_t +XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2, const xxh_u8* secret, XXH64_hash_t seed) +{ + acc.low64 += XXH3_mix16B (input_1, secret+0, seed); + acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8); + acc.high64 += XXH3_mix16B (input_2, secret+16, seed); + acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8); + return acc; } XXH_NO_INLINE XXH128_hash_t -XXH3_len_129to240_128b(const void* XXH_RESTRICT data, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) +XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) { - const BYTE* const p = (const BYTE*)data; - const char* const key = (const char*)secret; - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - { U64 acc1 = len * PRIME64_1; - U64 acc2 = 0; + { XXH128_hash_t acc; int const nbRounds = (int)len / 32; int i; + acc.low64 = len * PRIME64_1; + acc.high64 = 0; for (i=0; i<4; i++) { - acc1 += XXH3_mix16B(p+(32*i), key+(32*i), seed); - acc2 += XXH3_mix16B(p+(32*i)+16, key+(32*i)+16, 0ULL-seed); + acc = XXH128_mix32B(acc, input+(32*i), input+(32*i)+16, secret+(32*i), seed); } - acc1 = XXH3_avalanche(acc1); - acc2 = XXH3_avalanche(acc2); + acc.low64 = XXH3_avalanche(acc.low64); + acc.high64 = XXH3_avalanche(acc.high64); XXH_ASSERT(nbRounds >= 4); for (i=4 ; i < nbRounds; i++) { - acc1 += XXH3_mix16B(p+(32*i) , key+(32*(i-4)) + XXH3_MIDSIZE_STARTOFFSET, seed); - acc2 += XXH3_mix16B(p+(32*i)+16, key+(32*(i-4))+16 + XXH3_MIDSIZE_STARTOFFSET, 0ULL-seed); + acc = XXH128_mix32B(acc, input+(32*i), input+(32*i)+16, secret+XXH3_MIDSIZE_STARTOFFSET+(32*(i-4)), seed); } /* last bytes */ - acc1 += XXH3_mix16B(p + len - 16, key + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET , seed); - acc2 += XXH3_mix16B(p + len - 32, key + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, 0ULL-seed); + acc = XXH128_mix32B(acc, input + len - 16, input + len - 32, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, 0ULL - seed); - { U64 const low64 = acc1 + acc2; - U64 const high64 = (acc1 * PRIME64_1) + (acc2 * PRIME64_4) + ((len - seed) * PRIME64_2); + { xxh_u64 const low64 = acc.low64 + acc.high64; + xxh_u64 const high64 = (acc.low64 * PRIME64_1) + (acc.high64 * PRIME64_4) + ((len - seed) * PRIME64_2); XXH128_hash_t const h128 = { XXH3_avalanche(low64), (XXH64_hash_t)0 - XXH3_avalanche(high64) }; return h128; } } } -XXH_FORCE_INLINE XXH128_hash_t -XXH3_len_17to128_128b(const void* XXH_RESTRICT data, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - const BYTE* const p = (const BYTE*)data; - const char* const key = (const char*)secret; +XXH_FORCE_INLINE XXH128_hash_t +XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; XXH_ASSERT(16 < len && len <= 128); - { U64 acc1 = len * PRIME64_1; - U64 acc2 = 0; + { XXH128_hash_t acc; + acc.low64 = len * PRIME64_1; + acc.high64 = 0; if (len > 32) { if (len > 64) { if (len > 96) { - acc1 += XXH3_mix16B(p+48, key+96, seed); - acc2 += XXH3_mix16B(p+len-64, key+112, seed); + acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed); } - acc1 += XXH3_mix16B(p+32, key+64, seed); - acc2 += XXH3_mix16B(p+len-48, key+80, seed); + acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed); } - acc1 += XXH3_mix16B(p+16, key+32, seed); - acc2 += XXH3_mix16B(p+len-32, key+48, seed); + acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed); } - acc1 += XXH3_mix16B(p+0, key+0, seed); - acc2 += XXH3_mix16B(p+len-16, key+16, seed); - - { U64 const low64 = acc1 + acc2; - U64 const high64 = (acc1 * PRIME64_1) + (acc2 * PRIME64_4) + ((len - seed) * PRIME64_2); + acc = XXH128_mix32B(acc, input, input+len-16, secret, seed); + { xxh_u64 const low64 = acc.low64 + acc.high64; + xxh_u64 const high64 = (acc.low64 * PRIME64_1) + (acc.high64 * PRIME64_4) + ((len - seed) * PRIME64_2); XXH128_hash_t const h128 = { XXH3_avalanche(low64), (XXH64_hash_t)0 - XXH3_avalanche(high64) }; return h128; } } } -XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len) +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len) { - if (len <= 16) return XXH3_len_0to16_128b(data, len, kSecret, 0); - if (len <= 128) return XXH3_len_17to128_128b(data, len, kSecret, sizeof(kSecret), 0); - if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_128b(data, len, kSecret, sizeof(kSecret), 0); - return XXH3_hashLong_128b_defaultSecret(data, len); + if (len <= 16) return XXH3_len_0to16_128b((const xxh_u8*)input, len, kSecret, 0); + if (len <= 128) return XXH3_len_17to128_128b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), 0); + if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_128b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), 0); + return XXH3_hashLong_128b_defaultSecret((const xxh_u8*)input, len); } XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize) +XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize) { XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); /* if an action must be taken should `secret` conditions not be respected, * it should be done here. * For now, it's a contract pre-condition. * Adding a check and a branch here would cost performance at every hash */ - if (len <= 16) return XXH3_len_0to16_128b(data, len, secret, 0); - if (len <= 128) return XXH3_len_17to128_128b(data, len, secret, secretSize, 0); - if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_128b(data, len, secret, secretSize, 0); - return XXH3_hashLong_128b_withSecret(data, len, secret, secretSize); + if (len <= 16) return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, 0); + if (len <= 128) return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, 0); + if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, 0); + return XXH3_hashLong_128b_withSecret((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize); } XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed) +XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed) { - if (len <= 16) return XXH3_len_0to16_128b(data, len, kSecret, seed); - if (len <= 128) return XXH3_len_17to128_128b(data, len, kSecret, sizeof(kSecret), seed); - if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_128b(data, len, kSecret, sizeof(kSecret), seed); - return XXH3_hashLong_128b_withSeed(data, len, seed); + if (len <= 16) return XXH3_len_0to16_128b((const xxh_u8*)input, len, kSecret, seed); + if (len <= 128) return XXH3_len_17to128_128b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), seed); + if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_128b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), seed); + return XXH3_hashLong_128b_withSeed((const xxh_u8*)input, len, seed); } XXH_PUBLIC_API XXH128_hash_t -XXH128(const void* data, size_t len, XXH64_hash_t seed) +XXH128(const void* input, size_t len, XXH64_hash_t seed) { - return XXH3_128bits_withSeed(data, len, seed); + return XXH3_128bits_withSeed(input, len, seed); } @@ -1522,7 +1503,7 @@ XXH128(const void* data, size_t len, XXH64_hash_t seed) static void XXH3_128bits_reset_internal(XXH3_state_t* statePtr, XXH64_hash_t seed, - const void* secret, size_t secretSize) + const xxh_u8* secret, size_t secretSize) { XXH3_64bits_reset_internal(statePtr, seed, secret, secretSize); } @@ -1539,7 +1520,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize) { if (statePtr == NULL) return XXH_ERROR; - XXH3_128bits_reset_internal(statePtr, 0, secret, secretSize); + XXH3_128bits_reset_internal(statePtr, 0, (const xxh_u8*)secret, secretSize); if (secret == NULL) return XXH_ERROR; if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; return XXH_OK; @@ -1550,7 +1531,7 @@ XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed) { if (statePtr == NULL) return XXH_ERROR; XXH3_128bits_reset_internal(statePtr, seed, kSecret, XXH_SECRET_DEFAULT_SIZE); - XXH3_initKeySeed(statePtr->customSecret, seed); + XXH3_initCustomSecret(statePtr->customSecret, seed); statePtr->secret = statePtr->customSecret; return XXH_OK; } @@ -1558,7 +1539,7 @@ XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed) XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len) { - return XXH3_update(state, input, len, XXH3_acc_128bits); + return XXH3_update(state, (const xxh_u8*)input, len, XXH3_acc_128bits); } XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state) @@ -1567,8 +1548,8 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state) XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[ACC_NB]; XXH3_digest_long(acc, state, XXH3_acc_128bits); XXH_ASSERT(state->secretLimit + STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - { U64 const low64 = XXH3_mergeAccs(acc, (const char*)state->secret + XXH_SECRET_MERGEACCS_START, (U64)state->totalLen * PRIME64_1); - U64 const high64 = XXH3_mergeAccs(acc, (const char*)state->secret + state->secretLimit + STRIPE_LEN - sizeof(acc) - XXH_SECRET_MERGEACCS_START, ~((U64)state->totalLen * PRIME64_2)); + { xxh_u64 const low64 = XXH3_mergeAccs(acc, state->secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)state->totalLen * PRIME64_1); + xxh_u64 const high64 = XXH3_mergeAccs(acc, state->secret + state->secretLimit + STRIPE_LEN - sizeof(acc) - XXH_SECRET_MERGEACCS_START, ~((xxh_u64)state->totalLen * PRIME64_2)); XXH128_hash_t const h128 = { low64, high64 }; return h128; } diff --git a/source/thirdparty/include/xxhash.h b/source/thirdparty/include/xxhash.h index 412ef238a..60435bed1 100644 --- a/source/thirdparty/include/xxhash.h +++ b/source/thirdparty/include/xxhash.h @@ -178,7 +178,16 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void); # include typedef uint32_t XXH32_hash_t; #else - typedef unsigned int XXH32_hash_t; +# include +# if UINT_MAX == 0xFFFFFFFFUL + typedef unsigned int XXH32_hash_t; +# else +# if ULONG_MAX == 0xFFFFFFFFUL + typedef unsigned long XXH32_hash_t; +# else +# error "unsupported platform : need a 32-bit type" +# endif +# endif #endif /*! XXH32() : @@ -186,21 +195,13 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void); The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); /*====== Streaming ======*/ -typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); - -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); /* - * Streaming functions generate the xxHash of an input provided in multiple segments. - * Note that, for small input, they are slower than single-call functions, due to state management. + * Streaming functions generate the xxHash value from an incrememtal input. + * This method is slower than single-call functions, due to state management. * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. * * XXH state must first be allocated, using XXH*_createState() . @@ -214,23 +215,41 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); * This function returns the nn-bits hash as an int or long long. * * It's still possible to continue inserting input into the hash state after a digest, - * and generate some new hashes later on, by calling again XXH*_digest(). + * and generate some new hash values later on, by invoking again XXH*_digest(). * - * When done, free XXH state space if it was allocated dynamically. + * When done, release the state, using XXH*_freeState(). */ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + /*====== Canonical representation ======*/ +/* Default return values from XXH functions are basic unsigned 32 and 64 bits. + * This the simplest and fastest format for further post-processing. + * However, this leaves open the question of what is the order of bytes, + * since little and big endian conventions will write the same number differently. + * + * The canonical representation settles this issue, + * by mandating big-endian convention, + * aka, the same convention as human-readable numbers (large digits first). + * When writing hash values to storage, sending them over a network, or printing them, + * it's highly recommended to use the canonical representation, + * to ensure portability across a wider range of systems, present and future. + * + * The following functions allow transformation of hash values into and from canonical format. + */ + typedef struct { unsigned char digest[4]; } XXH32_canonical_t; XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); -/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. - * The canonical representation uses human-readable write convention, aka big-endian (large digits first). - * These functions allow transformation of hash result into and from its canonical format. - * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. - */ - #ifndef XXH_NO_LONG_LONG /*-********************************************************************** @@ -242,6 +261,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src # include typedef uint64_t XXH64_hash_t; #else + /* the following type must have a width of 64-bit */ typedef unsigned long long XXH64_hash_t; #endif @@ -250,7 +270,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src "seed" can be used to alter the result predictably. This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). */ -XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed); /*====== Streaming ======*/ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ @@ -258,7 +278,7 @@ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed); XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); @@ -336,9 +356,9 @@ struct XXH64_state_s { * * The XXH3 algorithm is still considered experimental. * Produced results can still change between versions. - * For example, results produced by v0.7.1 are not comparable with results from v0.7.0 . + * Results produced by v0.7.x are not comparable with results from v0.7.y . * It's nonetheless possible to use XXH3 for ephemeral data (local sessions), - * but avoid storing values in long-term storage for later re-use. + * but avoid storing values in long-term storage for later reads. * * The API supports one-shot hashing, streaming mode, and custom secrets. * @@ -351,20 +371,39 @@ struct XXH64_state_s { * However, at field level, they are identical on all platforms. * The canonical representation solves the issue of identical byte-level representation across platforms, * which is necessary for serialization. - * Would there be a better representation for a 128-bit hash result ? - * Are the names of the inner 64-bit fields important ? Should they be changed ? + * Q1 : Would there be a better representation for a 128-bit hash result ? + * Q2 : Are the names of the inner 64-bit fields important ? Should they be changed ? * - * - Seed type for 128-bits variant : currently, it's a single 64-bit value, like the 64-bit variant. + * - Prototype XXH128() : XXH128() uses the same arguments as XXH64(), for consistency. + * It means it maps to XXH3_128bits_withSeed(). + * This variant is slightly slower than XXH3_128bits(), + * because the seed is now part of the algorithm, and can't be simplified. + * Is that a good idea ? + * + * - Seed type for XXH128() : currently, it's a single 64-bit value, like the 64-bit variant. * It could be argued that it's more logical to offer a 128-bit seed input parameter for a 128-bit hash. * But 128-bit seed is more difficult to use, since it requires to pass a structure instead of a scalar value. * Such a variant could either replace current one, or become an additional one. * Farmhash, for example, offers both variants (the 128-bits seed variant is called `doubleSeed`). - * If both 64-bit and 128-bit seeds are possible, which variant should be called XXH128 ? + * Follow up question : if both 64-bit and 128-bit seeds are allowed, which variant should be called XXH128 ? * - * - Result for len==0 : Currently, the result of hashing a zero-length input is `0`. - * It seems okay as a return value when using all "default" secret and seed (it used to be a request for XXH32/XXH64). + * - Result for len==0 : Currently, the result of hashing a zero-length input is always `0`. + * It seems okay as a return value when using "default" secret and seed. * But is it still fine to return `0` when secret or seed are non-default ? * Are there use cases which could depend on generating a different hash result for zero-length input when the secret is different ? + * + * - Consistency (1) : Streaming XXH128 uses an XXH3 state, which is the same state as XXH3_64bits(). + * It means a 128bit streaming loop must invoke the following symbols : + * XXH3_createState(), XXH3_128bits_reset(), XXH3_128bits_update() (loop), XXH3_128bits_digest(), XXH3_freeState(). + * Is that consistent enough ? + * + * - Consistency (2) : The canonical representation of `XXH3_64bits` is provided by existing functions + * XXH64_canonicalFromHash(), and reverse operation XXH64_hashFromCanonical(). + * As a mirror, canonical functions for XXH128_hash_t results generated by `XXH3_128bits` + * are XXH128_canonicalFromHash() and XXH128_hashFromCanonical(). + * Which means, `XXH3` doesn't appear in the names, because canonical functions operate on a type, + * independently of which algorithm was used to generate that type. + * Is that consistent enough ? */ #ifdef XXH_NAMESPACE @@ -427,8 +466,8 @@ typedef struct XXH3_state_s XXH3_state_t; #define XXH3_INTERNALBUFFER_SIZE 256 struct XXH3_state_s { XXH_ALIGN(64) XXH64_hash_t acc[8]; - XXH_ALIGN(64) char customSecret[XXH3_SECRET_DEFAULT_SIZE]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */ - XXH_ALIGN(64) char buffer[XXH3_INTERNALBUFFER_SIZE]; + XXH_ALIGN(64) unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */ + XXH_ALIGN(64) unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]; XXH32_hash_t bufferedSize; XXH32_hash_t nbStripesPerBlock; XXH32_hash_t nbStripesSoFar; @@ -438,7 +477,7 @@ struct XXH3_state_s { XXH64_hash_t totalLen; XXH64_hash_t seed; XXH64_hash_t reserved64; - const void* secret; /* note : there is some padding after, due to alignment on 64 bytes */ + const unsigned char* secret; /* note : there is some padding after, due to alignment on 64 bytes */ }; /* typedef'd to XXH3_state_t */ /* Streaming requires state maintenance. @@ -507,7 +546,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr); -/* Note : for better performance, following functions should be inlined, +/* Note : for better performance, following functions can be inlined, * using XXH_INLINE_ALL */ /* return : 1 is equal, 0 if different */ diff --git a/source/thirdparty/src/xxhash.c b/source/thirdparty/src/xxhash.c index 50812c1ba..3f49b7d1d 100644 --- a/source/thirdparty/src/xxhash.c +++ b/source/thirdparty/src/xxhash.c @@ -33,6 +33,12 @@ */ +/* since xxhash.c can be included (via XXH_INLINE_ALL), + * it's good practice to protect it with guard + * in case of multiples inclusions */ +#ifndef XXHASH_C_01393879 +#define XXHASH_C_01393879 + /* ************************************* * Tuning parameters ***************************************/ @@ -161,20 +167,15 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp /* ************************************* * Basic Types ***************************************/ -#ifndef MEM_MODULE -# if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; -# else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; -# endif +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t xxh_u8; +#else + typedef unsigned char xxh_u8; #endif +typedef XXH32_hash_t xxh_u32; /* === Memory access === */ @@ -182,23 +183,23 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ -typedef union { U32 u32; } __attribute__((packed)) unalign; -static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; +static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } #else /* portable and safe solution. Generally efficient. * see : http://stackoverflow.com/a/32095106/646947 */ -static U32 XXH_read32(const void* memPtr) +static xxh_u32 XXH_read32(const void* memPtr) { - U32 val; + xxh_u32 val; memcpy(&val, memPtr, sizeof(val)); return val; } @@ -211,12 +212,21 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ #ifndef XXH_CPU_LITTLE_ENDIAN +# if defined(_WIN32) /* Windows is always little endian */ \ + || defined(__LITTLE_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 0 +# else static int XXH_isLittleEndian(void) { - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; /* don't use static : performance detrimental */ return one.c[0]; } # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() +# endif #endif @@ -248,7 +258,7 @@ static int XXH_isLittleEndian(void) #elif XXH_GCC_VERSION >= 403 # define XXH_swap32 __builtin_bswap32 #else -static U32 XXH_swap32 (U32 x) +static xxh_u32 XXH_swap32 (xxh_u32 x) { return ((x << 24) & 0xff000000 ) | ((x << 8) & 0x00ff0000 ) | @@ -263,23 +273,23 @@ static U32 XXH_swap32 (U32 x) *****************************/ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; -XXH_FORCE_INLINE U32 XXH_readLE32(const void* ptr) +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); } -static U32 XXH_readBE32(const void* ptr) +static xxh_u32 XXH_readBE32(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } -XXH_FORCE_INLINE U32 +XXH_FORCE_INLINE xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align) { if (align==XXH_unaligned) { return XXH_readLE32(ptr); } else { - return XXH_CPU_LITTLE_ENDIAN ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); } } @@ -293,13 +303,13 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } /* ******************************************************************* * 32-bit hash functions *********************************************************************/ -static const U32 PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */ -static const U32 PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */ -static const U32 PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */ -static const U32 PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */ -static const U32 PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */ +static const xxh_u32 PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */ +static const xxh_u32 PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */ +static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */ +static const xxh_u32 PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */ +static const xxh_u32 PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */ -static U32 XXH32_round(U32 acc, U32 input) +static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) { acc += input * PRIME32_2; acc = XXH_rotl32(acc, 13); @@ -352,7 +362,7 @@ static U32 XXH32_round(U32 acc, U32 input) } /* mix all bits */ -static U32 XXH32_avalanche(U32 h32) +static xxh_u32 XXH32_avalanche(xxh_u32 h32) { h32 ^= h32 >> 15; h32 *= PRIME32_2; @@ -364,18 +374,16 @@ static U32 XXH32_avalanche(U32 h32) #define XXH_get32bits(p) XXH_readLE32_align(p, align) -static U32 -XXH32_finalize(U32 h32, const void* ptr, size_t len, XXH_alignment align) +static xxh_u32 +XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align) { - const BYTE* p = (const BYTE*)ptr; - #define PROCESS1 \ - h32 += (*p++) * PRIME32_5; \ + h32 += (*ptr++) * PRIME32_5; \ h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; #define PROCESS4 \ - h32 += XXH_get32bits(p) * PRIME32_3; \ - p+=4; \ + h32 += XXH_get32bits(ptr) * PRIME32_3; \ + ptr+=4; \ h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; /* Compact rerolled version */ @@ -435,33 +443,32 @@ XXH32_finalize(U32 h32, const void* ptr, size_t len, XXH_alignment align) } } -XXH_FORCE_INLINE U32 -XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_alignment align) +XXH_FORCE_INLINE xxh_u32 +XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) { - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U32 h32; + const xxh_u8* bEnd = input + len; + xxh_u32 h32; #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - if (p==NULL) { + if (input==NULL) { len=0; - bEnd=p=(const BYTE*)(size_t)16; + bEnd=input=(const xxh_u8*)(size_t)16; } #endif if (len>=16) { - const BYTE* const limit = bEnd - 15; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; + const xxh_u8* const limit = bEnd - 15; + xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2; + xxh_u32 v2 = seed + PRIME32_2; + xxh_u32 v3 = seed + 0; + xxh_u32 v4 = seed - PRIME32_1; do { - v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; - v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; - v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; - v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; - } while (p < limit); + v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4; + v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4; + v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4; + v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4; + } while (input < limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); @@ -469,29 +476,29 @@ XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_alignment align) h32 = seed + PRIME32_5; } - h32 += (U32)len; + h32 += (xxh_u32)len; - return XXH32_finalize(h32, p, len&15, align); + return XXH32_finalize(h32, input, len&15, align); } -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, unsigned int seed) +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH32_state_t state; XXH32_reset(&state, seed); - XXH32_update(&state, input, len); + XXH32_update(&state, (const xxh_u8*)input, len); return XXH32_digest(&state); #else if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - return XXH32_endian_align(input, len, seed, XXH_aligned); + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); } } - return XXH32_endian_align(input, len, seed, XXH_unaligned); + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); #endif } @@ -514,7 +521,7 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t memcpy(dstState, srcState, sizeof(*dstState)); } -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) { XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)); @@ -538,21 +545,21 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len) return XXH_ERROR; #endif - { const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; state->total_len_32 += (XXH32_hash_t)len; state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len); state->memsize += (XXH32_hash_t)len; return XXH_OK; } if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const U32* p32 = state->mem32; + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const xxh_u32* p32 = state->mem32; state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++; state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++; state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++; @@ -563,11 +570,11 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len) } if (p <= bEnd-16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; + const xxh_u8* const limit = bEnd - 16; + xxh_u32 v1 = state->v1; + xxh_u32 v2 = state->v2; + xxh_u32 v3 = state->v3; + xxh_u32 v4 = state->v4; do { v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4; @@ -594,7 +601,7 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len) XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state) { - U32 h32; + xxh_u32 h32; if (state->large_len) { h32 = XXH_rotl32(state->v1, 1) @@ -607,7 +614,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state) h32 += state->total_len_32; - return XXH32_finalize(h32, state->mem32, state->memsize, XXH_aligned); + return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned); } @@ -640,18 +647,8 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src /*====== Memory access ======*/ -#ifndef MEM_MODULE -# define MEM_MODULE -# if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint64_t U64; -# else - /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ - typedef unsigned long long U64; -# endif -#endif +typedef XXH64_hash_t xxh_u64; + /*! XXH_REROLL_XXH64: * Whether to reroll the XXH64_finalize() loop. @@ -682,14 +679,14 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } +static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ -typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; -static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } +typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; +static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } #else @@ -697,9 +694,9 @@ static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } * see : http://stackoverflow.com/a/32095106/646947 */ -static U64 XXH_read64(const void* memPtr) +static xxh_u64 XXH_read64(const void* memPtr) { - U64 val; + xxh_u64 val; memcpy(&val, memPtr, sizeof(val)); return val; } @@ -711,7 +708,7 @@ static U64 XXH_read64(const void* memPtr) #elif XXH_GCC_VERSION >= 403 # define XXH_swap64 __builtin_bswap64 #else -static U64 XXH_swap64 (U64 x) +static xxh_u64 XXH_swap64 (xxh_u64 x) { return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | @@ -724,35 +721,35 @@ static U64 XXH_swap64 (U64 x) } #endif -XXH_FORCE_INLINE U64 XXH_readLE64(const void* ptr) +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); } -static U64 XXH_readBE64(const void* ptr) +static xxh_u64 XXH_readBE64(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); } -XXH_FORCE_INLINE U64 +XXH_FORCE_INLINE xxh_u64 XXH_readLE64_align(const void* ptr, XXH_alignment align) { if (align==XXH_unaligned) return XXH_readLE64(ptr); else - return XXH_CPU_LITTLE_ENDIAN ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); } /*====== xxh64 ======*/ -static const U64 PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */ -static const U64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */ -static const U64 PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */ -static const U64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */ -static const U64 PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */ +static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */ +static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */ +static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */ +static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */ +static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */ -static U64 XXH64_round(U64 acc, U64 input) +static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) { acc += input * PRIME64_2; acc = XXH_rotl64(acc, 31); @@ -760,7 +757,7 @@ static U64 XXH64_round(U64 acc, U64 input) return acc; } -static U64 XXH64_mergeRound(U64 acc, U64 val) +static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) { val = XXH64_round(0, val); acc ^= val; @@ -768,7 +765,7 @@ static U64 XXH64_mergeRound(U64 acc, U64 val) return acc; } -static U64 XXH64_avalanche(U64 h64) +static xxh_u64 XXH64_avalanche(xxh_u64 h64) { h64 ^= h64 >> 33; h64 *= PRIME64_2; @@ -781,23 +778,21 @@ static U64 XXH64_avalanche(U64 h64) #define XXH_get64bits(p) XXH_readLE64_align(p, align) -static U64 -XXH64_finalize(U64 h64, const void* ptr, size_t len, XXH_alignment align) +static xxh_u64 +XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align) { - const BYTE* p = (const BYTE*)ptr; - #define PROCESS1_64 \ - h64 ^= (*p++) * PRIME64_5; \ + h64 ^= (*ptr++) * PRIME64_5; \ h64 = XXH_rotl64(h64, 11) * PRIME64_1; #define PROCESS4_64 \ - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ - p+=4; \ + h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \ + ptr+=4; \ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; #define PROCESS8_64 { \ - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ - p+=8; \ + xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \ + ptr+=8; \ h64 ^= k1; \ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ } @@ -906,33 +901,32 @@ XXH64_finalize(U64 h64, const void* ptr, size_t len, XXH_alignment align) return 0; /* unreachable, but some compilers complain without it */ } -XXH_FORCE_INLINE U64 -XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_alignment align) +XXH_FORCE_INLINE xxh_u64 +XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) { - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U64 h64; + const xxh_u8* bEnd = input + len; + xxh_u64 h64; #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) - if (p==NULL) { + if (input==NULL) { len=0; - bEnd=p=(const BYTE*)(size_t)32; + bEnd=input=(const xxh_u8*)(size_t)32; } #endif if (len>=32) { - const BYTE* const limit = bEnd - 32; - U64 v1 = seed + PRIME64_1 + PRIME64_2; - U64 v2 = seed + PRIME64_2; - U64 v3 = seed + 0; - U64 v4 = seed - PRIME64_1; + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2; + xxh_u64 v2 = seed + PRIME64_2; + xxh_u64 v3 = seed + 0; + xxh_u64 v4 = seed - PRIME64_1; do { - v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; - v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; - v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; - v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; - } while (p<=limit); + v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8; + v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8; + v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8; + v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8; + } while (input<=limit); h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); @@ -944,29 +938,29 @@ XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_alignment align) h64 = seed + PRIME64_5; } - h64 += (U64) len; + h64 += (xxh_u64) len; - return XXH64_finalize(h64, p, len, align); + return XXH64_finalize(h64, input, len, align); } -XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, unsigned long long seed) +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH64_state_t state; XXH64_reset(&state, seed); - XXH64_update(&state, input, len); + XXH64_update(&state, (const xxh_u8*)input, len); return XXH64_digest(&state); #else if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - return XXH64_endian_align(input, len, seed, XXH_aligned); + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); } } - return XXH64_endian_align(input, len, seed, XXH_unaligned); + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); #endif } @@ -988,7 +982,7 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t memcpy(dstState, srcState, sizeof(*dstState)); } -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed) { XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)); @@ -1011,19 +1005,19 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len) return XXH_ERROR; #endif - { const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); - state->memsize += (U32)len; + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len); + state->memsize += (xxh_u32)len; return XXH_OK; } if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize); state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0)); state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1)); state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2)); @@ -1033,11 +1027,11 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len) } if (p+32 <= bEnd) { - const BYTE* const limit = bEnd - 32; - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; + const xxh_u8* const limit = bEnd - 32; + xxh_u64 v1 = state->v1; + xxh_u64 v2 = state->v2; + xxh_u64 v3 = state->v3; + xxh_u64 v4 = state->v4; do { v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8; @@ -1064,13 +1058,13 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len) XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state) { - U64 h64; + xxh_u64 h64; if (state->total_len >= 32) { - U64 const v1 = state->v1; - U64 const v2 = state->v2; - U64 const v3 = state->v3; - U64 const v4 = state->v4; + xxh_u64 const v1 = state->v1; + xxh_u64 const v2 = state->v2; + xxh_u64 const v3 = state->v3; + xxh_u64 const v4 = state->v4; h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); @@ -1081,9 +1075,9 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state) h64 = state->v3 /*seed*/ + PRIME64_5; } - h64 += (U64) state->total_len; + h64 += (xxh_u64) state->total_len; - return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, XXH_aligned); + return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned); } @@ -1112,3 +1106,5 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src #endif /* XXH_NO_LONG_LONG */ + +#endif /* XXHASH_C_01393879 */