Update xxHash to 1ea98d6a38300f7224869de856a876a2050cdf78

git-svn-id: https://svn.eduke32.com/eduke32@8185 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
terminx 2019-10-19 23:45:01 +00:00 committed by Christoph Oelckers
parent 462bd8d292
commit 8d74b9562f
3 changed files with 615 additions and 599 deletions

File diff suppressed because it is too large Load diff

View file

@ -178,7 +178,16 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void);
# include <stdint.h>
typedef uint32_t XXH32_hash_t;
#else
typedef unsigned int XXH32_hash_t;
# include <limits.h>
# if UINT_MAX == 0xFFFFFFFFUL
typedef unsigned int XXH32_hash_t;
# else
# if ULONG_MAX == 0xFFFFFFFFUL
typedef unsigned long XXH32_hash_t;
# else
# error "unsupported platform : need a 32-bit type"
# endif
# endif
#endif
/*! XXH32() :
@ -186,21 +195,13 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void);
The memory between input & input+length must be valid (allocated and read-accessible).
"seed" can be used to alter the result predictably.
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
/*====== Streaming ======*/
typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
/*
* Streaming functions generate the xxHash of an input provided in multiple segments.
* Note that, for small input, they are slower than single-call functions, due to state management.
* Streaming functions generate the xxHash value from an incrememtal input.
* This method is slower than single-call functions, due to state management.
* For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
*
* XXH state must first be allocated, using XXH*_createState() .
@ -214,23 +215,41 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
* This function returns the nn-bits hash as an int or long long.
*
* It's still possible to continue inserting input into the hash state after a digest,
* and generate some new hashes later on, by calling again XXH*_digest().
* and generate some new hash values later on, by invoking again XXH*_digest().
*
* When done, free XXH state space if it was allocated dynamically.
* When done, release the state, using XXH*_freeState().
*/
typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
/*====== Canonical representation ======*/
/* Default return values from XXH functions are basic unsigned 32 and 64 bits.
* This the simplest and fastest format for further post-processing.
* However, this leaves open the question of what is the order of bytes,
* since little and big endian conventions will write the same number differently.
*
* The canonical representation settles this issue,
* by mandating big-endian convention,
* aka, the same convention as human-readable numbers (large digits first).
* When writing hash values to storage, sending them over a network, or printing them,
* it's highly recommended to use the canonical representation,
* to ensure portability across a wider range of systems, present and future.
*
* The following functions allow transformation of hash values into and from canonical format.
*/
typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
* The canonical representation uses human-readable write convention, aka big-endian (large digits first).
* These functions allow transformation of hash result into and from its canonical format.
* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
*/
#ifndef XXH_NO_LONG_LONG
/*-**********************************************************************
@ -242,6 +261,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
# include <stdint.h>
typedef uint64_t XXH64_hash_t;
#else
/* the following type must have a width of 64-bit */
typedef unsigned long long XXH64_hash_t;
#endif
@ -250,7 +270,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
"seed" can be used to alter the result predictably.
This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
*/
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed);
/*====== Streaming ======*/
typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
@ -258,7 +278,7 @@ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed);
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
@ -336,9 +356,9 @@ struct XXH64_state_s {
*
* The XXH3 algorithm is still considered experimental.
* Produced results can still change between versions.
* For example, results produced by v0.7.1 are not comparable with results from v0.7.0 .
* Results produced by v0.7.x are not comparable with results from v0.7.y .
* It's nonetheless possible to use XXH3 for ephemeral data (local sessions),
* but avoid storing values in long-term storage for later re-use.
* but avoid storing values in long-term storage for later reads.
*
* The API supports one-shot hashing, streaming mode, and custom secrets.
*
@ -351,20 +371,39 @@ struct XXH64_state_s {
* However, at field level, they are identical on all platforms.
* The canonical representation solves the issue of identical byte-level representation across platforms,
* which is necessary for serialization.
* Would there be a better representation for a 128-bit hash result ?
* Are the names of the inner 64-bit fields important ? Should they be changed ?
* Q1 : Would there be a better representation for a 128-bit hash result ?
* Q2 : Are the names of the inner 64-bit fields important ? Should they be changed ?
*
* - Seed type for 128-bits variant : currently, it's a single 64-bit value, like the 64-bit variant.
* - Prototype XXH128() : XXH128() uses the same arguments as XXH64(), for consistency.
* It means it maps to XXH3_128bits_withSeed().
* This variant is slightly slower than XXH3_128bits(),
* because the seed is now part of the algorithm, and can't be simplified.
* Is that a good idea ?
*
* - Seed type for XXH128() : currently, it's a single 64-bit value, like the 64-bit variant.
* It could be argued that it's more logical to offer a 128-bit seed input parameter for a 128-bit hash.
* But 128-bit seed is more difficult to use, since it requires to pass a structure instead of a scalar value.
* Such a variant could either replace current one, or become an additional one.
* Farmhash, for example, offers both variants (the 128-bits seed variant is called `doubleSeed`).
* If both 64-bit and 128-bit seeds are possible, which variant should be called XXH128 ?
* Follow up question : if both 64-bit and 128-bit seeds are allowed, which variant should be called XXH128 ?
*
* - Result for len==0 : Currently, the result of hashing a zero-length input is `0`.
* It seems okay as a return value when using all "default" secret and seed (it used to be a request for XXH32/XXH64).
* - Result for len==0 : Currently, the result of hashing a zero-length input is always `0`.
* It seems okay as a return value when using "default" secret and seed.
* But is it still fine to return `0` when secret or seed are non-default ?
* Are there use cases which could depend on generating a different hash result for zero-length input when the secret is different ?
*
* - Consistency (1) : Streaming XXH128 uses an XXH3 state, which is the same state as XXH3_64bits().
* It means a 128bit streaming loop must invoke the following symbols :
* XXH3_createState(), XXH3_128bits_reset(), XXH3_128bits_update() (loop), XXH3_128bits_digest(), XXH3_freeState().
* Is that consistent enough ?
*
* - Consistency (2) : The canonical representation of `XXH3_64bits` is provided by existing functions
* XXH64_canonicalFromHash(), and reverse operation XXH64_hashFromCanonical().
* As a mirror, canonical functions for XXH128_hash_t results generated by `XXH3_128bits`
* are XXH128_canonicalFromHash() and XXH128_hashFromCanonical().
* Which means, `XXH3` doesn't appear in the names, because canonical functions operate on a type,
* independently of which algorithm was used to generate that type.
* Is that consistent enough ?
*/
#ifdef XXH_NAMESPACE
@ -427,8 +466,8 @@ typedef struct XXH3_state_s XXH3_state_t;
#define XXH3_INTERNALBUFFER_SIZE 256
struct XXH3_state_s {
XXH_ALIGN(64) XXH64_hash_t acc[8];
XXH_ALIGN(64) char customSecret[XXH3_SECRET_DEFAULT_SIZE]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */
XXH_ALIGN(64) char buffer[XXH3_INTERNALBUFFER_SIZE];
XXH_ALIGN(64) unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */
XXH_ALIGN(64) unsigned char buffer[XXH3_INTERNALBUFFER_SIZE];
XXH32_hash_t bufferedSize;
XXH32_hash_t nbStripesPerBlock;
XXH32_hash_t nbStripesSoFar;
@ -438,7 +477,7 @@ struct XXH3_state_s {
XXH64_hash_t totalLen;
XXH64_hash_t seed;
XXH64_hash_t reserved64;
const void* secret; /* note : there is some padding after, due to alignment on 64 bytes */
const unsigned char* secret; /* note : there is some padding after, due to alignment on 64 bytes */
}; /* typedef'd to XXH3_state_t */
/* Streaming requires state maintenance.
@ -507,7 +546,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
/* Note : for better performance, following functions should be inlined,
/* Note : for better performance, following functions can be inlined,
* using XXH_INLINE_ALL */
/* return : 1 is equal, 0 if different */

View file

@ -33,6 +33,12 @@
*/
/* since xxhash.c can be included (via XXH_INLINE_ALL),
* it's good practice to protect it with guard
* in case of multiples inclusions */
#ifndef XXHASH_C_01393879
#define XXHASH_C_01393879
/* *************************************
* Tuning parameters
***************************************/
@ -161,20 +167,15 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
/* *************************************
* Basic Types
***************************************/
#ifndef MEM_MODULE
# if !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
# else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
# endif
#if !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
typedef uint8_t xxh_u8;
#else
typedef unsigned char xxh_u8;
#endif
typedef XXH32_hash_t xxh_u32;
/* === Memory access === */
@ -182,23 +183,23 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
typedef union { U32 u32; } __attribute__((packed)) unalign;
static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
static xxh_u32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
#else
/* portable and safe solution. Generally efficient.
* see : http://stackoverflow.com/a/32095106/646947
*/
static U32 XXH_read32(const void* memPtr)
static xxh_u32 XXH_read32(const void* memPtr)
{
U32 val;
xxh_u32 val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
@ -211,12 +212,21 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
#ifndef XXH_CPU_LITTLE_ENDIAN
# if defined(_WIN32) /* Windows is always little endian */ \
|| defined(__LITTLE_ENDIAN__) \
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
# define XXH_CPU_LITTLE_ENDIAN 1
# elif defined(__BIG_ENDIAN__) \
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
# define XXH_CPU_LITTLE_ENDIAN 0
# else
static int XXH_isLittleEndian(void)
{
const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; /* don't use static : performance detrimental */
return one.c[0];
}
# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
# endif
#endif
@ -248,7 +258,7 @@ static int XXH_isLittleEndian(void)
#elif XXH_GCC_VERSION >= 403
# define XXH_swap32 __builtin_bswap32
#else
static U32 XXH_swap32 (U32 x)
static xxh_u32 XXH_swap32 (xxh_u32 x)
{
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
@ -263,23 +273,23 @@ static U32 XXH_swap32 (U32 x)
*****************************/
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
XXH_FORCE_INLINE U32 XXH_readLE32(const void* ptr)
XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
{
return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
}
static U32 XXH_readBE32(const void* ptr)
static xxh_u32 XXH_readBE32(const void* ptr)
{
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
}
XXH_FORCE_INLINE U32
XXH_FORCE_INLINE xxh_u32
XXH_readLE32_align(const void* ptr, XXH_alignment align)
{
if (align==XXH_unaligned) {
return XXH_readLE32(ptr);
} else {
return XXH_CPU_LITTLE_ENDIAN ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
}
}
@ -293,13 +303,13 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
/* *******************************************************************
* 32-bit hash functions
*********************************************************************/
static const U32 PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */
static const U32 PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */
static const U32 PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */
static const U32 PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */
static const U32 PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */
static const xxh_u32 PRIME32_1 = 0x9E3779B1U; /* 0b10011110001101110111100110110001 */
static const xxh_u32 PRIME32_2 = 0x85EBCA77U; /* 0b10000101111010111100101001110111 */
static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */
static const xxh_u32 PRIME32_4 = 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */
static const xxh_u32 PRIME32_5 = 0x165667B1U; /* 0b00010110010101100110011110110001 */
static U32 XXH32_round(U32 acc, U32 input)
static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
{
acc += input * PRIME32_2;
acc = XXH_rotl32(acc, 13);
@ -352,7 +362,7 @@ static U32 XXH32_round(U32 acc, U32 input)
}
/* mix all bits */
static U32 XXH32_avalanche(U32 h32)
static xxh_u32 XXH32_avalanche(xxh_u32 h32)
{
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
@ -364,18 +374,16 @@ static U32 XXH32_avalanche(U32 h32)
#define XXH_get32bits(p) XXH_readLE32_align(p, align)
static U32
XXH32_finalize(U32 h32, const void* ptr, size_t len, XXH_alignment align)
static xxh_u32
XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
{
const BYTE* p = (const BYTE*)ptr;
#define PROCESS1 \
h32 += (*p++) * PRIME32_5; \
h32 += (*ptr++) * PRIME32_5; \
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
#define PROCESS4 \
h32 += XXH_get32bits(p) * PRIME32_3; \
p+=4; \
h32 += XXH_get32bits(ptr) * PRIME32_3; \
ptr+=4; \
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
/* Compact rerolled version */
@ -435,33 +443,32 @@ XXH32_finalize(U32 h32, const void* ptr, size_t len, XXH_alignment align)
}
}
XXH_FORCE_INLINE U32
XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_alignment align)
XXH_FORCE_INLINE xxh_u32
XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U32 h32;
const xxh_u8* bEnd = input + len;
xxh_u32 h32;
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
if (p==NULL) {
if (input==NULL) {
len=0;
bEnd=p=(const BYTE*)(size_t)16;
bEnd=input=(const xxh_u8*)(size_t)16;
}
#endif
if (len>=16) {
const BYTE* const limit = bEnd - 15;
U32 v1 = seed + PRIME32_1 + PRIME32_2;
U32 v2 = seed + PRIME32_2;
U32 v3 = seed + 0;
U32 v4 = seed - PRIME32_1;
const xxh_u8* const limit = bEnd - 15;
xxh_u32 v1 = seed + PRIME32_1 + PRIME32_2;
xxh_u32 v2 = seed + PRIME32_2;
xxh_u32 v3 = seed + 0;
xxh_u32 v4 = seed - PRIME32_1;
do {
v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
} while (p < limit);
v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
} while (input < limit);
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
+ XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
@ -469,29 +476,29 @@ XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_alignment align)
h32 = seed + PRIME32_5;
}
h32 += (U32)len;
h32 += (xxh_u32)len;
return XXH32_finalize(h32, p, len&15, align);
return XXH32_finalize(h32, input, len&15, align);
}
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, unsigned int seed)
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
{
#if 0
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH32_state_t state;
XXH32_reset(&state, seed);
XXH32_update(&state, input, len);
XXH32_update(&state, (const xxh_u8*)input, len);
return XXH32_digest(&state);
#else
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
return XXH32_endian_align(input, len, seed, XXH_aligned);
return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
} }
return XXH32_endian_align(input, len, seed, XXH_unaligned);
return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
#endif
}
@ -514,7 +521,7 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t
memcpy(dstState, srcState, sizeof(*dstState));
}
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
{
XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
memset(&state, 0, sizeof(state));
@ -538,21 +545,21 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
return XXH_ERROR;
#endif
{ const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
{ const xxh_u8* p = (const xxh_u8*)input;
const xxh_u8* const bEnd = p + len;
state->total_len_32 += (XXH32_hash_t)len;
state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
if (state->memsize + len < 16) { /* fill in tmp buffer */
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
state->memsize += (XXH32_hash_t)len;
return XXH_OK;
}
if (state->memsize) { /* some data left from previous update */
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
{ const U32* p32 = state->mem32;
XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
{ const xxh_u32* p32 = state->mem32;
state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
@ -563,11 +570,11 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
}
if (p <= bEnd-16) {
const BYTE* const limit = bEnd - 16;
U32 v1 = state->v1;
U32 v2 = state->v2;
U32 v3 = state->v3;
U32 v4 = state->v4;
const xxh_u8* const limit = bEnd - 16;
xxh_u32 v1 = state->v1;
xxh_u32 v2 = state->v2;
xxh_u32 v3 = state->v3;
xxh_u32 v4 = state->v4;
do {
v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
@ -594,7 +601,7 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state)
{
U32 h32;
xxh_u32 h32;
if (state->large_len) {
h32 = XXH_rotl32(state->v1, 1)
@ -607,7 +614,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* state)
h32 += state->total_len_32;
return XXH32_finalize(h32, state->mem32, state->memsize, XXH_aligned);
return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
}
@ -640,18 +647,8 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
/*====== Memory access ======*/
#ifndef MEM_MODULE
# define MEM_MODULE
# if !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
typedef uint64_t U64;
# else
/* if compiler doesn't support unsigned long long, replace by another 64-bit type */
typedef unsigned long long U64;
# endif
#endif
typedef XXH64_hash_t xxh_u64;
/*! XXH_REROLL_XXH64:
* Whether to reroll the XXH64_finalize() loop.
@ -682,14 +679,14 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64;
static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
static xxh_u64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
#else
@ -697,9 +694,9 @@ static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
* see : http://stackoverflow.com/a/32095106/646947
*/
static U64 XXH_read64(const void* memPtr)
static xxh_u64 XXH_read64(const void* memPtr)
{
U64 val;
xxh_u64 val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
@ -711,7 +708,7 @@ static U64 XXH_read64(const void* memPtr)
#elif XXH_GCC_VERSION >= 403
# define XXH_swap64 __builtin_bswap64
#else
static U64 XXH_swap64 (U64 x)
static xxh_u64 XXH_swap64 (xxh_u64 x)
{
return ((x << 56) & 0xff00000000000000ULL) |
((x << 40) & 0x00ff000000000000ULL) |
@ -724,35 +721,35 @@ static U64 XXH_swap64 (U64 x)
}
#endif
XXH_FORCE_INLINE U64 XXH_readLE64(const void* ptr)
XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
{
return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
}
static U64 XXH_readBE64(const void* ptr)
static xxh_u64 XXH_readBE64(const void* ptr)
{
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
}
XXH_FORCE_INLINE U64
XXH_FORCE_INLINE xxh_u64
XXH_readLE64_align(const void* ptr, XXH_alignment align)
{
if (align==XXH_unaligned)
return XXH_readLE64(ptr);
else
return XXH_CPU_LITTLE_ENDIAN ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
}
/*====== xxh64 ======*/
static const U64 PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
static const U64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
static const U64 PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
static const U64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
static const U64 PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
static U64 XXH64_round(U64 acc, U64 input)
static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
{
acc += input * PRIME64_2;
acc = XXH_rotl64(acc, 31);
@ -760,7 +757,7 @@ static U64 XXH64_round(U64 acc, U64 input)
return acc;
}
static U64 XXH64_mergeRound(U64 acc, U64 val)
static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
{
val = XXH64_round(0, val);
acc ^= val;
@ -768,7 +765,7 @@ static U64 XXH64_mergeRound(U64 acc, U64 val)
return acc;
}
static U64 XXH64_avalanche(U64 h64)
static xxh_u64 XXH64_avalanche(xxh_u64 h64)
{
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
@ -781,23 +778,21 @@ static U64 XXH64_avalanche(U64 h64)
#define XXH_get64bits(p) XXH_readLE64_align(p, align)
static U64
XXH64_finalize(U64 h64, const void* ptr, size_t len, XXH_alignment align)
static xxh_u64
XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
{
const BYTE* p = (const BYTE*)ptr;
#define PROCESS1_64 \
h64 ^= (*p++) * PRIME64_5; \
h64 ^= (*ptr++) * PRIME64_5; \
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
#define PROCESS4_64 \
h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \
p+=4; \
h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * PRIME64_1; \
ptr+=4; \
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
#define PROCESS8_64 { \
U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \
p+=8; \
xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
ptr+=8; \
h64 ^= k1; \
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
}
@ -906,33 +901,32 @@ XXH64_finalize(U64 h64, const void* ptr, size_t len, XXH_alignment align)
return 0; /* unreachable, but some compilers complain without it */
}
XXH_FORCE_INLINE U64
XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_alignment align)
XXH_FORCE_INLINE xxh_u64
XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U64 h64;
const xxh_u8* bEnd = input + len;
xxh_u64 h64;
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
if (p==NULL) {
if (input==NULL) {
len=0;
bEnd=p=(const BYTE*)(size_t)32;
bEnd=input=(const xxh_u8*)(size_t)32;
}
#endif
if (len>=32) {
const BYTE* const limit = bEnd - 32;
U64 v1 = seed + PRIME64_1 + PRIME64_2;
U64 v2 = seed + PRIME64_2;
U64 v3 = seed + 0;
U64 v4 = seed - PRIME64_1;
const xxh_u8* const limit = bEnd - 32;
xxh_u64 v1 = seed + PRIME64_1 + PRIME64_2;
xxh_u64 v2 = seed + PRIME64_2;
xxh_u64 v3 = seed + 0;
xxh_u64 v4 = seed - PRIME64_1;
do {
v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
} while (p<=limit);
v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
} while (input<=limit);
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
h64 = XXH64_mergeRound(h64, v1);
@ -944,29 +938,29 @@ XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_alignment align)
h64 = seed + PRIME64_5;
}
h64 += (U64) len;
h64 += (xxh_u64) len;
return XXH64_finalize(h64, p, len, align);
return XXH64_finalize(h64, input, len, align);
}
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, unsigned long long seed)
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
{
#if 0
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH64_state_t state;
XXH64_reset(&state, seed);
XXH64_update(&state, input, len);
XXH64_update(&state, (const xxh_u8*)input, len);
return XXH64_digest(&state);
#else
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
return XXH64_endian_align(input, len, seed, XXH_aligned);
return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
} }
return XXH64_endian_align(input, len, seed, XXH_unaligned);
return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
#endif
}
@ -988,7 +982,7 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t
memcpy(dstState, srcState, sizeof(*dstState));
}
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
{
XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
memset(&state, 0, sizeof(state));
@ -1011,19 +1005,19 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
return XXH_ERROR;
#endif
{ const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
{ const xxh_u8* p = (const xxh_u8*)input;
const xxh_u8* const bEnd = p + len;
state->total_len += len;
if (state->memsize + len < 32) { /* fill in tmp buffer */
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
state->memsize += (U32)len;
XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
state->memsize += (xxh_u32)len;
return XXH_OK;
}
if (state->memsize) { /* tmp buffer is full */
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
@ -1033,11 +1027,11 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
}
if (p+32 <= bEnd) {
const BYTE* const limit = bEnd - 32;
U64 v1 = state->v1;
U64 v2 = state->v2;
U64 v3 = state->v3;
U64 v4 = state->v4;
const xxh_u8* const limit = bEnd - 32;
xxh_u64 v1 = state->v1;
xxh_u64 v2 = state->v2;
xxh_u64 v3 = state->v3;
xxh_u64 v4 = state->v4;
do {
v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
@ -1064,13 +1058,13 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
{
U64 h64;
xxh_u64 h64;
if (state->total_len >= 32) {
U64 const v1 = state->v1;
U64 const v2 = state->v2;
U64 const v3 = state->v3;
U64 const v4 = state->v4;
xxh_u64 const v1 = state->v1;
xxh_u64 const v2 = state->v2;
xxh_u64 const v3 = state->v3;
xxh_u64 const v4 = state->v4;
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
h64 = XXH64_mergeRound(h64, v1);
@ -1081,9 +1075,9 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* state)
h64 = state->v3 /*seed*/ + PRIME64_5;
}
h64 += (U64) state->total_len;
h64 += (xxh_u64) state->total_len;
return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, XXH_aligned);
return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
}
@ -1112,3 +1106,5 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
#endif /* XXH_NO_LONG_LONG */
#endif /* XXHASH_C_01393879 */