From 514f556a32a28f88f6a8115ae370c3c867659627 Mon Sep 17 00:00:00 2001 From: terminx Date: Tue, 30 Sep 2014 04:04:12 +0000 Subject: [PATCH] Update xxhash to r36 git-svn-id: https://svn.eduke32.com/eduke32@4599 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/build/include/xxhash.h | 94 +++--- polymer/eduke32/build/src/xxhash.c | 393 +++++++++++++++++++++++-- 2 files changed, 411 insertions(+), 76 deletions(-) diff --git a/polymer/eduke32/build/include/xxhash.h b/polymer/eduke32/build/include/xxhash.h index a319bcc9b..431148553 100644 --- a/polymer/eduke32/build/include/xxhash.h +++ b/polymer/eduke32/build/include/xxhash.h @@ -1,5 +1,5 @@ /* - xxHash - Fast Hash algorithm + xxHash - Extremely Fast Hash algorithm Header File Copyright (C) 2012-2014, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -7,14 +7,14 @@ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -52,8 +52,8 @@ CRC32 0.43 GB/s 9 MD5-32 0.33 GB/s 10 Ronald L. Rivest SHA1-32 0.28 GB/s 10 -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. 10 is a perfect score. */ @@ -64,18 +64,19 @@ extern "C" { #endif -//**************************** -// Type -//**************************** +/***************************** + Type +*****************************/ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; -//**************************** -// Simple Hash Functions -//**************************** +/***************************** + Simple Hash Functions +*****************************/ -unsigned int XXH32 (const void* input, int len, unsigned int seed); +unsigned int XXH32 (const void* input, unsigned int len, unsigned int seed); +unsigned long long XXH64 (const void* input, unsigned int len, unsigned long long seed); /* XXH32() : @@ -86,79 +87,82 @@ XXH32() : Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s Note that "len" is type "int", which means it is limited to 2^31-1. If your data is larger, use the advanced functions below. +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". */ -//**************************** -// Advanced Hash Functions -//**************************** +/***************************** + Advanced Hash Functions +*****************************/ void* XXH32_init (unsigned int seed); -XXH_errorcode XXH32_update (void* state, const void* input, int len); +XXH_errorcode XXH32_update (void* state, const void* input, unsigned int len); unsigned int XXH32_digest (void* state); +void* XXH64_init (unsigned long long seed); +XXH_errorcode XXH64_update (void* state, const void* input, unsigned int len); +unsigned long long XXH64_digest (void* state); + /* These functions calculate the xxhash of an input provided in several small packets, as opposed to an input provided as a single block. It must be started with : -void* XXH32_init() +void* XXHnn_init() The function returns a pointer which holds the state of calculation. +If the pointer is NULL, allocation has failed, so no state can be tracked. -This pointer must be provided as "void* state" parameter for XXH32_update(). -XXH32_update() can be called as many times as necessary. +The state pointer must be provided as "void* state" parameter for XXHnn_update(). +XXHnn_update() can be called as many times as necessary. The user must provide a valid (allocated) input. The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. -Note that "len" is type "int", which means it is limited to 2^31-1. -If your data is larger, it is recommended to chunk your data into blocks +Note that "len" is type "int", which means it is limited to 2^31-1. +If your data is larger, it is recommended to chunk your data into blocks of size for example 2^30 (1GB) to avoid any "int" overflow issue. -Finally, you can end the calculation anytime, by using XXH32_digest(). -This function returns the final 32-bits hash. -You must provide the same "void* state" parameter created by XXH32_init(). -Memory will be freed by XXH32_digest(). +Finally, you can end the calculation anytime, by using XXHnn_digest(). +This function returns the final nn-bits hash. +You must provide the same "void* state" parameter created by XXHnn_init(). +Memory will be freed by XXHnn_digest(). */ -int XXH32_sizeofState(); +int XXH32_sizeofState(void); XXH_errorcode XXH32_resetState(void* state, unsigned int seed); #define XXH32_SIZEOFSTATE 48 typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; + +int XXH64_sizeofState(void); +XXH_errorcode XXH64_resetState(void* state, unsigned long long seed); + +#define XXH64_SIZEOFSTATE 88 +typedef struct { long long ll[(XXH64_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH64_stateSpace_t; + /* These functions allow user application to make its own allocation for state. -XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state. +XXHnn_sizeofState() is used to know how much space must be allocated for the xxHash nn-bits state. Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. -This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state. +This pointer must then be provided as 'state' into XXHnn_resetState(), which initializes the state. For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), -use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. +use the structure XXHnn_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. */ -unsigned int XXH32_intermediateDigest (void* state); +unsigned int XXH32_intermediateDigest (void* state); +unsigned long long XXH64_intermediateDigest (void* state); /* -This function does the same as XXH32_digest(), generating a 32-bit hash, +These functions do the same as XXHnn_digest(), generating a nn-bit hash, but preserve memory context. -This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update(). -To free memory context, use XXH32_digest(), or free(). +This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXHnn_update(). +To free memory context, use XXHnn_digest(), or free(). */ - -//**************************** -// Deprecated function names -//**************************** -// The following translations are provided to ease code transition -// You are encouraged to no longer this function names -#define XXH32_feed XXH32_update -#define XXH32_result XXH32_digest -#define XXH32_getIntermediateResult XXH32_intermediateDigest - - - #if defined (__cplusplus) } #endif diff --git a/polymer/eduke32/build/src/xxhash.c b/polymer/eduke32/build/src/xxhash.c index 8e453a82e..54896975b 100644 --- a/polymer/eduke32/build/src/xxhash.c +++ b/polymer/eduke32/build/src/xxhash.c @@ -47,7 +47,7 @@ You can contact the author at : // When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. // This option has a very small performance cost (only measurable on small inputs). // By default, this option is disabled. To enable it, uncomment below define : -//#define XXH_ACCEPT_NULL_INPUT_POINTER 1 +// #define XXH_ACCEPT_NULL_INPUT_POINTER 1 // XXH_FORCE_NATIVE_FORMAT : // By default, xxHash library provides endian-independant Hash values, based on little-endian convention. @@ -58,7 +58,6 @@ You can contact the author at : // This option has no impact on Little_Endian CPU. #define XXH_FORCE_NATIVE_FORMAT 0 - //************************************** // Compiler Specific Options //************************************** @@ -69,7 +68,7 @@ You can contact the author at : #ifdef _MSC_VER // Visual Studio # define FORCE_INLINE static __forceinline -#else +#else # ifdef __GNUC__ # define FORCE_INLINE static inline __attribute__((always_inline)) # else @@ -77,7 +76,6 @@ You can contact the author at : # endif #endif - //************************************** // Includes & Memory related functions //************************************** @@ -126,12 +124,14 @@ FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return #endif typedef struct _U32_S { U32 v; } _PACKED U32_S; +typedef struct _U64_S { U64 v; } _PACKED U64_S; #if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) # pragma pack(pop) #endif #define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) //*************************************** @@ -142,20 +142,33 @@ typedef struct _U32_S { U32 v; } _PACKED U32_S; // Note : although _rotl exists for minGW (GCC under windows), performance seems poor #if defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) #else # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) #endif #if defined(_MSC_VER) // Visual Studio # define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 #elif GCC_VERSION >= 403 # define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 #else static inline U32 XXH_swap32 (U32 x) { return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff );} + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff );} +static inline U64 XXH_swap64 (U64 x) { + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL);} #endif @@ -168,6 +181,11 @@ static inline U32 XXH_swap32 (U32 x) { #define PRIME32_4 668265263U #define PRIME32_5 374761393U +#define PRIME64_1 11400714785074694791ULL +#define PRIME64_2 14029467366897019727ULL +#define PRIME64_3 1609587929392839161ULL +#define PRIME64_4 9650029242287828579ULL +#define PRIME64_5 2870177450012600261ULL //************************************** // Architecture Macros @@ -182,7 +200,7 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; //************************************** // Macros //************************************** -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } // use only *after* variable declarations //**************************** @@ -191,27 +209,38 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) -{ +{ if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); + return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); else - return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); + return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); } FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } +FORCE_INLINE U64 XXH_readLE64_align(const U64* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr)); + else + return endian==XXH_littleEndian ? *ptr : XXH_swap64(*ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } + //**************************** // Simple Hash Functions //**************************** -FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; + const BYTE* bEnd = p + len; U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; } + if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; } #endif if (len>=16) @@ -224,10 +253,10 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_en do { - v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + v1 += XXH_get32bits(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_get32bits(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_get32bits(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_get32bits(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; } while (p<=limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); @@ -239,9 +268,9 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_en h32 += (U32) len; - while (p<=bEnd-4) + while (p+4<=bEnd) { - h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3; + h32 += XXH_get32bits(p) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; p+=4; } @@ -263,7 +292,7 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_en } -U32 XXH32(const void* input, int len, U32 seed) +U32 XXH32(const void* input, unsigned int len, U32 seed) { #if 0 // Simple version, good for code maintenance, but unfortunately slow for small inputs @@ -274,7 +303,7 @@ U32 XXH32(const void* input, int len, U32 seed) XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; # if !defined(XXH_USE_UNALIGNED_ACCESS) - if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage + if ((((size_t)input) & 3) == 0) // Input is aligned, let's leverage the speed advantage { if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); @@ -290,6 +319,105 @@ U32 XXH32(const void* input, int len, U32 seed) #endif } +FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; } +#endif + + if (len>=32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; p+=8; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; p+=8; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; p+=8; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; p+=8; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; h64 ^= v1; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; h64 ^= v2; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; h64 ^= v3; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; h64 ^= v4; + h64 = h64 * PRIME64_1 + PRIME64_4; + } + else + { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) + { + U64 k1 = XXH_get64bits(p); + k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) + { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +unsigned long long XXH64(const void* input, unsigned int len, unsigned long long seed) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +} //**************************** // Advanced Hash Functions @@ -307,16 +435,34 @@ struct XXH_state32_t char memory[16]; }; +struct XXH_state64_t +{ + U64 total_len; + U64 seed; + U64 v1; + U64 v2; + U64 v3; + U64 v4; + int memsize; + char memory[32]; +}; -int XXH32_sizeofState() + +int XXH32_sizeofState(void) { XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough - return sizeof(struct XXH_state32_t); + return sizeof(struct XXH_state32_t); +} + +int XXH64_sizeofState(void) +{ + XXH_STATIC_ASSERT(XXH64_SIZEOFSTATE >= sizeof(struct XXH_state64_t)); // A compilation error here means XXH64_SIZEOFSTATE is not large enough + return sizeof(struct XXH_state64_t); } XXH_errorcode XXH32_resetState(void* state_in, U32 seed) -{ +{ struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; state->seed = seed; state->v1 = seed + PRIME32_1 + PRIME32_2; @@ -328,11 +474,31 @@ XXH_errorcode XXH32_resetState(void* state_in, U32 seed) return XXH_OK; } +XXH_errorcode XXH64_resetState(void* state_in, unsigned long long seed) +{ + struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; + state->seed = seed; + state->v1 = seed + PRIME64_1 + PRIME64_2; + state->v2 = seed + PRIME64_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME64_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + void* XXH32_init (U32 seed) { void* state = XXH_malloc (sizeof(struct XXH_state32_t)); - XXH32_resetState(state, seed); + if (state != NULL) XXH32_resetState(state, seed); + return state; +} + +void* XXH64_init (unsigned long long seed) +{ + void* state = XXH_malloc (sizeof(struct XXH_state64_t)); + if (state != NULL) XXH64_resetState(state, seed); return state; } @@ -362,7 +528,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu { const U32* p32 = (const U32*)state->memory; state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; - state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; } @@ -401,10 +567,10 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu return XXH_OK; } -XXH_errorcode XXH32_update (void* state_in, const void* input, int len) +XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_update_endian(state_in, input, len, XXH_littleEndian); else @@ -431,7 +597,7 @@ FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess h32 += (U32) state->total_len; - while (p<=bEnd-4) + while (p+4<=bEnd) { h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4; @@ -458,7 +624,7 @@ FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess U32 XXH32_intermediateDigest (void* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); else @@ -474,3 +640,168 @@ U32 XXH32_digest (void* state_in) return h32; } + + +FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) +{ + struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); + { + const U64* p64 = (const U64*)state->memory; + state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; state->v1 = XXH_rotl64(state->v1, 31); state->v1 *= PRIME64_1; p64++; + state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; state->v2 = XXH_rotl64(state->v2, 31); state->v2 *= PRIME64_1; p64++; + state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; state->v3 = XXH_rotl64(state->v3, 31); state->v3 *= PRIME64_1; p64++; + state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; state->v4 = XXH_rotl64(state->v4, 31); state->v4 *= PRIME64_1; p64++; + } + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do + { + v1 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; p+=8; + v2 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; p+=8; + v3 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; p+=8; + v4 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH64_update (void* state_in, const void* input, unsigned int len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U64 XXH64_intermediateDigest_endian (void* state_in, XXH_endianess endian) +{ + struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + BYTE* bEnd = (BYTE*)state->memory + state->memsize; + U64 h64; + + if (state->total_len >= 32) + { + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; h64 ^= v1; + h64 = h64*PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; h64 ^= v2; + h64 = h64*PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; h64 ^= v3; + h64 = h64*PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; h64 ^= v4; + h64 = h64*PRIME64_1 + PRIME64_4; + } + else + { + h64 = state->seed + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) + { + U64 k1 = XXH_readLE64((const U64*)p, endian); + k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) + { + h64 ^= (U64)(XXH_readLE32((const U32*)p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +unsigned long long XXH64_intermediateDigest (void* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_intermediateDigest_endian(state_in, XXH_littleEndian); + else + return XXH64_intermediateDigest_endian(state_in, XXH_bigEndian); +} + + +unsigned long long XXH64_digest (void* state_in) +{ + U64 h64 = XXH64_intermediateDigest(state_in); + + XXH_free(state_in); + + return h64; +} +