From df32b0b788046949ec49728b847ce35b09988b61 Mon Sep 17 00:00:00 2001 From: terminx Date: Mon, 11 Jan 2016 05:05:47 +0000 Subject: [PATCH] Update to the newest version of xxHash. git-svn-id: https://svn.eduke32.com/eduke32@5542 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/build/include/xxhash.h | 213 ++++--- polymer/eduke32/build/src/osd.c | 3 +- polymer/eduke32/build/src/xxhash.c | 794 +++++++++++++++---------- 3 files changed, 615 insertions(+), 395 deletions(-) diff --git a/polymer/eduke32/build/include/xxhash.h b/polymer/eduke32/build/include/xxhash.h index 431148553..c287866eb 100644 --- a/polymer/eduke32/build/include/xxhash.h +++ b/polymer/eduke32/build/include/xxhash.h @@ -1,7 +1,8 @@ /* xxHash - Extremely Fast Hash algorithm Header File - Copyright (C) 2012-2014, Yann Collet. + Copyright (C) 2012-2016, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without @@ -28,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - - xxHash source repository : http://code.google.com/p/xxhash/ + - xxHash source repository : https://github.com/Cyan4973/xxHash */ /* Notice extracted from xxHash homepage : @@ -55,114 +56,174 @@ SHA1-32 0.28 GB/s 10 Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. 10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s */ -#pragma once +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 #if defined (__cplusplus) extern "C" { #endif -/***************************** - Type -*****************************/ +/* **************************** +* Definitions +******************************/ +#include /* size_t */ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; +/* **************************** +* API modifier +******************************/ +/*!XXH_PRIVATE_API +* Transforms all publics symbols within `xxhash.c` into private ones. +* Methodology : +* instead of : #include "xxhash.h" +* do : +* #define XXH_PRIVATE_API +* #include "xxhash.c" // note the .c , instead of .h +* also : don't compile and link xxhash.c separately +*/ +#ifdef XXH_PRIVATE_API +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif -/***************************** - Simple Hash Functions -*****************************/ +/*!XXH_NAMESPACE, aka Namespace Emulation : -unsigned int XXH32 (const void* input, unsigned int len, unsigned int seed); -unsigned long long XXH64 (const void* input, unsigned int len, unsigned long long seed); +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, -/* +you can use XXH_NAMESPACE, to automatically prefix any public symbol from `xxhash.c` +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it also includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 5 +#define XXH_VERSION_RELEASE 0 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! XXH32() : - Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". - The memory between input & input+len must be valid (allocated and read-accessible). + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. - This function successfully passes all SMHasher tests. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s - Note that "len" is type "int", which means it is limited to 2^31-1. - If your data is larger, use the advanced functions below. XXH64() : Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). */ +/* **************************** +* Advanced Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete */ -/***************************** - Advanced Hash Functions -*****************************/ -void* XXH32_init (unsigned int seed); -XXH_errorcode XXH32_update (void* state, const void* input, unsigned int len); -unsigned int XXH32_digest (void* state); +/*!Static allocation + For static linking only, do not use in the context of DLL ! */ +typedef struct { long long ll[ 6]; } XXH32_stateBody_t; +typedef struct { long long ll[11]; } XXH64_stateBody_t; -void* XXH64_init (unsigned long long seed); -XXH_errorcode XXH64_update (void* state, const void* input, unsigned int len); -unsigned long long XXH64_digest (void* state); +#define XXH32_CREATESTATE_STATIC(name) XXH32_stateBody_t name##xxhbody; void* name##xxhvoid = &(name##xxhbody); XXH32_state_t* name = (XXH32_state_t*)(name##xxhvoid) /* no final ; */ +#define XXH64_CREATESTATE_STATIC(name) XXH64_stateBody_t name##xxhbody; void* name##xxhvoid = &(name##xxhbody); XXH64_state_t* name = (XXH64_state_t*)(name##xxhvoid) /* no final ; */ -/* -These functions calculate the xxhash of an input provided in several small packets, -as opposed to an input provided as a single block. -It must be started with : -void* XXHnn_init() -The function returns a pointer which holds the state of calculation. -If the pointer is NULL, allocation has failed, so no state can be tracked. +/*!Dynamic allocation + To be preferred in the context of DLL */ -The state pointer must be provided as "void* state" parameter for XXHnn_update(). -XXHnn_update() can be called as many times as necessary. -The user must provide a valid (allocated) input. +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* statePtr); + +/*! +These functions generate the xxHash of an input provided in multiple segments, +as opposed to provided as a single block. + +XXH state must first be allocated, using either static or dynamic method provided above. + +Start a new hash by initializing state with a seed, using XXHnn_reset(). + +Then, feed the hash state by calling XXHnn_update() as many times as necessary. +Obviously, input must be valid, hence allocated and read accessible. The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. -Note that "len" is type "int", which means it is limited to 2^31-1. -If your data is larger, it is recommended to chunk your data into blocks -of size for example 2^30 (1GB) to avoid any "int" overflow issue. -Finally, you can end the calculation anytime, by using XXHnn_digest(). -This function returns the final nn-bits hash. -You must provide the same "void* state" parameter created by XXHnn_init(). -Memory will be freed by XXHnn_digest(). -*/ +Finally, a hash value can be produced anytime, by using XXHnn_digest(). +This function returns the nn-bits hash. +It's nonetheless possible to continue inserting input into the hash state +and later on generate some new hashes, by calling again XXHnn_digest(). - -int XXH32_sizeofState(void); -XXH_errorcode XXH32_resetState(void* state, unsigned int seed); - -#define XXH32_SIZEOFSTATE 48 -typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; - -int XXH64_sizeofState(void); -XXH_errorcode XXH64_resetState(void* state, unsigned long long seed); - -#define XXH64_SIZEOFSTATE 88 -typedef struct { long long ll[(XXH64_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH64_stateSpace_t; - -/* -These functions allow user application to make its own allocation for state. - -XXHnn_sizeofState() is used to know how much space must be allocated for the xxHash nn-bits state. -Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. -This pointer must then be provided as 'state' into XXHnn_resetState(), which initializes the state. - -For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), -use the structure XXHnn_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. -*/ - - -unsigned int XXH32_intermediateDigest (void* state); -unsigned long long XXH64_intermediateDigest (void* state); -/* -These functions do the same as XXHnn_digest(), generating a nn-bit hash, -but preserve memory context. -This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXHnn_update(). -To free memory context, use XXHnn_digest(), or free(). +When done, free XXH state space if it was allocated dynamically. */ #if defined (__cplusplus) } #endif + +#endif /* XXHASH_H_5627135585666179 */ diff --git a/polymer/eduke32/build/src/osd.c b/polymer/eduke32/build/src/osd.c index a8b7bd2dc..d23ae8937 100644 --- a/polymer/eduke32/build/src/osd.c +++ b/polymer/eduke32/build/src/osd.c @@ -353,7 +353,8 @@ static int32_t _internal_osdfunc_fileinfo(const osdfuncparm_t *parm) klseek(i, 0, BSEEK_SET); int32_t xxhtime = getticks(); - void *xxh = XXH32_init(0x1337); + XXH32_CREATESTATE_STATIC(xxh); + XXH32_reset(xxh, 0x1337); do { j = kread(i, buf, 256); diff --git a/polymer/eduke32/build/src/xxhash.c b/polymer/eduke32/build/src/xxhash.c index 54896975b..d33113fe2 100644 --- a/polymer/eduke32/build/src/xxhash.c +++ b/polymer/eduke32/build/src/xxhash.c @@ -1,6 +1,7 @@ /* xxHash - Fast Hash algorithm -Copyright (C) 2012-2014, Yann Collet. +Copyright (C) 2012-2016, Yann Collet + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without @@ -27,119 +28,162 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : -- xxHash source repository : http://code.google.com/p/xxhash/ +- xxHash source repository : https://github.com/Cyan4973/xxHash */ -//************************************** -// Tuning parameters -//************************************** -// Unaligned memory access is automatically enabled for "common" CPU, such as x86. -// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. -// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. -// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). -#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_USE_UNALIGNED_ACCESS 1 +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif #endif -// XXH_ACCEPT_NULL_INPUT_POINTER : -// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. -// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. -// This option has a very small performance cost (only measurable on small inputs). -// By default, this option is disabled. To enable it, uncomment below define : -// #define XXH_ACCEPT_NULL_INPUT_POINTER 1 +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ -// XXH_FORCE_NATIVE_FORMAT : -// By default, xxHash library provides endian-independant Hash values, based on little-endian convention. -// Results are therefore identical for little-endian and big-endian CPU. -// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. -// Should endian-independance be of no importance for your application, you may set the #define below to 1. -// It will improve speed for Big-endian CPU. -// This option has no impact on Little_Endian CPU. +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ #define XXH_FORCE_NATIVE_FORMAT 0 -//************************************** -// Compiler Specific Options -//************************************** -// Disable some Visual warning messages -#ifdef _MSC_VER // Visual Studio -# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant +/*!XXH_USELESS_ALIGN_BRANCH : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : don't check for aligned/unaligned input, because performance will be the same. + * It saves one initial branch per hash. + */ +#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_USELESS_ALIGN_BRANCH 1 #endif -#ifdef _MSC_VER // Visual Studio + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # define FORCE_INLINE static __forceinline #else -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) +# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif # else -# define FORCE_INLINE static inline -# endif +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ #endif -//************************************** -// Includes & Memory related functions -//************************************** -#include "compat.h" -#include "xxhash.h" -// Modify the local functions below should you wish to use some other memory related routines -// for malloc(), free() + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for malloc(), free() */ #include -FORCE_INLINE void* XXH_malloc(size_t s) { return Bmalloc(s); } -FORCE_INLINE void XXH_free (void* p) { Bfree(p); } -// for memcpy() +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/* for memcpy() */ #include -FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return Bmemcpy(dest,src,size); } +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#include "xxhash.h" -//************************************** -// Basic Types -//************************************** -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; -#endif - -#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) -# define _PACKED __attribute__ ((packed)) -#else -# define _PACKED -#endif - -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# ifdef __IBMC__ -# pragma pack(1) +/* ************************************* +* Basic Types +***************************************/ +#ifndef MEM_MODULE +# define MEM_MODULE +# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; # else -# pragma pack(push, 1) + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; # endif #endif -typedef struct _U32_S { U32 v; } _PACKED U32_S; -typedef struct _U64_S { U64 v; } _PACKED U64_S; -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(pop) -#endif +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) -#define A32(x) (((U32_S *)(x))->v) -#define A64(x) (((U64_S *)(x))->v) +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS -//*************************************** -// Compiler-specific Functions and Macros -//*************************************** +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -// Note : although _rotl exists for minGW (GCC under windows), performance seems poor +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ #if defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) # define XXH_rotl64(x,r) _rotl64(x,r) @@ -148,19 +192,22 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) #endif -#if defined(_MSC_VER) // Visual Studio +#if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap32 _byteswap_ulong # define XXH_swap64 _byteswap_uint64 #elif GCC_VERSION >= 403 # define XXH_swap32 __builtin_bswap32 # define XXH_swap64 __builtin_bswap64 #else -static inline U32 XXH_swap32 (U32 x) { +static U32 XXH_swap32 (U32 x) +{ return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff );} -static inline U64 XXH_swap64 (U64 x) { + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | ((x << 24) & 0x0000ff0000000000ULL) | @@ -168,13 +215,64 @@ static inline U64 XXH_swap64 (U64 x) { ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL);} + ((x >> 56) & 0x00000000000000ffULL); +} #endif -//************************************** -// Constants -//************************************** +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ #define PRIME32_1 2654435761U #define PRIME32_2 2246822519U #define PRIME32_3 3266489917U @@ -187,60 +285,25 @@ static inline U64 XXH_swap64 (U64 x) { #define PRIME64_4 9650029242287828579ULL #define PRIME64_5 2870177450012600261ULL -//************************************** -// Architecture Macros -//************************************** -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; -#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch - static const int one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) -#endif +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } -//************************************** -// Macros -//************************************** -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } // use only *after* variable declarations - - -//**************************** -// Memory reads -//**************************** -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; - -FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); - else - return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); -} - -FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } - -FORCE_INLINE U64 XXH_readLE64_align(const U64* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr)); - else - return endian==XXH_littleEndian ? *ptr : XXH_swap64(*ptr); -} - -FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } - - -//**************************** -// Simple Hash Functions -//**************************** -FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 seed, XXH_endianess endian, XXH_alignment align) +/* *************************** +* Simple Hash Functions +*****************************/ +FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align) +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; } + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } #endif if (len>=16) @@ -253,11 +316,24 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 see do { - v1 += XXH_get32bits(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_get32bits(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_get32bits(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_get32bits(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); + v1 += XXH_get32bits(p) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_get32bits(p) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_get32bits(p) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_get32bits(p) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); } @@ -292,18 +368,19 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 see } -U32 XXH32(const void* input, unsigned int len, U32 seed) +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) { #if 0 - // Simple version, good for code maintenance, but unfortunately slow for small inputs - void* state = XXH32_init(seed); - XXH32_update(state, input, len); - return XXH32_digest(state); + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, input, len); + return XXH32_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; -# if !defined(XXH_USE_UNALIGNED_ACCESS) - if ((((size_t)input) & 3) == 0) // Input is aligned, let's leverage the speed advantage +# if !defined(XXH_USELESS_ALIGN_BRANCH) + if ((((size_t)input) & 3) == 0) /* Input is 4-bytes aligned, leverage the speed benefit */ { if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); @@ -319,15 +396,19 @@ U32 XXH32(const void* input, unsigned int len, U32 seed) #endif } -FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 seed, XXH_endianess endian, XXH_alignment align) +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) #ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; } + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } #endif if (len>=32) @@ -340,25 +421,50 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 see do { - v1 += XXH_get64bits(p) * PRIME64_2; p+=8; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; - v2 += XXH_get64bits(p) * PRIME64_2; p+=8; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; - v3 += XXH_get64bits(p) * PRIME64_2; p+=8; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; - v4 += XXH_get64bits(p) * PRIME64_2; p+=8; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; - } while (p<=limit); + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - v1 *= PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; h64 ^= v1; - h64 = h64 * PRIME64_1 + PRIME64_4; + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64 * PRIME64_1 + PRIME64_4; - v2 *= PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; h64 ^= v2; - h64 = h64 * PRIME64_1 + PRIME64_4; + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64 * PRIME64_1 + PRIME64_4; - v3 *= PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; h64 ^= v3; - h64 = h64 * PRIME64_1 + PRIME64_4; + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64 * PRIME64_1 + PRIME64_4; - v4 *= PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; h64 ^= v4; - h64 = h64 * PRIME64_1 + PRIME64_4; + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64 * PRIME64_1 + PRIME64_4; } else { @@ -369,22 +475,25 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 see while (p+8<=bEnd) { - U64 k1 = XXH_get64bits(p); - k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; + U64 k1 = XXH_get64bits(p); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; } if (p+4<=bEnd) { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } while (p= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough - return sizeof(struct XXH_state32_t); + XXH_STATIC_ASSERT(sizeof(XXH32_stateBody_t) >= sizeof(XXH32_state_t)); /* A compilation error here means XXH32_state_t is not large enough */ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); } - -int XXH64_sizeofState(void) +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) { - XXH_STATIC_ASSERT(XXH64_SIZEOFSTATE >= sizeof(struct XXH_state64_t)); // A compilation error here means XXH64_SIZEOFSTATE is not large enough - return sizeof(struct XXH_state64_t); -} - - -XXH_errorcode XXH32_resetState(void* state_in, U32 seed) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - state->seed = seed; - state->v1 = seed + PRIME32_1 + PRIME32_2; - state->v2 = seed + PRIME32_2; - state->v3 = seed + 0; - state->v4 = seed - PRIME32_1; - state->total_len = 0; - state->memsize = 0; + XXH_free(statePtr); return XXH_OK; } -XXH_errorcode XXH64_resetState(void* state_in, unsigned long long seed) +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) { - struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; - state->seed = seed; - state->v1 = seed + PRIME64_1 + PRIME64_2; - state->v2 = seed + PRIME64_2; - state->v3 = seed + 0; - state->v4 = seed - PRIME64_1; - state->total_len = 0; - state->memsize = 0; + XXH_STATIC_ASSERT(sizeof(XXH64_stateBody_t) >= sizeof(XXH64_state_t)); /* A compilation error here means XXH64_state_t is not large enough */ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); return XXH_OK; } -void* XXH32_init (U32 seed) -{ - void* state = XXH_malloc (sizeof(struct XXH_state32_t)); - if (state != NULL) XXH32_resetState(state, seed); - return state; -} +/*** Hash feed ***/ -void* XXH64_init (unsigned long long seed) +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) { - void* state = XXH_malloc (sizeof(struct XXH_state64_t)); - if (state != NULL) XXH64_resetState(state, seed); - return state; + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.seed = seed; + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; } -FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.seed = seed; + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -515,22 +630,34 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu state->total_len += len; - if (state->memsize + len < 16) // fill in tmp buffer + if (state->memsize + len < 16) /* fill in tmp buffer */ { - XXH_memcpy(state->memory + state->memsize, input, len); - state->memsize += len; + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (U32)len; return XXH_OK; } - if (state->memsize) // some data left from previous update + if (state->memsize) /* some data left from previous update */ { - XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); { - const U32* p32 = (const U32*)state->memory; - state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; - state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; - state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; - state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + const U32* p32 = state->mem32; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v1 = XXH_rotl32(state->v1, 13); + state->v1 *= PRIME32_1; + p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v2 = XXH_rotl32(state->v2, 13); + state->v2 *= PRIME32_1; + p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v3 = XXH_rotl32(state->v3, 13); + state->v3 *= PRIME32_1; + p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v4 = XXH_rotl32(state->v4, 13); + state->v4 *= PRIME32_1; + p32++; } p += 16-state->memsize; state->memsize = 0; @@ -546,11 +673,24 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu do { - v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); + v1 += XXH_readLE32(p, endian) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_readLE32(p, endian) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_readLE32(p, endian) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_readLE32(p, endian) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); state->v1 = v1; state->v2 = v2; @@ -560,14 +700,14 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu if (p < bEnd) { - XXH_memcpy(state->memory, p, bEnd-p); + XXH_memcpy(state->mem32, p, bEnd-p); state->memsize = (int)(bEnd-p); } return XXH_OK; } -XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len) +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; @@ -579,11 +719,10 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len) -FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) +FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) { - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - const BYTE * p = (const BYTE*)state->memory; - BYTE* bEnd = (BYTE*)state->memory + state->memsize; + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize; U32 h32; if (state->total_len >= 16) @@ -599,7 +738,7 @@ FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess while (p+4<=bEnd) { - h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; + h32 += XXH_readLE32(p, endian) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4; p+=4; } @@ -621,30 +760,19 @@ FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess } -U32 XXH32_intermediateDigest (void* state_in) +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); + return XXH32_digest_endian(state_in, XXH_littleEndian); else - return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); + return XXH32_digest_endian(state_in, XXH_bigEndian); } -U32 XXH32_digest (void* state_in) +FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { - U32 h32 = XXH32_intermediateDigest(state_in); - - XXH_free(state_in); - - return h32; -} - - -FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) -{ - struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; @@ -654,22 +782,34 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* inpu state->total_len += len; - if (state->memsize + len < 32) // fill in tmp buffer + if (state->memsize + len < 32) /* fill in tmp buffer */ { - XXH_memcpy(state->memory + state->memsize, input, len); - state->memsize += len; + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; return XXH_OK; } - if (state->memsize) // some data left from previous update + if (state->memsize) /* some data left from previous update */ { - XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); { - const U64* p64 = (const U64*)state->memory; - state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; state->v1 = XXH_rotl64(state->v1, 31); state->v1 *= PRIME64_1; p64++; - state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; state->v2 = XXH_rotl64(state->v2, 31); state->v2 *= PRIME64_1; p64++; - state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; state->v3 = XXH_rotl64(state->v3, 31); state->v3 *= PRIME64_1; p64++; - state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; state->v4 = XXH_rotl64(state->v4, 31); state->v4 *= PRIME64_1; p64++; + const U64* p64 = state->mem64; + state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v1 = XXH_rotl64(state->v1, 31); + state->v1 *= PRIME64_1; + p64++; + state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v2 = XXH_rotl64(state->v2, 31); + state->v2 *= PRIME64_1; + p64++; + state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v3 = XXH_rotl64(state->v3, 31); + state->v3 *= PRIME64_1; + p64++; + state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v4 = XXH_rotl64(state->v4, 31); + state->v4 *= PRIME64_1; + p64++; } p += 32-state->memsize; state->memsize = 0; @@ -685,11 +825,24 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* inpu do { - v1 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; p+=8; - v2 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; p+=8; - v3 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; p+=8; - v4 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; p+=8; - } while (p<=limit); + v1 += XXH_readLE64(p, endian) * PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + p+=8; + v2 += XXH_readLE64(p, endian) * PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + p+=8; + v3 += XXH_readLE64(p, endian) * PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + p+=8; + v4 += XXH_readLE64(p, endian) * PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + p+=8; + } + while (p<=limit); state->v1 = v1; state->v2 = v2; @@ -699,14 +852,14 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* inpu if (p < bEnd) { - XXH_memcpy(state->memory, p, bEnd-p); + XXH_memcpy(state->mem64, p, bEnd-p); state->memsize = (int)(bEnd-p); } return XXH_OK; } -XXH_errorcode XXH64_update (void* state_in, const void* input, unsigned int len) +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; @@ -718,33 +871,44 @@ XXH_errorcode XXH64_update (void* state_in, const void* input, unsigned int len) -FORCE_INLINE U64 XXH64_intermediateDigest_endian (void* state_in, XXH_endianess endian) +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { - struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; - const BYTE * p = (const BYTE*)state->memory; - BYTE* bEnd = (BYTE*)state->memory + state->memsize; + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize; U64 h64; if (state->total_len >= 32) { - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - v1 *= PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; h64 ^= v1; - h64 = h64*PRIME64_1 + PRIME64_4; + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64*PRIME64_1 + PRIME64_4; - v2 *= PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; h64 ^= v2; - h64 = h64*PRIME64_1 + PRIME64_4; + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64*PRIME64_1 + PRIME64_4; - v3 *= PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; h64 ^= v3; - h64 = h64*PRIME64_1 + PRIME64_4; + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64*PRIME64_1 + PRIME64_4; - v4 *= PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; h64 ^= v4; - h64 = h64*PRIME64_1 + PRIME64_4; + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64*PRIME64_1 + PRIME64_4; } else { @@ -755,22 +919,25 @@ FORCE_INLINE U64 XXH64_intermediateDigest_endian (void* state_in, XXH_endianess while (p+8<=bEnd) { - U64 k1 = XXH_readLE64((const U64*)p, endian); - k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; + U64 k1 = XXH_readLE64(p, endian); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; } if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32((const U32*)p, endian)) * PRIME64_1; + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } while (p