From 217759161974c4e5a2723bb5f1f44b89a8a3c56c Mon Sep 17 00:00:00 2001 From: terminx Date: Sat, 22 Mar 2014 09:26:39 +0000 Subject: [PATCH] Add xxHash (https://code.google.com/p/xxhash/), a faster alternative to CRC32, and implement it in a few places. This is around 20-30% faster than CRC32 for me (it's also implemented into the "fileinfo" console command, so you can test for yourselves). I didn't have time to gather up all of the files supported by the startup window so this isn't used there yet. Additionally, this is by the same author as the LZ4 compression library we already use. git-svn-id: https://svn.eduke32.com/eduke32@4387 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/Android.mk | 3 +- polymer/eduke32/build/Makefile | 3 +- polymer/eduke32/build/Makefile.deps | 1 + polymer/eduke32/build/Makefile.msvc | 3 +- polymer/eduke32/build/include/polymer.h | 4 +- polymer/eduke32/build/include/xxhash.h | 164 ++++++++ polymer/eduke32/build/src/engine.c | 7 +- polymer/eduke32/build/src/osd.c | 26 +- polymer/eduke32/build/src/polymer.c | 10 +- polymer/eduke32/build/src/texcache.c | 8 +- polymer/eduke32/build/src/xxhash.c | 476 ++++++++++++++++++++++++ polymer/eduke32/eduke32.vcxproj | 2 + polymer/eduke32/eduke32.vcxproj.filters | 6 + polymer/eduke32/source/astub.c | 20 +- polymer/eduke32/source/menus.c | 12 +- polymer/eduke32/source/rev.h | 2 +- 16 files changed, 710 insertions(+), 37 deletions(-) create mode 100644 polymer/eduke32/build/include/xxhash.h create mode 100644 polymer/eduke32/build/src/xxhash.c diff --git a/polymer/eduke32/Android.mk b/polymer/eduke32/Android.mk index 56152406e..952993454 100644 --- a/polymer/eduke32/Android.mk +++ b/polymer/eduke32/Android.mk @@ -60,7 +60,8 @@ BUILD_SRC = \ build/src/osd.c \ build/src/pragmas.c \ build/src/scriptfile.c \ - build/src/mutex.c + build/src/mutex.c \ + build/src/xxhash.c GL_SRC = \ build/src/mdsprite.c \ diff --git a/polymer/eduke32/build/Makefile b/polymer/eduke32/build/Makefile index 1404f5e74..54ffa5803 100644 --- a/polymer/eduke32/build/Makefile +++ b/polymer/eduke32/build/Makefile @@ -77,7 +77,8 @@ ENGINEOBJS+= \ $(OBJ)/osd.$o \ $(OBJ)/pragmas.$o \ $(OBJ)/scriptfile.$o \ - $(OBJ)/mutex.$o + $(OBJ)/mutex.$o \ + $(OBJ)/xxhash.$o ifeq (1,$(USE_OPENGL)) ENGINEOBJS+= $(OBJ)/mdsprite.$o diff --git a/polymer/eduke32/build/Makefile.deps b/polymer/eduke32/build/Makefile.deps index 9e312f30c..6d854f31e 100644 --- a/polymer/eduke32/build/Makefile.deps +++ b/polymer/eduke32/build/Makefile.deps @@ -32,6 +32,7 @@ $(OBJ)/polymer.$o: $(SRC)/polymer.c $(INC)/polymer.h $(INC)/compat.h $(INC)/buil $(OBJ)/mutex.$o: $(SRC)/mutex.c $(INC)/mutex.h $(OBJ)/rawinput.$o: $(SRC)/rawinput.c $(INC)/rawinput.h $(OBJ)/winbits.$o: $(SRC)/winbits.c $(INC)/winbits.h +$(OBJ)/xxhash.$o: $(SRC)/xxhash.c $(INC)/xxhash.h $(OBJ)/lunatic.$o: $(SRC)/lunatic.c $(INC)/lunatic.h $(INC)/cache1d.h $(INC)/osd.h diff --git a/polymer/eduke32/build/Makefile.msvc b/polymer/eduke32/build/Makefile.msvc index bcb7293f2..f9566a5f7 100644 --- a/polymer/eduke32/build/Makefile.msvc +++ b/polymer/eduke32/build/Makefile.msvc @@ -95,7 +95,8 @@ ENGINEOBJS= \ $(OBJ)\scriptfile.$o \ $(OBJ)\polymer.$o \ $(OBJ)\mutex.$o \ - $(OBJ)\winbits.$o + $(OBJ)\winbits.$o \ + $(OBJ)\xxhash.$o EDITOROBJS=$(OBJ)\build.$o \ $(OBJ)\startwin.editor.$o \ diff --git a/polymer/eduke32/build/include/polymer.h b/polymer/eduke32/build/include/polymer.h index 86c928ed7..b6f51534c 100644 --- a/polymer/eduke32/build/include/polymer.h +++ b/polymer/eduke32/build/include/polymer.h @@ -286,7 +286,7 @@ typedef struct s_prwall { typedef struct s_prsprite { _prplane plane; - uint32_t crc; + uint32_t hash; } _prsprite; typedef struct s_prmirror { @@ -337,7 +337,7 @@ static inline void polymer_invalidatesprite(int32_t i) extern _prsprite *prsprites[MAXSPRITES]; if (prsprites[i]) - prsprites[i]->crc = 0xDEADBEEF; + prsprites[i]->hash = 0xDEADBEEF; } static inline void polymer_invalidateartmap(int32_t tilenum) diff --git a/polymer/eduke32/build/include/xxhash.h b/polymer/eduke32/build/include/xxhash.h new file mode 100644 index 000000000..a319bcc9b --- /dev/null +++ b/polymer/eduke32/build/include/xxhash.h @@ -0,0 +1,164 @@ +/* + xxHash - Fast Hash algorithm + Header File + Copyright (C) 2012-2014, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : http://code.google.com/p/xxhash/ +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. +*/ + +#pragma once + +#if defined (__cplusplus) +extern "C" { +#endif + + +//**************************** +// Type +//**************************** +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + + +//**************************** +// Simple Hash Functions +//**************************** + +unsigned int XXH32 (const void* input, int len, unsigned int seed); + +/* +XXH32() : + Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". + The memory between input & input+len must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + This function successfully passes all SMHasher tests. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s + Note that "len" is type "int", which means it is limited to 2^31-1. + If your data is larger, use the advanced functions below. +*/ + + + +//**************************** +// Advanced Hash Functions +//**************************** + +void* XXH32_init (unsigned int seed); +XXH_errorcode XXH32_update (void* state, const void* input, int len); +unsigned int XXH32_digest (void* state); + +/* +These functions calculate the xxhash of an input provided in several small packets, +as opposed to an input provided as a single block. + +It must be started with : +void* XXH32_init() +The function returns a pointer which holds the state of calculation. + +This pointer must be provided as "void* state" parameter for XXH32_update(). +XXH32_update() can be called as many times as necessary. +The user must provide a valid (allocated) input. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. +Note that "len" is type "int", which means it is limited to 2^31-1. +If your data is larger, it is recommended to chunk your data into blocks +of size for example 2^30 (1GB) to avoid any "int" overflow issue. + +Finally, you can end the calculation anytime, by using XXH32_digest(). +This function returns the final 32-bits hash. +You must provide the same "void* state" parameter created by XXH32_init(). +Memory will be freed by XXH32_digest(). +*/ + + +int XXH32_sizeofState(); +XXH_errorcode XXH32_resetState(void* state, unsigned int seed); + +#define XXH32_SIZEOFSTATE 48 +typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; +/* +These functions allow user application to make its own allocation for state. + +XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state. +Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. +This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state. + +For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), +use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. +*/ + + +unsigned int XXH32_intermediateDigest (void* state); +/* +This function does the same as XXH32_digest(), generating a 32-bit hash, +but preserve memory context. +This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update(). +To free memory context, use XXH32_digest(), or free(). +*/ + + + +//**************************** +// Deprecated function names +//**************************** +// The following translations are provided to ease code transition +// You are encouraged to no longer this function names +#define XXH32_feed XXH32_update +#define XXH32_result XXH32_digest +#define XXH32_getIntermediateResult XXH32_intermediateDigest + + + +#if defined (__cplusplus) +} +#endif diff --git a/polymer/eduke32/build/src/engine.c b/polymer/eduke32/build/src/engine.c index 807484260..4ed92f8f6 100644 --- a/polymer/eduke32/build/src/engine.c +++ b/polymer/eduke32/build/src/engine.c @@ -18,6 +18,7 @@ #include "a.h" #include "osd.h" #include "crc32.h" +#include "xxhash.h" #include "lz4.h" #include "baselayer.h" @@ -15019,7 +15020,7 @@ void setbrightness(char dabrightness, uint8_t dapalid, uint8_t flags) { static uint32_t lastpalettesum=0; - uint32_t newpalettesum = crc32once((uint8_t *)curpalettefaded, sizeof(curpalettefaded)); + uint32_t newpalettesum = XXH32((uint8_t *)curpalettefaded, sizeof(curpalettefaded), sizeof(curpalettefaded)); palsumdidchange = (newpalettesum != lastpalettesum); @@ -15116,12 +15117,10 @@ void setpalettefade(char r, char g, char b, char offset) { static uint32_t lastpalettesum=0; - uint32_t newpalettesum = crc32once((uint8_t *)curpalettefaded, sizeof(curpalettefaded)); + uint32_t newpalettesum = XXH32((uint8_t *)curpalettefaded, sizeof(curpalettefaded), sizeof(curpalettefaded)); if (newpalettesum != lastpalettesum || newpalettesum != g_lastpalettesum) - { setpalette(0,256); - } g_lastpalettesum = lastpalettesum = newpalettesum; } diff --git a/polymer/eduke32/build/src/osd.c b/polymer/eduke32/build/src/osd.c index 63dc6cb85..cda41bf3a 100644 --- a/polymer/eduke32/build/src/osd.c +++ b/polymer/eduke32/build/src/osd.c @@ -10,6 +10,7 @@ #include "pragmas.h" #include "scancodes.h" #include "crc32.h" +#include "xxhash.h" typedef struct _symbol { @@ -391,6 +392,9 @@ static int32_t _internal_osdfunc_fileinfo(const osdfuncparm_t *parm) uint32_t crc, length; int32_t i,j; char buf[256]; + void *xxh; + uint32_t xxhash; + int32_t crctime, xxhtime; if (parm->numparms != 1) return OSDCMD_SHOWHELP; @@ -402,6 +406,7 @@ static int32_t _internal_osdfunc_fileinfo(const osdfuncparm_t *parm) length = kfilelength(i); + crctime = getticks(); crc32init(&crc); do { @@ -410,13 +415,30 @@ static int32_t _internal_osdfunc_fileinfo(const osdfuncparm_t *parm) } while (j == 256); crc32finish(&crc); + crctime = getticks() - crctime; + + klseek(i, 0, BSEEK_SET); + + xxhtime = getticks(); + xxh = XXH32_init(0x1337); + do + { + j = kread(i, buf, 256); + XXH32_update(xxh, (uint8_t *) buf, j); + } + while (j == 256); + xxhash = XXH32_digest(xxh); + xxhtime = getticks() - xxhtime; kclose(i); OSD_Printf("fileinfo: %s\n" " File size: %d\n" - " CRC-32: %08X\n", - parm->parms[0], length, crc); + " CRC-32: %08X (%g sec)\n" + " xxHash: %08X (%g sec)\n", + parm->parms[0], length, + crc, (double)crctime/gettimerfreq(), + xxhash, (double)xxhtime/gettimerfreq()); return OSDCMD_OK; } diff --git a/polymer/eduke32/build/src/polymer.c b/polymer/eduke32/build/src/polymer.c index 493a63271..8aa74e501 100644 --- a/polymer/eduke32/build/src/polymer.c +++ b/polymer/eduke32/build/src/polymer.c @@ -7,7 +7,7 @@ #define POLYMER_C #include "polymer.h" #include "engine_priv.h" -#include "crc32.h" +#include "xxhash.h" #include "texcache.h" // CVARS @@ -3610,10 +3610,10 @@ void polymer_updatesprite(int32_t snum) if (tspr->cstat & 48 && searchit != 2) { - uint32_t crc = crc32once((uint8_t *)tspr, offsetof(spritetype, owner)); + uint32_t xxhash = XXH32((uint8_t *)tspr, offsetof(spritetype, owner), 0xDEADBEEF); - if (crc == s->crc && tspr->picnum == curpicnum) return; - s->crc = crc; + if (xxhash == s->hash && tspr->picnum == curpicnum) return; + s->hash = xxhash; } polymer_getbuildmaterial(&s->plane.material, curpicnum, tspr->pal, tspr->shade, @@ -3636,7 +3636,7 @@ void polymer_updatesprite(int32_t snum) s->plane.material.diffusemodulation[1] = ((GLubyte *)(&tspr->owner))[0]; s->plane.material.diffusemodulation[2] = ((GLubyte *)(&tspr->owner))[1]; s->plane.material.diffusemodulation[3] = 0xFF; - s->crc = 0xdeadbeef; + s->hash = 0xDEADBEEF; } curpicnum = tspr->picnum; diff --git a/polymer/eduke32/build/src/texcache.c b/polymer/eduke32/build/src/texcache.c index 8a9ef2e68..af702b345 100644 --- a/polymer/eduke32/build/src/texcache.c +++ b/polymer/eduke32/build/src/texcache.c @@ -8,7 +8,7 @@ #include "texcache.h" #include "dxtfilter.h" #include "scriptfile.h" -#include "crc32.h" +#include "xxhash.h" #define CLEAR_GL_ERRORS() while(bglGetError() != GL_NO_ERROR) { } #define REALLOC_OR_FAIL(ptr, size, type) { ptr = (type *)Brealloc(ptr, size); if (!ptr) goto failure; } @@ -415,9 +415,9 @@ static const char * texcache_calcid(char *cachefn, const char *fn, const int32_t Bstrcat(id.name, fn); Bsprintf(cachefn, "%08x%08x%08x", - crc32once((uint8_t *)fn, Bstrlen(fn)), - crc32once((uint8_t *)id.name, Bstrlen(id.name)), - crc32once((uint8_t *)&id, sizeof(struct texcacheid_t))); + XXH32((uint8_t *)fn, Bstrlen(fn), TEXCACHEMAGIC[3]), + XXH32((uint8_t *)id.name, Bstrlen(id.name), TEXCACHEMAGIC[3]), + XXH32((uint8_t *)&id, sizeof(struct texcacheid_t), TEXCACHEMAGIC[3])); return cachefn; } diff --git a/polymer/eduke32/build/src/xxhash.c b/polymer/eduke32/build/src/xxhash.c new file mode 100644 index 000000000..8e453a82e --- /dev/null +++ b/polymer/eduke32/build/src/xxhash.c @@ -0,0 +1,476 @@ +/* +xxHash - Fast Hash algorithm +Copyright (C) 2012-2014, Yann Collet. +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash source repository : http://code.google.com/p/xxhash/ +*/ + + +//************************************** +// Tuning parameters +//************************************** +// Unaligned memory access is automatically enabled for "common" CPU, such as x86. +// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. +// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. +// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). +#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_USE_UNALIGNED_ACCESS 1 +#endif + +// XXH_ACCEPT_NULL_INPUT_POINTER : +// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. +// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. +// This option has a very small performance cost (only measurable on small inputs). +// By default, this option is disabled. To enable it, uncomment below define : +//#define XXH_ACCEPT_NULL_INPUT_POINTER 1 + +// XXH_FORCE_NATIVE_FORMAT : +// By default, xxHash library provides endian-independant Hash values, based on little-endian convention. +// Results are therefore identical for little-endian and big-endian CPU. +// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. +// Should endian-independance be of no importance for your application, you may set the #define below to 1. +// It will improve speed for Big-endian CPU. +// This option has no impact on Little_Endian CPU. +#define XXH_FORCE_NATIVE_FORMAT 0 + + +//************************************** +// Compiler Specific Options +//************************************** +// Disable some Visual warning messages +#ifdef _MSC_VER // Visual Studio +# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant +#endif + +#ifdef _MSC_VER // Visual Studio +# define FORCE_INLINE static __forceinline +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +//************************************** +// Includes & Memory related functions +//************************************** +#include "compat.h" +#include "xxhash.h" +// Modify the local functions below should you wish to use some other memory related routines +// for malloc(), free() +#include +FORCE_INLINE void* XXH_malloc(size_t s) { return Bmalloc(s); } +FORCE_INLINE void XXH_free (void* p) { Bfree(p); } +// for memcpy() +#include +FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return Bmemcpy(dest,src,size); } + + +//************************************** +// Basic Types +//************************************** +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + +#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif + +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# ifdef __IBMC__ +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif +#endif + +typedef struct _U32_S { U32 v; } _PACKED U32_S; + +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# pragma pack(pop) +#endif + +#define A32(x) (((U32_S *)(x))->v) + + +//*************************************** +// Compiler-specific Functions and Macros +//*************************************** +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +// Note : although _rotl exists for minGW (GCC under windows), performance seems poor +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif + +#if defined(_MSC_VER) // Visual Studio +# define XXH_swap32 _byteswap_ulong +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static inline U32 XXH_swap32 (U32 x) { + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff );} +#endif + + +//************************************** +// Constants +//************************************** +#define PRIME32_1 2654435761U +#define PRIME32_2 2246822519U +#define PRIME32_3 3266489917U +#define PRIME32_4 668265263U +#define PRIME32_5 374761393U + + +//************************************** +// Architecture Macros +//************************************** +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; +#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch + static const int one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) +#endif + + +//************************************** +// Macros +//************************************** +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations + + +//**************************** +// Memory reads +//**************************** +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); + else + return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } + + +//**************************** +// Simple Hash Functions +//**************************** +FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U32 h32; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; } +#endif + + if (len>=16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do + { + v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } + else + { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p<=bEnd-4) + { + h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +U32 XXH32(const void* input, int len, U32 seed) +{ +#if 0 + // Simple version, good for code maintenance, but unfortunately slow for small inputs + void* state = XXH32_init(seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +//**************************** +// Advanced Hash Functions +//**************************** + +struct XXH_state32_t +{ + U64 total_len; + U32 seed; + U32 v1; + U32 v2; + U32 v3; + U32 v4; + int memsize; + char memory[16]; +}; + + +int XXH32_sizeofState() +{ + XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough + return sizeof(struct XXH_state32_t); +} + + +XXH_errorcode XXH32_resetState(void* state_in, U32 seed) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + state->seed = seed; + state->v1 = seed + PRIME32_1 + PRIME32_2; + state->v2 = seed + PRIME32_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME32_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + + +void* XXH32_init (U32 seed) +{ + void* state = XXH_malloc (sizeof(struct XXH_state32_t)); + XXH32_resetState(state, seed); + return state; +} + + +FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 16) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); + { + const U32* p32 = (const U32*)state->memory; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do + { + v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH32_update (void* state_in, const void* input, int len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + BYTE* bEnd = (BYTE*)state->memory + state->memsize; + U32 h32; + + if (state->total_len >= 16) + { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } + else + { + h32 = state->seed + PRIME32_5; + } + + h32 += (U32) state->total_len; + + while (p<=bEnd-4) + { + h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +U32 XXH32_intermediateDigest (void* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); + else + return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); +} + + +U32 XXH32_digest (void* state_in) +{ + U32 h32 = XXH32_intermediateDigest(state_in); + + XXH_free(state_in); + + return h32; +} diff --git a/polymer/eduke32/eduke32.vcxproj b/polymer/eduke32/eduke32.vcxproj index fbb293de1..ee5e3869f 100644 --- a/polymer/eduke32/eduke32.vcxproj +++ b/polymer/eduke32/eduke32.vcxproj @@ -279,6 +279,7 @@ + @@ -392,6 +393,7 @@ + diff --git a/polymer/eduke32/eduke32.vcxproj.filters b/polymer/eduke32/eduke32.vcxproj.filters index b40b9531f..646f6cf7e 100644 --- a/polymer/eduke32/eduke32.vcxproj.filters +++ b/polymer/eduke32/eduke32.vcxproj.filters @@ -429,6 +429,9 @@ eduke32\headers + + build\headers + @@ -752,6 +755,9 @@ eduke32\source\android + + build\source + diff --git a/polymer/eduke32/source/astub.c b/polymer/eduke32/source/astub.c index 9c1c66b8d..fdc01e25e 100644 --- a/polymer/eduke32/source/astub.c +++ b/polymer/eduke32/source/astub.c @@ -41,7 +41,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #include "keyboard.h" #include "scriptfile.h" -#include "crc32.h" +#include "xxhash.h" #include "sounds_mapster32.h" #include "fx_man.h" @@ -495,24 +495,24 @@ void create_map_snapshot(void) if (numsectors) { int32_t j; - uint32_t tempcrc = crc32once((uint8_t *)sector, numsectors*sizeof(sectortype)); + uint32_t temphash = XXH32((uint8_t *)sector, numsectors*sizeof(sectortype), numsectors*sizeof(sectortype)); - if (!try_match_with_prev(0, numsectors, tempcrc)) - create_compressed_block(0, sector, numsectors*sizeof(sectortype), tempcrc); + if (!try_match_with_prev(0, numsectors, temphash)) + create_compressed_block(0, sector, numsectors*sizeof(sectortype), temphash); if (numwalls) { - tempcrc = crc32once((uint8_t *)wall, numwalls*sizeof(walltype)); + temphash = XXH32((uint8_t *)wall, numwalls*sizeof(walltype), numwalls*sizeof(walltype)); - if (!try_match_with_prev(1, numwalls, tempcrc)) - create_compressed_block(1, wall, numwalls*sizeof(walltype), tempcrc); + if (!try_match_with_prev(1, numwalls, temphash)) + create_compressed_block(1, wall, numwalls*sizeof(walltype), temphash); } if (Numsprites) { - tempcrc = crc32once((uint8_t *)sprite, MAXSPRITES*sizeof(spritetype)); + temphash = XXH32((uint8_t *)sprite, MAXSPRITES*sizeof(spritetype), MAXSPRITES*sizeof(spritetype)); - if (!try_match_with_prev(2, Numsprites, tempcrc)) + if (!try_match_with_prev(2, Numsprites, temphash)) { int32_t i = 0; spritetype *const tspri = (spritetype *)Bmalloc(Numsprites*sizeof(spritetype) + 4); @@ -527,7 +527,7 @@ void create_map_snapshot(void) i++; } - create_compressed_block(2, tspri, Numsprites*sizeof(spritetype), tempcrc); + create_compressed_block(2, tspri, Numsprites*sizeof(spritetype), temphash); Bfree(tspri); } } diff --git a/polymer/eduke32/source/menus.c b/polymer/eduke32/source/menus.c index 35c2e8376..65684df28 100644 --- a/polymer/eduke32/source/menus.c +++ b/polymer/eduke32/source/menus.c @@ -34,7 +34,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #include "savegame.h" #include "premap.h" #include "demo.h" -#include "crc32.h" +#include "xxhash.h" #include "common.h" #include "common_game.h" #include "input.h" @@ -4754,9 +4754,9 @@ cheat_for_port_credits2: if (g_currentMenu >= 360 && g_currentMenu <= 369) { - static uint32_t crc = 0; + static uint32_t xxh = 0; - if (!crc) crc = crc32once((uint8_t *)&ud.savegame[g_currentMenu-360][0], 19); + if (!xxh) xxh = XXH32((uint8_t *)&ud.savegame[g_currentMenu-360][0], 19, 0xDEADBEEF); Bsprintf(tempbuf,"Players: %-2d ",ud.multimode); mgametext(160,156,tempbuf,0,2+8+16); @@ -4769,7 +4769,7 @@ cheat_for_port_credits2: if (x == -1) { - crc = 0; + xxh = 0; ReadSaveGameHeaders(); M_ChangeMenu(351); goto DISPLAYNAMES; @@ -4779,7 +4779,7 @@ cheat_for_port_credits2: { // dirty hack... char 127 in last position indicates an auto-filled name if (ud.savegame[g_currentMenu-360][0] == 0 || (ud.savegame[g_currentMenu-360][20] == 127 && - crc == crc32once((uint8_t *)&ud.savegame[g_currentMenu-360][0], 19))) + xxh == XXH32((uint8_t *)&ud.savegame[g_currentMenu-360][0], 19, 0xDEADBEEF))) { Bstrncpy(&ud.savegame[g_currentMenu-360][0], MapInfo[ud.volume_number * MAXLEVELS + ud.level_number].name, 19); ud.savegame[g_currentMenu-360][20] = 127; @@ -4795,7 +4795,7 @@ cheat_for_port_credits2: ready2send = 1; totalclock = ototalclock; } - crc = 0; + xxh = 0; } rotatesprite_fs(101<<16,97<<16,65536>>1,512,TILE_SAVESHOT,-32,0,2+4+8+64); diff --git a/polymer/eduke32/source/rev.h b/polymer/eduke32/source/rev.h index 14419fe9b..c701af12f 100644 --- a/polymer/eduke32/source/rev.h +++ b/polymer/eduke32/source/rev.h @@ -1 +1 @@ -s_buildRev = "r4381"; +s_buildRev = "r4383";