From cc87536fda104250a6d258e2d0ce5aa155b555d7 Mon Sep 17 00:00:00 2001 From: rfm Date: Tue, 12 Mar 2013 16:11:10 +0000 Subject: [PATCH] experiment with slower but better hashes git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@36344 72102866-910b-0410-8b05-ffd578937521 --- ChangeLog | 14 +- Source/GNUmakefile | 1 + Source/GSPrivate.h | 51 +++--- Source/GSPrivateHash.m | 338 ++++++++++++++++++++++++++++++++++++++ Source/GSString.m | 82 +++++---- Source/NSSortDescriptor.m | 4 +- Source/NSString.m | 18 +- 7 files changed, 429 insertions(+), 79 deletions(-) create mode 100644 Source/GSPrivateHash.m diff --git a/ChangeLog b/ChangeLog index 1ddc90db3..b3676dd52 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,21 @@ +2012-03-12 Richard Frith-Macdonald + + * Source/GSPrivate.h: Change hash function args and add new functions + * for incremental hash. + * Source/NSString.m: Update to use the private hash function introduced + for NSSortdescriptor. + * Source/GSString.m: ditto. + * Source/NSSortDescriptor.m: Update for new function arguments. + * Source/GSPrivateHash.m: Add alternative version of hash functions + (old code plus new public domain murmurhash3 code). + * Source/GNUmakefile: Build alternative hash functions. + build with OLDHASH defined to 0 to get new alternatives. + 2012-03-10 Richard Frith-Macdonald * Source/NSFileManager.m: Fix some autorelease pool management issues. -======= 2013-03-09 Sebastian Reitenbach * Tests/base/NSFileManager/general.m remove accidently snucked in header diff --git a/Source/GNUmakefile b/Source/GNUmakefile index eb4b03f65..8c9f08791 100644 --- a/Source/GNUmakefile +++ b/Source/GNUmakefile @@ -163,6 +163,7 @@ GSFormat.m \ GSHTTPAuthentication.m \ GSHTTPURLHandle.m \ GSICUString.m \ +GSPrivateHash.m \ GSQuickSort.m \ GSRunLoopWatcher.m \ GSSet.m \ diff --git a/Source/GSPrivate.h b/Source/GSPrivate.h index 46b1c4a51..58be10d82 100644 --- a/Source/GSPrivate.h +++ b/Source/GSPrivate.h @@ -376,34 +376,6 @@ GSPrivateIsByteEncoding(NSStringEncoding encoding) GS_ATTRIB_PRIVATE; BOOL GSPrivateIsEncodingSupported(NSStringEncoding encoding) GS_ATTRIB_PRIVATE; -/* Hash function to hash up to limit bytes from data of specified length. - * If the flag is NO then a result of 0 is mapped to 0xffffffff. - * This is a pretty useful general purpose hash function. - */ -static inline unsigned -GSPrivateHash(const void *data, unsigned length, unsigned limit, BOOL zero) - __attribute__((unused)); -static inline unsigned -GSPrivateHash(const void *data, unsigned length, unsigned limit, BOOL zero) -{ - unsigned ret = length; - unsigned l = length; - - if (limit < length) - { - l = limit; - } - while (l-- > 0) - { - ret = (ret << 5) + ret + ((const unsigned char*)data)[l]; - } - if (ret == 0 && zero == NO) - { - ret = 0xffffffff; - } - return ret; -} - /* load a module into the runtime */ long @@ -543,5 +515,28 @@ GSPrivateIsCollectable(const void *ptr) GS_ATTRIB_PRIVATE; NSZone* GSAtomicMallocZone (void); +/* Generate a 32bit hash from supplied byte data. + */ +uint32_t +GSPrivateHash(uint32_t seed, const void *bytes, int length) + GS_ATTRIB_PRIVATE; + +/* Incorporate 'l' bytes of data from the buffer pointed to by 'b' into + * the hash state information pointed to by p0 and p1. + * The hash state variables should have been initialised to zero before + * the first call to this function, and the result should be produced + * by calling the GSPrivateFinishHash() function. + */ +void +GSPrivateIncrementalHash(uint32_t *p0, uint32_t *p1, const void *b, int l) + GS_ATTRIB_PRIVATE; + +/* Generate a 32bit hash from supplied state variables resulting from + * calls to the GSPrivateIncrementalHash() function. + */ +uint32_t +GSPrivateFinishHash(uint32_t s0, uint32_t s1, uint32_t totalLength) + GS_ATTRIB_PRIVATE; + #endif /* _GSPrivate_h_ */ diff --git a/Source/GSPrivateHash.m b/Source/GSPrivateHash.m new file mode 100644 index 000000000..4a08590cd --- /dev/null +++ b/Source/GSPrivateHash.m @@ -0,0 +1,338 @@ +/* GSPrivateHash.m + Copyright (C) 2013 Free Software Foundation, Inc. + + Written by: Richard Frith-Macdonald + + This file is part of the GNUstep Base Library. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02111 USA. +*/ + +#import "GSPrivate.h" + +uint32_t +GSPrivateHash(uint32_t seed, const void *bytes, int length) +{ + uint32_t carry = 0; + + GSPrivateIncrementalHash(&seed, &carry, bytes, length); + return GSPrivateFinishHash(seed, carry, length); +} + +#ifndef OLDHASH +#define OLDHASH 1 +#endif + +#if OLDHASH + +/* Very fast, simple hash. Poor distribution properties though. + */ +void +GSPrivateIncrementalHash(uint32_t *p0, uint32_t *p1, const void *b, int l) +{ + unsigned i; + + for (i = 0; i < l; i++) + { + *p0 = (*p0 << 5) + *p0 + ((const uint8_t*)b)[i]; + } +} + +uint32_t +GSPrivateFinishHash(uint32_t s0, uint32_t s1, uint32_t totalLength) +{ + return s0; +} + +#else /* OLDHASH */ + +/*----------------------------------------------------------------------------- + * MurmurHash3 was written by Austin Appleby, and is placed in the public + * domain. + * + * This implementation was written by Shane Day, and is also public domain. + * + * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) + * with support for progressive processing. + */ + +/*----------------------------------------------------------------------------- + +If you want to understand the MurmurHash algorithm you would be much better +off reading the original source. Just point your browser at: +http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + + +What this version provides? + +1. Progressive data feeding. Useful when the entire payload to be hashed +does not fit in memory or when the data is streamed through the application. +Also useful when hashing a number of strings with a common prefix. A partial +hash of a prefix string can be generated and reused for each suffix string. + +2. Portability. Plain old C so that it should compile on any old compiler. +Both CPU endian and access-alignment neutral, but avoiding inefficient code +when possible depending on CPU capabilities. + +3. Drop in. I personally like nice self contained public domain code, making it +easy to pilfer without loads of refactoring to work properly in the existing +application code & makefile structure and mucking around with licence files. +Just copy PMurHash.h and PMurHash.c and you're ready to go. + + +How does it work? + +We can only process entire 32 bit chunks of input, except for the very end +that may be shorter. So along with the partial hash we need to give back to +the caller a carry containing up to 3 bytes that we were unable to process. +This carry also needs to record the number of bytes the carry holds. I use +the low 2 bits as a count (0..3) and the carry bytes are shifted into the +high byte in stream order. + +To handle endianess I simply use a macro that reads a uint32_t and define +that macro to be a direct read on little endian machines, a read and swap +on big endian machines, or a byte-by-byte read if the endianess is unknown. + +-----------------------------------------------------------------------------*/ + + +/* MSVC warnings we choose to ignore */ +#if defined(_MSC_VER) + #pragma warning(disable: 4127) /* conditional expression is constant */ +#endif + +/*----------------------------------------------------------------------------- + * Endianess, misalignment capabilities and util macros + * + * The following 3 macros are defined in this section. The other macros defined + * are only needed to help derive these 3. + * + * READ_UINT32(x) Read a little endian unsigned 32-bit int + * UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries + * ROTL32(x,r) Rotate x left by r bits + */ + +/* Convention is to define __BYTE_ORDER == to one of these values */ +#if !defined(__BIG_ENDIAN) + #define __BIG_ENDIAN 4321 +#endif +#if !defined(__LITTLE_ENDIAN) + #define __LITTLE_ENDIAN 1234 +#endif + +/* I386 */ +#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386) + #define __BYTE_ORDER __LITTLE_ENDIAN + #define UNALIGNED_SAFE +#endif + +/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __), + * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */ +#if !defined(__BYTE_ORDER) + #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1 + #define __BYTE_ORDER __LITTLE_ENDIAN + #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1 + #define __BYTE_ORDER __BIG_ENDIAN + #endif +#endif + +/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */ +#if !defined(__BYTE_ORDER) + #if defined(__ARMEL__) || defined(__MIPSEL__) + #define __BYTE_ORDER __LITTLE_ENDIAN + #endif + #if defined(__ARMEB__) || defined(__MIPSEB__) + #define __BYTE_ORDER __BIG_ENDIAN + #endif +#endif + +/* Now find best way we can to READ_UINT32 */ +#if __BYTE_ORDER==__LITTLE_ENDIAN + /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ + #define READ_UINT32(ptr) (*((uint32_t*)(ptr))) +#elif __BYTE_ORDER==__BIG_ENDIAN + /* TODO: Add additional cases below where a compiler provided bswap32 is available */ + #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) + #define READ_UINT32(ptr) (__builtin_bswap32(*((uint32_t*)(ptr)))) + #else + /* Without a known fast bswap32 we're just as well off doing this */ + #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) + #define UNALIGNED_SAFE + #endif +#else + /* Unknown endianess so last resort is to read individual bytes */ + #define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) + + /* Since we're not doing word-reads we can skip the messing about with realignment */ + #define UNALIGNED_SAFE +#endif + +/* Find best way to ROTL32 */ +#if defined(_MSC_VER) + #include /* Microsoft put _rotl declaration in here */ + #define ROTL32(x,r) _rotl(x,r) +#else + /* gcc recognises this code and generates a rotate instruction for CPUs with one */ + #define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r))) +#endif + + +/*----------------------------------------------------------------------------- + * Core murmurhash algorithm macros */ + +#define C1 (0xcc9e2d51) +#define C2 (0x1b873593) + +/* This is the main processing body of the algorithm. It operates + * on each full 32-bits of input. */ +#define DOBLOCK(h1, k1) do{ \ + k1 *= C1; \ + k1 = ROTL32(k1,15); \ + k1 *= C2; \ + \ + h1 ^= k1; \ + h1 = ROTL32(h1,13); \ + h1 = h1*5+0xe6546b64; \ + }while(0) + + +/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ +/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ +#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \ + int _i = cnt; \ + while(_i--) { \ + c = c>>8 | *ptr++<<24; \ + n++; len--; \ + if(n==4) { \ + DOBLOCK(h1, c); \ + n = 0; \ + } \ + } }while(0) + +/*---------------------------------------------------------------------------*/ + +/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed + * if wanted. Both ph1 and pcarry are required arguments. */ +void +GSPrivateIncrementalHash( + uint32_t *ph1, uint32_t *pcarry, const void *key, int len) +{ + uint32_t h1 = *ph1; + uint32_t c = *pcarry; + + const uint8_t *ptr = (uint8_t*)key; + const uint8_t *end; + + /* Extract carry count from low 2 bits of c value */ + int n = c & 3; + +#if defined(UNALIGNED_SAFE) + /* This CPU handles unaligned word access */ + + /* Consume any carry bytes */ + int i = (4-n) & 3; + if(i && i <= len) { + DOBYTES(i, h1, c, n, ptr, len); + } + + /* Process 32-bit chunks */ + end = ptr + len/4*4; + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = READ_UINT32(ptr); + DOBLOCK(h1, k1); + } + +#else /*UNALIGNED_SAFE*/ + /* This CPU does not handle unaligned word access */ + + /* Consume enough so that the next data byte is word aligned */ + int i = -(long)ptr & 3; + if(i && i <= len) { + DOBYTES(i, h1, c, n, ptr, len); + } + + /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ + end = ptr + len/4*4; + switch(n) { /* how many bytes in c */ + case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = READ_UINT32(ptr); + DOBLOCK(h1, k1); + } + break; + case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = c>>24; + c = READ_UINT32(ptr); + k1 |= c<<8; + DOBLOCK(h1, k1); + } + break; + case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = c>>16; + c = READ_UINT32(ptr); + k1 |= c<<16; + DOBLOCK(h1, k1); + } + break; + case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ + for( ; ptr < end ; ptr+=4) { + uint32_t k1 = c>>8; + c = READ_UINT32(ptr); + k1 |= c<<24; + DOBLOCK(h1, k1); + } + } +#endif /*UNALIGNED_SAFE*/ + + /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */ + len -= len/4*4; + + /* Append any remaining bytes into carry */ + DOBYTES(len, h1, c, n, ptr, len); + + /* Copy out new running hash and carry */ + *ph1 = h1; + *pcarry = (c & ~0xff) | n; +} + +/*---------------------------------------------------------------------------*/ + +/* Finalize a hash. To match the original Murmur3A the total_length must be provided */ +uint32_t +GSPrivateFinishHash(uint32_t h, uint32_t carry, uint32_t total_length) +{ + uint32_t k1; + int n = carry & 3; + if(n) { + k1 = carry >> (4-n)*8; + k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1; + } + h ^= total_length; + + /* fmix */ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +#endif /* OLDHASH */ + diff --git a/Source/GSString.m b/Source/GSString.m index c828d9dbf..07cf5c1a6 100644 --- a/Source/GSString.m +++ b/Source/GSString.m @@ -3363,50 +3363,48 @@ transmute(GSStr self, NSString *aString) { if (self->_flags.hash == 0) { - unsigned ret = 0; - unsigned len = self->_count; + uint32_t ret = 0; + int len = (int)self->_count; if (len > 0) { - register unsigned index = 0; - if (self->_flags.wide) { - register const unichar *p = self->_contents.u; + const unichar *p = self->_contents.u; - while (index < len) - { - ret = (ret << 5) + ret + p[index++]; - } + ret = GSPrivateHash(0, p, len * sizeof(unichar)); } - else + else if (len > 64) + { + return (self->_flags.hash = [super hash]); + } + else { - register const unsigned char *p = self->_contents.c; + unichar buf[64]; + unsigned index; + const unsigned char *p = self->_contents.c; if (internalEncoding == NSISOLatin1StringEncoding) { - while (index < len) - { - ret = (ret << 5) + ret + p[index++]; - } - } + for (index = 0; index < len; index++) + { + buf[index] = p[index]; + } + } else { - while (index < len) - { - unichar u = p[index++]; + for (index = 0; index < len; index++) + { + unichar u = p[index]; - if (u > 127) - { - unsigned char c = (unsigned char)u; - unsigned int s = 1; - unichar *d = &u; - - GSToUnicode(&d, &s, &c, 1, internalEncoding, 0, 0); - } - ret = (ret << 5) + ret + u; - } + if (u > 127) + { + return (self->_flags.hash = [super hash]); + } + buf[index] = u; + } } + ret = GSPrivateHash(0, buf, len * sizeof(unichar)); } /* @@ -5530,25 +5528,41 @@ literalIsEqual(NXConstantString *self, id anObject) } /* Must match the implementation in NSString + * To avoid allocating memory, we build the hash incrementally. */ - (NSUInteger) hash { if (nxcslen > 0) { - unsigned ret = 0; + uint32_t s0 = 0; + uint32_t s1 = 0; + unichar chunk[64]; + uint32_t ret; unichar n = 0; unsigned i = 0; - unichar c; + int l = 0; + uint32_t t = 0; while (i < nxcslen) { - c = nextUTF8((const uint8_t *)nxcsptr, nxcslen, &i, &n); - ret = (ret << 5) + ret + c; + chunk[l++] = nextUTF8((const uint8_t *)nxcsptr, nxcslen, &i, &n); + if (64 == l) + { + GSPrivateIncrementalHash(&s0, &s1, chunk, l * sizeof(unichar)); + t += l; + l = 0; + } } if (0 != n) { - ret = (ret << 5) + ret + n; // Add final character + chunk[l++] = n; // Add final character } + if (l > 0) + { + GSPrivateIncrementalHash(&s0, &s1, chunk, l * sizeof(unichar)); + t += l; + } + ret = GSPrivateFinishHash(s0, s1, t * sizeof(unichar)); ret &= 0x0fffffff; if (ret == 0) { diff --git a/Source/NSSortDescriptor.m b/Source/NSSortDescriptor.m index 45455fa07..e01ba7168 100644 --- a/Source/NSSortDescriptor.m +++ b/Source/NSSortDescriptor.m @@ -38,7 +38,9 @@ static BOOL initialized = NO; +#ifdef __clang__ #pragma clang diagnostic ignored "-Wreceiver-forward-class" +#endif #if GS_USE_TIMSORT @class GSTimSortDescriptor; @@ -117,7 +119,7 @@ static BOOL initialized = NO; { const char *sel = sel_getName(_selector); - return _ascending + GSPrivateHash(sel, strlen(sel), 16, YES) + [_key hash]; + return _ascending + GSPrivateHash(0, sel, strlen(sel)) + [_key hash]; } - (id) initWithKey: (NSString *) key ascending: (BOOL) ascending diff --git a/Source/NSString.m b/Source/NSString.m index aeb654725..6a6a7f8b0 100644 --- a/Source/NSString.m +++ b/Source/NSString.m @@ -2430,29 +2430,17 @@ static UCollator *GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *local */ - (NSUInteger) hash { - unsigned ret = 0; - unsigned len = [self length]; + uint32_t ret = 0; + int len = (int)[self length]; if (len > 0) { unichar buf[64]; unichar *ptr = (len <= 64) ? buf : NSZoneMalloc(NSDefaultMallocZone(), len * sizeof(unichar)); - unichar *p; - unsigned char_count = 0; [self getCharacters: ptr range: NSMakeRange(0,len)]; - - p = ptr; - - while (char_count++ < len) - { - unichar c = *p++; - - // FIXME ... should normalize composed character sequences. - ret = (ret << 5) + ret + c; - } - + ret = GSPrivateHash(0, (const void*)ptr, len * sizeof(unichar)); if (ptr != buf) { NSZoneFree(NSDefaultMallocZone(), ptr);