From cc87536fda104250a6d258e2d0ce5aa155b555d7 Mon Sep 17 00:00:00 2001
From: rfm <rfm@72102866-910b-0410-8b05-ffd578937521>
Date: Tue, 12 Mar 2013 16:11:10 +0000
Subject: [PATCH] experiment with slower but better hashes

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@36344 72102866-910b-0410-8b05-ffd578937521
---
 ChangeLog                 |  14 +-
 Source/GNUmakefile        |   1 +
 Source/GSPrivate.h        |  51 +++---
 Source/GSPrivateHash.m    | 338 ++++++++++++++++++++++++++++++++++++++
 Source/GSString.m         |  82 +++++----
 Source/NSSortDescriptor.m |   4 +-
 Source/NSString.m         |  18 +-
 7 files changed, 429 insertions(+), 79 deletions(-)
 create mode 100644 Source/GSPrivateHash.m

diff --git a/ChangeLog b/ChangeLog
index 1ddc90db3..b3676dd52 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,21 @@
+2012-03-12  Richard Frith-Macdonald <rfm@gnu.org>
+
+        * Source/GSPrivate.h: Change hash function args and add new functions
+        * for incremental hash.
+        * Source/NSString.m: Update to use the private hash function introduced
+        for NSSortdescriptor.
+        * Source/GSString.m: ditto.
+        * Source/NSSortDescriptor.m: Update for new function arguments.
+        * Source/GSPrivateHash.m: Add alternative version of hash functions
+        (old code plus new public domain murmurhash3 code).
+        * Source/GNUmakefile: Build alternative hash functions.
+        build with OLDHASH defined to 0 to get new alternatives.
+
 2012-03-10  Richard Frith-Macdonald <rfm@gnu.org>
 
         * Source/NSFileManager.m:
         Fix some autorelease pool management issues.
 
-=======
 2013-03-09 Sebastian Reitenbach <sebastia@l00-bugdead-prods.de>
 	* Tests/base/NSFileManager/general.m
 	  remove accidently snucked in header
diff --git a/Source/GNUmakefile b/Source/GNUmakefile
index eb4b03f65..8c9f08791 100644
--- a/Source/GNUmakefile
+++ b/Source/GNUmakefile
@@ -163,6 +163,7 @@ GSFormat.m \
 GSHTTPAuthentication.m \
 GSHTTPURLHandle.m \
 GSICUString.m \
+GSPrivateHash.m \
 GSQuickSort.m \
 GSRunLoopWatcher.m \
 GSSet.m \
diff --git a/Source/GSPrivate.h b/Source/GSPrivate.h
index 46b1c4a51..58be10d82 100644
--- a/Source/GSPrivate.h
+++ b/Source/GSPrivate.h
@@ -376,34 +376,6 @@ GSPrivateIsByteEncoding(NSStringEncoding encoding) GS_ATTRIB_PRIVATE;
 BOOL
 GSPrivateIsEncodingSupported(NSStringEncoding encoding) GS_ATTRIB_PRIVATE;
 
-/* Hash function to hash up to limit bytes from data of specified length.
- * If the flag is NO then a result of 0 is mapped to 0xffffffff.
- * This is a pretty useful general purpose hash function.
- */
-static inline unsigned
-GSPrivateHash(const void *data, unsigned length, unsigned limit, BOOL zero)
-  __attribute__((unused));
-static inline unsigned
-GSPrivateHash(const void *data, unsigned length, unsigned limit, BOOL zero)
-{
-  unsigned	ret = length;
-  unsigned	l = length;
-
-  if (limit < length)
-    {
-      l = limit;
-    }
-  while (l-- > 0)
-    {
-      ret = (ret << 5) + ret + ((const unsigned char*)data)[l];
-    }
-  if (ret == 0 && zero == NO)
-    {
-       ret = 0xffffffff;
-    }
-  return ret;
-}
-
 /* load a module into the runtime
  */
 long
@@ -543,5 +515,28 @@ GSPrivateIsCollectable(const void *ptr) GS_ATTRIB_PRIVATE;
 NSZone*
 GSAtomicMallocZone (void);
 
+/* Generate a 32bit hash from supplied byte data.
+ */
+uint32_t
+GSPrivateHash(uint32_t seed, const void *bytes, int length)
+  GS_ATTRIB_PRIVATE;
+
+/* Incorporate 'l' bytes of data from the buffer pointed to by 'b' into
+ * the hash state information pointed to by p0 and p1.
+ * The hash state variables should have been initialised to zero before
+ * the first call to this function, and the result should be produced
+ * by calling the GSPrivateFinishHash() function.
+ */
+void
+GSPrivateIncrementalHash(uint32_t *p0, uint32_t *p1, const void *b, int l)
+  GS_ATTRIB_PRIVATE;
+
+/* Generate a 32bit hash from supplied state variables resulting from
+ * calls to the GSPrivateIncrementalHash() function.
+ */
+uint32_t
+GSPrivateFinishHash(uint32_t s0, uint32_t s1, uint32_t totalLength)
+  GS_ATTRIB_PRIVATE;
+
 #endif /* _GSPrivate_h_ */
 
diff --git a/Source/GSPrivateHash.m b/Source/GSPrivateHash.m
new file mode 100644
index 000000000..4a08590cd
--- /dev/null
+++ b/Source/GSPrivateHash.m
@@ -0,0 +1,338 @@
+/* GSPrivateHash.m
+   Copyright (C) 2013 Free Software Foundation, Inc.
+
+   Written by:  Richard Frith-Macdonald <rfm@gnu.org>
+   
+   This file is part of the GNUstep Base Library.
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2 of the License, or (at your option) any later version.
+   
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+   
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02111 USA.
+*/ 
+
+#import "GSPrivate.h"
+
+uint32_t
+GSPrivateHash(uint32_t seed, const void *bytes, int length)
+{
+  uint32_t      carry = 0;
+
+  GSPrivateIncrementalHash(&seed, &carry, bytes, length);
+  return GSPrivateFinishHash(seed, carry, length);
+}
+ 
+#ifndef OLDHASH
+#define OLDHASH     1
+#endif
+
+#if     OLDHASH
+
+/* Very fast, simple hash.  Poor distribution properties though.
+ */
+void
+GSPrivateIncrementalHash(uint32_t *p0, uint32_t *p1, const void *b, int l)
+{
+  unsigned   i;
+
+  for (i = 0; i < l; i++)
+    {
+      *p0 = (*p0 << 5) + *p0 + ((const uint8_t*)b)[i];
+    }
+}
+
+uint32_t
+GSPrivateFinishHash(uint32_t s0, uint32_t s1, uint32_t totalLength)
+{
+  return s0;
+}
+
+#else   /* OLDHASH */
+
+/*-----------------------------------------------------------------------------
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain.
+ *
+ * This implementation was written by Shane Day, and is also public domain.
+ *
+ * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
+ * with support for progressive processing.
+ */
+
+/*-----------------------------------------------------------------------------
+ 
+If you want to understand the MurmurHash algorithm you would be much better
+off reading the original source. Just point your browser at:
+http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+
+
+What this version provides?
+
+1. Progressive data feeding. Useful when the entire payload to be hashed
+does not fit in memory or when the data is streamed through the application.
+Also useful when hashing a number of strings with a common prefix. A partial
+hash of a prefix string can be generated and reused for each suffix string.
+
+2. Portability. Plain old C so that it should compile on any old compiler.
+Both CPU endian and access-alignment neutral, but avoiding inefficient code
+when possible depending on CPU capabilities.
+
+3. Drop in. I personally like nice self contained public domain code, making it
+easy to pilfer without loads of refactoring to work properly in the existing
+application code & makefile structure and mucking around with licence files.
+Just copy PMurHash.h and PMurHash.c and you're ready to go.
+
+
+How does it work?
+
+We can only process entire 32 bit chunks of input, except for the very end
+that may be shorter. So along with the partial hash we need to give back to
+the caller a carry containing up to 3 bytes that we were unable to process.
+This carry also needs to record the number of bytes the carry holds. I use
+the low 2 bits as a count (0..3) and the carry bytes are shifted into the
+high byte in stream order.
+
+To handle endianess I simply use a macro that reads a uint32_t and define
+that macro to be a direct read on little endian machines, a read and swap
+on big endian machines, or a byte-by-byte read if the endianess is unknown.
+
+-----------------------------------------------------------------------------*/
+
+
+/* MSVC warnings we choose to ignore */
+#if defined(_MSC_VER)
+  #pragma warning(disable: 4127) /* conditional expression is constant */
+#endif
+
+/*-----------------------------------------------------------------------------
+ * Endianess, misalignment capabilities and util macros
+ *
+ * The following 3 macros are defined in this section. The other macros defined
+ * are only needed to help derive these 3.
+ *
+ * READ_UINT32(x)   Read a little endian unsigned 32-bit int
+ * UNALIGNED_SAFE   Defined if READ_UINT32 works on non-word boundaries
+ * ROTL32(x,r)      Rotate x left by r bits
+ */
+
+/* Convention is to define __BYTE_ORDER == to one of these values */
+#if !defined(__BIG_ENDIAN)
+  #define __BIG_ENDIAN 4321
+#endif
+#if !defined(__LITTLE_ENDIAN)
+  #define __LITTLE_ENDIAN 1234
+#endif
+
+/* I386 */
+#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
+  #define __BYTE_ORDER __LITTLE_ENDIAN
+  #define UNALIGNED_SAFE
+#endif
+
+/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
+ * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
+#if !defined(__BYTE_ORDER)
+  #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
+#if !defined(__BYTE_ORDER)
+  #if defined(__ARMEL__) || defined(__MIPSEL__)
+    #define __BYTE_ORDER __LITTLE_ENDIAN
+  #endif
+  #if defined(__ARMEB__) || defined(__MIPSEB__)
+    #define __BYTE_ORDER __BIG_ENDIAN
+  #endif
+#endif
+
+/* Now find best way we can to READ_UINT32 */
+#if __BYTE_ORDER==__LITTLE_ENDIAN
+  /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
+  #define READ_UINT32(ptr)   (*((uint32_t*)(ptr)))
+#elif __BYTE_ORDER==__BIG_ENDIAN
+  /* TODO: Add additional cases below where a compiler provided bswap32 is available */
+  #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
+    #define READ_UINT32(ptr)   (__builtin_bswap32(*((uint32_t*)(ptr))))
+  #else
+    /* Without a known fast bswap32 we're just as well off doing this */
+    #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+    #define UNALIGNED_SAFE
+  #endif
+#else
+  /* Unknown endianess so last resort is to read individual bytes */
+  #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
+
+  /* Since we're not doing word-reads we can skip the messing about with realignment */
+  #define UNALIGNED_SAFE
+#endif
+
+/* Find best way to ROTL32 */
+#if defined(_MSC_VER)
+  #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
+  #define ROTL32(x,r)  _rotl(x,r)
+#else
+  /* gcc recognises this code and generates a rotate instruction for CPUs with one */
+  #define ROTL32(x,r)  (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
+#endif
+
+
+/*-----------------------------------------------------------------------------
+ * Core murmurhash algorithm macros */
+
+#define C1  (0xcc9e2d51)
+#define C2  (0x1b873593)
+
+/* This is the main processing body of the algorithm. It operates
+ * on each full 32-bits of input. */
+#define DOBLOCK(h1, k1) do{ \
+        k1 *= C1; \
+        k1 = ROTL32(k1,15); \
+        k1 *= C2; \
+        \
+        h1 ^= k1; \
+        h1 = ROTL32(h1,13); \
+        h1 = h1*5+0xe6546b64; \
+    }while(0)
+
+
+/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
+/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
+#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \
+    int _i = cnt; \
+    while(_i--) { \
+        c = c>>8 | *ptr++<<24; \
+        n++; len--; \
+        if(n==4) { \
+            DOBLOCK(h1, c); \
+            n = 0; \
+        } \
+    } }while(0)
+
+/*---------------------------------------------------------------------------*/
+
+/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed
+ * if wanted. Both ph1 and pcarry are required arguments. */
+void
+GSPrivateIncrementalHash(
+  uint32_t *ph1, uint32_t *pcarry, const void *key, int len)
+{
+  uint32_t h1 = *ph1;
+  uint32_t c = *pcarry;
+
+  const uint8_t *ptr = (uint8_t*)key;
+  const uint8_t *end;
+
+  /* Extract carry count from low 2 bits of c value */
+  int n = c & 3;
+
+#if defined(UNALIGNED_SAFE)
+  /* This CPU handles unaligned word access */
+
+  /* Consume any carry bytes */
+  int i = (4-n) & 3;
+  if(i && i <= len) {
+    DOBYTES(i, h1, c, n, ptr, len);
+  }
+
+  /* Process 32-bit chunks */
+  end = ptr + len/4*4;
+  for( ; ptr < end ; ptr+=4) {
+    uint32_t k1 = READ_UINT32(ptr);
+    DOBLOCK(h1, k1);
+  }
+
+#else /*UNALIGNED_SAFE*/
+  /* This CPU does not handle unaligned word access */
+
+  /* Consume enough so that the next data byte is word aligned */
+  int i = -(long)ptr & 3;
+  if(i && i <= len) {
+      DOBYTES(i, h1, c, n, ptr, len);
+  }
+
+  /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
+  end = ptr + len/4*4;
+  switch(n) { /* how many bytes in c */
+  case 0: /* c=[----]  w=[3210]  b=[3210]=w            c'=[----] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = READ_UINT32(ptr);
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 1: /* c=[0---]  w=[4321]  b=[3210]=c>>24|w<<8   c'=[4---] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>24;
+      c = READ_UINT32(ptr);
+      k1 |= c<<8;
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 2: /* c=[10--]  w=[5432]  b=[3210]=c>>16|w<<16  c'=[54--] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>16;
+      c = READ_UINT32(ptr);
+      k1 |= c<<16;
+      DOBLOCK(h1, k1);
+    }
+    break;
+  case 3: /* c=[210-]  w=[6543]  b=[3210]=c>>8|w<<24   c'=[654-] */
+    for( ; ptr < end ; ptr+=4) {
+      uint32_t k1 = c>>8;
+      c = READ_UINT32(ptr);
+      k1 |= c<<24;
+      DOBLOCK(h1, k1);
+    }
+  }
+#endif /*UNALIGNED_SAFE*/
+
+  /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
+  len -= len/4*4;
+
+  /* Append any remaining bytes into carry */
+  DOBYTES(len, h1, c, n, ptr, len);
+
+  /* Copy out new running hash and carry */
+  *ph1 = h1;
+  *pcarry = (c & ~0xff) | n;
+} 
+
+/*---------------------------------------------------------------------------*/
+
+/* Finalize a hash. To match the original Murmur3A the total_length must be provided */
+uint32_t
+GSPrivateFinishHash(uint32_t h, uint32_t carry, uint32_t total_length)
+{
+  uint32_t k1;
+  int n = carry & 3;
+  if(n) {
+    k1 = carry >> (4-n)*8;
+    k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1;
+  }
+  h ^= total_length;
+
+  /* fmix */
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+#endif  /* OLDHASH */
+
diff --git a/Source/GSString.m b/Source/GSString.m
index c828d9dbf..07cf5c1a6 100644
--- a/Source/GSString.m
+++ b/Source/GSString.m
@@ -3363,50 +3363,48 @@ transmute(GSStr self, NSString *aString)
 {
   if (self->_flags.hash == 0)
     {
-      unsigned	ret = 0;
-      unsigned	len = self->_count;
+      uint32_t	ret = 0;
+      int	len = (int)self->_count;
 
       if (len > 0)
 	{
-	  register unsigned	index = 0;
-
 	  if (self->_flags.wide)
 	    {
-	      register const unichar	*p = self->_contents.u;
+	      const unichar	*p = self->_contents.u;
 
-	      while (index < len)
-		{
-		  ret = (ret << 5) + ret + p[index++];
-		}
+              ret = GSPrivateHash(0, p, len * sizeof(unichar));
 	    }
-	  else
+	  else if (len > 64)
+            {
+              return (self->_flags.hash = [super hash]);
+            }
+          else
 	    {
-	      register const unsigned char	*p = self->_contents.c;
+              unichar                   buf[64];
+              unsigned	                index;
+	      const unsigned char	*p = self->_contents.c;
 
 	      if (internalEncoding == NSISOLatin1StringEncoding)
 		{
-		  while (index < len)
-		    {
-		      ret = (ret << 5) + ret + p[index++];
-		    }
-		}
+                  for (index = 0; index < len; index++)
+                    {
+                      buf[index] = p[index];
+                    }
+                }
 	      else
 		{
-		  while (index < len)
-		    {
-		      unichar	u = p[index++];
+                  for (index = 0; index < len; index++)
+                    {
+		      unichar	u = p[index];
 
-		      if (u > 127)
-			{
-			  unsigned char	c = (unsigned char)u;
-			  unsigned int	s = 1;
-			  unichar	*d = &u;
-
-			  GSToUnicode(&d, &s, &c, 1, internalEncoding, 0, 0);
-			}
-		      ret = (ret << 5) + ret + u;
-		    }
+                      if (u > 127)
+                        {
+                          return (self->_flags.hash = [super hash]);
+                        }
+                      buf[index] = u;
+                    }
 		}
+              ret = GSPrivateHash(0, buf, len * sizeof(unichar));
 	    }
 
 	  /*
@@ -5530,25 +5528,41 @@ literalIsEqual(NXConstantString *self, id anObject)
 }
 
 /* Must match the implementation in NSString
+ * To avoid allocating memory, we build the hash incrementally.
  */
 - (NSUInteger) hash
 {
   if (nxcslen > 0)
     {
-      unsigned	ret = 0;
+      uint32_t  s0 = 0;
+      uint32_t  s1 = 0;
+      unichar   chunk[64];
+      uint32_t	ret;
       unichar	n = 0;
       unsigned	i = 0;
-      unichar	c;
+      int       l = 0;
+      uint32_t  t = 0;
 
       while (i < nxcslen)
 	{
-	  c = nextUTF8((const uint8_t *)nxcsptr, nxcslen, &i, &n);
-	  ret = (ret << 5) + ret + c;
+	  chunk[l++] = nextUTF8((const uint8_t *)nxcsptr, nxcslen, &i, &n);
+	  if (64 == l)
+            {
+              GSPrivateIncrementalHash(&s0, &s1, chunk, l * sizeof(unichar));
+              t += l;
+              l = 0;
+            }
 	}
       if (0 != n)
 	{
-	  ret = (ret << 5) + ret + n;	// Add final character
+	  chunk[l++] = n;	// Add final character
 	}
+      if (l > 0)
+        {
+          GSPrivateIncrementalHash(&s0, &s1, chunk, l * sizeof(unichar));
+          t += l;
+        }
+      ret = GSPrivateFinishHash(s0, s1, t * sizeof(unichar));
       ret &= 0x0fffffff;
       if (ret == 0)
 	{
diff --git a/Source/NSSortDescriptor.m b/Source/NSSortDescriptor.m
index 45455fa07..e01ba7168 100644
--- a/Source/NSSortDescriptor.m
+++ b/Source/NSSortDescriptor.m
@@ -38,7 +38,9 @@
 
 static BOOL     initialized = NO;
 
+#ifdef  __clang__
 #pragma clang diagnostic ignored "-Wreceiver-forward-class"
+#endif
 
 #if     GS_USE_TIMSORT
 @class  GSTimSortDescriptor;
@@ -117,7 +119,7 @@ static BOOL     initialized = NO;
 {
   const char	*sel = sel_getName(_selector);
 
-  return _ascending + GSPrivateHash(sel, strlen(sel), 16, YES) + [_key hash];
+  return _ascending + GSPrivateHash(0, sel, strlen(sel)) + [_key hash];
 }
 
 - (id) initWithKey: (NSString *) key ascending: (BOOL) ascending
diff --git a/Source/NSString.m b/Source/NSString.m
index aeb654725..6a6a7f8b0 100644
--- a/Source/NSString.m
+++ b/Source/NSString.m
@@ -2430,29 +2430,17 @@ static UCollator *GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *local
  */
 - (NSUInteger) hash
 {
-  unsigned	ret = 0;
-  unsigned	len = [self length];
+  uint32_t	ret = 0;
+  int   	len = (int)[self length];
 
   if (len > 0)
     {
       unichar		buf[64];
       unichar		*ptr = (len <= 64) ? buf :
 	NSZoneMalloc(NSDefaultMallocZone(), len * sizeof(unichar));
-      unichar		*p;
-      unsigned		char_count = 0;
 
       [self getCharacters: ptr range: NSMakeRange(0,len)];
-
-      p = ptr;
-
-      while (char_count++ < len)
-	{
-	  unichar	c = *p++;
-
-	  // FIXME ... should normalize composed character sequences.
-	  ret = (ret << 5) + ret + c;
-	}
-
+      ret = GSPrivateHash(0, (const void*)ptr, len * sizeof(unichar));
       if (ptr != buf)
 	{
 	  NSZoneFree(NSDefaultMallocZone(), ptr);