mirror of
https://github.com/gnustep/libs-base.git
synced 2025-05-31 16:50:58 +00:00
experiment with slower but better hashes
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@36344 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
4de78bda70
commit
cc87536fda
7 changed files with 429 additions and 79 deletions
14
ChangeLog
14
ChangeLog
|
@ -1,9 +1,21 @@
|
||||||
|
2012-03-12 Richard Frith-Macdonald <rfm@gnu.org>
|
||||||
|
|
||||||
|
* Source/GSPrivate.h: Change hash function args and add new functions
|
||||||
|
* for incremental hash.
|
||||||
|
* Source/NSString.m: Update to use the private hash function introduced
|
||||||
|
for NSSortdescriptor.
|
||||||
|
* Source/GSString.m: ditto.
|
||||||
|
* Source/NSSortDescriptor.m: Update for new function arguments.
|
||||||
|
* Source/GSPrivateHash.m: Add alternative version of hash functions
|
||||||
|
(old code plus new public domain murmurhash3 code).
|
||||||
|
* Source/GNUmakefile: Build alternative hash functions.
|
||||||
|
build with OLDHASH defined to 0 to get new alternatives.
|
||||||
|
|
||||||
2012-03-10 Richard Frith-Macdonald <rfm@gnu.org>
|
2012-03-10 Richard Frith-Macdonald <rfm@gnu.org>
|
||||||
|
|
||||||
* Source/NSFileManager.m:
|
* Source/NSFileManager.m:
|
||||||
Fix some autorelease pool management issues.
|
Fix some autorelease pool management issues.
|
||||||
|
|
||||||
=======
|
|
||||||
2013-03-09 Sebastian Reitenbach <sebastia@l00-bugdead-prods.de>
|
2013-03-09 Sebastian Reitenbach <sebastia@l00-bugdead-prods.de>
|
||||||
* Tests/base/NSFileManager/general.m
|
* Tests/base/NSFileManager/general.m
|
||||||
remove accidently snucked in header
|
remove accidently snucked in header
|
||||||
|
|
|
@ -163,6 +163,7 @@ GSFormat.m \
|
||||||
GSHTTPAuthentication.m \
|
GSHTTPAuthentication.m \
|
||||||
GSHTTPURLHandle.m \
|
GSHTTPURLHandle.m \
|
||||||
GSICUString.m \
|
GSICUString.m \
|
||||||
|
GSPrivateHash.m \
|
||||||
GSQuickSort.m \
|
GSQuickSort.m \
|
||||||
GSRunLoopWatcher.m \
|
GSRunLoopWatcher.m \
|
||||||
GSSet.m \
|
GSSet.m \
|
||||||
|
|
|
@ -376,34 +376,6 @@ GSPrivateIsByteEncoding(NSStringEncoding encoding) GS_ATTRIB_PRIVATE;
|
||||||
BOOL
|
BOOL
|
||||||
GSPrivateIsEncodingSupported(NSStringEncoding encoding) GS_ATTRIB_PRIVATE;
|
GSPrivateIsEncodingSupported(NSStringEncoding encoding) GS_ATTRIB_PRIVATE;
|
||||||
|
|
||||||
/* Hash function to hash up to limit bytes from data of specified length.
|
|
||||||
* If the flag is NO then a result of 0 is mapped to 0xffffffff.
|
|
||||||
* This is a pretty useful general purpose hash function.
|
|
||||||
*/
|
|
||||||
static inline unsigned
|
|
||||||
GSPrivateHash(const void *data, unsigned length, unsigned limit, BOOL zero)
|
|
||||||
__attribute__((unused));
|
|
||||||
static inline unsigned
|
|
||||||
GSPrivateHash(const void *data, unsigned length, unsigned limit, BOOL zero)
|
|
||||||
{
|
|
||||||
unsigned ret = length;
|
|
||||||
unsigned l = length;
|
|
||||||
|
|
||||||
if (limit < length)
|
|
||||||
{
|
|
||||||
l = limit;
|
|
||||||
}
|
|
||||||
while (l-- > 0)
|
|
||||||
{
|
|
||||||
ret = (ret << 5) + ret + ((const unsigned char*)data)[l];
|
|
||||||
}
|
|
||||||
if (ret == 0 && zero == NO)
|
|
||||||
{
|
|
||||||
ret = 0xffffffff;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* load a module into the runtime
|
/* load a module into the runtime
|
||||||
*/
|
*/
|
||||||
long
|
long
|
||||||
|
@ -543,5 +515,28 @@ GSPrivateIsCollectable(const void *ptr) GS_ATTRIB_PRIVATE;
|
||||||
NSZone*
|
NSZone*
|
||||||
GSAtomicMallocZone (void);
|
GSAtomicMallocZone (void);
|
||||||
|
|
||||||
|
/* Generate a 32bit hash from supplied byte data.
|
||||||
|
*/
|
||||||
|
uint32_t
|
||||||
|
GSPrivateHash(uint32_t seed, const void *bytes, int length)
|
||||||
|
GS_ATTRIB_PRIVATE;
|
||||||
|
|
||||||
|
/* Incorporate 'l' bytes of data from the buffer pointed to by 'b' into
|
||||||
|
* the hash state information pointed to by p0 and p1.
|
||||||
|
* The hash state variables should have been initialised to zero before
|
||||||
|
* the first call to this function, and the result should be produced
|
||||||
|
* by calling the GSPrivateFinishHash() function.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
GSPrivateIncrementalHash(uint32_t *p0, uint32_t *p1, const void *b, int l)
|
||||||
|
GS_ATTRIB_PRIVATE;
|
||||||
|
|
||||||
|
/* Generate a 32bit hash from supplied state variables resulting from
|
||||||
|
* calls to the GSPrivateIncrementalHash() function.
|
||||||
|
*/
|
||||||
|
uint32_t
|
||||||
|
GSPrivateFinishHash(uint32_t s0, uint32_t s1, uint32_t totalLength)
|
||||||
|
GS_ATTRIB_PRIVATE;
|
||||||
|
|
||||||
#endif /* _GSPrivate_h_ */
|
#endif /* _GSPrivate_h_ */
|
||||||
|
|
||||||
|
|
338
Source/GSPrivateHash.m
Normal file
338
Source/GSPrivateHash.m
Normal file
|
@ -0,0 +1,338 @@
|
||||||
|
/* GSPrivateHash.m
|
||||||
|
Copyright (C) 2013 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
Written by: Richard Frith-Macdonald <rfm@gnu.org>
|
||||||
|
|
||||||
|
This file is part of the GNUstep Base Library.
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Library General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with this library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||||
|
MA 02111 USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#import "GSPrivate.h"
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
GSPrivateHash(uint32_t seed, const void *bytes, int length)
|
||||||
|
{
|
||||||
|
uint32_t carry = 0;
|
||||||
|
|
||||||
|
GSPrivateIncrementalHash(&seed, &carry, bytes, length);
|
||||||
|
return GSPrivateFinishHash(seed, carry, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef OLDHASH
|
||||||
|
#define OLDHASH 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if OLDHASH
|
||||||
|
|
||||||
|
/* Very fast, simple hash. Poor distribution properties though.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
GSPrivateIncrementalHash(uint32_t *p0, uint32_t *p1, const void *b, int l)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
for (i = 0; i < l; i++)
|
||||||
|
{
|
||||||
|
*p0 = (*p0 << 5) + *p0 + ((const uint8_t*)b)[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
GSPrivateFinishHash(uint32_t s0, uint32_t s1, uint32_t totalLength)
|
||||||
|
{
|
||||||
|
return s0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* OLDHASH */
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------------------
|
||||||
|
* MurmurHash3 was written by Austin Appleby, and is placed in the public
|
||||||
|
* domain.
|
||||||
|
*
|
||||||
|
* This implementation was written by Shane Day, and is also public domain.
|
||||||
|
*
|
||||||
|
* This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
|
||||||
|
* with support for progressive processing.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
If you want to understand the MurmurHash algorithm you would be much better
|
||||||
|
off reading the original source. Just point your browser at:
|
||||||
|
http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
|
||||||
|
|
||||||
|
|
||||||
|
What this version provides?
|
||||||
|
|
||||||
|
1. Progressive data feeding. Useful when the entire payload to be hashed
|
||||||
|
does not fit in memory or when the data is streamed through the application.
|
||||||
|
Also useful when hashing a number of strings with a common prefix. A partial
|
||||||
|
hash of a prefix string can be generated and reused for each suffix string.
|
||||||
|
|
||||||
|
2. Portability. Plain old C so that it should compile on any old compiler.
|
||||||
|
Both CPU endian and access-alignment neutral, but avoiding inefficient code
|
||||||
|
when possible depending on CPU capabilities.
|
||||||
|
|
||||||
|
3. Drop in. I personally like nice self contained public domain code, making it
|
||||||
|
easy to pilfer without loads of refactoring to work properly in the existing
|
||||||
|
application code & makefile structure and mucking around with licence files.
|
||||||
|
Just copy PMurHash.h and PMurHash.c and you're ready to go.
|
||||||
|
|
||||||
|
|
||||||
|
How does it work?
|
||||||
|
|
||||||
|
We can only process entire 32 bit chunks of input, except for the very end
|
||||||
|
that may be shorter. So along with the partial hash we need to give back to
|
||||||
|
the caller a carry containing up to 3 bytes that we were unable to process.
|
||||||
|
This carry also needs to record the number of bytes the carry holds. I use
|
||||||
|
the low 2 bits as a count (0..3) and the carry bytes are shifted into the
|
||||||
|
high byte in stream order.
|
||||||
|
|
||||||
|
To handle endianess I simply use a macro that reads a uint32_t and define
|
||||||
|
that macro to be a direct read on little endian machines, a read and swap
|
||||||
|
on big endian machines, or a byte-by-byte read if the endianess is unknown.
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
|
||||||
|
/* MSVC warnings we choose to ignore */
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#pragma warning(disable: 4127) /* conditional expression is constant */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------------------
|
||||||
|
* Endianess, misalignment capabilities and util macros
|
||||||
|
*
|
||||||
|
* The following 3 macros are defined in this section. The other macros defined
|
||||||
|
* are only needed to help derive these 3.
|
||||||
|
*
|
||||||
|
* READ_UINT32(x) Read a little endian unsigned 32-bit int
|
||||||
|
* UNALIGNED_SAFE Defined if READ_UINT32 works on non-word boundaries
|
||||||
|
* ROTL32(x,r) Rotate x left by r bits
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Convention is to define __BYTE_ORDER == to one of these values */
|
||||||
|
#if !defined(__BIG_ENDIAN)
|
||||||
|
#define __BIG_ENDIAN 4321
|
||||||
|
#endif
|
||||||
|
#if !defined(__LITTLE_ENDIAN)
|
||||||
|
#define __LITTLE_ENDIAN 1234
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* I386 */
|
||||||
|
#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
|
||||||
|
#define __BYTE_ORDER __LITTLE_ENDIAN
|
||||||
|
#define UNALIGNED_SAFE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
|
||||||
|
* or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
|
||||||
|
#if !defined(__BYTE_ORDER)
|
||||||
|
#if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
|
||||||
|
#define __BYTE_ORDER __LITTLE_ENDIAN
|
||||||
|
#elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
|
||||||
|
#define __BYTE_ORDER __BIG_ENDIAN
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
|
||||||
|
#if !defined(__BYTE_ORDER)
|
||||||
|
#if defined(__ARMEL__) || defined(__MIPSEL__)
|
||||||
|
#define __BYTE_ORDER __LITTLE_ENDIAN
|
||||||
|
#endif
|
||||||
|
#if defined(__ARMEB__) || defined(__MIPSEB__)
|
||||||
|
#define __BYTE_ORDER __BIG_ENDIAN
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Now find best way we can to READ_UINT32 */
|
||||||
|
#if __BYTE_ORDER==__LITTLE_ENDIAN
|
||||||
|
/* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
|
||||||
|
#define READ_UINT32(ptr) (*((uint32_t*)(ptr)))
|
||||||
|
#elif __BYTE_ORDER==__BIG_ENDIAN
|
||||||
|
/* TODO: Add additional cases below where a compiler provided bswap32 is available */
|
||||||
|
#if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
|
||||||
|
#define READ_UINT32(ptr) (__builtin_bswap32(*((uint32_t*)(ptr))))
|
||||||
|
#else
|
||||||
|
/* Without a known fast bswap32 we're just as well off doing this */
|
||||||
|
#define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
|
||||||
|
#define UNALIGNED_SAFE
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
/* Unknown endianess so last resort is to read individual bytes */
|
||||||
|
#define READ_UINT32(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
|
||||||
|
|
||||||
|
/* Since we're not doing word-reads we can skip the messing about with realignment */
|
||||||
|
#define UNALIGNED_SAFE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Find best way to ROTL32 */
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#include <stdlib.h> /* Microsoft put _rotl declaration in here */
|
||||||
|
#define ROTL32(x,r) _rotl(x,r)
|
||||||
|
#else
|
||||||
|
/* gcc recognises this code and generates a rotate instruction for CPUs with one */
|
||||||
|
#define ROTL32(x,r) (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*-----------------------------------------------------------------------------
|
||||||
|
* Core murmurhash algorithm macros */
|
||||||
|
|
||||||
|
#define C1 (0xcc9e2d51)
|
||||||
|
#define C2 (0x1b873593)
|
||||||
|
|
||||||
|
/* This is the main processing body of the algorithm. It operates
|
||||||
|
* on each full 32-bits of input. */
|
||||||
|
#define DOBLOCK(h1, k1) do{ \
|
||||||
|
k1 *= C1; \
|
||||||
|
k1 = ROTL32(k1,15); \
|
||||||
|
k1 *= C2; \
|
||||||
|
\
|
||||||
|
h1 ^= k1; \
|
||||||
|
h1 = ROTL32(h1,13); \
|
||||||
|
h1 = h1*5+0xe6546b64; \
|
||||||
|
}while(0)
|
||||||
|
|
||||||
|
|
||||||
|
/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
|
||||||
|
/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
|
||||||
|
#define DOBYTES(cnt, h1, c, n, ptr, len) do{ \
|
||||||
|
int _i = cnt; \
|
||||||
|
while(_i--) { \
|
||||||
|
c = c>>8 | *ptr++<<24; \
|
||||||
|
n++; len--; \
|
||||||
|
if(n==4) { \
|
||||||
|
DOBLOCK(h1, c); \
|
||||||
|
n = 0; \
|
||||||
|
} \
|
||||||
|
} }while(0)
|
||||||
|
|
||||||
|
/*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
/* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed
|
||||||
|
* if wanted. Both ph1 and pcarry are required arguments. */
|
||||||
|
void
|
||||||
|
GSPrivateIncrementalHash(
|
||||||
|
uint32_t *ph1, uint32_t *pcarry, const void *key, int len)
|
||||||
|
{
|
||||||
|
uint32_t h1 = *ph1;
|
||||||
|
uint32_t c = *pcarry;
|
||||||
|
|
||||||
|
const uint8_t *ptr = (uint8_t*)key;
|
||||||
|
const uint8_t *end;
|
||||||
|
|
||||||
|
/* Extract carry count from low 2 bits of c value */
|
||||||
|
int n = c & 3;
|
||||||
|
|
||||||
|
#if defined(UNALIGNED_SAFE)
|
||||||
|
/* This CPU handles unaligned word access */
|
||||||
|
|
||||||
|
/* Consume any carry bytes */
|
||||||
|
int i = (4-n) & 3;
|
||||||
|
if(i && i <= len) {
|
||||||
|
DOBYTES(i, h1, c, n, ptr, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Process 32-bit chunks */
|
||||||
|
end = ptr + len/4*4;
|
||||||
|
for( ; ptr < end ; ptr+=4) {
|
||||||
|
uint32_t k1 = READ_UINT32(ptr);
|
||||||
|
DOBLOCK(h1, k1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /*UNALIGNED_SAFE*/
|
||||||
|
/* This CPU does not handle unaligned word access */
|
||||||
|
|
||||||
|
/* Consume enough so that the next data byte is word aligned */
|
||||||
|
int i = -(long)ptr & 3;
|
||||||
|
if(i && i <= len) {
|
||||||
|
DOBYTES(i, h1, c, n, ptr, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
|
||||||
|
end = ptr + len/4*4;
|
||||||
|
switch(n) { /* how many bytes in c */
|
||||||
|
case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */
|
||||||
|
for( ; ptr < end ; ptr+=4) {
|
||||||
|
uint32_t k1 = READ_UINT32(ptr);
|
||||||
|
DOBLOCK(h1, k1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */
|
||||||
|
for( ; ptr < end ; ptr+=4) {
|
||||||
|
uint32_t k1 = c>>24;
|
||||||
|
c = READ_UINT32(ptr);
|
||||||
|
k1 |= c<<8;
|
||||||
|
DOBLOCK(h1, k1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */
|
||||||
|
for( ; ptr < end ; ptr+=4) {
|
||||||
|
uint32_t k1 = c>>16;
|
||||||
|
c = READ_UINT32(ptr);
|
||||||
|
k1 |= c<<16;
|
||||||
|
DOBLOCK(h1, k1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */
|
||||||
|
for( ; ptr < end ; ptr+=4) {
|
||||||
|
uint32_t k1 = c>>8;
|
||||||
|
c = READ_UINT32(ptr);
|
||||||
|
k1 |= c<<24;
|
||||||
|
DOBLOCK(h1, k1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /*UNALIGNED_SAFE*/
|
||||||
|
|
||||||
|
/* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
|
||||||
|
len -= len/4*4;
|
||||||
|
|
||||||
|
/* Append any remaining bytes into carry */
|
||||||
|
DOBYTES(len, h1, c, n, ptr, len);
|
||||||
|
|
||||||
|
/* Copy out new running hash and carry */
|
||||||
|
*ph1 = h1;
|
||||||
|
*pcarry = (c & ~0xff) | n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
/* Finalize a hash. To match the original Murmur3A the total_length must be provided */
|
||||||
|
uint32_t
|
||||||
|
GSPrivateFinishHash(uint32_t h, uint32_t carry, uint32_t total_length)
|
||||||
|
{
|
||||||
|
uint32_t k1;
|
||||||
|
int n = carry & 3;
|
||||||
|
if(n) {
|
||||||
|
k1 = carry >> (4-n)*8;
|
||||||
|
k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1;
|
||||||
|
}
|
||||||
|
h ^= total_length;
|
||||||
|
|
||||||
|
/* fmix */
|
||||||
|
h ^= h >> 16;
|
||||||
|
h *= 0x85ebca6b;
|
||||||
|
h ^= h >> 13;
|
||||||
|
h *= 0xc2b2ae35;
|
||||||
|
h ^= h >> 16;
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* OLDHASH */
|
||||||
|
|
|
@ -3363,50 +3363,48 @@ transmute(GSStr self, NSString *aString)
|
||||||
{
|
{
|
||||||
if (self->_flags.hash == 0)
|
if (self->_flags.hash == 0)
|
||||||
{
|
{
|
||||||
unsigned ret = 0;
|
uint32_t ret = 0;
|
||||||
unsigned len = self->_count;
|
int len = (int)self->_count;
|
||||||
|
|
||||||
if (len > 0)
|
if (len > 0)
|
||||||
{
|
{
|
||||||
register unsigned index = 0;
|
|
||||||
|
|
||||||
if (self->_flags.wide)
|
if (self->_flags.wide)
|
||||||
{
|
{
|
||||||
register const unichar *p = self->_contents.u;
|
const unichar *p = self->_contents.u;
|
||||||
|
|
||||||
while (index < len)
|
ret = GSPrivateHash(0, p, len * sizeof(unichar));
|
||||||
{
|
|
||||||
ret = (ret << 5) + ret + p[index++];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else if (len > 64)
|
||||||
|
{
|
||||||
|
return (self->_flags.hash = [super hash]);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
register const unsigned char *p = self->_contents.c;
|
unichar buf[64];
|
||||||
|
unsigned index;
|
||||||
|
const unsigned char *p = self->_contents.c;
|
||||||
|
|
||||||
if (internalEncoding == NSISOLatin1StringEncoding)
|
if (internalEncoding == NSISOLatin1StringEncoding)
|
||||||
{
|
{
|
||||||
while (index < len)
|
for (index = 0; index < len; index++)
|
||||||
{
|
{
|
||||||
ret = (ret << 5) + ret + p[index++];
|
buf[index] = p[index];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
while (index < len)
|
for (index = 0; index < len; index++)
|
||||||
{
|
{
|
||||||
unichar u = p[index++];
|
unichar u = p[index];
|
||||||
|
|
||||||
if (u > 127)
|
if (u > 127)
|
||||||
{
|
{
|
||||||
unsigned char c = (unsigned char)u;
|
return (self->_flags.hash = [super hash]);
|
||||||
unsigned int s = 1;
|
}
|
||||||
unichar *d = &u;
|
buf[index] = u;
|
||||||
|
}
|
||||||
GSToUnicode(&d, &s, &c, 1, internalEncoding, 0, 0);
|
|
||||||
}
|
|
||||||
ret = (ret << 5) + ret + u;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
ret = GSPrivateHash(0, buf, len * sizeof(unichar));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5530,25 +5528,41 @@ literalIsEqual(NXConstantString *self, id anObject)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Must match the implementation in NSString
|
/* Must match the implementation in NSString
|
||||||
|
* To avoid allocating memory, we build the hash incrementally.
|
||||||
*/
|
*/
|
||||||
- (NSUInteger) hash
|
- (NSUInteger) hash
|
||||||
{
|
{
|
||||||
if (nxcslen > 0)
|
if (nxcslen > 0)
|
||||||
{
|
{
|
||||||
unsigned ret = 0;
|
uint32_t s0 = 0;
|
||||||
|
uint32_t s1 = 0;
|
||||||
|
unichar chunk[64];
|
||||||
|
uint32_t ret;
|
||||||
unichar n = 0;
|
unichar n = 0;
|
||||||
unsigned i = 0;
|
unsigned i = 0;
|
||||||
unichar c;
|
int l = 0;
|
||||||
|
uint32_t t = 0;
|
||||||
|
|
||||||
while (i < nxcslen)
|
while (i < nxcslen)
|
||||||
{
|
{
|
||||||
c = nextUTF8((const uint8_t *)nxcsptr, nxcslen, &i, &n);
|
chunk[l++] = nextUTF8((const uint8_t *)nxcsptr, nxcslen, &i, &n);
|
||||||
ret = (ret << 5) + ret + c;
|
if (64 == l)
|
||||||
|
{
|
||||||
|
GSPrivateIncrementalHash(&s0, &s1, chunk, l * sizeof(unichar));
|
||||||
|
t += l;
|
||||||
|
l = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (0 != n)
|
if (0 != n)
|
||||||
{
|
{
|
||||||
ret = (ret << 5) + ret + n; // Add final character
|
chunk[l++] = n; // Add final character
|
||||||
}
|
}
|
||||||
|
if (l > 0)
|
||||||
|
{
|
||||||
|
GSPrivateIncrementalHash(&s0, &s1, chunk, l * sizeof(unichar));
|
||||||
|
t += l;
|
||||||
|
}
|
||||||
|
ret = GSPrivateFinishHash(s0, s1, t * sizeof(unichar));
|
||||||
ret &= 0x0fffffff;
|
ret &= 0x0fffffff;
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
{
|
{
|
||||||
|
|
|
@ -38,7 +38,9 @@
|
||||||
|
|
||||||
static BOOL initialized = NO;
|
static BOOL initialized = NO;
|
||||||
|
|
||||||
|
#ifdef __clang__
|
||||||
#pragma clang diagnostic ignored "-Wreceiver-forward-class"
|
#pragma clang diagnostic ignored "-Wreceiver-forward-class"
|
||||||
|
#endif
|
||||||
|
|
||||||
#if GS_USE_TIMSORT
|
#if GS_USE_TIMSORT
|
||||||
@class GSTimSortDescriptor;
|
@class GSTimSortDescriptor;
|
||||||
|
@ -117,7 +119,7 @@ static BOOL initialized = NO;
|
||||||
{
|
{
|
||||||
const char *sel = sel_getName(_selector);
|
const char *sel = sel_getName(_selector);
|
||||||
|
|
||||||
return _ascending + GSPrivateHash(sel, strlen(sel), 16, YES) + [_key hash];
|
return _ascending + GSPrivateHash(0, sel, strlen(sel)) + [_key hash];
|
||||||
}
|
}
|
||||||
|
|
||||||
- (id) initWithKey: (NSString *) key ascending: (BOOL) ascending
|
- (id) initWithKey: (NSString *) key ascending: (BOOL) ascending
|
||||||
|
|
|
@ -2430,29 +2430,17 @@ static UCollator *GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *local
|
||||||
*/
|
*/
|
||||||
- (NSUInteger) hash
|
- (NSUInteger) hash
|
||||||
{
|
{
|
||||||
unsigned ret = 0;
|
uint32_t ret = 0;
|
||||||
unsigned len = [self length];
|
int len = (int)[self length];
|
||||||
|
|
||||||
if (len > 0)
|
if (len > 0)
|
||||||
{
|
{
|
||||||
unichar buf[64];
|
unichar buf[64];
|
||||||
unichar *ptr = (len <= 64) ? buf :
|
unichar *ptr = (len <= 64) ? buf :
|
||||||
NSZoneMalloc(NSDefaultMallocZone(), len * sizeof(unichar));
|
NSZoneMalloc(NSDefaultMallocZone(), len * sizeof(unichar));
|
||||||
unichar *p;
|
|
||||||
unsigned char_count = 0;
|
|
||||||
|
|
||||||
[self getCharacters: ptr range: NSMakeRange(0,len)];
|
[self getCharacters: ptr range: NSMakeRange(0,len)];
|
||||||
|
ret = GSPrivateHash(0, (const void*)ptr, len * sizeof(unichar));
|
||||||
p = ptr;
|
|
||||||
|
|
||||||
while (char_count++ < len)
|
|
||||||
{
|
|
||||||
unichar c = *p++;
|
|
||||||
|
|
||||||
// FIXME ... should normalize composed character sequences.
|
|
||||||
ret = (ret << 5) + ret + c;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ptr != buf)
|
if (ptr != buf)
|
||||||
{
|
{
|
||||||
NSZoneFree(NSDefaultMallocZone(), ptr);
|
NSZoneFree(NSDefaultMallocZone(), ptr);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue