2006-02-10 22:01:20 +00:00
|
|
|
/*
|
|
|
|
Copyright (C) 2001-2006, William Joseph.
|
|
|
|
All Rights Reserved.
|
|
|
|
|
|
|
|
This file is part of GtkRadiant.
|
|
|
|
|
|
|
|
GtkRadiant is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
GtkRadiant is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with GtkRadiant; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#if !defined(INCLUDED_CONTAINER_HASHFUNC_H)
|
|
|
|
#define INCLUDED_CONTAINER_HASHFUNC_H
|
|
|
|
|
|
|
|
#include <cctype>
|
|
|
|
#include "string/string.h"
|
|
|
|
#include "container/array.h"
|
|
|
|
typedef unsigned long int ub4; /* unsigned 4-byte quantities */
|
|
|
|
typedef unsigned char ub1;
|
|
|
|
|
|
|
|
inline ub1 ub1_as_ub1_nocase(ub1 byte)
|
|
|
|
{
|
|
|
|
return std::tolower(byte);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline ub4 ub1x4_as_ub4_nocase(const ub1 bytes[4])
|
|
|
|
{
|
|
|
|
ub4 result;
|
|
|
|
reinterpret_cast<ub1*>(&result)[0] = ub1_as_ub1_nocase(bytes[0]);
|
|
|
|
reinterpret_cast<ub1*>(&result)[1] = ub1_as_ub1_nocase(bytes[1]);
|
|
|
|
reinterpret_cast<ub1*>(&result)[2] = ub1_as_ub1_nocase(bytes[2]);
|
|
|
|
reinterpret_cast<ub1*>(&result)[3] = ub1_as_ub1_nocase(bytes[3]);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
class ub1_default_traits
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static ub1 as_ub1(ub1 byte)
|
|
|
|
{
|
|
|
|
return byte;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class ub1_nocase_traits
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static ub1 as_ub1(ub1 byte)
|
|
|
|
{
|
|
|
|
return ub1_as_ub1_nocase(byte);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class ub1x4_default_traits
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static ub4 as_ub4(const ub1 bytes[4])
|
|
|
|
{
|
|
|
|
return *reinterpret_cast<const ub4*>(bytes);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class ub1x4_nocase_traits
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static ub4 as_ub4(const ub1 bytes[4])
|
|
|
|
{
|
|
|
|
return ub1x4_as_ub4_nocase(bytes);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class ub4_default_traits
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static ub4 as_ub4(ub4 i)
|
|
|
|
{
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class ub4_nocase_traits
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static ub4 as_ub4(ub4 i)
|
|
|
|
{
|
|
|
|
return ub1x4_as_ub4_nocase(reinterpret_cast<const ub1*>(&i));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// lookup2.c
|
|
|
|
// By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
|
|
|
|
// code any way you wish, private, educational, or commercial. It's free.
|
|
|
|
|
|
|
|
#define hashsize(n) ((ub4)1<<(n))
|
|
|
|
#define hashmask(n) (hashsize(n)-1)
|
|
|
|
|
|
|
|
/*
|
|
|
|
--------------------------------------------------------------------
|
|
|
|
mix -- mix 3 32-bit values reversibly.
|
|
|
|
For every delta with one or two bit set, and the deltas of all three
|
|
|
|
high bits or all three low bits, whether the original value of a,b,c
|
|
|
|
is almost all zero or is uniformly distributed,
|
|
|
|
* If mix() is run forward or backward, at least 32 bits in a,b,c
|
|
|
|
have at least 1/4 probability of changing.
|
|
|
|
* If mix() is run forward, every bit of c will change between 1/3 and
|
|
|
|
2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
|
|
|
|
mix() was built out of 36 single-cycle latency instructions in a
|
|
|
|
structure that could supported 2x parallelism, like so:
|
|
|
|
a -= b;
|
|
|
|
a -= c; x = (c>>13);
|
|
|
|
b -= c; a ^= x;
|
|
|
|
b -= a; x = (a<<8);
|
|
|
|
c -= a; b ^= x;
|
|
|
|
c -= b; x = (b>>13);
|
|
|
|
...
|
|
|
|
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
|
|
|
|
of that parallelism. They've also turned some of those single-cycle
|
|
|
|
latency instructions into multi-cycle latency instructions. Still,
|
|
|
|
this is the fastest good hash I could find. There were about 2^^68
|
|
|
|
to choose from. I only looked at a billion or so.
|
|
|
|
--------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#define mix(a,b,c) \
|
|
|
|
{ \
|
|
|
|
a -= b; a -= c; a ^= (c>>13); \
|
|
|
|
b -= c; b -= a; b ^= (a<<8); \
|
|
|
|
c -= a; c -= b; c ^= (b>>13); \
|
|
|
|
a -= b; a -= c; a ^= (c>>12); \
|
|
|
|
b -= c; b -= a; b ^= (a<<16); \
|
|
|
|
c -= a; c -= b; c ^= (b>>5); \
|
|
|
|
a -= b; a -= c; a ^= (c>>3); \
|
|
|
|
b -= c; b -= a; b ^= (a<<10); \
|
|
|
|
c -= a; c -= b; c ^= (b>>15); \
|
|
|
|
}
|
|
|
|
|
|
|
|
/* same, but slower, works on systems that might have 8 byte ub4's */
|
|
|
|
#define mix2(a,b,c) \
|
|
|
|
{ \
|
|
|
|
a -= b; a -= c; a ^= (c>>13); \
|
|
|
|
b -= c; b -= a; b ^= (a<< 8); \
|
|
|
|
c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
|
|
|
|
a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
|
|
|
|
b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
|
|
|
|
c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
|
|
|
|
a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
|
|
|
|
b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
|
|
|
|
c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
--------------------------------------------------------------------
|
|
|
|
hash() -- hash a variable-length key into a 32-bit value
|
|
|
|
k : the key (the unaligned variable-length array of bytes)
|
|
|
|
len : the length of the key, counting by bytes
|
|
|
|
level : can be any 4-byte value
|
|
|
|
Returns a 32-bit value. Every bit of the key affects every bit of
|
|
|
|
the return value. Every 1-bit and 2-bit delta achieves avalanche.
|
|
|
|
About 36+6len instructions.
|
|
|
|
|
|
|
|
The best hash table sizes are powers of 2. There is no need to do
|
|
|
|
mod a prime (mod is sooo slow!). If you need less than 32 bits,
|
|
|
|
use a bitmask. For example, if you need only 10 bits, do
|
|
|
|
h = (h & hashmask(10));
|
|
|
|
In which case, the hash table should have hashsize(10) elements.
|
|
|
|
|
|
|
|
If you are hashing n strings (ub1 **)k, do it like this:
|
|
|
|
for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
|
|
|
|
|
|
|
|
See http://burlteburtle.net/bob/hash/evahash.html
|
|
|
|
Use for hash table lookup, or anything where one collision in 2^32 is
|
|
|
|
acceptable. Do NOT use for cryptographic purposes.
|
|
|
|
--------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
template<typename UB1Traits, typename UB4x1Traits>
|
|
|
|
inline ub4 hash(
|
|
|
|
const ub1 *k, /* the key */
|
|
|
|
ub4 length, /* the length of the key */
|
|
|
|
ub4 initval, /* the previous hash, or an arbitrary value */
|
|
|
|
const UB1Traits& ub1traits,
|
|
|
|
const UB4x1Traits& ub4x1traits
|
|
|
|
)
|
|
|
|
{
|
|
|
|
register ub4 a,b,c,len;
|
|
|
|
|
|
|
|
/* Set up the internal state */
|
|
|
|
len = length;
|
|
|
|
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
|
|
|
c = initval; /* the previous hash value */
|
|
|
|
|
|
|
|
/*---------------------------------------- handle most of the key */
|
|
|
|
while (len >= 12)
|
|
|
|
{
|
|
|
|
a += (k[0] +((ub4)UB1Traits::as_ub1(k[1])<<8) +((ub4)UB1Traits::as_ub1(k[2])<<16) +((ub4)UB1Traits::as_ub1(k[3])<<24));
|
|
|
|
b += (k[4] +((ub4)UB1Traits::as_ub1(k[5])<<8) +((ub4)UB1Traits::as_ub1(k[6])<<16) +((ub4)UB1Traits::as_ub1(k[7])<<24));
|
|
|
|
c += (k[8] +((ub4)UB1Traits::as_ub1(k[9])<<8) +((ub4)UB1Traits::as_ub1(k[10])<<16)+((ub4)UB1Traits::as_ub1(k[11])<<24));
|
|
|
|
mix(a,b,c);
|
|
|
|
k += 12; len -= 12;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*------------------------------------- handle the last 11 bytes */
|
|
|
|
c += length;
|
|
|
|
switch(len) /* all the case statements fall through */
|
|
|
|
{
|
|
|
|
case 11: c += ((ub4)UB1Traits::as_ub1(k[10]) << 24);
|
|
|
|
case 10: c += ((ub4)UB1Traits::as_ub1(k[9]) << 16);
|
|
|
|
case 9 : c += ((ub4)UB1Traits::as_ub1(k[8]) << 8);
|
|
|
|
/* the first byte of c is reserved for the length */
|
|
|
|
case 8 : b += ((ub4)UB1Traits::as_ub1(k[7]) << 24);
|
|
|
|
case 7 : b += ((ub4)UB1Traits::as_ub1(k[6]) << 16);
|
|
|
|
case 6 : b += ((ub4)UB1Traits::as_ub1(k[5]) << 8);
|
|
|
|
case 5 : b += UB1Traits::as_ub1(k[4]);
|
|
|
|
case 4 : a += ((ub4)UB1Traits::as_ub1(k[3]) << 24);
|
|
|
|
case 3 : a += ((ub4)UB1Traits::as_ub1(k[2]) << 16);
|
|
|
|
case 2 : a += ((ub4)UB1Traits::as_ub1(k[1]) << 8);
|
|
|
|
case 1 : a += UB1Traits::as_ub1(k[0]);
|
|
|
|
/* case 0: nothing left to add */
|
|
|
|
}
|
|
|
|
mix(a,b,c);
|
|
|
|
/*-------------------------------------------- report the result */
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
--------------------------------------------------------------------
|
|
|
|
This works on all machines. hash2() is identical to hash() on
|
|
|
|
little-endian machines, except that the length has to be measured
|
|
|
|
in ub4s instead of bytes. It is much faster than hash(). It
|
|
|
|
requires
|
|
|
|
-- that the key be an array of ub4's, and
|
|
|
|
-- that all your machines have the same endianness, and
|
|
|
|
-- that the length be the number of ub4's in the key
|
|
|
|
--------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
template<typename UB4Traits>
|
|
|
|
inline ub4 hash2(
|
|
|
|
const ub4 *k, /* the key */
|
|
|
|
ub4 length, /* the length of the key, in ub4s */
|
|
|
|
ub4 initval, /* the previous hash, or an arbitrary value */
|
|
|
|
const UB4Traits& ub4traits
|
|
|
|
)
|
|
|
|
{
|
|
|
|
register ub4 a,b,c,len;
|
|
|
|
|
|
|
|
/* Set up the internal state */
|
|
|
|
len = length;
|
|
|
|
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
|
|
|
c = initval; /* the previous hash value */
|
|
|
|
|
|
|
|
/*---------------------------------------- handle most of the key */
|
|
|
|
while (len >= 3)
|
|
|
|
{
|
|
|
|
a += UB4Traits::as_ub4(k[0]);
|
|
|
|
b += UB4Traits::as_ub4(k[1]);
|
|
|
|
c += UB4Traits::as_ub4(k[2]);
|
|
|
|
mix(a,b,c);
|
|
|
|
k += 3; len -= 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*-------------------------------------- handle the last 2 ub4's */
|
|
|
|
c += length;
|
|
|
|
switch(len) /* all the case statements fall through */
|
|
|
|
{
|
|
|
|
/* c is reserved for the length */
|
|
|
|
case 2 : b += UB4Traits::as_ub4(k[1]);
|
|
|
|
case 1 : a += UB4Traits::as_ub4(k[0]);
|
|
|
|
/* case 0: nothing left to add */
|
|
|
|
}
|
|
|
|
mix(a,b,c);
|
|
|
|
/*-------------------------------------------- report the result */
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
typedef ub4 hash_t;
|
|
|
|
|
|
|
|
inline hash_t hash_ub1(const ub1* key, std::size_t len, hash_t previous = 0)
|
|
|
|
{
|
|
|
|
return hash(key, ub4(len), previous, ub1_default_traits(), ub1x4_default_traits());
|
|
|
|
}
|
|
|
|
|
|
|
|
inline hash_t hash_ub1_nocase(const ub1* key, std::size_t len, hash_t previous = 0)
|
|
|
|
{
|
|
|
|
return hash(key, ub4(len), previous, ub1_nocase_traits(), ub1x4_nocase_traits());
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename UB4Traits>
|
|
|
|
inline hash_t hash_ub4(const ub4* key, std::size_t len, const UB4Traits& traits, hash_t previous = 0)
|
|
|
|
{
|
|
|
|
return hash2(key,ub4(len), previous, traits);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline ub4 hash_combine(ub4 left, ub4 right)
|
|
|
|
{
|
|
|
|
return hash_ub1(reinterpret_cast<const ub1*>(&left), 4, right);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename POD>
|
|
|
|
inline hash_t pod_hash(const POD& pod)
|
|
|
|
{
|
|
|
|
return hash_ub1(reinterpret_cast<const ub1*>(&pod), sizeof(POD));
|
|
|
|
}
|
|
|
|
|
|
|
|
inline hash_t string_hash(const char* string, hash_t previous = 0)
|
|
|
|
{
|
|
|
|
return hash_ub1(reinterpret_cast<const ub1*>(string), string_length(string), previous);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline hash_t string_hash_nocase(const char* string, hash_t previous = 0)
|
|
|
|
{
|
|
|
|
return hash_ub1_nocase(reinterpret_cast<const ub1*>(string), string_length(string), previous);
|
|
|
|
}
|
|
|
|
|
2006-02-26 22:27:38 +00:00
|
|
|
struct RawStringHash
|
|
|
|
{
|
|
|
|
typedef hash_t hash_type;
|
|
|
|
hash_type operator()(const char* string) const
|
|
|
|
{
|
|
|
|
return string_hash(string);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2006-02-10 22:01:20 +00:00
|
|
|
struct HashString
|
|
|
|
{
|
|
|
|
typedef hash_t hash_type;
|
|
|
|
hash_type operator()(const CopiedString& string) const
|
|
|
|
{
|
|
|
|
return string_hash(string.c_str());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct HashStringNoCase
|
|
|
|
{
|
|
|
|
typedef hash_t hash_type;
|
|
|
|
hash_type operator()(const CopiedString& string) const
|
|
|
|
{
|
|
|
|
return string_hash_nocase(string.c_str());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief Length of a string in ub4.
|
|
|
|
/// "wibble" (6) gives 2,
|
|
|
|
/// "and" (3) gives 1,
|
|
|
|
/// "bleh" (4) gives 2
|
|
|
|
inline std::size_t string_length_ub4(const char* string)
|
|
|
|
{
|
|
|
|
return ((string_length(string)>>2)+1)<<2;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Hashable key type that stores a string as an array of ub4 - making hashing faster.
|
|
|
|
/// Also caches the 32-bit result of the hash to speed up comparison of keys.
|
|
|
|
template<typename UB4Traits = ub4_default_traits>
|
|
|
|
class HashKey
|
|
|
|
{
|
|
|
|
Array<ub4> m_key;
|
|
|
|
hash_t m_hash;
|
|
|
|
|
|
|
|
void copy(const HashKey& other)
|
|
|
|
{
|
|
|
|
std::copy(other.m_key.begin(), other.m_key.end(), m_key.begin());
|
|
|
|
m_hash = other.m_hash;
|
|
|
|
}
|
|
|
|
void copy(const char* string)
|
|
|
|
{
|
|
|
|
strncpy(reinterpret_cast<char*>(m_key.data()), string, m_key.size());
|
|
|
|
for(Array<ub4>::iterator i = m_key.begin(); i != m_key.end(); ++i)
|
|
|
|
{
|
|
|
|
*i = UB4Traits::as_ub4(*i);
|
|
|
|
}
|
|
|
|
m_hash = hash_ub4(m_key.data(), m_key.size(), ub4_default_traits());
|
|
|
|
}
|
|
|
|
bool equal(const HashKey& other) const
|
|
|
|
{
|
|
|
|
return m_hash == other.m_hash && m_key.size() == other.m_key.size()
|
|
|
|
&& std::equal(m_key.begin(), m_key.end(), other.m_key.begin());
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
|
|
|
HashKey(const HashKey& other) : m_key(other.m_key.size())
|
|
|
|
{
|
|
|
|
copy(other);
|
|
|
|
}
|
|
|
|
HashKey(const char* string) : m_key(string_length_ub4(string))
|
|
|
|
{
|
|
|
|
copy(string);
|
|
|
|
}
|
|
|
|
HashKey& operator=(const char* string)
|
|
|
|
{
|
|
|
|
m_key.resize(string_length_ub4(string));
|
|
|
|
copy(string);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
bool operator==(const HashKey& other) const
|
|
|
|
{
|
|
|
|
return equal(other);
|
|
|
|
}
|
|
|
|
bool operator!=(const HashKey& other) const
|
|
|
|
{
|
|
|
|
return !equal(other);
|
|
|
|
}
|
|
|
|
hash_t hash() const
|
|
|
|
{
|
|
|
|
return m_hash;
|
|
|
|
}
|
|
|
|
#if 0
|
|
|
|
const char* c_str() const
|
|
|
|
{
|
|
|
|
return reinterpret_cast<const char*>(m_key.data());
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief Hash function to use with HashKey.
|
|
|
|
struct HashKeyHasher
|
|
|
|
{
|
|
|
|
typedef hash_t hash_type;
|
|
|
|
hash_type operator()(const HashKey<ub4_default_traits>& key) const
|
|
|
|
{
|
|
|
|
return key.hash();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|