diff --git a/hash.c b/hash.c index 9aba831..e11f28e 100644 --- a/hash.c +++ b/hash.c @@ -20,317 +20,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "gmqcc.h" -#include - -#if defined(_MSC_VER) -# define HASH_ROTL32(X, Y) _rotl((X), (Y)) -#elif defined (__GNUC__) && (defined(__i386__) || defined(__amd64__)) - static GMQCC_FORCEINLINE uint32_t hash_rotl32(volatile uint32_t x, int8_t r) { - __asm__ __volatile__ ("roll %1,%0" : "+r"(x) : "c"(r)); - return x; +#include +size_t hash(const char *string) { + size_t hash = 0; + for(; *string; ++string) { + hash += *string; + hash += (hash << 10); + hash ^= (hash >> 6); } -# define HASH_ROTL32(X, Y) hash_rotl32((volatile uint32_t)(X), (Y)) -#else -# define HASH_ROTL32(X, Y) (((X) << (Y)) | ((X) >> (32 - (Y)))) -#endif - -/* - * This is a version of the Murmur3 hashing function optimized for various - * compilers/architectures. It uses the traditional Murmur2 mix staging - * but fixes the mix staging inner loops. - * - * Murmur 2 contains an inner loop such as: - * while (l >= 4) { - * u32 k = *(u32*)d; - * k *= m; - * k ^= k >> r; - * k *= m; - * - * h *= m; - * h ^= k; - * d += 4; - * l -= 4; - * } - * - * The two u32s that form the key are the same value for x - * this premix stage will perform the same results for both values. Unrolled - * this produces just: - * x *= m; - * x ^= x >> r; - * x *= m; - * - * h *= m; - * h ^= x; - * h *= m; - * h ^= x; - * - * This appears to be fine, except what happens when m == 1? well x - * cancels out entierly, leaving just: - * x ^= x >> r; - * h ^= x; - * h ^= x; - * - * So all keys hash to the same value, but how often does m == 1? - * well, it turns out testing x for all possible values yeilds only - * 172,013,942 unique results instead of 2^32. So nearly ~4.6 bits - * are cancelled out on average! - * - * This means we have a 14.5% higher chance of collision. This is where - * Murmur3 comes in to save the day. - */ -static GMQCC_FORCEINLINE uint32_t hash_murmur_mix32(uint32_t hash) { - hash ^= hash >> 16; - hash *= 0x85EBCA6B; - hash ^= hash >> 13; - hash *= 0xC2B2AE35; - hash ^= hash >> 16; + hash += hash << 3; + hash ^= hash >> 11; + hash += hash << 15; return hash; } - -/* - * These constants were calculated with SMHasher to determine the best - * case senario for Murmur3: - * http://code.google.com/p/smhasher/ - */ -#define HASH_MURMUR_MASK1 0xCC9E2D51 -#define HASH_MURMUR_MASK2 0x1B873593 -#define HASH_MURMUR_SEED 0x9747B28C - -#if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE -# define HASH_MURMUR_SAFEREAD(PTR) (*((uint32_t*)(PTR))) -#elif PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_BIG -# if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR >= 3)) -# define HASH_MURMUR_SAFEREAD(PTR) (__builtin_bswap32(*((uint32_t*)(PTR)))) -# endif -#endif -/* Process individual bytes at this point since the endianess isn't known. */ -#ifndef HASH_MURMUR_SAFEREAD -# define HASH_MURMUR_SAFEREAD(PTR) ((PTR)[0] | (PTR)[1] << 8 | (PTR)[2] << 16 | (PTR)[3] << 24) -#endif - -#define HASH_MURMUR_BLOCK(H, K) \ - do { \ - K *= HASH_MURMUR_MASK1; \ - K = HASH_ROTL32(K, 15); \ - K *= HASH_MURMUR_MASK2; \ - H ^= K; \ - H = HASH_ROTL32(H, 13); \ - H = H * 5 + 0xE6546B64; \ - } while (0) -#define HASH_MURMUR_BYTES(COUNT, H, C, N, PTR, LENGTH) \ - do { \ - int i = COUNT; \ - while (i--) { \ - C = C >> 8 | *PTR++ << 24; \ - N++; \ - LENGTH--; \ - if (N == 4) { \ - HASH_MURMUR_BLOCK(H, C); \ - N = 0; \ - } \ - } \ - } while (0) -#define HASH_MURMUR_TAIL(P, Z, H, C, N, PTR, LEN) \ - do { \ - LEN -= LEN/4*4; \ - HASH_MURMUR_BYTES(LEN, H, C, N, PTR, LEN); \ - *P = H; \ - *Z = ((C) & ~0xFF) | (N); \ - } while (0) - -#if PLATFORM_BYTE_ORDER == GMQCC_BYTE_ORDER_LITTLE -static GMQCC_FORCEINLINE void hash_murmur_process(uint32_t *ph1, uint32_t *carry, const void *key, int length) { - uint32_t h1 = *ph1; - uint32_t c = *carry; - - const uint8_t *ptr = (uint8_t*)key; - const uint8_t *end; - - int n = c & 3; - int it = (4 - n) & 3; - if (it && it <= length) - HASH_MURMUR_BYTES(it, h1, c, n, ptr, length); - - end = ptr + length/4*4; - for (; ptr < end; ptr += 4) { - uint32_t k1 = HASH_MURMUR_SAFEREAD(ptr); - HASH_MURMUR_BLOCK(h1, k1); - } - HASH_MURMUR_TAIL(ph1, carry, h1, c, n, ptr, length); -} -#else -static GMQCC_FORCEINLINE void hash_murmur_process(uint32_t *ph1, uint32_t *carry, const void *key, int length) { - uint32_t k1; - uint32_t h1 = *ph1; - uint32_t c = *carry; - - const uint8_t *ptr = (uint8_t*)key; - const uint8_t *end; - - int n = c & 3; - int it = -(long)ptr & 3; - if (it && it <= length) - HASH_MURMUR_BYTES(it, h1, c, n, ptr, length); - - end = ptr + length / 4 * 4; - switch (n) { - case 0: - for (; ptr < end; ptr += 4) { - k1 = HASH_MURMUR_SAFEREAD(ptr); - HASH_MURMUR_BLOCK(h1, k1); - } - break; - -# define NEXT(N, RIGHT, LEFT) \ - case N: \ - for (; ptr < end; ptr += 4) { \ - k1 = c >> RIGHT; \ - c = HASH_MURMUR_SAFEREAD(ptr); \ - k1 |= c << LEFT; \ - HASH_MURMUR_BLOCK(h1, k1); \ - } \ - break - NEXT(1, 24, 8); - NEXT(2, 16, 16); - NEXT(3, 8, 24); - #undef NEXT - } - HASH_MURMUR_TAIL(ph1, carry, h1, c, n, ptr, length); -} -#endif - -static GMQCC_FORCEINLINE uint32_t hash_murmur_result(uint32_t hash, uint32_t carry, size_t length) { - uint32_t k1; - int n = carry & 3; - if (GMQCC_LIKELY(n)) { - k1 = carry >> (4 - n) * 8; - k1 *= HASH_MURMUR_MASK1; - k1 = HASH_ROTL32(k1, 15); - k1 *= HASH_MURMUR_MASK2; - hash ^= k1; - } - hash ^= length; - hash = hash_murmur_mix32(hash); - - return hash; -} - -static GMQCC_FORCEINLINE uint32_t hash_murmur(const void *GMQCC_RESTRICT key, size_t length) { - uint32_t hash = HASH_MURMUR_SEED; - uint32_t carry = 0; - hash_murmur_process(&hash, &carry, key, length); - return hash_murmur_result(hash, carry, length); -} - -/* - * The following hash function implements it's own strlen to avoid using libc's - * which isn't always slow but isn't always fastest either. - * - * Some things to note about this strlen that are otherwise confusing to grasp - * at first is that it does intentionally depend on undefined behavior. - * - * The first step to the strlen is to ensure alignment before checking words, - * without this step we risk crossing a page boundry with the word check and - * that would cause a crash. - * - * The second step to the strlen contains intentional undefined behavior. When - * accessing a word of any size, the first byte of that word is accessible if - * and only if the whole word is accessible because words are aligned. This is - * indicated by the fact that size / alignment always divides the page size. - * One could argue that an architecture exists where size_t and alignment are - * different, if that were the case, the alignment will always assume to be the - * size of the type (size_t). So it's always safe in that regard. - * - * In other words, an aligned 2^n load cannot cross a page boundry unless - * n > log2(PAGE_SIZE). There are no known architectures which support such - * a wide load larger than PAGE_SIZE. - * - * Valgrind and address sanatizer may choke on this because they're strictly - * trying to find bugs, it's a false positive to assume this is a bug when it's - * intentional. To prevent these false positives, both things need to be taught - * about the intentional behavior; for address sanatizer this can be done with - * a compiler attribute, effectively preventing the function from being - * instrumented. Valgrind requires a little more work as there is no way to - * downright prevent a function from being instrumented, instead we can mark - * + sizeof(size_t) bytes ahead of each byte we're reading as we calculate - * the length of the string, then we can make that additional + sizeof(size_t) - * on the end undefined after the length has been calculated. - * - * If the compiler doesn't have the attribute to disable address sanatizer - * instrumentation we fall back to using libc's strlen instead. This isn't the - * best solution. On windows we can assume this method always because neither - * address sanatizer or valgrind exist. - */ - -/* Some compilers expose this */ -#if defined(__has_feature) -# if __has_feature(address_sanitizer) -# define ASAN_DISABLE __attribute__((no_sanitize_address)) -# define HAS_ASAN_DISABLE -# endif -#endif - -/* If they don't try to find by version the attriubte was introduces */ -#if defined(__GNUC__) && __GNUC__ >= 4 -# define ASAN_DISABLE __attribute__((no_sanitize_address)) -# define HAS_ASAN_DISABLE -#elif defined(__clang__) && __clang_major__ >= 3 -# define ASAN_DISABLE __attribute__((no_sanatize_address)) -# define HAS_ASAN_DISABLE -/* On windows asan doesn't exist */ -#elif defined(_WIN32) -# define ASAN_DISABLE /* nothing */ -# define HAS_ASAN_DISABLE -#endif - -#ifndef HAS_ASAN_DISABLE -# include -#endif - -#ifndef NVALGRIND -# include -# include -#else -# define VALGRIND_MAKE_MEM_DEFINED(PTR, REDZONE_SIZE) -# define VALGRIND_MAKE_MEM_NOACCESS(PTR, REDZONE_SIZE) -#endif - -#ifdef HAS_ASAN_DISABLE -#define STRLEN_ALIGN (sizeof(size_t)) -#define STRLEN_ONES ((size_t)-1/UCHAR_MAX) -#define STRLEN_HIGHS (STRLEN_ONES * (UCHAR_MAX/2+1)) -#define STRLEN_HASZERO(X) (((X)-STRLEN_ONES) & ~(X) & STRLEN_HIGHS) - -static ASAN_DISABLE size_t hash_strlen(const char *key) { - const char *s = key; - const char *a = s; - const size_t *w; - - for (; (uintptr_t)s % STRLEN_ALIGN; s++) { - if (*s) - continue; - return s-a; - } - - VALGRIND_MAKE_MEM_DEFINED(s, STRLEN_ALIGN); - for (w = (const size_t *)s; !STRLEN_HASZERO(*w); w++) { - /* Make the next word legal to access */ - VALGRIND_MAKE_MEM_DEFINED(w + STRLEN_ALIGN, STRLEN_ALIGN); - } - - for (s = (const char *)w; *s; s++); - - /* It's not legal to access this area anymore */ - VALGRIND_MAKE_MEM_NOACCESS(s + 1, STRLEN_ALIGN); - return s-a; -} -#else -static GMQCC_INLINE size_t hash_strlen(const char *key) { - return strlen(key); -} -#endif - -size_t hash(const char *key) { - return hash_murmur((const void *)key, hash_strlen(key)); -}