Murmurhash is generally superior to the Jenkins lookup3 hash according to the available figures. Perhaps we should generally replace our current hashes by murmurhash.
For now, I'm introducing a parallel implementation to allow it to be used in cases where an incremental one-word-at-a-time hash is desirable. The first user will be added in an upcoming commit. Signed-off-by: Ben Pfaff <b...@nicira.com> --- This is meant for the "classifier-opt" series, just before the final patch. (If you look at that final patch as I sent it out, you can see a couple of comments * XXX murmurhash would be much better for this than hash_int(). */ but I forgot to do that before I sent them out. lib/hash.c | 15 ++++++++++++ lib/hash.h | 42 +++++++++++++++++++++++++++++++++++ tests/test-hash.c | 62 ++++++++++++++++++++++++++++++++-------------------- 3 files changed, 95 insertions(+), 24 deletions(-) diff --git a/lib/hash.c b/lib/hash.c index f7aa916..41ad1bf 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -102,3 +102,18 @@ hash_bytes(const void *p_, size_t n, uint32_t basis) return c; } + +/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'. + * 'p' must be properly aligned. */ +uint32_t +mhash_words(const uint32_t p[], size_t n_words, uint32_t basis) +{ + uint32_t hash; + size_t i; + + hash = basis; + for (i = 0; i < n_words; i++) { + hash = mhash_add(hash, p[i]); + } + return mhash_finish(hash, n_words); +} diff --git a/lib/hash.h b/lib/hash.h index ac6a63c..701e686 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -118,6 +118,48 @@ static inline uint32_t hash_pointer(const void *p, uint32_t basis) return hash_int((uint32_t) (uintptr_t) p, basis); } +/* Murmurhash by Austin Appleby, + * from http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp. + * + * The upstream license there says: + * + * // MurmurHash3 was written by Austin Appleby, and is placed in the public + * // domain. The author hereby disclaims copyright to this source code. + * + * Murmurhash is faster and higher-quality than the Jenkins lookup3 hash. When + * we have a little more familiarity with it, it's probably a good idea to + * switch all of OVS to it. + * + * For now, we have this implementation here for use by code that needs a hash + * that is convenient for use one word at a time, since the Jenkins lookup3 + * hash works three words at a time. + * + * See mhash_words() for sample usage. */ + +uint32_t mhash_words(const uint32_t data[], size_t n_words, uint32_t basis); + +static inline uint32_t mhash_add(uint32_t hash, uint32_t data) +{ + data *= 0xcc9e2d51; + data = hash_rot(data, 15); + data *= 0x1b873593; + + hash ^= data; + hash = hash_rot(hash, 13); + return hash * 5 + 0xe6546b64; +} + +static inline uint32_t mhash_finish(uint32_t hash, size_t n) +{ + hash ^= n * 4; + hash ^= hash_rot(hash, 16); + hash *= 0x85ebca6b; + hash ^= hash_rot(hash, 13); + hash *= 0xc2b2ae35; + hash ^= hash_rot(hash, 16); + return hash; +} + #ifdef __cplusplus } #endif diff --git a/tests/test-hash.c b/tests/test-hash.c index bdf1435..d53ba2e 100644 --- a/tests/test-hash.c +++ b/tests/test-hash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Nicira, Inc. + * Copyright (c) 2009, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,6 +41,12 @@ hash_words_cb(uint32_t input) } static uint32_t +mhash_words_cb(uint32_t input) +{ + return mhash_words(&input, 1, 0); +} + +static uint32_t hash_int_cb(uint32_t input) { return hash_int(input, 0); @@ -76,11 +82,37 @@ check_word_hash(uint32_t (*hash)(uint32_t), const char *name, } } -int -main(void) +static void +check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t), + const char *name) { int i, j; + for (i = 0; i <= 96; i++) { + for (j = i + 1; j <= 96; j++) { + uint32_t in1[3], in2[3]; + uint32_t out1, out2; + const int min_unique = 12; + const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; + + set_bit(in1, i); + set_bit(in2, j); + out1 = hash(in1, 3, 0); + out2 = hash(in2, 3, 0); + if ((out1 & unique_mask) == (out2 & unique_mask)) { + printf("%s has a partial collision:\n", name); + printf("hash(1 << %d) == %08"PRIx32"\n", i, out1); + printf("hash(1 << %d) == %08"PRIx32"\n", j, out2); + printf("The low-order %d bits of output are both " + "0x%"PRIx32"\n", min_unique, out1 & unique_mask); + } + } + } +} + +int +main(void) +{ /* Check that all hashes computed with hash_words with one 1-bit (or no * 1-bits) set within a single 32-bit word have different values in all * 11-bit consecutive runs. @@ -98,6 +130,7 @@ main(void) * independence must be a bad assumption :-) */ check_word_hash(hash_words_cb, "hash_words", 11); + check_word_hash(mhash_words_cb, "mhash_words", 11); /* Check that all hash functions of with one 1-bit (or no 1-bits) set * within three 32-bit words have different values in their lowest 12 @@ -112,27 +145,8 @@ main(void) * * so we are doing pretty well to not have any collisions in 12 bits. */ - for (i = 0; i <= 96; i++) { - for (j = i + 1; j <= 96; j++) { - uint32_t in1[3], in2[3]; - uint32_t out1, out2; - const int min_unique = 12; - const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; - - set_bit(in1, i); - set_bit(in2, j); - out1 = hash_words(in1, 3, 0); - out2 = hash_words(in2, 3, 0); - if ((out1 & unique_mask) == (out2 & unique_mask)) { - printf("Partial collision:\n"); - printf("hash(1 << %d) == %08"PRIx32"\n", i, out1); - printf("hash(1 << %d) == %08"PRIx32"\n", j, out2); - printf("The low-order %d bits of output are both " - "0x%"PRIx32"\n", min_unique, out1 & unique_mask); - exit(1); - } - } - } + check_3word_hash(hash_words, "hash_words"); + check_3word_hash(mhash_words, "mhash_words"); /* Check that all hashes computed with hash_int with one 1-bit (or no * 1-bits) set within a single 32-bit word have different values in all -- 1.7.2.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev