I verified that the code you've written matches up with: http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
That code is, of course, total magic, so I'm glad there are unit tests for it. Because you're only hashing words you leave out the following section of code: ------------------------------------------ //---------- // tail const uint8_t * tail = (const uint8_t*)(data + nblocks*4); uint32_t k1 = 0; switch(len & 3) { case 3: k1 ^= tail[2] << 16; case 2: k1 ^= tail[1] << 8; case 1: k1 ^= tail[0]; k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; }; ----------------------------------- I would like to see an XXX comment noting it's absence so that we remember to fold it in when/if we replace hash_bytes() with murmur. I want to replace the implementation of flow_hash_symmetric_l4() with this because it's in the fast path for bundle actions, and it's current implementation is particularly inefficient. Any reason I shouldn't? Also, I'm going to start doing Acked-bys Acked-by: Ethan Jackson <et...@nicira.com> On Mon, Aug 20, 2012 at 2:27 PM, Ben Pfaff <b...@nicira.com> wrote: > Murmurhash is generally superior to the Jenkins lookup3 hash according to > the available figures. Perhaps we should generally replace our current > hashes by murmurhash. > > For now, I'm introducing a parallel implementation to allow it to be used > in cases where an incremental one-word-at-a-time hash is desirable. The > first user will be added in an upcoming commit. > > Signed-off-by: Ben Pfaff <b...@nicira.com> > --- > This is meant for the "classifier-opt" series, just before the > final patch. (If you look at that final patch as I sent it out, > you can see a couple of comments > * XXX murmurhash would be much better for this than hash_int(). */ > but I forgot to do that before I sent them out. > > lib/hash.c | 15 ++++++++++++ > lib/hash.h | 42 +++++++++++++++++++++++++++++++++++ > tests/test-hash.c | 62 ++++++++++++++++++++++++++++++++-------------------- > 3 files changed, 95 insertions(+), 24 deletions(-) > > diff --git a/lib/hash.c b/lib/hash.c > index f7aa916..41ad1bf 100644 > --- a/lib/hash.c > +++ b/lib/hash.c > @@ -102,3 +102,18 @@ hash_bytes(const void *p_, size_t n, uint32_t basis) > > return c; > } > + > +/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'. > + * 'p' must be properly aligned. */ > +uint32_t > +mhash_words(const uint32_t p[], size_t n_words, uint32_t basis) > +{ > + uint32_t hash; > + size_t i; > + > + hash = basis; > + for (i = 0; i < n_words; i++) { > + hash = mhash_add(hash, p[i]); > + } > + return mhash_finish(hash, n_words); > +} > diff --git a/lib/hash.h b/lib/hash.h > index ac6a63c..701e686 100644 > --- a/lib/hash.h > +++ b/lib/hash.h > @@ -118,6 +118,48 @@ static inline uint32_t hash_pointer(const void *p, > uint32_t basis) > return hash_int((uint32_t) (uintptr_t) p, basis); > } > > +/* Murmurhash by Austin Appleby, > + * from > http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp. > + * > + * The upstream license there says: > + * > + * // MurmurHash3 was written by Austin Appleby, and is placed in the public > + * // domain. The author hereby disclaims copyright to this source code. > + * > + * Murmurhash is faster and higher-quality than the Jenkins lookup3 hash. > When > + * we have a little more familiarity with it, it's probably a good idea to > + * switch all of OVS to it. > + * > + * For now, we have this implementation here for use by code that needs a > hash > + * that is convenient for use one word at a time, since the Jenkins lookup3 > + * hash works three words at a time. > + * > + * See mhash_words() for sample usage. */ > + > +uint32_t mhash_words(const uint32_t data[], size_t n_words, uint32_t basis); > + > +static inline uint32_t mhash_add(uint32_t hash, uint32_t data) > +{ > + data *= 0xcc9e2d51; > + data = hash_rot(data, 15); > + data *= 0x1b873593; > + > + hash ^= data; > + hash = hash_rot(hash, 13); > + return hash * 5 + 0xe6546b64; > +} > + > +static inline uint32_t mhash_finish(uint32_t hash, size_t n) > +{ > + hash ^= n * 4; > + hash ^= hash_rot(hash, 16); > + hash *= 0x85ebca6b; > + hash ^= hash_rot(hash, 13); > + hash *= 0xc2b2ae35; > + hash ^= hash_rot(hash, 16); > + return hash; > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/tests/test-hash.c b/tests/test-hash.c > index bdf1435..d53ba2e 100644 > --- a/tests/test-hash.c > +++ b/tests/test-hash.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2009 Nicira, Inc. > + * Copyright (c) 2009, 2012 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -41,6 +41,12 @@ hash_words_cb(uint32_t input) > } > > static uint32_t > +mhash_words_cb(uint32_t input) > +{ > + return mhash_words(&input, 1, 0); > +} > + > +static uint32_t > hash_int_cb(uint32_t input) > { > return hash_int(input, 0); > @@ -76,11 +82,37 @@ check_word_hash(uint32_t (*hash)(uint32_t), const char > *name, > } > } > > -int > -main(void) > +static void > +check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t), > + const char *name) > { > int i, j; > > + for (i = 0; i <= 96; i++) { > + for (j = i + 1; j <= 96; j++) { > + uint32_t in1[3], in2[3]; > + uint32_t out1, out2; > + const int min_unique = 12; > + const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; > + > + set_bit(in1, i); > + set_bit(in2, j); > + out1 = hash(in1, 3, 0); > + out2 = hash(in2, 3, 0); > + if ((out1 & unique_mask) == (out2 & unique_mask)) { > + printf("%s has a partial collision:\n", name); > + printf("hash(1 << %d) == %08"PRIx32"\n", i, out1); > + printf("hash(1 << %d) == %08"PRIx32"\n", j, out2); > + printf("The low-order %d bits of output are both " > + "0x%"PRIx32"\n", min_unique, out1 & unique_mask); > + } > + } > + } > +} > + > +int > +main(void) > +{ > /* Check that all hashes computed with hash_words with one 1-bit (or no > * 1-bits) set within a single 32-bit word have different values in all > * 11-bit consecutive runs. > @@ -98,6 +130,7 @@ main(void) > * independence must be a bad assumption :-) > */ > check_word_hash(hash_words_cb, "hash_words", 11); > + check_word_hash(mhash_words_cb, "mhash_words", 11); > > /* Check that all hash functions of with one 1-bit (or no 1-bits) set > * within three 32-bit words have different values in their lowest 12 > @@ -112,27 +145,8 @@ main(void) > * > * so we are doing pretty well to not have any collisions in 12 bits. > */ > - for (i = 0; i <= 96; i++) { > - for (j = i + 1; j <= 96; j++) { > - uint32_t in1[3], in2[3]; > - uint32_t out1, out2; > - const int min_unique = 12; > - const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; > - > - set_bit(in1, i); > - set_bit(in2, j); > - out1 = hash_words(in1, 3, 0); > - out2 = hash_words(in2, 3, 0); > - if ((out1 & unique_mask) == (out2 & unique_mask)) { > - printf("Partial collision:\n"); > - printf("hash(1 << %d) == %08"PRIx32"\n", i, out1); > - printf("hash(1 << %d) == %08"PRIx32"\n", j, out2); > - printf("The low-order %d bits of output are both " > - "0x%"PRIx32"\n", min_unique, out1 & unique_mask); > - exit(1); > - } > - } > - } > + check_3word_hash(hash_words, "hash_words"); > + check_3word_hash(mhash_words, "mhash_words"); > > /* Check that all hashes computed with hash_int with one 1-bit (or no > * 1-bits) set within a single 32-bit word have different values in all > -- > 1.7.2.5 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev