Use CRC32 intrinsics for hash computations when building for X86_64 with SSE4_2.
Add a new hash_words64() and change hash_words() to be inlined when 'n_words' is a compile-time constant. Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> --- v2: Changed hash_words to be inlined only when 'n_words' is known to be a compile time constant. lib/hash.c | 27 ++++--- lib/hash.h | 208 +++++++++++++++++++++++++++++++++++++++++++++++++---- tests/test-hash.c | 11 ++- 3 files changed, 216 insertions(+), 30 deletions(-) diff --git a/lib/hash.c b/lib/hash.c index 349f54a..71cd74c 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -50,21 +50,6 @@ hash_bytes(const void *p_, size_t n, uint32_t basis) return hash_finish(hash, orig_n); } -/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'. - * 'p' must be properly aligned. */ -uint32_t -hash_words(const uint32_t p[], size_t n_words, uint32_t basis) -{ - uint32_t hash; - size_t i; - - hash = basis; - for (i = 0; i < n_words; i++) { - hash = hash_add(hash, p[i]); - } - return hash_finish(hash, n_words * 4); -} - uint32_t hash_double(double x, uint32_t basis) { @@ -74,3 +59,15 @@ hash_double(double x, uint32_t basis) memcpy(value, &x, sizeof value); return hash_3words(value[0], value[1], basis); } + +uint32_t +hash_words__(const uint32_t p[], size_t n_words, uint32_t basis) +{ + return hash_words_inline(p, n_words, basis); +} + +uint32_t +hash_words64__(const uint64_t p[], size_t n_words, uint64_t basis) +{ + return hash_words64_inline(p, n_words, basis); +} diff --git a/lib/hash.h b/lib/hash.h index 1e19f45..f8bbada 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -32,7 +32,6 @@ hash_rot(uint32_t x, int k) return (x << k) | (x >> (32 - k)); } -uint32_t hash_words(const uint32_t data[], size_t n_words, uint32_t basis); uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis); static inline uint32_t hash_int(uint32_t x, uint32_t basis); @@ -83,6 +82,9 @@ static inline uint32_t mhash_finish(uint32_t hash, uint32_t n_bytes) return hash; } +#if !(defined(__SSE4_2__) && defined(__x86_64__)) +/* Mhash-based implemantation. */ + static inline uint32_t hash_add(uint32_t hash, uint32_t data) { return mhash_add(hash, data); @@ -93,23 +95,29 @@ static inline uint32_t hash_finish(uint32_t hash, uint32_t final) return mhash_finish(hash, final); } -static inline uint32_t hash_string(const char *s, uint32_t basis) +/* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'. + * 'p' must be properly aligned. + * + * This is inlined for the compiler to have access to the 'n_words', which + * in many cases is a constant. */ +static inline uint32_t +hash_words_inline(const uint32_t p[], size_t n_words, uint32_t basis) { - return hash_bytes(s, strlen(s), basis); -} + uint32_t hash; + size_t i; -static inline uint32_t hash_int(uint32_t x, uint32_t basis) -{ - return hash_2words(x, basis); + hash = basis; + for (i = 0; i < n_words; i++) { + hash = hash_add(hash, p[i]); + } + return hash_finish(hash, n_words * 4); } -/* An attempt at a useful 1-bit hash function. Has not been analyzed for - * quality. */ -static inline uint32_t hash_boolean(bool x, uint32_t basis) +static inline uint32_t +hash_words64_inline(const uint64_t p[], size_t n_words, uint64_t basis) { - const uint32_t P0 = 0xc2b73583; /* This is hash_int(1, 0). */ - const uint32_t P1 = 0xe90f1258; /* This is hash_int(2, 0). */ - return (x ? P0 : P1) ^ hash_rot(basis, 1); + return hash_words_inline((uint32_t *)p, n_words * 2, + (uint32_t)basis ^ basis >> 32); } static inline uint32_t hash_pointer(const void *p, uint32_t basis) @@ -140,6 +148,180 @@ static inline uint32_t hash_uint64_basis(const uint64_t x, { return hash_3words((uint32_t)(x >> 32), (uint32_t)x, basis); } + +#else /* __SSE4_2__ && __x86_64__ */ +#include <smmintrin.h> + +static inline uint32_t hash_add(uint32_t hash, uint32_t data) +{ + return _mm_crc32_u32(hash, data); +} + +static inline uint32_t hash_finish(uint64_t hash, uint64_t final) +{ + /* The finishing multiplier 0x805204f3 has been experimentally + * derived to pass the testsuite hash tests. */ + hash = _mm_crc32_u64(hash, final) * 0x805204f3; + return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */ +} + +/* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'. + * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__. + * + * This is inlined for the compiler to have access to the 'n_words', which + * in many cases is a constant. */ +static inline uint32_t +hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis) +{ + const uint64_t *p = (const void *)p_; + uint64_t hash1 = basis; + uint64_t hash2 = 0; + uint64_t hash3 = n_words; + const uint32_t *endp = (const uint32_t *)p + n_words; + const uint64_t *limit = p + n_words / 2 - 3; + + while (p <= limit) { + hash1 = _mm_crc32_u64(hash1, p[0]); + hash2 = _mm_crc32_u64(hash2, p[1]); + hash3 = _mm_crc32_u64(hash3, p[2]); + p += 3; + } + switch (endp - (const uint32_t *)p) { + case 1: + hash1 = _mm_crc32_u32(hash1, *(const uint32_t *)&p[0]); + break; + case 2: + hash1 = _mm_crc32_u64(hash1, p[0]); + break; + case 3: + hash1 = _mm_crc32_u64(hash1, p[0]); + hash2 = _mm_crc32_u32(hash2, *(const uint32_t *)&p[1]); + break; + case 4: + hash1 = _mm_crc32_u64(hash1, p[0]); + hash2 = _mm_crc32_u64(hash2, p[1]); + break; + case 5: + hash1 = _mm_crc32_u64(hash1, p[0]); + hash2 = _mm_crc32_u64(hash2, p[1]); + hash3 = _mm_crc32_u32(hash3, *(const uint32_t *)&p[2]); + break; + } + return hash_finish(hash1, hash2 << 32 | hash3); +} + +/* A simpler version for 64-bit data. + * 'n_words' is the count of 64-bit words, basis is 64 bits. */ +static inline uint32_t +hash_words64_inline(const uint64_t p[], size_t n_words, uint64_t basis) +{ + uint64_t hash1 = (uint32_t)basis; + uint64_t hash2 = basis >> 32; + uint64_t hash3 = n_words; + const uint64_t *endp = p + n_words; + const uint64_t *limit = endp - 3; + + while (p <= limit) { + hash1 = _mm_crc32_u64(hash1, p[0]); + hash2 = _mm_crc32_u64(hash2, p[1]); + hash3 = _mm_crc32_u64(hash3, p[2]); + p += 3; + } + switch (endp - p) { + case 1: + hash1 = _mm_crc32_u64(hash1, p[0]); + break; + case 2: + hash1 = _mm_crc32_u64(hash1, p[0]); + hash2 = _mm_crc32_u64(hash2, p[1]); + break; + } + return hash_finish(hash1, hash2 << 32 | hash3); +} + +static inline uint32_t hash_uint64_basis(const uint64_t x, + const uint32_t basis) +{ + /* '23' chosen to mix bits enough for the test-hash to pass. */ + return hash_finish(_mm_crc32_u64(basis, x), 23); +} + +static inline uint32_t hash_uint64(const uint64_t x) +{ + return hash_uint64_basis(x, 0); +} + +static inline uint32_t hash_2words(uint32_t x, uint32_t y) +{ + return hash_uint64((uint64_t)y << 32 | x); +} + +static inline uint32_t hash_pointer(const void *p, uint32_t basis) +{ + return hash_uint64_basis((uint64_t) (uintptr_t) p, basis); +} +#endif + +uint32_t hash_words__(const uint32_t p[], size_t n_words, uint32_t basis); +uint32_t hash_words64__(const uint64_t p[], size_t n_words, uint64_t basis); + +/* Inline the larger hash functions only when 'n_words' is known to be + * compile-time constant. */ +#if __GNUC__ >= 4 +static inline uint32_t +hash_words(const uint32_t p[], size_t n_words, uint32_t basis) +{ + if (__builtin_constant_p(n_words)) { + return hash_words_inline(p, n_words, basis); + } else { + return hash_words__(p, n_words, basis); + } +} + +static inline uint32_t +hash_words64(const uint64_t p[], size_t n_words, uint64_t basis) +{ + if (__builtin_constant_p(n_words)) { + return hash_words64_inline(p, n_words, basis); + } else { + return hash_words64__(p, n_words, basis); + } +} + +#else + +static inline uint32_t +hash_words(const uint32_t p[], size_t n_words, uint32_t basis) +{ + return hash_words__(p, n_words, basis); +} + +static inline uint32_t +hash_words64(const uint64_t p[], size_t n_words, uint64_t basis) +{ + return hash_words64__(p, n_words, basis); +} +#endif + +static inline uint32_t hash_string(const char *s, uint32_t basis) +{ + return hash_bytes(s, strlen(s), basis); +} + +static inline uint32_t hash_int(uint32_t x, uint32_t basis) +{ + return hash_2words(x, basis); +} + +/* An attempt at a useful 1-bit hash function. Has not been analyzed for + * quality. */ +static inline uint32_t hash_boolean(bool x, uint32_t basis) +{ + const uint32_t P0 = 0xc2b73583; /* This is hash_int(1, 0). */ + const uint32_t P1 = 0xe90f1258; /* This is hash_int(2, 0). */ + return (x ? P0 : P1) ^ hash_rot(basis, 1); +} + #ifdef __cplusplus } #endif diff --git a/tests/test-hash.c b/tests/test-hash.c index 081e723..f410c16 100644 --- a/tests/test-hash.c +++ b/tests/test-hash.c @@ -92,15 +92,22 @@ check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t), for (i = 0; i <= 96; i++) { for (j = i + 1; j <= 96; j++) { - uint32_t in1[3], in2[3]; - uint32_t out1, out2; + uint32_t in0[3], in1[3], in2[3]; + uint32_t out0,out1, out2; const int min_unique = 12; const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; + set_bit(in0, i); set_bit(in1, i); set_bit(in2, j); + out0 = hash(in0, 3, 0); out1 = hash(in1, 3, 0); out2 = hash(in2, 3, 0); + + if (out0 != out1) { + printf("%s hash not the same for non-64 aligned data " + "%08"PRIx32" != %08"PRIx32"\n", name, out0, out1); + } if ((out1 & unique_mask) == (out2 & unique_mask)) { printf("%s has a partial collision:\n", name); printf("hash(1 << %d) == %08"PRIx32"\n", i, out1); -- 1.7.10.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev