This commit adds a new test for hash_bytes128() using single 128-bit word. The test shows that there is no collision in all 17 consecutive bits checks, which indicates the hash function is good.
Signed-off-by: Alex Wang <al...@nicira.com> --- tests/test-hash.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 12 deletions(-) diff --git a/tests/test-hash.c b/tests/test-hash.c index 5032d32..4eda745 100644 --- a/tests/test-hash.c +++ b/tests/test-hash.c @@ -35,28 +35,35 @@ set_bit(uint32_t array[3], int bit) } } +/* When bit == n_bits, the function just 0 sets the 'values'. */ static void -set_bit128(ovs_u128 array[16], int bit) +set_bit128(ovs_u128 *values, int bit, int n_bits) { assert(bit >= 0 && bit <= 2048); - memset(array, 0, sizeof(ovs_u128) * 16); - if (bit < 2048) { + memset(values, 0, n_bits/8); + if (bit < n_bits) { int b = bit % 128; if (b < 64) { - array[bit / 128].u64.lo = UINT64_C(1) << (b % 64); + values[bit / 128].u64.lo = UINT64_C(1) << (b % 64); } else { - array[bit / 128].u64.hi = UINT64_C(1) << (b % 64); + values[bit / 128].u64.hi = UINT64_C(1) << (b % 64); } } } +static uint64_t +get_range128(ovs_u128 *value, int ofs, uint64_t mask) +{ + return ((ofs < 64 ? (value->u64.lo >> ofs) : 0) & mask) + | ((ofs <= 64 ? (value->u64.hi << (64 - ofs)) : (value->u64.hi >> (ofs - 64)) & mask)); +} + static uint32_t hash_words_cb(uint32_t input) { return hash_words(&input, 1, 0); } - static uint32_t jhash_words_cb(uint32_t input) { @@ -135,11 +142,63 @@ check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t), } static void +check_hash_bytes128(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), + const char *name, const int min_unique) +{ + const uint64_t unique_mask = (UINT64_C(1) << min_unique) - 1; + const int n_bits = sizeof(ovs_u128) * 8; + int i, j; + + for (i = 0; i <= n_bits; i++) { + OVS_PACKED(struct offset_ovs_u128 { + uint32_t a; + ovs_u128 b; + }) in0_data; + ovs_u128 *in0, in1; + ovs_u128 out0, out1; + + in0 = &in0_data.b; + set_bit128(in0, i, n_bits); + set_bit128(&in1, i, n_bits); + hash(in0, sizeof(ovs_u128), 0, &out0); + hash(&in1, sizeof(ovs_u128), 0, &out1); + if (!ovs_u128_equal(&out0, &out1)) { + printf("%s hash not the same for non-64 aligned data " + "%016"PRIx64"%016"PRIx64" != %016"PRIx64"%016"PRIx64"\n", + name, out0.u64.lo, out0.u64.hi, out1.u64.lo, out1.u64.hi); + } + + for (j = i + 1; j <= n_bits; j++) { + ovs_u128 in2; + ovs_u128 out2; + int ofs; + + set_bit128(&in2, j, n_bits); + hash(&in2, sizeof(ovs_u128), 0, &out2); + for (ofs = 0; ofs < 128 - min_unique; ofs++) { + uint64_t bits1 = get_range128(&out1, ofs, unique_mask); + uint64_t bits2 = get_range128(&out2, ofs, unique_mask); + + if (bits1 == bits2) { + printf("%s has a partial collision:\n", name); + printf("hash(1 << %d) == %016"PRIx64"%016"PRIx64"\n", + i, out1.u64.hi, out1.u64.lo); + printf("hash(1 << %d) == %016"PRIx64"%016"PRIx64"\n", + j, out2.u64.hi, out2.u64.lo); + printf("%d bits of output starting at bit %d " + "are both 0x%016"PRIx64"\n", min_unique, ofs, bits1); + } + } + } + } +} + +static void check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), const char *name, const int min_unique) { const uint64_t unique_mask = (UINT64_C(1) << min_unique) - 1; - const int n_bits = 256 * 8; + const int n_bits = sizeof(ovs_u128) * 8 * 16; int i, j; for (i = 0; i <= n_bits; i++) { @@ -151,10 +210,8 @@ check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), ovs_u128 out0, out1; in0 = in0_data.b; - /* When, i or j == n_bits, the set_bit128() will just - * zero set the input variables. */ - set_bit128(in0, i); - set_bit128(in1, i); + set_bit128(in0, i, n_bits); + set_bit128(in1, i, n_bits); hash(in0, sizeof(ovs_u128) * 16, 0, &out0); hash(in1, sizeof(ovs_u128) * 16, 0, &out1); if (!ovs_u128_equal(&out0, &out1)) { @@ -167,7 +224,7 @@ check_256byte_hash(void (*hash)(const void *, size_t, uint32_t, ovs_u128 *), ovs_u128 in2[16]; ovs_u128 out2; - set_bit128(in2, j); + set_bit128(in2, j, n_bits); hash(in2, sizeof(ovs_u128) * 16, 0, &out2); if ((out1.u64.lo & unique_mask) == (out2.u64.lo & unique_mask)) { printf("%s has a partial collision:\n", name); @@ -241,6 +298,25 @@ test_hash_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) */ check_word_hash(hash_int_cb, "hash_int", 12); + /* Check that all hashes computed with hash_bytes128 with one 1-bit (or no + * 1-bits) set within a single 128-bit word have different values in all + * 17-bit consecutive runs. + * + * Given a random distribution, the probability of at least one collision + * in any set of 17 bits is approximately + * + * 1 - ((2**17 - 1)/2**17)**C(129,2) + * == 1 - (131,071/131,072)**8256 + * =~ 0.061 + * + * There are 111 ways to pick 17 consecutive bits in a 128-bit word, so if + * we assumed independence then the chance of having no collisions in any of + * those 17-bit runs would be (1-0.061)**111 =~ 0.00092. This refutes our + * assumption of independence, which makes it seem like a good hash + * function. + */ + check_hash_bytes128(hash_bytes128, "hash_bytes128", 17); + /* Check that all hashes computed with hash_bytes128 with 1-bit (or no * 1-bits) set within 16 128-bit words have different values in their * lowest 23 bits. -- 1.7.9.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev