libihash uses open addressing. Previously, quadratic probing in both directions was used to resolve collisions. Quadratic probing might result in a less efficient use of caches.
Also, prime numbers of the form 4 * i + 3 were used as array sizes. This was used in combination with the integer modulo operation for hashing. It has been known for some time that libihash suffers from collisions, so a integer hash function is now applied to the keys to derive the index. Use linear probing instead. Also, use powers of two for the array sizes, so a bit mask can be used for the modulo operation. * libihash/ihash.c (ihash_sizes, ihash_nsizes): Remove. (find_index): Use linear probing and fast modulo operation. (add_one): Likewise. * libihash/ihash.h (HURD_IHASH_MIN_SIZE): New macro. --- libihash/ihash.c | 121 ++++++------------------------------------------------- libihash/ihash.h | 4 ++ 2 files changed, 17 insertions(+), 108 deletions(-) diff --git a/libihash/ihash.c b/libihash/ihash.c index 1de4c35..e74a2c5 100644 --- a/libihash/ihash.c +++ b/libihash/ihash.c @@ -31,55 +31,6 @@ #include <assert.h> #include "ihash.h" - - -/* The prime numbers of the form 4 * i + 3 for some i, all greater - than twice the previous one and smaller than 2^40 (for now). */ -static const uint64_t ihash_sizes[] = -{ - 3, - 7, - 19, - 43, - 103, - 211, - 431, - 863, - 1747, - 3499, - 7019, - 14051, - 28111, - 56239, - 112507, - 225023, - 450067, - 900139, - 1800311, - 3600659, - 7201351, - 14402743, - 28805519, - 57611039, - 115222091, - 230444239, - 460888499, - 921777067, - 1843554151, - UINT64_C (3687108307), - UINT64_C (7374216631), - UINT64_C (14748433279), - UINT64_C (29496866579), - UINT64_C (58993733159), - UINT64_C (117987466379), - UINT64_C (235974932759), - UINT64_C (471949865531), - UINT64_C (943899731087) -}; - -static const unsigned int ihash_nsizes = (sizeof ihash_sizes - / sizeof ihash_sizes[0]); - /* Integer hashing follows Thomas Wang's paper about his 32/64-bits mix functions : @@ -126,40 +77,24 @@ static inline int find_index (hurd_ihash_t ht, hurd_ihash_key_t key) { unsigned int idx; - unsigned int i; unsigned int up_idx; - unsigned int down_idx; + unsigned int mask = ht->size - 1; - idx = hash_int32 (key, 32) % ht->size; + idx = hash_int32 (key, 32) & mask; if (ht->items[idx].value == _HURD_IHASH_EMPTY || ht->items[idx].key == key) return idx; - /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + i^2, - we add 1, 3, 5, 7, etc to the previous index. We do this in both - directions separately. */ - i = 1; up_idx = idx; - down_idx = idx; do { - up_idx = (up_idx + i) % ht->size; + up_idx = (up_idx + 1) & mask; if (ht->items[up_idx].value == _HURD_IHASH_EMPTY || ht->items[up_idx].key == key) return up_idx; - - if (down_idx < i) - down_idx += ht->size; - down_idx = (down_idx - i) % ht->size; - if (ht->items[down_idx].value == _HURD_IHASH_EMPTY - || ht->items[down_idx].key == key) - return down_idx; - - /* After (ht->size - 1) / 2 iterations, this will be 0. */ - i = (i + 2) % ht->size; } - while (i); + while (up_idx != idx); /* If we end up here, the item could not be found. Return any invalid index. */ @@ -282,53 +217,26 @@ add_one (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t value) { unsigned int idx; unsigned int first_free; + unsigned int mask = ht->size - 1; - idx = hash_int32 (key, 32) % ht->size; + idx = hash_int32 (key, 32) & mask; first_free = idx; if (ht->items[idx].value != _HURD_IHASH_EMPTY && ht->items[idx].key != key) { - /* Instead of calculating idx + 1, idx + 4, idx + 9, ..., idx + - i^2, we add 1, 3, 5, 7, ... 2 * i - 1 to the previous index. - We do this in both directions separately. */ - unsigned int i = 1; unsigned int up_idx = idx; - unsigned int down_idx = idx; do { - up_idx = (up_idx + i) % ht->size; + up_idx = (up_idx + 1) & mask; if (ht->items[up_idx].value == _HURD_IHASH_EMPTY || ht->items[up_idx].key == key) { idx = up_idx; break; } - if (first_free == idx - && ht->items[up_idx].value == _HURD_IHASH_DELETED) - first_free = up_idx; - - if (down_idx < i) - down_idx += ht->size; - down_idx = (down_idx - i) % ht->size; - if (down_idx < 0) - down_idx += ht->size; - else - down_idx %= ht->size; - if (ht->items[down_idx].value == _HURD_IHASH_EMPTY - || ht->items[down_idx].key == key) - { - idx = down_idx; - break; - } - if (first_free == idx - && ht->items[down_idx].value == _HURD_IHASH_DELETED) - first_free = down_idx; - - /* After (ht->size - 1) / 2 iterations, this will be 0. */ - i = (i + 2) % ht->size; } - while (i); + while (up_idx != idx); } /* Remove the old entry for this key if necessary. */ @@ -377,15 +285,12 @@ hurd_ihash_add (hurd_ihash_t ht, hurd_ihash_key_t key, hurd_ihash_value_t item) } /* The hash table is too small, and we have to increase it. */ - for (i = 0; i < ihash_nsizes; i++) - if (ihash_sizes[i] > old_ht.size) - break; - if (i == ihash_nsizes - || ihash_sizes[i] > SIZE_MAX / sizeof (struct _hurd_ihash_item)) - return ENOMEM; /* Surely will be true momentarily. */ - ht->nr_items = 0; - ht->size = ihash_sizes[i]; + if (ht->size == 0) + ht->size = HURD_IHASH_MIN_SIZE; + else + ht->size <<= 1; + /* calloc() will initialize all values to _HURD_IHASH_EMPTY implicitely. */ ht->items = calloc (ht->size, sizeof (struct _hurd_ihash_item)); diff --git a/libihash/ihash.h b/libihash/ihash.h index 6bdc925..057babc 100644 --- a/libihash/ihash.h +++ b/libihash/ihash.h @@ -93,6 +93,10 @@ typedef struct hurd_ihash *hurd_ihash_t; /* Construction and destruction of hash tables. */ +/* The size of the initial allocation in number of items. This must + be a power of two. */ +#define HURD_IHASH_MIN_SIZE 32 + /* The default value for the maximum load factor in percent. */ #define HURD_IHASH_MAX_LOAD_DEFAULT 75 -- 2.0.0.rc0