On Mon, Apr 13, 2015 at 3:22 PM, Thomas Helland <thomashellan...@gmail.com> wrote: > > On 14 Apr 2015 00:05, "Jason Ekstrand" <ja...@jlekstrand.net> wrote: >> >> On Sat, Apr 11, 2015 at 4:25 PM, Thomas Helland >> <thomashellan...@gmail.com> wrote: >> > The same rationale applies here as for the hash table. >> > Power of two size should give better performance, >> > and using the algorithm hash = sh + i/2 + i*i/2 >> > should result in only distinct hash values when hitting collisions. >> > >> > Difference at 95.0% confidence >> > -7.9505 +/- 2.44011 >> > -5.04357% +/- 1.54794% >> > >> > V3: Feedback from Eric Anholt >> > - Don't change load factor and starting size. >> > >> > V2: Feedback from Connor Abbott >> > - Don't set initial hash address before potential rehash >> > - Remove hash_sizes table >> > - Correct the quadratic hashing algorithm >> > - Use correct comment style >> > >> > Feedback from Jason Ekstrand >> > - Use unreachable() to detect if we fail to insert >> > >> > Signed-off-by: Thomas Helland <thomashellan...@gmail.com> >> > --- >> > src/util/set.c | 118 >> > ++++++++++++++++++++------------------------------------- >> > src/util/set.h | 3 +- >> > 2 files changed, 43 insertions(+), 78 deletions(-) >> > >> > diff --git a/src/util/set.c b/src/util/set.c >> > index f01f869..7ff9520 100644 >> > --- a/src/util/set.c >> > +++ b/src/util/set.c >> > @@ -32,6 +32,17 @@ >> > * Keith Packard <kei...@keithp.com> >> > */ >> > >> > +/** >> > + * Implements an open-addressing, quadratic probing hash-set. >> > + * >> > + * We choose set sizes that's a power of two. >> > + * This is computationally less expensive than primes. >> > + * As a bonus the size and free space can be calculated instead of >> > looked up. >> > + * FNV-1a has good avalanche properties, so collision is not an issue. >> > + * These sets are sized to have an extra 10% free to avoid >> > + * exponential performance degradation as the set fills. >> > + */ >> > + >> > #include <stdlib.h> >> > #include <assert.h> >> > >> > @@ -39,51 +50,9 @@ >> > #include "ralloc.h" >> > #include "set.h" >> > >> > -/* >> > - * From Knuth -- a good choice for hash/rehash values is p, p-2 where >> > - * p and p-2 are both prime. These tables are sized to have an extra >> > 10% >> > - * free to avoid exponential performance degradation as the hash table >> > fills >> > - */ >> > - >> > uint32_t deleted_key_value; >> > const void *deleted_key = &deleted_key_value; >> > >> > -static const struct { >> > - uint32_t max_entries, size, rehash; >> > -} hash_sizes[] = { >> > - { 2, 5, 3 }, >> > - { 4, 7, 5 }, >> > - { 8, 13, 11 }, >> > - { 16, 19, 17 }, >> > - { 32, 43, 41 }, >> > - { 64, 73, 71 }, >> > - { 128, 151, 149 }, >> > - { 256, 283, 281 }, >> > - { 512, 571, 569 }, >> > - { 1024, 1153, 1151 }, >> > - { 2048, 2269, 2267 }, >> > - { 4096, 4519, 4517 }, >> > - { 8192, 9013, 9011 }, >> > - { 16384, 18043, 18041 }, >> > - { 32768, 36109, 36107 }, >> > - { 65536, 72091, 72089 }, >> > - { 131072, 144409, 144407 }, >> > - { 262144, 288361, 288359 }, >> > - { 524288, 576883, 576881 }, >> > - { 1048576, 1153459, 1153457 }, >> > - { 2097152, 2307163, 2307161 }, >> > - { 4194304, 4613893, 4613891 }, >> > - { 8388608, 9227641, 9227639 }, >> > - { 16777216, 18455029, 18455027 }, >> > - { 33554432, 36911011, 36911009 }, >> > - { 67108864, 73819861, 73819859 }, >> > - { 134217728, 147639589, 147639587 }, >> > - { 268435456, 295279081, 295279079 }, >> > - { 536870912, 590559793, 590559791 }, >> > - { 1073741824, 1181116273, 1181116271 }, >> > - { 2147483648ul, 2362232233ul, 2362232231ul } >> > -}; >> > - >> > static int >> > entry_is_free(struct set_entry *entry) >> > { >> > @@ -114,10 +83,9 @@ _mesa_set_create(void *mem_ctx, >> > if (ht == NULL) >> > return NULL; >> > >> > - ht->size_index = 0; >> > - ht->size = hash_sizes[ht->size_index].size; >> > - ht->rehash = hash_sizes[ht->size_index].rehash; >> > - ht->max_entries = hash_sizes[ht->size_index].max_entries; >> > + ht->size_iteration = 2; >> > + ht->size = 1 << ht->size_iteration; >> > + ht->max_entries = ht->size * 0.9; >> > ht->key_hash_function = key_hash_function; >> > ht->key_equals_function = key_equals_function; >> > ht->table = rzalloc_array(ht, struct set_entry, ht->size); >> > @@ -163,12 +131,11 @@ _mesa_set_destroy(struct set *ht, void >> > (*delete_function)(struct set_entry *entr >> > static struct set_entry * >> > set_search(const struct set *ht, uint32_t hash, const void *key) >> > { >> > - uint32_t hash_address; >> > + uint32_t start_hash_address = hash & (ht->size - 1); >> > + uint32_t hash_address = start_hash_address; >> > + uint32_t quad_hash = 1; >> > >> > - hash_address = hash % ht->size; >> > do { >> > - uint32_t double_hash; >> > - >> > struct set_entry *entry = ht->table + hash_address; >> > >> > if (entry_is_free(entry)) { >> > @@ -179,10 +146,10 @@ set_search(const struct set *ht, uint32_t hash, >> > const void *key) >> > } >> > } >> > >> > - double_hash = 1 + hash % ht->rehash; >> > - >> > - hash_address = (hash_address + double_hash) % ht->size; >> > - } while (hash_address != hash % ht->size); >> > + hash_address = (start_hash_address + >> > + (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size >> > - 1); >> > + quad_hash++; >> > + } while (hash_address != start_hash_address); >> > >> > return NULL; >> > } >> > @@ -207,35 +174,30 @@ static struct set_entry * >> > set_add(struct set *ht, uint32_t hash, const void *key); >> > >> > static void >> > -set_rehash(struct set *ht, unsigned new_size_index) >> > +set_rehash(struct set *ht, uint32_t new_size_iteration) >> > { >> > struct set old_ht; >> > struct set_entry *table, *entry; >> > >> > - if (new_size_index >= ARRAY_SIZE(hash_sizes)) >> > + if (new_size_iteration >= 31) >> > return; >> > >> > table = rzalloc_array(ht, struct set_entry, >> > - hash_sizes[new_size_index].size); >> > + 1 << new_size_iteration); >> > if (table == NULL) >> > return; >> > >> > old_ht = *ht; >> > >> > ht->table = table; >> > - ht->size_index = new_size_index; >> > - ht->size = hash_sizes[ht->size_index].size; >> > - ht->rehash = hash_sizes[ht->size_index].rehash; >> > - ht->max_entries = hash_sizes[ht->size_index].max_entries; >> > + ht->size_iteration = new_size_iteration; >> > + ht->size = 1 << new_size_iteration; >> > + ht->max_entries = ht->size * 0.7; >> > ht->entries = 0; >> > ht->deleted_entries = 0; >> > >> > - for (entry = old_ht.table; >> > - entry != old_ht.table + old_ht.size; >> > - entry++) { >> > - if (entry_is_present(entry)) { >> > - set_add(ht, entry->hash, entry->key); >> > - } >> > + set_foreach(&old_ht, entry) { >> > + set_add(ht, entry->hash, entry->key); >> > } >> >> This hunk, while I think it's correct, is unrelated. Please drop it. >> I think I'm ok with the cleanup, but it should be a separate patch if >> you want to do that. >> --Jason >> > > Yup, unrelated change it is. > I'll split it in two and post both in one of the coming days. > > (This is how it is done in the hash table in current master. > Following the same pattern in the set seemed desirable.)
Yup. Sounds good to me. Go ahead and put my r-b on the split-out hunk assuming it's the same as above --Jason > - Thomas > >> > ralloc_free(old_ht.table); >> > @@ -250,19 +212,21 @@ set_rehash(struct set *ht, unsigned >> > new_size_index) >> > static struct set_entry * >> > set_add(struct set *ht, uint32_t hash, const void *key) >> > { >> > - uint32_t hash_address; >> > + uint32_t start_hash_address, hash_address; >> > + uint32_t quad_hash = 1; >> > struct set_entry *available_entry = NULL; >> > >> > if (ht->entries >= ht->max_entries) { >> > - set_rehash(ht, ht->size_index + 1); >> > + set_rehash(ht, ht->size_iteration + 1); >> > } else if (ht->deleted_entries + ht->entries >= ht->max_entries) { >> > - set_rehash(ht, ht->size_index); >> > + set_rehash(ht, ht->size_iteration); >> > } >> > >> > - hash_address = hash % ht->size; >> > + start_hash_address = hash & (ht->size - 1); >> > + hash_address = start_hash_address; >> > + >> > do { >> > struct set_entry *entry = ht->table + hash_address; >> > - uint32_t double_hash; >> > >> > if (!entry_is_present(entry)) { >> > /* Stash the first available entry we find */ >> > @@ -288,10 +252,11 @@ set_add(struct set *ht, uint32_t hash, const void >> > *key) >> > return entry; >> > } >> > >> > - double_hash = 1 + hash % ht->rehash; >> > + hash_address = (start_hash_address + >> > + (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size >> > - 1); >> > + quad_hash++; >> > + } while (hash_address != start_hash_address); >> > >> > - hash_address = (hash_address + double_hash) % ht->size; >> > - } while (hash_address != hash % ht->size); >> > >> > if (available_entry) { >> > if (entry_is_deleted(available_entry)) >> > @@ -305,6 +270,7 @@ set_add(struct set *ht, uint32_t hash, const void >> > *key) >> > /* We could hit here if a required resize failed. An >> > unchecked-malloc >> > * application could ignore this result. >> > */ >> > + unreachable("Failed to insert entry in hash set"); >> > return NULL; >> > } >> > >> > @@ -368,7 +334,7 @@ _mesa_set_random_entry(struct set *ht, >> > int (*predicate)(struct set_entry *entry)) >> > { >> > struct set_entry *entry; >> > - uint32_t i = rand() % ht->size; >> > + uint32_t i = rand() & (ht->size - 1); >> > >> > if (ht->entries == 0) >> > return NULL; >> > diff --git a/src/util/set.h b/src/util/set.h >> > index 9acd2c2..fa17d3a 100644 >> > --- a/src/util/set.h >> > +++ b/src/util/set.h >> > @@ -46,9 +46,8 @@ struct set { >> > uint32_t (*key_hash_function)(const void *key); >> > bool (*key_equals_function)(const void *a, const void *b); >> > uint32_t size; >> > - uint32_t rehash; >> > uint32_t max_entries; >> > - uint32_t size_index; >> > + uint32_t size_iteration; >> > uint32_t entries; >> > uint32_t deleted_entries; >> > }; >> > -- >> > 2.3.4 >> > >> > _______________________________________________ >> > mesa-dev mailing list >> > mesa-dev@lists.freedesktop.org >> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev