Changeset: 5d22cee12337 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5d22cee12337 Modified Files: clients/Tests/exports.stable.out gdk/gdk_batop.c gdk/gdk_hash.c gdk/gdk_hash.h monetdb5/mal/mal_builder.c sql/storage/bat/bat_storage.c Branch: default Log Message:
Merge with linear-hashing branch. diffs (245 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -297,7 +297,6 @@ void HASHdestroy(BAT *b); gdk_return HASHgrowbucket(BAT *b); BUN HASHlist(Hash *h, BUN i); BUN HASHprobe(const Hash *h, const void *v); -gdk_return HASHupgradehashheap(BAT *b, BUN cap); void HEAP_free(Heap *heap, var_t block); void HEAP_initialize(Heap *heap, size_t nbytes, size_t nprivate, int alignment); var_t HEAP_malloc(Heap *heap, size_t nbytes); diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -302,7 +302,6 @@ insert_string_bat(BAT *b, BAT *n, BAT *s b->theap.free += sizeof(var_t); break; } - b->theap.dirty = true; } b->tvarsized = true; b->ttype = TYPE_str; diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -207,17 +207,16 @@ HASHcollisions(BAT *b, Hash *h, const ch entries == 0 ? 0 : total / entries); } -gdk_return -HASHupgradehashheap(BAT *b, BUN cap) +static gdk_return +HASHupgradehashheap(BAT *b) { Hash *h = b->thash; - int nwidth; + int nwidth = h->width << 1; BUN i; - if (h == NULL) - return GDK_SUCCEED; - if ((nwidth = HASHwidth(cap)) <= h->width) - return GDK_SUCCEED; + assert(nwidth <= SIZEOF_BUN); + assert((nwidth & (nwidth - 1)) == 0); + if (HEAPextend(&h->heaplink, h->heaplink.size * nwidth / h->width, true) != GDK_SUCCEED || HEAPextend(&h->heapbckt, (h->heapbckt.size - HASH_HEADER_SIZE * SIZEOF_SIZE_T) * nwidth / h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T, true) != GDK_SUCCEED) { b->thash = NULL; @@ -246,6 +245,7 @@ HASHupgradehashheap(BAT *b, BUN cap) } break; } + h->nil = BUN4_NONE; break; #ifdef BUN8 case BUN8: @@ -283,6 +283,7 @@ HASHupgradehashheap(BAT *b, BUN cap) } break; } + h->nil = BUN8_NONE; break; #endif } @@ -308,6 +309,12 @@ HASHgrowbucket(BAT *b) lng t0 = 0; ACCELDEBUG t0 = GDKusec(); + + /* only needed to fix hash tables built before this fix was + * introduced */ + if (h->nil <= h->mask2 && HASHupgradehashheap(b) != GDK_SUCCEED) + return GDK_FAIL; + h->heapbckt.dirty = true; h->heaplink.dirty = true; if (((size_t *) h->heapbckt.base)[0] & (size_t) 1 << 24) { @@ -338,6 +345,11 @@ HASHgrowbucket(BAT *b) if (h->nbucket == h->mask2) { h->mask1 = h->mask2; h->mask2 = h->mask2 << 1 | 1; + if (h->nil == h->mask2) { + /* time to widen the hash table */ + if (HASHupgradehashheap(b) != GDK_SUCCEED) + return GDK_FAIL; + } } h->nbucket++; h->heapbckt.free += h->width; @@ -448,54 +460,75 @@ BATcheckhash(BAT *b) fstat(fd, &st) == 0 && st.st_size > 0 && st.st_size >= (off_t) (h->heaplink.size = h->heaplink.free = hdata[1] * h->width) && - HEAPload(&h->heaplink, nme, "thashl", false) == GDK_SUCCEED && - HEAPload(&h->heapbckt, nme, "thashb", false) == GDK_SUCCEED) { - if (h->nbucket & (h->nbucket - 1)) { - h->mask2 = hashmask(h->nbucket); - h->mask1 = h->mask2 >> 1; - } else { - h->mask1 = h->nbucket - 1; - h->mask2 = h->mask1 << 1 | 1; - } - h->nunique = hdata[5]; - h->nheads = hdata[6]; - h->type = ATOMtype(b->ttype); - switch (h->width) { - case BUN2: - h->nil = (BUN) BUN2_NONE; - break; - case BUN4: - h->nil = (BUN) BUN4_NONE; - break; + HEAPload(&h->heaplink, nme, "thashl", false) == GDK_SUCCEED) { + if (HEAPload(&h->heapbckt, nme, "thashb", false) == GDK_SUCCEED) { + if (h->nbucket & (h->nbucket - 1)) { + h->mask2 = hashmask(h->nbucket); + h->mask1 = h->mask2 >> 1; + } else { + h->mask1 = h->nbucket - 1; + h->mask2 = h->mask1 << 1 | 1; + } + h->nunique = hdata[5]; + h->nheads = hdata[6]; + h->type = ATOMtype(b->ttype); + switch (h->width) { + case BUN2: + h->nil = (BUN) BUN2_NONE; + break; + case BUN4: + h->nil = (BUN) BUN4_NONE; + break; #ifdef BUN8 - case BUN8: - h->nil = (BUN) BUN8_NONE; - break; + case BUN8: + h->nil = (BUN) BUN8_NONE; + break; #endif - default: - assert(0); + default: + assert(0); + } + if (h->nil > h->nbucket) { + close(fd); + h->Link = h->heaplink.base; + h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T; + h->heaplink.parentid = b->batCacheid; + h->heapbckt.parentid = b->batCacheid; + h->heaplink.dirty = false; + h->heapbckt.dirty = false; + BATsetprop_nolock( + b, + GDK_HASH_BUCKETS, + TYPE_oid, + &(oid){NHASHBUCKETS(h)}); + BATsetprop_nolock( + b, + GDK_NUNIQUE, + TYPE_oid, + &(oid){h->nunique}); + b->thash = h; + ACCELDEBUG fprintf(stderr, "#%s: BATcheckhash(" ALGOBATFMT "): reusing persisted hash\n", MT_thread_getname(), ALGOBATPAR(b)); + MT_lock_unset(&b->batIdxLock); + return true; + } + /* if h->nil==h->nbucket + * (was + * possible in + * previous + * iterations + * of the + * code), then + * we can't + * use the + * hash since + * we can't + * distinguish + * between + * end-of-list + * and a valid + * link */ + HEAPfree(&h->heapbckt, false); } - close(fd); - h->Link = h->heaplink.base; - h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T; - h->heaplink.parentid = b->batCacheid; - h->heapbckt.parentid = b->batCacheid; - h->heaplink.dirty = false; - h->heapbckt.dirty = false; - BATsetprop_nolock( - b, - GDK_HASH_BUCKETS, - TYPE_oid, - &(oid){NHASHBUCKETS(h)}); - BATsetprop_nolock( - b, - GDK_NUNIQUE, - TYPE_oid, - &(oid){h->nunique}); - b->thash = h; - ACCELDEBUG fprintf(stderr, "#%s: BATcheckhash(" ALGOBATFMT "): reusing persisted hash\n", MT_thread_getname(), ALGOBATPAR(b)); - MT_lock_unset(&b->batIdxLock); - return true; + HEAPfree(&h->heaplink, false); } close(fd); /* unlink unusable file */ diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h --- a/gdk/gdk_hash.h +++ b/gdk/gdk_hash.h @@ -35,7 +35,6 @@ gdk_export void HASHdestroy(BAT *b); gdk_export BUN HASHprobe(const Hash *h, const void *v); gdk_export BUN HASHlist(Hash *h, BUN i); gdk_export gdk_return HASHgrowbucket(BAT *b); -gdk_export gdk_return HASHupgradehashheap(BAT *b, BUN cap); #define HASHnil(H) (H)->nil @@ -295,8 +294,7 @@ HASHgetlink(Hash *h, BUN i) (((i) + 1) * _h->width > _h->heaplink.size && \ HEAPextend(&_h->heaplink, \ (i) * _h->width + GDK_mmap_pagesize, \ - true) != GDK_SUCCEED) || \ - HASHupgradehashheap(b, i) != GDK_SUCCEED) { \ + true) != GDK_SUCCEED)) { \ MT_lock_unset(&(b)->batIdxLock); \ HASHdestroy(b); \ } else { \ diff --git a/sql/storage/bat/bat_storage.c b/sql/storage/bat/bat_storage.c --- a/sql/storage/bat/bat_storage.c +++ b/sql/storage/bat/bat_storage.c @@ -13,8 +13,7 @@ #include "algebra.h" #include "gdk_atoms.h" -//#define SNAPSHOT_MINSIZE ((BUN) 1024*128) -#define SNAPSHOT_MINSIZE ((BUN) 1024*2) +#define SNAPSHOT_MINSIZE ((BUN) 1024*128) static MT_Lock destroy_lock = MT_LOCK_INITIALIZER("destroy_lock"); sql_dbat *tobe_destroyed_dbat = NULL; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list