Changeset: 3cd4a5ef0276 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3cd4a5ef0276 Modified Files: gdk/gdk.h gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_bbp.c gdk/gdk_group.c gdk/gdk_hash.c gdk/gdk_hash.h gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_select.c gdk/gdk_unique.c monetdb5/mal/mal_resource.h monetdb5/modules/kernel/bat5.c monetdb5/modules/kernel/status.c sql/backends/monet5/sql.c Branch: linear-hashing Log Message:
Split hash file into two files. diffs (truncated from 714 to 300 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -599,11 +599,11 @@ typedef struct { int type; /* type of index entity */ int width; /* width of hash entries */ BUN nil; /* nil representation */ - BUN lim; /* collision list size */ BUN mask; /* number of hash buckets-1 (power of 2) */ void *Hash; /* hash table */ void *Link; /* collision list */ - Heap heap; /* heap where the hash is stored */ + Heap heaplink; /* heap where the hash links are stored */ + Heap heapbckt; /* heap where the hash buckets are stored */ } Hash; typedef struct Imprints Imprints; diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -2351,7 +2351,6 @@ BATassertProps(BAT *b) const char *nme = BBP_physical(b->batCacheid); Hash *hs = NULL; BUN mask; - int len; if ((hs = GDKzalloc(sizeof(Hash))) == NULL) { fprintf(stderr, @@ -2359,8 +2358,8 @@ BATassertProps(BAT *b) "hash table\n"); goto abort_check; } - len = snprintf(hs->heap.filename, sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid()); - if (len == -1 || len > (int) sizeof(hs->heap.filename)) { + if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, THRgettid()) >= (int) sizeof(hs->heaplink.filename) || + snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, THRgettid()) >= (int) sizeof(hs->heapbckt.filename)) { GDKfree(hs); fprintf(stderr, "#BATassertProps: heap filename " @@ -2373,10 +2372,12 @@ BATassertProps(BAT *b) mask = (BUN) 1 << 16; else mask = HASHmask(b->batCount); - if ((hs->heap.farmid = BBPselectfarm(TRANSIENT, b->ttype, - hashheap)) < 0 || + if ((hs->heaplink.farmid = BBPselectfarm( + TRANSIENT, b->ttype, hashheap)) < 0 || + (hs->heapbckt.farmid = BBPselectfarm( + TRANSIENT, b->ttype, hashheap)) < 0 || HASHnew(hs, b->ttype, BUNlast(b), - mask, BUN_NONE) != GDK_SUCCEED) { + mask, BUN_NONE, false) != GDK_SUCCEED) { GDKfree(hs); fprintf(stderr, "#BATassertProps: cannot allocate " @@ -2409,7 +2410,8 @@ BATassertProps(BAT *b) assert(!b->tnonil || !isnil); seennil |= isnil; } - HEAPfree(&hs->heap, true); + HEAPfree(&hs->heaplink, true); + HEAPfree(&hs->heapbckt, true); GDKfree(hs); } abort_check: diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -568,7 +568,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f } while (prop); #endif if (b->thash == (Hash *) 1 || BATcount(b) == 0 || - (b->thash && ((size_t *) b->thash->heap.base)[0] & (1 << 24))) { + (b->thash && ((size_t *) b->thash->heapbckt.base)[0] & (1 << 24))) { /* don't bother first loading the hash to then change * it, or updating the hash if we replace the heap, * also, we cannot maintain persistent hashes */ @@ -1385,7 +1385,6 @@ BATkeyed(BAT *b) const char *nme; BUN prb; BUN mask; - int len; GDKclrerr(); /* not interested in BAThash errors */ nme = BBP_physical(b->batCacheid); @@ -1402,9 +1401,9 @@ BATkeyed(BAT *b) } if ((hs = GDKzalloc(sizeof(Hash))) == NULL) goto doreturn; - len = snprintf(hs->heap.filename, sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid()); - if (len == -1 || len >= (int) sizeof(hs->heap.filename) || - HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE) != GDK_SUCCEED) { + if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshkeyl%x", nme, THRgettid()) >= (int) sizeof(hs->heaplink.filename) || + snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshkeyb%x", nme, THRgettid()) >= (int) sizeof(hs->heapbckt.filename) || + HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE, false) != GDK_SUCCEED) { GDKfree(hs); /* err on the side of caution: not keyed */ goto doreturn; @@ -1428,7 +1427,8 @@ BATkeyed(BAT *b) HASHput(hs, prb, p); } doreturn_free: - HEAPfree(&hs->heap, true); + HEAPfree(&hs->heaplink, true); + HEAPfree(&hs->heapbckt, true); GDKfree(hs); if (p == q) { /* we completed the complete scan: no diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -1972,16 +1972,15 @@ BBPdump(void) } } if (b->thash && b->thash != (Hash *) 1) { - fprintf(stderr, - " Thash=[%zu,%zu]", - HEAPmemsize(&b->thash->heap), - HEAPvmsize(&b->thash->heap)); + size_t m = HEAPmemsize(&b->thash->heaplink) + HEAPmemsize(&b->thash->heapbckt); + size_t v = HEAPvmsize(&b->thash->heaplink) + HEAPvmsize(&b->thash->heapbckt); + fprintf(stderr, " Thash=[%zu,%zu]", m, v); if (BBP_logical(i) && BBP_logical(i)[0] == '.') { - cmem += HEAPmemsize(&b->thash->heap); - cvm += HEAPvmsize(&b->thash->heap); + cmem += m; + cvm += v; } else { - mem += HEAPmemsize(&b->thash->heap); - vm += HEAPvmsize(&b->thash->heap); + mem += m; + vm += v; } } fprintf(stderr, " role: %s, persistence: %s\n", @@ -3794,7 +3793,8 @@ BBPdiskscan(const char *parent, size_t b } else if (strncmp(p + 1, "theap", 5) == 0) { BAT *b = getdesc(bid); delete = (b == NULL || !b->tvheap || !b->batCopiedtodisk); - } else if (strncmp(p + 1, "thash", 5) == 0) { + } else if (strncmp(p + 1, "thashl", 6) == 0 || + strncmp(p + 1, "thashb", 6) == 0) { #ifdef PERSISTENTHASH BAT *b = getdesc(bid); delete = b == NULL; @@ -3803,6 +3803,10 @@ BBPdiskscan(const char *parent, size_t b #else delete = true; #endif + } else if (strncmp(p + 1, "thash", 5) == 0) { + /* older versions used .thash which we + * can simply ignore */ + delete = true; } else if (strncmp(p + 1, "timprints", 9) == 0) { BAT *b = getdesc(bid); delete = b == NULL; diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c --- a/gdk/gdk_group.c +++ b/gdk/gdk_group.c @@ -1047,7 +1047,7 @@ BATgroup_internal(BAT **groups, BAT **ex bool gc = g != NULL && (BATordered(g) || BATordered_rev(g)); const char *nme; BUN prb; - int bits, len; + int bits; BUN mask; oid grp; @@ -1075,15 +1075,16 @@ BATgroup_internal(BAT **groups, BAT **ex * which power of two */ bits = 8 * SIZEOF_OID - pop(mask - 1); if ((hs = GDKzalloc(sizeof(Hash))) == NULL || - (hs->heap.farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap)) < 0) { + (hs->heaplink.farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap)) < 0 || + (hs->heapbckt.farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap)) < 0) { GDKfree(hs); hs = NULL; GDKerror("BATgroup: cannot allocate hash table\n"); goto error; } - len = snprintf(hs->heap.filename, sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid()); - if (len < 0 || len >= (int) sizeof(hs->heap.filename) || - HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE) != GDK_SUCCEED) { + if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshgrpl%x", nme, THRgettid()) >= (int) sizeof(hs->heaplink.filename) || + snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshgrpb%x", nme, THRgettid()) >= (int) sizeof(hs->heapbckt.filename) || + HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE, false) != GDK_SUCCEED) { GDKfree(hs); hs = NULL; GDKerror("BATgroup: cannot allocate hash table\n"); @@ -1173,7 +1174,8 @@ BATgroup_internal(BAT **groups, BAT **ex GRP_create_partial_hash_table_any(); } - HEAPfree(&hs->heap, true); + HEAPfree(&hs->heapbckt, true); + HEAPfree(&hs->heaplink, true); GDKfree(hs); } if (extents) { @@ -1210,7 +1212,8 @@ BATgroup_internal(BAT **groups, BAT **ex return GDK_SUCCEED; error: if (hs != NULL && hs != b->thash) { - HEAPfree(&hs->heap, true); + HEAPfree(&hs->heaplink, true); + HEAPfree(&hs->heapbckt, true); GDKfree(hs); } if (gn) diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -91,18 +91,22 @@ HASHclear(Hash *h) #define HASH_HEADER_SIZE 6 /* nr of size_t fields in header */ gdk_return -HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count) +HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count, bool bcktonly) { - Heap *hp = &h->heap; - int width = HASHwidth(size); + if (h->width == 0) + h->width = HASHwidth(size); - if (HEAPalloc(hp, mask + size + HASH_HEADER_SIZE * SIZEOF_SIZE_T / width, width) != GDK_SUCCEED) + if (!bcktonly) { + if (HEAPalloc(&h->heaplink, size, h->width) != GDK_SUCCEED) + return GDK_FAIL; + h->heaplink.free = size * h->width; + h->Link = h->heaplink.base; + } + if (HEAPalloc(&h->heapbckt, mask + HASH_HEADER_SIZE * SIZEOF_SIZE_T / h->width, h->width) != GDK_SUCCEED) return GDK_FAIL; - h->heap.free = (mask + size) * width + HASH_HEADER_SIZE * SIZEOF_SIZE_T; - h->lim = size; + h->heapbckt.free = mask * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T; h->mask = mask - 1; - h->width = width; - switch (width) { + switch (h->width) { case BUN2: h->nil = (BUN) BUN2_NONE; break; @@ -117,17 +121,16 @@ HASHnew(Hash *h, int tpe, BUN size, BUN default: assert(0); } - h->Link = h->heap.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T; - h->Hash = (void *) ((char *) h->Link + h->lim * width); + h->Hash = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T; h->type = tpe; HASHclear(h); /* zero the mask */ - ((size_t *) h->heap.base)[0] = HASH_VERSION; - ((size_t *) h->heap.base)[1] = size; - ((size_t *) h->heap.base)[2] = mask; - ((size_t *) h->heap.base)[3] = width; - ((size_t *) h->heap.base)[4] = count; - ((size_t *) h->heap.base)[5] = 0; /* # filled slots (chain heads) */ - ACCELDEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask " BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, width, (size + mask) * width); + ((size_t *) h->heapbckt.base)[0] = HASH_VERSION; + ((size_t *) h->heapbckt.base)[1] = size; + ((size_t *) h->heapbckt.base)[2] = mask; + ((size_t *) h->heapbckt.base)[3] = h->width; + ((size_t *) h->heapbckt.base)[4] = count; + ((size_t *) h->heapbckt.base)[5] = 0; /* # filled slots (chain heads) */ + ACCELDEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask " BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, h->width, (size + mask) * h->width); return GDK_SUCCEED; } @@ -183,14 +186,18 @@ BATcheckhash(BAT *b) assert(!GDKinmemory()); b->thash = NULL; if ((h = GDKzalloc(sizeof(*h))) != NULL && - (h->heap.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0) { + (h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0 && + (h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0) { const char *nme = BBP_physical(b->batCacheid); - strconcat_len(h->heap.filename, - sizeof(h->heap.filename), - nme, ".thash", NULL); + strconcat_len(h->heaplink.filename, + sizeof(h->heaplink.filename), + nme, ".thashl", NULL); + strconcat_len(h->heapbckt.filename, + sizeof(h->heapbckt.filename), + nme, ".thashb", NULL); /* check whether a persisted hash can be found */ - if ((fd = GDKfdlocate(h->heap.farmid, nme, "rb+", "thash")) >= 0) { + if ((fd = GDKfdlocate(h->heapbckt.farmid, nme, "rb+", "thashb")) >= 0) { size_t hdata[HASH_HEADER_SIZE]; struct stat st; @@ -202,9 +209,13 @@ BATcheckhash(BAT *b) HASH_VERSION) && hdata[4] == (size_t) BATcount(b) && fstat(fd, &st) == 0 && - st.st_size >= (off_t) (h->heap.size = h->heap.free = (hdata[1] + hdata[2]) * hdata[3] + HASH_HEADER_SIZE * SIZEOF_SIZE_T) && - HEAPload(&h->heap, nme, "thash", false) == GDK_SUCCEED) { - h->lim = (BUN) hdata[1]; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list