Changeset: 3cd4a5ef0276 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3cd4a5ef0276
Modified Files:
        gdk/gdk.h
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_group.c
        gdk/gdk_hash.c
        gdk/gdk_hash.h
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_select.c
        gdk/gdk_unique.c
        monetdb5/mal/mal_resource.h
        monetdb5/modules/kernel/bat5.c
        monetdb5/modules/kernel/status.c
        sql/backends/monet5/sql.c
Branch: linear-hashing
Log Message:

Split hash file into two files.


diffs (truncated from 714 to 300 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -599,11 +599,11 @@ typedef struct {
        int type;               /* type of index entity */
        int width;              /* width of hash entries */
        BUN nil;                /* nil representation */
-       BUN lim;                /* collision list size */
        BUN mask;               /* number of hash buckets-1 (power of 2) */
        void *Hash;             /* hash table */
        void *Link;             /* collision list */
-       Heap heap;              /* heap where the hash is stored */
+       Heap heaplink;          /* heap where the hash links are stored */
+       Heap heapbckt;          /* heap where the hash buckets are stored */
 } Hash;
 
 typedef struct Imprints Imprints;
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -2351,7 +2351,6 @@ BATassertProps(BAT *b)
                        const char *nme = BBP_physical(b->batCacheid);
                        Hash *hs = NULL;
                        BUN mask;
-                       int len;
 
                        if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
                                fprintf(stderr,
@@ -2359,8 +2358,8 @@ BATassertProps(BAT *b)
                                        "hash table\n");
                                goto abort_check;
                        }
-                       len = snprintf(hs->heap.filename, 
sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid());
-                       if (len == -1 || len > (int) sizeof(hs->heap.filename)) 
{
+                       if (snprintf(hs->heaplink.filename, 
sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, THRgettid()) >= (int) 
sizeof(hs->heaplink.filename) ||
+                           snprintf(hs->heapbckt.filename, 
sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, THRgettid()) >= (int) 
sizeof(hs->heapbckt.filename)) {
                                GDKfree(hs);
                                fprintf(stderr,
                                        "#BATassertProps: heap filename "
@@ -2373,10 +2372,12 @@ BATassertProps(BAT *b)
                                mask = (BUN) 1 << 16;
                        else
                                mask = HASHmask(b->batCount);
-                       if ((hs->heap.farmid = BBPselectfarm(TRANSIENT, 
b->ttype,
-                                                       hashheap)) < 0 ||
+                       if ((hs->heaplink.farmid = BBPselectfarm(
+                                    TRANSIENT, b->ttype, hashheap)) < 0 ||
+                           (hs->heapbckt.farmid = BBPselectfarm(
+                                   TRANSIENT, b->ttype, hashheap)) < 0 ||
                            HASHnew(hs, b->ttype, BUNlast(b),
-                                   mask, BUN_NONE) != GDK_SUCCEED) {
+                                   mask, BUN_NONE, false) != GDK_SUCCEED) {
                                GDKfree(hs);
                                fprintf(stderr,
                                        "#BATassertProps: cannot allocate "
@@ -2409,7 +2410,8 @@ BATassertProps(BAT *b)
                                assert(!b->tnonil || !isnil);
                                seennil |= isnil;
                        }
-                       HEAPfree(&hs->heap, true);
+                       HEAPfree(&hs->heaplink, true);
+                       HEAPfree(&hs->heapbckt, true);
                        GDKfree(hs);
                }
          abort_check:
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -568,7 +568,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
        } while (prop);
 #endif
        if (b->thash == (Hash *) 1 || BATcount(b) == 0 ||
-           (b->thash && ((size_t *) b->thash->heap.base)[0] & (1 << 24))) {
+           (b->thash && ((size_t *) b->thash->heapbckt.base)[0] & (1 << 24))) {
                /* don't bother first loading the hash to then change
                 * it, or updating the hash if we replace the heap,
                 * also, we cannot maintain persistent hashes */
@@ -1385,7 +1385,6 @@ BATkeyed(BAT *b)
                        const char *nme;
                        BUN prb;
                        BUN mask;
-                       int len;
 
                        GDKclrerr(); /* not interested in BAThash errors */
                        nme = BBP_physical(b->batCacheid);
@@ -1402,9 +1401,9 @@ BATkeyed(BAT *b)
                        }
                        if ((hs = GDKzalloc(sizeof(Hash))) == NULL)
                                goto doreturn;
-                       len = snprintf(hs->heap.filename, 
sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid());
-                       if (len == -1 || len >= (int) sizeof(hs->heap.filename) 
||
-                           HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE) 
!= GDK_SUCCEED) {
+                       if (snprintf(hs->heaplink.filename, 
sizeof(hs->heaplink.filename), "%s.thshkeyl%x", nme, THRgettid()) >= (int) 
sizeof(hs->heaplink.filename) ||
+                           snprintf(hs->heapbckt.filename, 
sizeof(hs->heapbckt.filename), "%s.thshkeyb%x", nme, THRgettid()) >= (int) 
sizeof(hs->heapbckt.filename) ||
+                           HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE, 
false) != GDK_SUCCEED) {
                                GDKfree(hs);
                                /* err on the side of caution: not keyed */
                                goto doreturn;
@@ -1428,7 +1427,8 @@ BATkeyed(BAT *b)
                                HASHput(hs, prb, p);
                        }
                  doreturn_free:
-                       HEAPfree(&hs->heap, true);
+                       HEAPfree(&hs->heaplink, true);
+                       HEAPfree(&hs->heapbckt, true);
                        GDKfree(hs);
                        if (p == q) {
                                /* we completed the complete scan: no
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -1972,16 +1972,15 @@ BBPdump(void)
                        }
                }
                if (b->thash && b->thash != (Hash *) 1) {
-                       fprintf(stderr,
-                               " Thash=[%zu,%zu]",
-                               HEAPmemsize(&b->thash->heap),
-                               HEAPvmsize(&b->thash->heap));
+                       size_t m = HEAPmemsize(&b->thash->heaplink) + 
HEAPmemsize(&b->thash->heapbckt);
+                       size_t v = HEAPvmsize(&b->thash->heaplink) + 
HEAPvmsize(&b->thash->heapbckt);
+                       fprintf(stderr, " Thash=[%zu,%zu]", m, v);
                        if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
-                               cmem += HEAPmemsize(&b->thash->heap);
-                               cvm += HEAPvmsize(&b->thash->heap);
+                               cmem += m;
+                               cvm += v;
                        } else {
-                               mem += HEAPmemsize(&b->thash->heap);
-                               vm += HEAPvmsize(&b->thash->heap);
+                               mem += m;
+                               vm += v;
                        }
                }
                fprintf(stderr, " role: %s, persistence: %s\n",
@@ -3794,7 +3793,8 @@ BBPdiskscan(const char *parent, size_t b
                        } else if (strncmp(p + 1, "theap", 5) == 0) {
                                BAT *b = getdesc(bid);
                                delete = (b == NULL || !b->tvheap || 
!b->batCopiedtodisk);
-                       } else if (strncmp(p + 1, "thash", 5) == 0) {
+                       } else if (strncmp(p + 1, "thashl", 6) == 0 ||
+                                  strncmp(p + 1, "thashb", 6) == 0) {
 #ifdef PERSISTENTHASH
                                BAT *b = getdesc(bid);
                                delete = b == NULL;
@@ -3803,6 +3803,10 @@ BBPdiskscan(const char *parent, size_t b
 #else
                                delete = true;
 #endif
+                       } else if (strncmp(p + 1, "thash", 5) == 0) {
+                               /* older versions used .thash which we
+                                * can simply ignore */
+                               delete = true;
                        } else if (strncmp(p + 1, "timprints", 9) == 0) {
                                BAT *b = getdesc(bid);
                                delete = b == NULL;
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -1047,7 +1047,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                bool gc = g != NULL && (BATordered(g) || BATordered_rev(g));
                const char *nme;
                BUN prb;
-               int bits, len;
+               int bits;
                BUN mask;
                oid grp;
 
@@ -1075,15 +1075,16 @@ BATgroup_internal(BAT **groups, BAT **ex
                 * which power of two */
                bits = 8 * SIZEOF_OID - pop(mask - 1);
                if ((hs = GDKzalloc(sizeof(Hash))) == NULL ||
-                   (hs->heap.farmid = BBPselectfarm(TRANSIENT, b->ttype, 
hashheap)) < 0) {
+                   (hs->heaplink.farmid = BBPselectfarm(TRANSIENT, b->ttype, 
hashheap)) < 0 ||
+                   (hs->heapbckt.farmid = BBPselectfarm(TRANSIENT, b->ttype, 
hashheap)) < 0) {
                        GDKfree(hs);
                        hs = NULL;
                        GDKerror("BATgroup: cannot allocate hash table\n");
                        goto error;
                }
-               len = snprintf(hs->heap.filename, sizeof(hs->heap.filename), 
"%s.hash%d", nme, THRgettid());
-               if (len < 0 || len >= (int) sizeof(hs->heap.filename) ||
-                   HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE) != 
GDK_SUCCEED) {
+               if (snprintf(hs->heaplink.filename, 
sizeof(hs->heaplink.filename), "%s.thshgrpl%x", nme, THRgettid()) >= (int) 
sizeof(hs->heaplink.filename) ||
+                   snprintf(hs->heapbckt.filename, 
sizeof(hs->heapbckt.filename), "%s.thshgrpb%x", nme, THRgettid()) >= (int) 
sizeof(hs->heapbckt.filename) ||
+                   HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE, false) != 
GDK_SUCCEED) {
                        GDKfree(hs);
                        hs = NULL;
                        GDKerror("BATgroup: cannot allocate hash table\n");
@@ -1173,7 +1174,8 @@ BATgroup_internal(BAT **groups, BAT **ex
                        GRP_create_partial_hash_table_any();
                }
 
-               HEAPfree(&hs->heap, true);
+               HEAPfree(&hs->heapbckt, true);
+               HEAPfree(&hs->heaplink, true);
                GDKfree(hs);
        }
        if (extents) {
@@ -1210,7 +1212,8 @@ BATgroup_internal(BAT **groups, BAT **ex
        return GDK_SUCCEED;
   error:
        if (hs != NULL && hs != b->thash) {
-               HEAPfree(&hs->heap, true);
+               HEAPfree(&hs->heaplink, true);
+               HEAPfree(&hs->heapbckt, true);
                GDKfree(hs);
        }
        if (gn)
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -91,18 +91,22 @@ HASHclear(Hash *h)
 #define HASH_HEADER_SIZE       6 /* nr of size_t fields in header */
 
 gdk_return
-HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count)
+HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count, bool bcktonly)
 {
-       Heap *hp = &h->heap;
-       int width = HASHwidth(size);
+       if (h->width == 0)
+               h->width = HASHwidth(size);
 
-       if (HEAPalloc(hp, mask + size + HASH_HEADER_SIZE * SIZEOF_SIZE_T / 
width, width) != GDK_SUCCEED)
+       if (!bcktonly) {
+               if (HEAPalloc(&h->heaplink, size, h->width) != GDK_SUCCEED)
+                       return GDK_FAIL;
+               h->heaplink.free = size * h->width;
+               h->Link = h->heaplink.base;
+       }
+       if (HEAPalloc(&h->heapbckt, mask + HASH_HEADER_SIZE * SIZEOF_SIZE_T / 
h->width, h->width) != GDK_SUCCEED)
                return GDK_FAIL;
-       h->heap.free = (mask + size) * width + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
-       h->lim = size;
+       h->heapbckt.free = mask * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
        h->mask = mask - 1;
-       h->width = width;
-       switch (width) {
+       switch (h->width) {
        case BUN2:
                h->nil = (BUN) BUN2_NONE;
                break;
@@ -117,17 +121,16 @@ HASHnew(Hash *h, int tpe, BUN size, BUN 
        default:
                assert(0);
        }
-       h->Link = h->heap.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
-       h->Hash = (void *) ((char *) h->Link + h->lim * width);
+       h->Hash = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
        h->type = tpe;
        HASHclear(h);           /* zero the mask */
-       ((size_t *) h->heap.base)[0] = HASH_VERSION;
-       ((size_t *) h->heap.base)[1] = size;
-       ((size_t *) h->heap.base)[2] = mask;
-       ((size_t *) h->heap.base)[3] = width;
-       ((size_t *) h->heap.base)[4] = count;
-       ((size_t *) h->heap.base)[5] = 0; /* # filled slots (chain heads) */
-       ACCELDEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask 
" BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, width, (size + 
mask) * width);
+       ((size_t *) h->heapbckt.base)[0] = HASH_VERSION;
+       ((size_t *) h->heapbckt.base)[1] = size;
+       ((size_t *) h->heapbckt.base)[2] = mask;
+       ((size_t *) h->heapbckt.base)[3] = h->width;
+       ((size_t *) h->heapbckt.base)[4] = count;
+       ((size_t *) h->heapbckt.base)[5] = 0; /* # filled slots (chain heads) */
+       ACCELDEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask 
" BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, h->width, (size 
+ mask) * h->width);
        return GDK_SUCCEED;
 }
 
@@ -183,14 +186,18 @@ BATcheckhash(BAT *b)
                        assert(!GDKinmemory());
                        b->thash = NULL;
                        if ((h = GDKzalloc(sizeof(*h))) != NULL &&
-                           (h->heap.farmid = BBPselectfarm(b->batRole, 
b->ttype, hashheap)) >= 0) {
+                           (h->heaplink.farmid = BBPselectfarm(b->batRole, 
b->ttype, hashheap)) >= 0 &&
+                           (h->heapbckt.farmid = BBPselectfarm(b->batRole, 
b->ttype, hashheap)) >= 0) {
                                const char *nme = BBP_physical(b->batCacheid);
-                               strconcat_len(h->heap.filename,
-                                             sizeof(h->heap.filename),
-                                             nme, ".thash", NULL);
+                               strconcat_len(h->heaplink.filename,
+                                             sizeof(h->heaplink.filename),
+                                             nme, ".thashl", NULL);
+                               strconcat_len(h->heapbckt.filename,
+                                             sizeof(h->heapbckt.filename),
+                                             nme, ".thashb", NULL);
 
                                /* check whether a persisted hash can be found 
*/
-                               if ((fd = GDKfdlocate(h->heap.farmid, nme, 
"rb+", "thash")) >= 0) {
+                               if ((fd = GDKfdlocate(h->heapbckt.farmid, nme, 
"rb+", "thashb")) >= 0) {
                                        size_t hdata[HASH_HEADER_SIZE];
                                        struct stat st;
 
@@ -202,9 +209,13 @@ BATcheckhash(BAT *b)
                                                    HASH_VERSION) &&
                                            hdata[4] == (size_t) BATcount(b) &&
                                            fstat(fd, &st) == 0 &&
-                                           st.st_size >= (off_t) (h->heap.size 
= h->heap.free = (hdata[1] + hdata[2]) * hdata[3] + HASH_HEADER_SIZE * 
SIZEOF_SIZE_T) &&
-                                           HEAPload(&h->heap, nme, "thash", 
false) == GDK_SUCCEED) {
-                                               h->lim = (BUN) hdata[1];
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to