Changeset: 41f2c8acb614 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/41f2c8acb614
Modified Files:
        gdk/gdk.h
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_hash.h
        gdk/gdk_heap.c
        gdk/gdk_private.h
        gdk/gdk_string.c
        monetdb5/modules/mal/mkey.c
        sql/backends/monet5/sql.c
        sql/storage/bat/bat_logger.c
Branch: default
Log Message:

Removed hashash bit from heap: string heaps no longer optionally contain the 
hash value.


diffs (truncated from 819 to 300 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -557,8 +557,7 @@ typedef struct {
 
        ATOMIC_TYPE refs;       /* reference count for this heap */
        bte farmid;             /* id of farm where heap is located */
-       bool hashash:1,         /* the string heap contains hash values */
-               cleanhash:1,    /* string heaps must clean hash */
+       bool cleanhash:1,       /* string heaps must clean hash */
                dirty:1,        /* specific heap dirty marker */
                remove:1;       /* remove storage file when freeing */
        storage_t storage;      /* storage mode (mmap/malloc). */
@@ -742,9 +741,10 @@ typedef struct {
 #define assert_shift_width(shift,width) assert(((shift) == 0 && (width) == 0) 
|| ((unsigned)1<<(shift)) == (unsigned)(width))
 
 #define GDKLIBRARY_MINMAX_POS  061042U /* first in Nov2019: no min/max 
position; no BBPinfo value */
-#define GDKLIBRARY_TAILN       061043U /* first after Oct2020: str offset 
heaps names don't take width into account */
+#define GDKLIBRARY_TAILN       061043U /* first in Jul2021: str offset heaps 
names don't take width into account */
+#define GDKLIBRARY_HASHASH     061044U /* first in Jul2021: hashash bit in 
string heaps */
 /* if the version number is updated, also fix snapshot_bats() in bat_logger.c 
*/
-#define GDKLIBRARY             061044U /* first after Oct2020 */
+#define GDKLIBRARY             061045U /* first after Jul2021 */
 
 typedef struct BAT {
        /* static bat properties */
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -748,7 +748,6 @@ heapmove(Heap *dst, Heap *src)
        dst->size = src->size;
        dst->base = src->base;
        dst->farmid = src->farmid;
-       dst->hashash = src->hashash;
        dst->cleanhash = src->cleanhash;
        dst->storage = src->storage;
        dst->newstorage = src->newstorage;
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -82,8 +82,7 @@ insert_string_bat(BAT *b, BAT *n, struct
        ni = bat_iterator(n);
        tp = NULL;
        if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
-                           !GDK_ELIMDOUBLES(n->tvheap) &&
-                           b->tvheap->hashash == n->tvheap->hashash)) {
+                           !GDK_ELIMDOUBLES(n->tvheap))) {
                if (b->batRole == TRANSIENT || b->tvheap == n->tvheap) {
                        /* If b is in the transient farm (i.e. b will
                         * never become persistent), we try some
@@ -185,15 +184,13 @@ insert_string_bat(BAT *b, BAT *n, struct
                         * individually, but reusing the string in b's
                         * string heap. */
                        int match = 0, i;
-                       size_t len = b->tvheap->hashash ? 1024 * EXTRALEN : 0;
+                       size_t len = 0;
                        for (i = 0; i < 1024; i++) {
                                p = (BUN) (((double) rand() / RAND_MAX) * (cnt 
- 1));
                                p = canditer_idx(ci, p) - n->hseqbase;
                                off = BUNtvaroff(ni, p);
                                if (off < b->tvheap->free &&
-                                   strcmp(b->tvheap->base + off, 
n->tvheap->base + off) == 0 &&
-                                   (!b->tvheap->hashash ||
-                                    ((BUN *) (b->tvheap->base + off))[-1] == 
(n->tvheap->hashash ? ((BUN *) (n->tvheap->base + off))[-1] : 
strHash(n->tvheap->base + off))))
+                                   strcmp(b->tvheap->base + off, 
n->tvheap->base + off) == 0)
                                        match++;
                                len += (strlen(n->tvheap->base + off) + 8) & ~7;
                        }
@@ -358,9 +355,7 @@ insert_string_bat(BAT *b, BAT *n, struct
                        off = BUNtvaroff(ni, p); /* the offset */
                        tp = n->tvheap->base + off; /* the string */
                        if (off < b->tvheap->free &&
-                           strcmp(b->tvheap->base + off, tp) == 0 &&
-                           (!b->tvheap->hashash ||
-                            ((BUN *) (b->tvheap->base + off))[-1] == 
(n->tvheap->hashash ? ((BUN *) tp)[-1] : strHash(tp)))) {
+                           strcmp(b->tvheap->base + off, tp) == 0) {
                                /* we found the string at the same
                                 * offset in b's string heap as it was
                                 * in n's string heap, so we don't
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -404,7 +404,11 @@ static gdk_return BBPrecover_subdir(void
 static bool BBPdiskscan(const char *, size_t);
 
 static int
-heapinit(BAT *b, const char *buf, int *hashash, unsigned bbpversion, bat bid, 
const char *filename, int lineno)
+heapinit(BAT *b, const char *buf,
+#ifdef GDKLIBRARY_HASHASH
+        int *hashash,
+#endif
+        unsigned bbpversion, bat bid, const char *filename, int lineno)
 {
        int t;
        char type[33];
@@ -452,7 +456,9 @@ heapinit(BAT *b, const char *buf, int *h
                TRC_CRITICAL(GDK, "unknown properties are set: incompatible 
database on line %d of BBP.dir\n", lineno);
                return -1;
        }
+#ifdef GDKLIBRARY_HASHASH
        *hashash = var & 2;
+#endif
        var &= ~2;
 #ifdef HAVE_HGE
        if (strcmp(type, "hge") == 0)
@@ -514,7 +520,7 @@ heapinit(BAT *b, const char *buf, int *h
 }
 
 static int
-vheapinit(BAT *b, const char *buf, int hashash, bat bid, const char *filename, 
int lineno)
+vheapinit(BAT *b, const char *buf, bat bid, const char *filename, int lineno)
 {
        int n = 0;
        uint64_t free, size;
@@ -539,7 +545,6 @@ vheapinit(BAT *b, const char *buf, int h
                strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename),
                              filename, ".theap", NULL);
                b->tvheap->storage = (storage_t) storage;
-               b->tvheap->hashash = hashash != 0;
                b->tvheap->cleanhash = true;
                b->tvheap->newstorage = (storage_t) storage;
                b->tvheap->dirty = false;
@@ -555,10 +560,18 @@ vheapinit(BAT *b, const char *buf, int h
 }
 
 static gdk_return
-BBPreadEntries(FILE *fp, unsigned bbpversion, int lineno)
+BBPreadEntries(FILE *fp, unsigned bbpversion, int lineno
+#ifdef GDKLIBRARY_HASHASH
+              , bat **hashbats, bat *nhashbats
+#endif
+       )
 {
        bat bid = 0;
        char buf[4096];
+#ifdef GDKLIBRARY_HASHASH
+       bat *hbats = NULL;
+       bat nhbats = 0;
+#endif
 
        /* read the BBP.dir and insert the BATs into the BBP */
        while (fgets(buf, sizeof(buf), fp) != NULL) {
@@ -572,7 +585,9 @@ BBPreadEntries(FILE *fp, unsigned bbpver
                char *s, *options = NULL;
                char logical[1024];
                uint64_t count, capacity, base = 0;
+#ifdef GDKLIBRARY_HASHASH
                int Thashash;
+#endif
 
                lineno++;
                if ((s = strchr(buf, '\r')) != NULL) {
@@ -659,18 +674,35 @@ BBPreadEntries(FILE *fp, unsigned bbpver
                        return GDK_FAIL;
                }
                bn->hseqbase = (oid) base;
-               n = heapinit(bn, buf + nread, &Thashash, bbpversion, bid, 
filename, lineno);
+               n = heapinit(bn, buf + nread,
+#ifdef GDKLIBRARY_HASHASH
+                            &Thashash,
+#endif
+                            bbpversion, bid, filename, lineno);
                if (n < 0) {
                        BATdestroy(bn);
                        return GDK_FAIL;
                }
                nread += n;
-               n = vheapinit(bn, buf + nread, Thashash, bid, filename, lineno);
+               n = vheapinit(bn, buf + nread, bid, filename, lineno);
                if (n < 0) {
                        BATdestroy(bn);
                        return GDK_FAIL;
                }
                nread += n;
+#ifdef GDKLIBRARY_HASHASH
+               if (Thashash) {
+                       assert(bbpversion <= GDKLIBRARY_HASHASH);
+                       bat *sb = GDKrealloc(hbats, ++nhbats * sizeof(bat));
+                       if (sb == NULL) {
+                               GDKfree(hbats);
+                               BATdestroy(bn);
+                               return GDK_FAIL;
+                       }
+                       hbats = sb;
+                       hbats[nhbats - 1] = bn->batCacheid;
+               }
+#endif
 
                if (buf[nread] != '\n' && buf[nread] != ' ') {
                        BATdestroy(bn);
@@ -724,6 +756,10 @@ BBPreadEntries(FILE *fp, unsigned bbpver
                BBP_desc(bid) = bn;
                BBP_status_set(bid, BBPEXISTING);       /* do we need other 
status bits? */
        }
+#ifdef GDKLIBRARY_HASHASH
+       *hashbats = hbats;
+       *nhashbats = nhbats;
+#endif
        return GDK_SUCCEED;
 }
 
@@ -830,6 +866,7 @@ BBPheader(FILE *fp, int *lineno)
                return 0;
        }
        if (bbpversion != GDKLIBRARY &&
+           bbpversion != GDKLIBRARY_HASHASH &&
            bbpversion != GDKLIBRARY_TAILN &&
            bbpversion != GDKLIBRARY_MINMAX_POS) {
                TRC_CRITICAL(GDK, "incompatible BBP version: expected 0%o, got 
0%o. "
@@ -984,6 +1021,230 @@ BBPaddfarm(const char *dirname, uint32_t
        return GDK_FAIL;
 }
 
+#ifdef GDKLIBRARY_HASHASH
+static gdk_return
+fixhashashbat(BAT *b)
+{
+       const char *nme = BBP_physical(b->batCacheid);
+       char *srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL);
+       if (srcdir == NULL) {
+               TRC_CRITICAL(GDK, "GDKfilepath failed\n");
+               return GDK_FAIL;
+       }
+       char *s;
+       if ((s = strrchr(srcdir, DIR_SEP)) != NULL)
+               *s = 0;
+       const char *bnme;
+       if ((bnme = strrchr(nme, DIR_SEP)) != NULL)
+               bnme++;
+       else
+               bnme = nme;
+       long_str filename;
+       snprintf(filename, sizeof(filename), "BACKUP%c%s", DIR_SEP, bnme);
+
+       /* we don't maintain index structures */
+       HASHdestroy(b);
+       IMPSdestroy(b);
+       OIDXdestroy(b);
+       PROPdestroy(b);
+
+       /* make backup of heaps */
+       const char *t;
+       if (GDKmove(b->theap->farmid, srcdir, bnme, "tail1",
+                   BAKDIR, bnme, "tail1", false) == GDK_SUCCEED)
+               t = "tail1";
+       else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail2",
+                        BAKDIR, bnme, "tail2", false) == GDK_SUCCEED)
+               t = "tail2";
+#if SIZEOF_VAR_T == 8
+       else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail4",
+                        BAKDIR, bnme, "tail4", false) == GDK_SUCCEED)
+               t = "tail4";
+#endif
+       else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail",
+                        BAKDIR, bnme, "tail", true) == GDK_SUCCEED)
+               t = "tail";
+       else {
+               GDKfree(srcdir);
+               TRC_CRITICAL(GDK, "cannot make backup of %s.tail\n", nme);
+               return GDK_FAIL;
+       }
+       GDKclrerr();
+       if (GDKmove(b->theap->farmid, srcdir, bnme, "theap",
+                   BAKDIR, bnme, "theap", true) != GDK_SUCCEED) {
+               GDKfree(srcdir);
+               TRC_CRITICAL(GDK, "cannot make backup of %s.theap\n", nme);
+               return GDK_FAIL;
+       }
+       /* load old heaps */
+       Heap h1 = *b->theap;    /* old heap */
+       h1.base = NULL;
+       h1.dirty = false;
+       strconcat_len(h1.filename, sizeof(h1.filename), filename, ".", t, NULL);
+       if (HEAPload(&h1, filename, t, false) != GDK_SUCCEED) {
+               GDKfree(srcdir);
+               TRC_CRITICAL(GDK, "loading old tail heap "
+                            "for BAT %d failed\n", b->batCacheid);
+               return GDK_FAIL;
+       }
+       Heap vh1 = *b->tvheap;  /* old heap */
+       vh1.base = NULL;
+       vh1.dirty = false;
+       strconcat_len(vh1.filename, sizeof(vh1.filename), filename, ".theap", 
NULL);
+       if (HEAPload(&vh1, filename, "theap", false) != GDK_SUCCEED) {
+               GDKfree(srcdir);
+               HEAPfree(&h1, false);
+               TRC_CRITICAL(GDK, "loading old string heap "
+                            "for BAT %d failed\n", b->batCacheid);
+               return GDK_FAIL;
+       }
+
+       /* create new heaps */
+       Heap *h2 = GDKmalloc(sizeof(Heap));
+       Heap *vh2 = GDKmalloc(sizeof(Heap));
+       if (h2 == NULL || vh2 == NULL) {
+               GDKfree(h2);
+               GDKfree(vh2);
+               GDKfree(srcdir);
+               HEAPfree(&h1, false);
+               HEAPfree(&vh1, false);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to