Changeset: 41f2c8acb614 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/41f2c8acb614 Modified Files: gdk/gdk.h gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_bbp.c gdk/gdk_hash.h gdk/gdk_heap.c gdk/gdk_private.h gdk/gdk_string.c monetdb5/modules/mal/mkey.c sql/backends/monet5/sql.c sql/storage/bat/bat_logger.c Branch: default Log Message:
Removed hashash bit from heap: string heaps no longer optionally contain the hash value. diffs (truncated from 819 to 300 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -557,8 +557,7 @@ typedef struct { ATOMIC_TYPE refs; /* reference count for this heap */ bte farmid; /* id of farm where heap is located */ - bool hashash:1, /* the string heap contains hash values */ - cleanhash:1, /* string heaps must clean hash */ + bool cleanhash:1, /* string heaps must clean hash */ dirty:1, /* specific heap dirty marker */ remove:1; /* remove storage file when freeing */ storage_t storage; /* storage mode (mmap/malloc). */ @@ -742,9 +741,10 @@ typedef struct { #define assert_shift_width(shift,width) assert(((shift) == 0 && (width) == 0) || ((unsigned)1<<(shift)) == (unsigned)(width)) #define GDKLIBRARY_MINMAX_POS 061042U /* first in Nov2019: no min/max position; no BBPinfo value */ -#define GDKLIBRARY_TAILN 061043U /* first after Oct2020: str offset heaps names don't take width into account */ +#define GDKLIBRARY_TAILN 061043U /* first in Jul2021: str offset heaps names don't take width into account */ +#define GDKLIBRARY_HASHASH 061044U /* first in Jul2021: hashash bit in string heaps */ /* if the version number is updated, also fix snapshot_bats() in bat_logger.c */ -#define GDKLIBRARY 061044U /* first after Oct2020 */ +#define GDKLIBRARY 061045U /* first after Jul2021 */ typedef struct BAT { /* static bat properties */ diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -748,7 +748,6 @@ heapmove(Heap *dst, Heap *src) dst->size = src->size; dst->base = src->base; dst->farmid = src->farmid; - dst->hashash = src->hashash; dst->cleanhash = src->cleanhash; dst->storage = src->storage; dst->newstorage = src->newstorage; diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -82,8 +82,7 @@ insert_string_bat(BAT *b, BAT *n, struct ni = bat_iterator(n); tp = NULL; if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) && - !GDK_ELIMDOUBLES(n->tvheap) && - b->tvheap->hashash == n->tvheap->hashash)) { + !GDK_ELIMDOUBLES(n->tvheap))) { if (b->batRole == TRANSIENT || b->tvheap == n->tvheap) { /* If b is in the transient farm (i.e. b will * never become persistent), we try some @@ -185,15 +184,13 @@ insert_string_bat(BAT *b, BAT *n, struct * individually, but reusing the string in b's * string heap. */ int match = 0, i; - size_t len = b->tvheap->hashash ? 1024 * EXTRALEN : 0; + size_t len = 0; for (i = 0; i < 1024; i++) { p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1)); p = canditer_idx(ci, p) - n->hseqbase; off = BUNtvaroff(ni, p); if (off < b->tvheap->free && - strcmp(b->tvheap->base + off, n->tvheap->base + off) == 0 && - (!b->tvheap->hashash || - ((BUN *) (b->tvheap->base + off))[-1] == (n->tvheap->hashash ? ((BUN *) (n->tvheap->base + off))[-1] : strHash(n->tvheap->base + off)))) + strcmp(b->tvheap->base + off, n->tvheap->base + off) == 0) match++; len += (strlen(n->tvheap->base + off) + 8) & ~7; } @@ -358,9 +355,7 @@ insert_string_bat(BAT *b, BAT *n, struct off = BUNtvaroff(ni, p); /* the offset */ tp = n->tvheap->base + off; /* the string */ if (off < b->tvheap->free && - strcmp(b->tvheap->base + off, tp) == 0 && - (!b->tvheap->hashash || - ((BUN *) (b->tvheap->base + off))[-1] == (n->tvheap->hashash ? ((BUN *) tp)[-1] : strHash(tp)))) { + strcmp(b->tvheap->base + off, tp) == 0) { /* we found the string at the same * offset in b's string heap as it was * in n's string heap, so we don't diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -404,7 +404,11 @@ static gdk_return BBPrecover_subdir(void static bool BBPdiskscan(const char *, size_t); static int -heapinit(BAT *b, const char *buf, int *hashash, unsigned bbpversion, bat bid, const char *filename, int lineno) +heapinit(BAT *b, const char *buf, +#ifdef GDKLIBRARY_HASHASH + int *hashash, +#endif + unsigned bbpversion, bat bid, const char *filename, int lineno) { int t; char type[33]; @@ -452,7 +456,9 @@ heapinit(BAT *b, const char *buf, int *h TRC_CRITICAL(GDK, "unknown properties are set: incompatible database on line %d of BBP.dir\n", lineno); return -1; } +#ifdef GDKLIBRARY_HASHASH *hashash = var & 2; +#endif var &= ~2; #ifdef HAVE_HGE if (strcmp(type, "hge") == 0) @@ -514,7 +520,7 @@ heapinit(BAT *b, const char *buf, int *h } static int -vheapinit(BAT *b, const char *buf, int hashash, bat bid, const char *filename, int lineno) +vheapinit(BAT *b, const char *buf, bat bid, const char *filename, int lineno) { int n = 0; uint64_t free, size; @@ -539,7 +545,6 @@ vheapinit(BAT *b, const char *buf, int h strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename), filename, ".theap", NULL); b->tvheap->storage = (storage_t) storage; - b->tvheap->hashash = hashash != 0; b->tvheap->cleanhash = true; b->tvheap->newstorage = (storage_t) storage; b->tvheap->dirty = false; @@ -555,10 +560,18 @@ vheapinit(BAT *b, const char *buf, int h } static gdk_return -BBPreadEntries(FILE *fp, unsigned bbpversion, int lineno) +BBPreadEntries(FILE *fp, unsigned bbpversion, int lineno +#ifdef GDKLIBRARY_HASHASH + , bat **hashbats, bat *nhashbats +#endif + ) { bat bid = 0; char buf[4096]; +#ifdef GDKLIBRARY_HASHASH + bat *hbats = NULL; + bat nhbats = 0; +#endif /* read the BBP.dir and insert the BATs into the BBP */ while (fgets(buf, sizeof(buf), fp) != NULL) { @@ -572,7 +585,9 @@ BBPreadEntries(FILE *fp, unsigned bbpver char *s, *options = NULL; char logical[1024]; uint64_t count, capacity, base = 0; +#ifdef GDKLIBRARY_HASHASH int Thashash; +#endif lineno++; if ((s = strchr(buf, '\r')) != NULL) { @@ -659,18 +674,35 @@ BBPreadEntries(FILE *fp, unsigned bbpver return GDK_FAIL; } bn->hseqbase = (oid) base; - n = heapinit(bn, buf + nread, &Thashash, bbpversion, bid, filename, lineno); + n = heapinit(bn, buf + nread, +#ifdef GDKLIBRARY_HASHASH + &Thashash, +#endif + bbpversion, bid, filename, lineno); if (n < 0) { BATdestroy(bn); return GDK_FAIL; } nread += n; - n = vheapinit(bn, buf + nread, Thashash, bid, filename, lineno); + n = vheapinit(bn, buf + nread, bid, filename, lineno); if (n < 0) { BATdestroy(bn); return GDK_FAIL; } nread += n; +#ifdef GDKLIBRARY_HASHASH + if (Thashash) { + assert(bbpversion <= GDKLIBRARY_HASHASH); + bat *sb = GDKrealloc(hbats, ++nhbats * sizeof(bat)); + if (sb == NULL) { + GDKfree(hbats); + BATdestroy(bn); + return GDK_FAIL; + } + hbats = sb; + hbats[nhbats - 1] = bn->batCacheid; + } +#endif if (buf[nread] != '\n' && buf[nread] != ' ') { BATdestroy(bn); @@ -724,6 +756,10 @@ BBPreadEntries(FILE *fp, unsigned bbpver BBP_desc(bid) = bn; BBP_status_set(bid, BBPEXISTING); /* do we need other status bits? */ } +#ifdef GDKLIBRARY_HASHASH + *hashbats = hbats; + *nhashbats = nhbats; +#endif return GDK_SUCCEED; } @@ -830,6 +866,7 @@ BBPheader(FILE *fp, int *lineno) return 0; } if (bbpversion != GDKLIBRARY && + bbpversion != GDKLIBRARY_HASHASH && bbpversion != GDKLIBRARY_TAILN && bbpversion != GDKLIBRARY_MINMAX_POS) { TRC_CRITICAL(GDK, "incompatible BBP version: expected 0%o, got 0%o. " @@ -984,6 +1021,230 @@ BBPaddfarm(const char *dirname, uint32_t return GDK_FAIL; } +#ifdef GDKLIBRARY_HASHASH +static gdk_return +fixhashashbat(BAT *b) +{ + const char *nme = BBP_physical(b->batCacheid); + char *srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL); + if (srcdir == NULL) { + TRC_CRITICAL(GDK, "GDKfilepath failed\n"); + return GDK_FAIL; + } + char *s; + if ((s = strrchr(srcdir, DIR_SEP)) != NULL) + *s = 0; + const char *bnme; + if ((bnme = strrchr(nme, DIR_SEP)) != NULL) + bnme++; + else + bnme = nme; + long_str filename; + snprintf(filename, sizeof(filename), "BACKUP%c%s", DIR_SEP, bnme); + + /* we don't maintain index structures */ + HASHdestroy(b); + IMPSdestroy(b); + OIDXdestroy(b); + PROPdestroy(b); + + /* make backup of heaps */ + const char *t; + if (GDKmove(b->theap->farmid, srcdir, bnme, "tail1", + BAKDIR, bnme, "tail1", false) == GDK_SUCCEED) + t = "tail1"; + else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail2", + BAKDIR, bnme, "tail2", false) == GDK_SUCCEED) + t = "tail2"; +#if SIZEOF_VAR_T == 8 + else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail4", + BAKDIR, bnme, "tail4", false) == GDK_SUCCEED) + t = "tail4"; +#endif + else if (GDKmove(b->theap->farmid, srcdir, bnme, "tail", + BAKDIR, bnme, "tail", true) == GDK_SUCCEED) + t = "tail"; + else { + GDKfree(srcdir); + TRC_CRITICAL(GDK, "cannot make backup of %s.tail\n", nme); + return GDK_FAIL; + } + GDKclrerr(); + if (GDKmove(b->theap->farmid, srcdir, bnme, "theap", + BAKDIR, bnme, "theap", true) != GDK_SUCCEED) { + GDKfree(srcdir); + TRC_CRITICAL(GDK, "cannot make backup of %s.theap\n", nme); + return GDK_FAIL; + } + /* load old heaps */ + Heap h1 = *b->theap; /* old heap */ + h1.base = NULL; + h1.dirty = false; + strconcat_len(h1.filename, sizeof(h1.filename), filename, ".", t, NULL); + if (HEAPload(&h1, filename, t, false) != GDK_SUCCEED) { + GDKfree(srcdir); + TRC_CRITICAL(GDK, "loading old tail heap " + "for BAT %d failed\n", b->batCacheid); + return GDK_FAIL; + } + Heap vh1 = *b->tvheap; /* old heap */ + vh1.base = NULL; + vh1.dirty = false; + strconcat_len(vh1.filename, sizeof(vh1.filename), filename, ".theap", NULL); + if (HEAPload(&vh1, filename, "theap", false) != GDK_SUCCEED) { + GDKfree(srcdir); + HEAPfree(&h1, false); + TRC_CRITICAL(GDK, "loading old string heap " + "for BAT %d failed\n", b->batCacheid); + return GDK_FAIL; + } + + /* create new heaps */ + Heap *h2 = GDKmalloc(sizeof(Heap)); + Heap *vh2 = GDKmalloc(sizeof(Heap)); + if (h2 == NULL || vh2 == NULL) { + GDKfree(h2); + GDKfree(vh2); + GDKfree(srcdir); + HEAPfree(&h1, false); + HEAPfree(&vh1, false); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list