Changeset: 64682e00f4ab for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=64682e00f4ab Modified Files: gdk/gdk.h gdk/gdk_bbp.c gdk/gdk_logger.c gdk/gdk_logger.h Branch: default Log Message:
Implemented data base upgrade code for converting old floating point NIL to NaN. diffs (truncated from 432 to 300 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -859,7 +859,8 @@ typedef struct { #define GDKLIBRARY_NOKEY 061034 /* nokey values can't be trusted */ #define GDKLIBRARY_BADEMPTY 061035 /* possibility of duplicate empty str */ #define GDKLIBRARY_TALIGN 061036 /* talign field in BBP.dir */ -#define GDKLIBRARY 061037 +#define GDKLIBRARY_NIL_NAN 061037 /* flt/dbl NIL not represented by NaN */ +#define GDKLIBRARY 061040 typedef struct BAT { /* static bat properties */ diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -922,6 +922,179 @@ fixstrbats(void) } #endif +#ifdef GDKLIBRARY_NIL_NAN +static void +fixfltheap(BAT *b) +{ + long_str filename; + Heap h1; /* old heap */ + Heap h2; /* new heap */ + const char *nme, *bnme; + char *srcdir; + BUN i; + int nofix = 1; + + nme = BBP_physical(b->batCacheid); + srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL); + if (srcdir == NULL) + GDKfatal("fixfltheap: GDKmalloc failed\n"); + *strrchr(srcdir, DIR_SEP) = 0; + + if ((bnme = strrchr(nme, DIR_SEP)) != NULL) + bnme++; + else + bnme = nme; + sprintf(filename, "BACKUP%c%s", DIR_SEP, bnme); + + /* make backup of heap */ + if (GDKmove(b->theap.farmid, srcdir, bnme, "tail", BAKDIR, bnme, "tail") != GDK_SUCCEED) + GDKfatal("fixfltheap: cannot make backup of %s.tail\n", nme); + /* load old heap */ + h1 = b->theap; + h1.filename = NULL; + h1.base = NULL; + h1.dirty = 0; + if (HEAPload(&h1, filename, "tail", 0) != GDK_SUCCEED) + GDKfatal("fixfltheap: loading old tail heap " + "for BAT %d failed\n", b->batCacheid); + + /* create new heap */ + h2 = b->theap; + h2.filename = GDKfilepath(NOFARM, NULL, nme, "tail"); + if (h2.filename == NULL) + GDKfatal("fixfltheap: GDKmalloc failed\n"); + if (HEAPalloc(&h2, b->batCapacity, b->twidth) != GDK_SUCCEED) + GDKfatal("fixfltheap: allocating new tail heap " + "for BAT %d failed\n", b->batCacheid); + h2.dirty = TRUE; + h2.free = h1.free; + + switch (b->ttype) { + case TYPE_flt: { + const flt *restrict o = (const flt *) h1.base; + flt *restrict n = (flt *) h2.base; + + for (i = 0; i < b->batCount; i++) { + if (o[i] == GDK_flt_min) { + b->tnil = 1; + n[i] = flt_nil; + nofix = 0; + } else { + n[i] = o[i]; + } + } + break; + } + case TYPE_dbl: { + const dbl *restrict o = (const dbl *) h1.base; + dbl *restrict n = (dbl *) h2.base; + + for (i = 0; i < b->batCount; i++) { + if (o[i] == GDK_dbl_min) { + b->tnil = 1; + n[i] = dbl_nil; + nofix = 0; + } else { + n[i] = o[i]; + } + } + break; + } + default: { + struct mbr { + float xmin, ymin, xmax, ymax; + }; + const struct mbr *restrict o = (const struct mbr *) h1.base; + struct mbr *restrict n = (struct mbr *) h2.base; + + assert(strcmp(ATOMunknown_name(b->ttype), "mbr") == 0); + assert(b->twidth == 4 * sizeof(flt)); + + for (i = 0; i < b->batCount; i++) { + if (o[i].xmin == GDK_flt_min || + o[i].xmax == GDK_flt_min || + o[i].ymin == GDK_flt_min || + o[i].ymax == GDK_flt_min) { + b->tnil = 1; + n[i].xmin = n[i].xmax = n[i].ymin = n[i].ymax = flt_nil; + nofix = 0; + } else { + n[i] = o[i]; + } + } + break; + } + } + + /* cleanup */ + HEAPfree(&h1, 0); + if (nofix) { + /* didn't fix anything, move backup back */ + HEAPfree(&h2, 1); + if (GDKmove(b->theap.farmid, BAKDIR, bnme, "tail", srcdir, bnme, "tail") != GDK_SUCCEED) + GDKfatal("fixfltheap: cannot restore backup of %s.tail\n", nme); + } else { + /* heap was fixed */ + b->batDirtydesc = 1; + if (HEAPsave(&h2, nme, "tail") != GDK_SUCCEED) + GDKfatal("fixfltheap: saving heap failed\n"); + HEAPfree(&h2, 0); + b->theap = h2; + } + GDKfree(srcdir); +} + +static void +fixfloatbats(void) +{ + bat bid; + BAT *b; + char filename[PATHLENGTH]; + FILE *fp; + size_t len; + + for (bid = 1; bid < (bat) ATOMIC_GET(BBPsize, BBPsizeLock); bid++) { + if ((b = BBP_desc(bid)) == NULL) { + /* not a valid BAT */ + continue; + } + if (BBP_logical(bid) && + (len = strlen(BBP_logical(bid))) > 12 && + strcmp(BBP_logical(bid) + len - 12, "_catalog_nme") == 0) { + /* this is one of the files used by the + * logger. We need to communicate to the + * logger that it also needs to do a + * conversion. That is done by creating a + * file here based on the name of this BAT. */ + snprintf(filename, sizeof(filename), + "%s/%.*s_nil-nan-convert", + BBPfarms[0].dirname, + (int) (len - 12), BBP_logical(bid)); + fp = fopen(filename, "w"); + if (fp == NULL) + GDKfatal("fixfloatbats: cannot create file %s\n", + filename); + fclose(fp); + } + if (b->batCount == 0 || b->tnonil) { + /* no NILs to convert */ + continue; + } + if (b->ttype < 0) { + const char *anme; + + /* as yet unknown tail column type */ + anme = ATOMunknown_name(b->ttype); + /* known string types */ + if (strcmp(anme, "mbr") != 0) + continue; + } else if (b->ttype != TYPE_flt && b->ttype != TYPE_dbl) + continue; + fixfltheap(b); + } +} +#endif + /* * A read only BAT can be shared in a file system by reading its * descriptor separately. The default src=0 is to read the full @@ -1263,7 +1436,8 @@ BBPheader(FILE *fp) bbpversion != GDKLIBRARY_OLDWKB && bbpversion != GDKLIBRARY_INSERTED && bbpversion != GDKLIBRARY_HEADED && - bbpversion != GDKLIBRARY_TALIGN) { + bbpversion != GDKLIBRARY_TALIGN && + bbpversion != GDKLIBRARY_NIL_NAN) { GDKfatal("BBPinit: incompatible BBP version: expected 0%o, got 0%o.\n" "This database was probably created by %s version of MonetDB.", GDKLIBRARY, bbpversion, @@ -1473,6 +1647,10 @@ BBPinit(void) if (bbpversion <= GDKLIBRARY_BADEMPTY) fixstrbats(); #endif +#ifdef GDKLIBRARY_NIL_NAN + if (bbpversion <= GDKLIBRARY_NIL_NAN) + fixfloatbats(); +#endif if (bbpversion < GDKLIBRARY) TMcommit(); GDKfree(bbpdirstr); diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c --- a/gdk/gdk_logger.c +++ b/gdk/gdk_logger.c @@ -308,6 +308,45 @@ log_read_seq(logger *lg, logformat *l) return LOG_OK; } +#ifdef GDKLIBRARY_NIL_NAN +static void * +fltRead(void *dst, stream *s, size_t cnt) +{ + flt *ptr; + size_t i; + + if ((ptr = BATatoms[TYPE_flt].atomRead(dst, s, cnt)) == NULL) + return NULL; + for (i = 0; i < cnt; i++) + if (ptr[i] == GDK_flt_min) + ptr[i] = flt_nil; + return ptr; +} + +static void * +dblRead(void *dst, stream *s, size_t cnt) +{ + dbl *ptr; + size_t i; + + if ((ptr = BATatoms[TYPE_dbl].atomRead(dst, s, cnt)) == NULL) + return NULL; + for (i = 0; i < cnt; i++) + if (ptr[i] == GDK_dbl_min) + ptr[i] = dbl_nil; + return ptr; +} + +static void * +mbrRead(void *dst, stream *s, size_t cnt) +{ + /* an MBR consists of 4 flt values; here we don't care about + * anything else, we just need to convert the old NIL to NaN + * for all those values */ + return fltRead(dst, s, cnt * 4); +} +#endif + static log_return log_read_updates(logger *lg, trans *tr, logformat *l, char *name) { @@ -352,6 +391,16 @@ log_read_updates(logger *lg, trans *tr, if (ATOMstorage(tt) < TYPE_str) tv = lg->buf; +#ifdef GDKLIBRARY_NIL_NAN + if (lg->convert_nil_nan) { + if (tt == TYPE_flt) + rt = fltRead; + else if (tt == TYPE_dbl) + rt = dblRead; + else if (tt > TYPE_str && strcmp(BATatoms[tt].name, "mbr") == 0) + rt = mbrRead; + } +#endif assert(l->nr <= (lng) BUN_MAX); if (l->flag == LOG_UPDATE) { @@ -1782,15 +1831,124 @@ logger_load(int debug, const char *fn, c } if (fp != NULL) { +#ifdef GDKLIBRARY_NIL_NAN + char cvfile[PATHLENGTH]; +#endif + if (check_version(lg, fp) != GDK_SUCCEED) { goto error; } +#ifdef GDKLIBRARY_NIL_NAN + /* When a file *_nil-nan-convert exists in the + * database, it was left there by the BBP + * initialization code when it did a conversion of old + * style NILs to NaNs. If the file exists, we first _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list