Changeset: 6e85b1747188 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6e85b1747188 Modified Files: clients/mapiclient/tomograph.c gdk/gdk_atoms.c gdk/gdk_bbp.c gdk/gdk_heap.c monetdb5/mal/mal_factory.c monetdb5/mal/mal_instruction.h monetdb5/mal/mal_profiler.c monetdb5/mal/mal_readline.c monetdb5/modules/mal/tablet.c monetdb5/optimizer/opt_qep.c Branch: default Log Message:
Merge with Feb2013 branch. diffs (truncated from 382 to 300 lines): diff --git a/clients/mapiclient/tomograph.c b/clients/mapiclient/tomograph.c --- a/clients/mapiclient/tomograph.c +++ b/clients/mapiclient/tomograph.c @@ -2157,7 +2157,7 @@ doProfile(void *d) for (i = 0; i < MAXTHREADS; i++) threads[i] = topbox++; - /* sent single query */ + /* send single query */ if (sqlstatement) { doRequest(dbhsql, sqlstatement); } diff --git a/gdk/gdk_atoms.c b/gdk/gdk_atoms.c --- a/gdk/gdk_atoms.c +++ b/gdk/gdk_atoms.c @@ -963,6 +963,9 @@ strHash(const char *s) return res; } +/* if at least (2*SIZEOF_BUN), also store length (heaps are then + * incompatible) */ +#define EXTRALEN ((SIZEOF_BUN + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1)) void strCleanHash(Heap *h, int rebuild) @@ -971,6 +974,50 @@ strCleanHash(Heap *h, int rebuild) if (!GDK_ELIMDOUBLES(h)) { /* flush hash table for security */ memset(h->base, 0, GDK_STRHASHSIZE); + } else { + /* rebuild hash table for double elimination + * + * If appending strings to the BAT was aborted, if the + * heap was memory mapped, the hash in the string heap + * may well be incorrect. Therefore we don't trust it + * when we read in a string heap and we rebuild the + * complete table (it is small, so this won't take any + * time at all). */ + size_t pad, pos; + const size_t extralen = h->hashash ? EXTRALEN : 0; + stridx_t *bucket; + BUN off, strhash; + const char *s; + + memset(h->base, 0, GDK_STRHASHSIZE); + pos = GDK_STRHASHSIZE; + while (pos < h->free) { + pad = GDK_VARALIGN - (pos & (GDK_VARALIGN - 1)); + if (pad < sizeof(stridx_t)) + pad += GDK_VARALIGN; + pos += pad + extralen; + s = h->base + pos; + if (h->hashash) + strhash = ((const BUN *) s)[-1]; + else + GDK_STRHASH(s, strhash); + off = strhash & GDK_STRHASHMASK; + bucket = ((stridx_t *) h->base) + off; + *bucket = pos - extralen - sizeof(stridx_t); + pos += GDK_STRLEN(s); + } +#ifndef NDEBUG + pos = GDK_STRHASHSIZE; + while (pos < h->free) { + pad = GDK_VARALIGN - (pos & (GDK_VARALIGN - 1)); + if (pad < sizeof(stridx_t)) + pad += GDK_VARALIGN; + pos += pad + extralen; + s = h->base + pos; + assert(strLocate(h, s) != 0); + pos += GDK_STRLEN(s); + } +#endif } } @@ -979,15 +1026,11 @@ strCleanHash(Heap *h, int rebuild) * the location of a string in the heap if it exists. Otherwise it * returns zero. */ -/* if at least (2*SIZEOF_BUN), also store length (heaps are then - * incompatible) */ -#define EXTRALEN ((SIZEOF_BUN + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1)) - var_t strLocate(Heap *h, const char *v) { stridx_t *ref, *next; - size_t extralen = h->hashash ? EXTRALEN : 0; + const size_t extralen = h->hashash ? EXTRALEN : 0; /* search hash-table, if double-elimination is still in place */ BUN off; @@ -1012,7 +1055,7 @@ strPut(Heap *h, var_t *dst, const char * size_t elimbase = GDK_ELIMBASE(h->free); size_t pad = GDK_VARALIGN - (h->free & (GDK_VARALIGN - 1)); size_t pos, len = GDK_STRLEN(v); - size_t extralen = h->hashash ? EXTRALEN : 0; + const size_t extralen = h->hashash ? EXTRALEN : 0; stridx_t *bucket, *ref, *next; BUN off, strhash; @@ -1021,24 +1064,30 @@ strPut(Heap *h, var_t *dst, const char * off &= GDK_STRHASHMASK; bucket = ((stridx_t *) h->base) + off; - /* search hash-table, if double-elimination is still in place */ - if (elimbase == 0) { /* small string heap (<64KB) -- fully double eliminated */ - for (ref = bucket; *ref; ref = next) { /* search the linked list */ + /* if double-elimination is still in place, search hash-table */ + if (elimbase == 0) { + /* small string heap (<64KB) -- fully double eliminated */ + for (ref = bucket; *ref; ref = next) { + /* search the linked list */ next = (stridx_t *) (h->base + *ref); - if (GDK_STRCMP(v, (str) (next + 1) + extralen) == 0) { /* found */ + if (GDK_STRCMP(v, (str) (next + 1) + extralen) == 0) { + /* found */ pos = sizeof(stridx_t) + *ref + extralen; return *dst = (var_t) (pos >> GDK_VARSHIFT); } } /* is there room for the next pointer in the padding space? */ - if (pad < sizeof(stridx_t)) - pad += GDK_VARALIGN; /* if not, pad more */ + if (pad < sizeof(stridx_t)) { + /* if not, pad more */ + pad += GDK_VARALIGN; + } } else if (*bucket) { /* large string heap (>=64KB) -- * opportunistic/probabilistic double elimination */ pos = elimbase + *bucket + extralen; if (GDK_STRCMP(v, h->base + pos) == 0) { - return *dst = (var_t) (pos >> GDK_VARSHIFT); /* already in heap; do not insert! */ + /* already in heap; do not insert! */ + return *dst = (var_t) (pos >> GDK_VARSHIFT); } #if SIZEOF_VAR_T >= SIZEOF_VOID_P /* in fact SIZEOF_VAR_T == SIZEOF_VOID_P */ if (extralen == 0) diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -3165,8 +3165,14 @@ do_backup(const char *srcdir, const char int ret = 0; /* direct mmap is unprotected (readonly usage, or has WAL - * protection) */ - if (h->storage != STORE_MMAP) { + * protection); however, if we're backing up for subcommit + * and a backup already exists in the main backup directory + * (see GDKupgradevarheap), move the file */ + if (subcommit && file_exists(BAKDIR, nme, extbase)) { + assert(h->storage == STORE_MMAP); + if (file_move(BAKDIR, SUBDIR, nme, extbase)) + return -1; + } else if (h->storage != STORE_MMAP) { /* STORE_PRIV saves into X.new files. Two cases could * happen. The first is when a valid X.new exists * because of an access change or a previous diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c --- a/gdk/gdk_heap.c +++ b/gdk/gdk_heap.c @@ -390,6 +390,20 @@ HEAPshrink(Heap *h, size_t size) return -1; } +/* returns 1 if the file exists */ +static int +file_exists(const char *dir, const char *name, const char *ext) +{ + long_str path; + struct stat st; + int ret; + + GDKfilepath(path, dir, name, ext); + ret = stat(path, &st); + IODEBUG THRprintf(GDKstdout, "#stat(%s) = %d\n", path, ret); + return (ret == 0); +} + int GDKupgradevarheap(COLrec *c, var_t v, int copyall) { @@ -403,6 +417,7 @@ GDKupgradevarheap(COLrec *c, var_t v, in #endif size_t i, n; size_t savefree; + const char *filename; assert(c->heap.parentid == 0); assert(width != 0); @@ -414,11 +429,65 @@ GDKupgradevarheap(COLrec *c, var_t v, in } assert(c->width < width); assert(c->shift < shift); + /* if copyall is set, we need to convert the whole heap, since * we may be in the middle of an insert loop that adjusts the * free value at the end; otherwise only copy the area * indicated by the "free" pointer */ n = (copyall ? c->heap.size : c->heap.free) >> c->shift; + + /* for memory mapped files, create a backup copy before widening + * + * this solves a problem that we don't control what's in the + * actual file until the next commit happens, so a crash might + * otherwise leave the file (and the database) in an + * inconsistent state + * + * also see do_backup in gdk_bbp.c */ + filename = strrchr(c->heap.filename, DIR_SEP); + if (filename == NULL) + filename = c->heap.filename; + else + filename++; + if (c->heap.storage == STORE_MMAP && !file_exists(BAKDIR, filename, NULL)) { + int fd; + ssize_t ret = 0; + size_t size = n << c->shift; + const char *base = c->heap.base; + + /* first save heap in file with extra .tmp extension */ + if ((fd = GDKfdlocate(c->heap.filename, "wb", "tmp")) < 0) + return GDK_FAIL; + while (size > 0) { + ret = write(fd, base, (unsigned) MIN(1 << 30, size)); + if (ret < 0) + size = 0; + size -= ret; + base += ret; + } + if (ret < 0 || +#if defined(NATIVE_WIN32) + _commit(fd) < 0 || +#elif defined(HAVE_FDATASYNC) + fdatasync(fd) < 0 || +#elif defined(HAVE_FSYNC) + fsync(fd) < 0 || +#endif + close(fd) < 0) { + /* something went wrong: abandon ship */ + close(fd); + GDKunlink(BATDIR, c->heap.filename, "tmp"); + return GDK_FAIL; + } + /* move tmp file to backup directory (without .tmp + * extension) */ + if (GDKmove(BATDIR, c->heap.filename, "tmp", BAKDIR, filename, NULL) < 0) { + /* backup failed */ + GDKunlink(BATDIR, c->heap.filename, "tmp"); + return GDK_FAIL; + } + } + savefree = c->heap.free; if (copyall) c->heap.free = c->heap.size; diff --git a/monetdb5/mal/mal_factory.c b/monetdb5/mal/mal_factory.c --- a/monetdb5/mal/mal_factory.c +++ b/monetdb5/mal/mal_factory.c @@ -122,7 +122,7 @@ * end random; * @end example * - * The operators to built client aware factories are, + * The operators to build client aware factories are, * @sc{factories.getCaller()}, which returns a client * index, @sc{factories.getModule()} and @sc{factories.getFunction()}, * which returns the identity of scope enclosed. diff --git a/monetdb5/mal/mal_instruction.h b/monetdb5/mal/mal_instruction.h --- a/monetdb5/mal/mal_instruction.h +++ b/monetdb5/mal/mal_instruction.h @@ -137,7 +137,7 @@ typedef struct MALBLK { ProfPtr profiler; struct MALBLK *history; /* of optimizer actions */ short keephistory; /* do we need the history at all */ - short dotfile; /* sent dot file to stethoscope? */ + short dotfile; /* send dot file to stethoscope? */ str marker; /* history points are marked for backtracking */ int maxarg; /* keep track on the maximal arguments used */ ptr replica; /* for the replicator tests */ diff --git a/monetdb5/mal/mal_profiler.c b/monetdb5/mal/mal_profiler.c --- a/monetdb5/mal/mal_profiler.c +++ b/monetdb5/mal/mal_profiler.c @@ -203,7 +203,7 @@ deactivateCounter(str name) * Offline processing * The offline processing structure is the easiest. We merely have to * produce a correct tuple format for the front-end. - * To avoid unnecessary locks we first built the event as a string + * To avoid unnecessary locks we first build the event as a string * It uses a local logbuffer[LOGLEN] and logbase, logtop, loglen */ #define LOGLEN 8192 diff --git a/monetdb5/mal/mal_readline.c b/monetdb5/mal/mal_readline.c --- a/monetdb5/mal/mal_readline.c +++ b/monetdb5/mal/mal_readline.c @@ -61,7 +61,7 @@ * using the commands @sc{manual.help}. * Keyword based lookup is supported by the operation @sc{manual.search}; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list