Changeset: 6e85b1747188 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6e85b1747188
Modified Files:
        clients/mapiclient/tomograph.c
        gdk/gdk_atoms.c
        gdk/gdk_bbp.c
        gdk/gdk_heap.c
        monetdb5/mal/mal_factory.c
        monetdb5/mal/mal_instruction.h
        monetdb5/mal/mal_profiler.c
        monetdb5/mal/mal_readline.c
        monetdb5/modules/mal/tablet.c
        monetdb5/optimizer/opt_qep.c
Branch: default
Log Message:

Merge with Feb2013 branch.


diffs (truncated from 382 to 300 lines):

diff --git a/clients/mapiclient/tomograph.c b/clients/mapiclient/tomograph.c
--- a/clients/mapiclient/tomograph.c
+++ b/clients/mapiclient/tomograph.c
@@ -2157,7 +2157,7 @@ doProfile(void *d)
        for (i = 0; i < MAXTHREADS; i++)
                threads[i] = topbox++;
 
-       /* sent single query */
+       /* send single query */
        if (sqlstatement) {
                doRequest(dbhsql, sqlstatement);
        }
diff --git a/gdk/gdk_atoms.c b/gdk/gdk_atoms.c
--- a/gdk/gdk_atoms.c
+++ b/gdk/gdk_atoms.c
@@ -963,6 +963,9 @@ strHash(const char *s)
        return res;
 }
 
+/* if at least (2*SIZEOF_BUN), also store length (heaps are then
+ * incompatible) */
+#define EXTRALEN ((SIZEOF_BUN + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1))
 
 void
 strCleanHash(Heap *h, int rebuild)
@@ -971,6 +974,50 @@ strCleanHash(Heap *h, int rebuild)
        if (!GDK_ELIMDOUBLES(h)) {
                /* flush hash table for security */
                memset(h->base, 0, GDK_STRHASHSIZE);
+       } else {
+               /* rebuild hash table for double elimination
+                *
+                * If appending strings to the BAT was aborted, if the
+                * heap was memory mapped, the hash in the string heap
+                * may well be incorrect.  Therefore we don't trust it
+                * when we read in a string heap and we rebuild the
+                * complete table (it is small, so this won't take any
+                * time at all). */
+               size_t pad, pos;
+               const size_t extralen = h->hashash ? EXTRALEN : 0;
+               stridx_t *bucket;
+               BUN off, strhash;
+               const char *s;
+
+               memset(h->base, 0, GDK_STRHASHSIZE);
+               pos = GDK_STRHASHSIZE;
+               while (pos < h->free) {
+                       pad = GDK_VARALIGN - (pos & (GDK_VARALIGN - 1));
+                       if (pad < sizeof(stridx_t))
+                               pad += GDK_VARALIGN;
+                       pos += pad + extralen;
+                       s = h->base + pos;
+                       if (h->hashash)
+                               strhash = ((const BUN *) s)[-1];
+                       else
+                               GDK_STRHASH(s, strhash);
+                       off = strhash & GDK_STRHASHMASK;
+                       bucket = ((stridx_t *) h->base) + off;
+                       *bucket = pos - extralen - sizeof(stridx_t);
+                       pos += GDK_STRLEN(s);
+               }
+#ifndef NDEBUG
+               pos = GDK_STRHASHSIZE;
+               while (pos < h->free) {
+                       pad = GDK_VARALIGN - (pos & (GDK_VARALIGN - 1));
+                       if (pad < sizeof(stridx_t))
+                               pad += GDK_VARALIGN;
+                       pos += pad + extralen;
+                       s = h->base + pos;
+                       assert(strLocate(h, s) != 0);
+                       pos += GDK_STRLEN(s);
+               }
+#endif
        }
 }
 
@@ -979,15 +1026,11 @@ strCleanHash(Heap *h, int rebuild)
  * the location of a string in the heap if it exists. Otherwise it
  * returns zero.
  */
-/* if at least (2*SIZEOF_BUN), also store length (heaps are then
- * incompatible) */
-#define EXTRALEN ((SIZEOF_BUN + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1))
-
 var_t
 strLocate(Heap *h, const char *v)
 {
        stridx_t *ref, *next;
-       size_t extralen = h->hashash ? EXTRALEN : 0;
+       const size_t extralen = h->hashash ? EXTRALEN : 0;
 
        /* search hash-table, if double-elimination is still in place */
        BUN off;
@@ -1012,7 +1055,7 @@ strPut(Heap *h, var_t *dst, const char *
        size_t elimbase = GDK_ELIMBASE(h->free);
        size_t pad = GDK_VARALIGN - (h->free & (GDK_VARALIGN - 1));
        size_t pos, len = GDK_STRLEN(v);
-       size_t extralen = h->hashash ? EXTRALEN : 0;
+       const size_t extralen = h->hashash ? EXTRALEN : 0;
        stridx_t *bucket, *ref, *next;
        BUN off, strhash;
 
@@ -1021,24 +1064,30 @@ strPut(Heap *h, var_t *dst, const char *
        off &= GDK_STRHASHMASK;
        bucket = ((stridx_t *) h->base) + off;
 
-       /* search hash-table, if double-elimination is still in place */
-       if (elimbase == 0) {    /* small string heap (<64KB) -- fully double 
eliminated */
-               for (ref = bucket; *ref; ref = next) {  /* search the linked 
list */
+       /* if double-elimination is still in place, search hash-table */
+       if (elimbase == 0) {
+               /* small string heap (<64KB) -- fully double eliminated */
+               for (ref = bucket; *ref; ref = next) {
+                       /* search the linked list */
                        next = (stridx_t *) (h->base + *ref);
-                       if (GDK_STRCMP(v, (str) (next + 1) + extralen) == 0) {  
/* found */
+                       if (GDK_STRCMP(v, (str) (next + 1) + extralen) == 0) {
+                               /* found */
                                pos = sizeof(stridx_t) + *ref + extralen;
                                return *dst = (var_t) (pos >> GDK_VARSHIFT);
                        }
                }
                /* is there room for the next pointer in the padding space? */
-               if (pad < sizeof(stridx_t))
-                       pad += GDK_VARALIGN;    /* if not, pad more */
+               if (pad < sizeof(stridx_t)) {
+                       /* if not, pad more */
+                       pad += GDK_VARALIGN;
+               }
        } else if (*bucket) {
                /* large string heap (>=64KB) --
                 * opportunistic/probabilistic double elimination */
                pos = elimbase + *bucket + extralen;
                if (GDK_STRCMP(v, h->base + pos) == 0) {
-                       return *dst = (var_t) (pos >> GDK_VARSHIFT);    /* 
already in heap; do not insert! */
+                       /* already in heap; do not insert! */
+                       return *dst = (var_t) (pos >> GDK_VARSHIFT);
                }
 #if SIZEOF_VAR_T >= SIZEOF_VOID_P /* in fact SIZEOF_VAR_T == SIZEOF_VOID_P */
                if (extralen == 0)
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -3165,8 +3165,14 @@ do_backup(const char *srcdir, const char
        int ret = 0;
 
         /* direct mmap is unprotected (readonly usage, or has WAL
-         * protection)  */
-       if (h->storage != STORE_MMAP) {
+         * protection); however, if we're backing up for subcommit
+         * and a backup already exists in the main backup directory
+         * (see GDKupgradevarheap), move the file */
+       if (subcommit && file_exists(BAKDIR, nme, extbase)) {
+               assert(h->storage == STORE_MMAP);
+               if (file_move(BAKDIR, SUBDIR, nme, extbase))
+                       return -1;
+       } else if (h->storage != STORE_MMAP) {
                /* STORE_PRIV saves into X.new files. Two cases could
                 * happen. The first is when a valid X.new exists
                 * because of an access change or a previous
diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c
--- a/gdk/gdk_heap.c
+++ b/gdk/gdk_heap.c
@@ -390,6 +390,20 @@ HEAPshrink(Heap *h, size_t size)
        return -1;
 }
 
+/* returns 1 if the file exists */
+static int
+file_exists(const char *dir, const char *name, const char *ext)
+{
+       long_str path;
+       struct stat st;
+       int ret;
+
+       GDKfilepath(path, dir, name, ext);
+       ret = stat(path, &st);
+       IODEBUG THRprintf(GDKstdout, "#stat(%s) = %d\n", path, ret);
+       return (ret == 0);
+}
+
 int
 GDKupgradevarheap(COLrec *c, var_t v, int copyall)
 {
@@ -403,6 +417,7 @@ GDKupgradevarheap(COLrec *c, var_t v, in
 #endif
        size_t i, n;
        size_t savefree;
+       const char *filename;
 
        assert(c->heap.parentid == 0);
        assert(width != 0);
@@ -414,11 +429,65 @@ GDKupgradevarheap(COLrec *c, var_t v, in
        }
        assert(c->width < width);
        assert(c->shift < shift);
+
        /* if copyall is set, we need to convert the whole heap, since
         * we may be in the middle of an insert loop that adjusts the
         * free value at the end; otherwise only copy the area
         * indicated by the "free" pointer */
        n = (copyall ? c->heap.size : c->heap.free) >> c->shift;
+
+       /* for memory mapped files, create a backup copy before widening
+        *
+        * this solves a problem that we don't control what's in the
+        * actual file until the next commit happens, so a crash might
+        * otherwise leave the file (and the database) in an
+        * inconsistent state
+        *
+        * also see do_backup in gdk_bbp.c */
+       filename = strrchr(c->heap.filename, DIR_SEP);
+       if (filename == NULL)
+               filename = c->heap.filename;
+       else
+               filename++;
+       if (c->heap.storage == STORE_MMAP && !file_exists(BAKDIR, filename, 
NULL)) {
+               int fd;
+               ssize_t ret = 0;
+               size_t size = n << c->shift;
+               const char *base = c->heap.base;
+
+               /* first save heap in file with extra .tmp extension */
+               if ((fd = GDKfdlocate(c->heap.filename, "wb", "tmp")) < 0)
+                       return GDK_FAIL;
+               while (size > 0) {
+                       ret = write(fd, base, (unsigned) MIN(1 << 30, size));
+                       if (ret < 0)
+                               size = 0;
+                       size -= ret;
+                       base += ret;
+               }
+               if (ret < 0 ||
+#if defined(NATIVE_WIN32)
+                   _commit(fd) < 0 ||
+#elif defined(HAVE_FDATASYNC)
+                   fdatasync(fd) < 0 ||
+#elif defined(HAVE_FSYNC)
+                   fsync(fd) < 0 ||
+#endif
+                   close(fd) < 0) {
+                       /* something went wrong: abandon ship */
+                       close(fd);
+                       GDKunlink(BATDIR, c->heap.filename, "tmp");
+                       return GDK_FAIL;
+               }
+               /* move tmp file to backup directory (without .tmp
+                * extension) */
+               if (GDKmove(BATDIR, c->heap.filename, "tmp", BAKDIR, filename, 
NULL) < 0) {
+                       /* backup failed */
+                       GDKunlink(BATDIR, c->heap.filename, "tmp");
+                       return GDK_FAIL;
+               }
+       }
+
        savefree = c->heap.free;
        if (copyall)
                c->heap.free = c->heap.size;
diff --git a/monetdb5/mal/mal_factory.c b/monetdb5/mal/mal_factory.c
--- a/monetdb5/mal/mal_factory.c
+++ b/monetdb5/mal/mal_factory.c
@@ -122,7 +122,7 @@
  * end random;
  * @end example
  *
- * The operators to built client aware factories are,
+ * The operators to build client aware factories are,
  * @sc{factories.getCaller()}, which returns a client
  * index, @sc{factories.getModule()} and @sc{factories.getFunction()},
  * which returns the identity of scope enclosed.
diff --git a/monetdb5/mal/mal_instruction.h b/monetdb5/mal/mal_instruction.h
--- a/monetdb5/mal/mal_instruction.h
+++ b/monetdb5/mal/mal_instruction.h
@@ -137,7 +137,7 @@ typedef struct MALBLK {
        ProfPtr profiler;
        struct MALBLK *history;         /* of optimizer actions */
        short keephistory;                      /* do we need the history at 
all */
-       short dotfile;                          /* sent dot file to 
stethoscope? */
+       short dotfile;                          /* send dot file to 
stethoscope? */
        str marker;                                     /* history points are 
marked for backtracking */
        int maxarg;                                     /* keep track on the 
maximal arguments used */
        ptr replica;                            /* for the replicator tests */
diff --git a/monetdb5/mal/mal_profiler.c b/monetdb5/mal/mal_profiler.c
--- a/monetdb5/mal/mal_profiler.c
+++ b/monetdb5/mal/mal_profiler.c
@@ -203,7 +203,7 @@ deactivateCounter(str name)
  * Offline processing
  * The offline processing structure is the easiest. We merely have to
  * produce a correct tuple format for the front-end.
- * To avoid unnecessary locks we first built the event as a string
+ * To avoid unnecessary locks we first build the event as a string
  * It uses a local logbuffer[LOGLEN] and logbase, logtop, loglen
  */
 #define LOGLEN 8192
diff --git a/monetdb5/mal/mal_readline.c b/monetdb5/mal/mal_readline.c
--- a/monetdb5/mal/mal_readline.c
+++ b/monetdb5/mal/mal_readline.c
@@ -61,7 +61,7 @@
  * using the commands @sc{manual.help}.
  * Keyword based lookup is supported by the operation @sc{manual.search};
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to