Changeset: 5d22cee12337 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5d22cee12337
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk_batop.c
        gdk/gdk_hash.c
        gdk/gdk_hash.h
        monetdb5/mal/mal_builder.c
        sql/storage/bat/bat_storage.c
Branch: default
Log Message:

Merge with linear-hashing branch.


diffs (245 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -297,7 +297,6 @@ void HASHdestroy(BAT *b);
 gdk_return HASHgrowbucket(BAT *b);
 BUN HASHlist(Hash *h, BUN i);
 BUN HASHprobe(const Hash *h, const void *v);
-gdk_return HASHupgradehashheap(BAT *b, BUN cap);
 void HEAP_free(Heap *heap, var_t block);
 void HEAP_initialize(Heap *heap, size_t nbytes, size_t nprivate, int 
alignment);
 var_t HEAP_malloc(Heap *heap, size_t nbytes);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -302,7 +302,6 @@ insert_string_bat(BAT *b, BAT *n, BAT *s
                                b->theap.free += sizeof(var_t);
                                break;
                        }
-                       b->theap.dirty = true;
                }
                b->tvarsized = true;
                b->ttype = TYPE_str;
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -207,17 +207,16 @@ HASHcollisions(BAT *b, Hash *h, const ch
                entries == 0 ? 0 : total / entries);
 }
 
-gdk_return
-HASHupgradehashheap(BAT *b, BUN cap)
+static gdk_return
+HASHupgradehashheap(BAT *b)
 {
        Hash *h = b->thash;
-       int nwidth;
+       int nwidth = h->width << 1;
        BUN i;
 
-       if (h == NULL)
-               return GDK_SUCCEED;
-       if ((nwidth = HASHwidth(cap)) <= h->width)
-               return GDK_SUCCEED;
+       assert(nwidth <= SIZEOF_BUN);
+       assert((nwidth & (nwidth - 1)) == 0);
+
        if (HEAPextend(&h->heaplink, h->heaplink.size * nwidth / h->width, 
true) != GDK_SUCCEED ||
            HEAPextend(&h->heapbckt, (h->heapbckt.size - HASH_HEADER_SIZE * 
SIZEOF_SIZE_T) * nwidth / h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T, true) != 
GDK_SUCCEED) {
                b->thash = NULL;
@@ -246,6 +245,7 @@ HASHupgradehashheap(BAT *b, BUN cap)
                        }
                        break;
                }
+               h->nil = BUN4_NONE;
                break;
 #ifdef BUN8
        case BUN8:
@@ -283,6 +283,7 @@ HASHupgradehashheap(BAT *b, BUN cap)
                        }
                        break;
                }
+               h->nil = BUN8_NONE;
                break;
 #endif
        }
@@ -308,6 +309,12 @@ HASHgrowbucket(BAT *b)
        lng t0 = 0;
 
        ACCELDEBUG t0 = GDKusec();
+
+       /* only needed to fix hash tables built before this fix was
+        * introduced */
+       if (h->nil <= h->mask2 && HASHupgradehashheap(b) != GDK_SUCCEED)
+               return GDK_FAIL;
+
        h->heapbckt.dirty = true;
        h->heaplink.dirty = true;
        if (((size_t *) h->heapbckt.base)[0] & (size_t) 1 << 24) {
@@ -338,6 +345,11 @@ HASHgrowbucket(BAT *b)
                if (h->nbucket == h->mask2) {
                        h->mask1 = h->mask2;
                        h->mask2 = h->mask2 << 1 | 1;
+                       if (h->nil == h->mask2) {
+                               /* time to widen the hash table */
+                               if (HASHupgradehashheap(b) != GDK_SUCCEED)
+                                       return GDK_FAIL;
+                       }
                }
                h->nbucket++;
                h->heapbckt.free += h->width;
@@ -448,54 +460,75 @@ BATcheckhash(BAT *b)
                                            fstat(fd, &st) == 0 &&
                                            st.st_size > 0 &&
                                            st.st_size >= (off_t) 
(h->heaplink.size = h->heaplink.free = hdata[1] * h->width) &&
-                                           HEAPload(&h->heaplink, nme, 
"thashl", false) == GDK_SUCCEED &&
-                                           HEAPload(&h->heapbckt, nme, 
"thashb", false) == GDK_SUCCEED) {
-                                               if (h->nbucket & (h->nbucket - 
1)) {
-                                                       h->mask2 = 
hashmask(h->nbucket);
-                                                       h->mask1 = h->mask2 >> 
1;
-                                               } else {
-                                                       h->mask1 = h->nbucket - 
1;
-                                                       h->mask2 = h->mask1 << 
1 | 1;
-                                               }
-                                               h->nunique = hdata[5];
-                                               h->nheads = hdata[6];
-                                               h->type = ATOMtype(b->ttype);
-                                               switch (h->width) {
-                                               case BUN2:
-                                                       h->nil = (BUN) 
BUN2_NONE;
-                                                       break;
-                                               case BUN4:
-                                                       h->nil = (BUN) 
BUN4_NONE;
-                                                       break;
+                                           HEAPload(&h->heaplink, nme, 
"thashl", false) == GDK_SUCCEED) {
+                                               if (HEAPload(&h->heapbckt, nme, 
"thashb", false) == GDK_SUCCEED) {
+                                                       if (h->nbucket & 
(h->nbucket - 1)) {
+                                                               h->mask2 = 
hashmask(h->nbucket);
+                                                               h->mask1 = 
h->mask2 >> 1;
+                                                       } else {
+                                                               h->mask1 = 
h->nbucket - 1;
+                                                               h->mask2 = 
h->mask1 << 1 | 1;
+                                                       }
+                                                       h->nunique = hdata[5];
+                                                       h->nheads = hdata[6];
+                                                       h->type = 
ATOMtype(b->ttype);
+                                                       switch (h->width) {
+                                                       case BUN2:
+                                                               h->nil = (BUN) 
BUN2_NONE;
+                                                               break;
+                                                       case BUN4:
+                                                               h->nil = (BUN) 
BUN4_NONE;
+                                                               break;
 #ifdef BUN8
-                                               case BUN8:
-                                                       h->nil = (BUN) 
BUN8_NONE;
-                                                       break;
+                                                       case BUN8:
+                                                               h->nil = (BUN) 
BUN8_NONE;
+                                                               break;
 #endif
-                                               default:
-                                                       assert(0);
+                                                       default:
+                                                               assert(0);
+                                                       }
+                                                       if (h->nil > 
h->nbucket) {
+                                                               close(fd);
+                                                               h->Link = 
h->heaplink.base;
+                                                               h->Bckt = 
h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
+                                                               
h->heaplink.parentid = b->batCacheid;
+                                                               
h->heapbckt.parentid = b->batCacheid;
+                                                               
h->heaplink.dirty = false;
+                                                               
h->heapbckt.dirty = false;
+                                                               
BATsetprop_nolock(
+                                                                       b,
+                                                                       
GDK_HASH_BUCKETS,
+                                                                       
TYPE_oid,
+                                                                       
&(oid){NHASHBUCKETS(h)});
+                                                               
BATsetprop_nolock(
+                                                                       b,
+                                                                       
GDK_NUNIQUE,
+                                                                       
TYPE_oid,
+                                                                       
&(oid){h->nunique});
+                                                               b->thash = h;
+                                                               ACCELDEBUG 
fprintf(stderr, "#%s: BATcheckhash(" ALGOBATFMT "): reusing persisted hash\n", 
MT_thread_getname(), ALGOBATPAR(b));
+                                                               
MT_lock_unset(&b->batIdxLock);
+                                                               return true;
+                                                       }
+                                                       /* if h->nil==h->nbucket
+                                                        * (was
+                                                        * possible in
+                                                        * previous
+                                                        * iterations
+                                                        * of the
+                                                        * code), then
+                                                        * we can't
+                                                        * use the
+                                                        * hash since
+                                                        * we can't
+                                                        * distinguish
+                                                        * between
+                                                        * end-of-list
+                                                        * and a valid
+                                                        * link */
+                                                       HEAPfree(&h->heapbckt, 
false);
                                                }
-                                               close(fd);
-                                               h->Link = h->heaplink.base;
-                                               h->Bckt = h->heapbckt.base + 
HASH_HEADER_SIZE * SIZEOF_SIZE_T;
-                                               h->heaplink.parentid = 
b->batCacheid;
-                                               h->heapbckt.parentid = 
b->batCacheid;
-                                               h->heaplink.dirty = false;
-                                               h->heapbckt.dirty = false;
-                                               BATsetprop_nolock(
-                                                       b,
-                                                       GDK_HASH_BUCKETS,
-                                                       TYPE_oid,
-                                                       
&(oid){NHASHBUCKETS(h)});
-                                               BATsetprop_nolock(
-                                                       b,
-                                                       GDK_NUNIQUE,
-                                                       TYPE_oid,
-                                                       &(oid){h->nunique});
-                                               b->thash = h;
-                                               ACCELDEBUG fprintf(stderr, 
"#%s: BATcheckhash(" ALGOBATFMT "): reusing persisted hash\n", 
MT_thread_getname(), ALGOBATPAR(b));
-                                               MT_lock_unset(&b->batIdxLock);
-                                               return true;
+                                               HEAPfree(&h->heaplink, false);
                                        }
                                        close(fd);
                                        /* unlink unusable file */
diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h
--- a/gdk/gdk_hash.h
+++ b/gdk/gdk_hash.h
@@ -35,7 +35,6 @@ gdk_export void HASHdestroy(BAT *b);
 gdk_export BUN HASHprobe(const Hash *h, const void *v);
 gdk_export BUN HASHlist(Hash *h, BUN i);
 gdk_export gdk_return HASHgrowbucket(BAT *b);
-gdk_export gdk_return HASHupgradehashheap(BAT *b, BUN cap);
 
 #define HASHnil(H)     (H)->nil
 
@@ -295,8 +294,7 @@ HASHgetlink(Hash *h, BUN i)
                            (((i) + 1) * _h->width > _h->heaplink.size && \
                             HEAPextend(&_h->heaplink,                  \
                                        (i) * _h->width + GDK_mmap_pagesize, \
-                                       true) != GDK_SUCCEED) || \
-                           HASHupgradehashheap(b, i) != GDK_SUCCEED) { \
+                                       true) != GDK_SUCCEED)) {        \
                                MT_lock_unset(&(b)->batIdxLock);        \
                                HASHdestroy(b);                         \
                        } else {                                        \
diff --git a/sql/storage/bat/bat_storage.c b/sql/storage/bat/bat_storage.c
--- a/sql/storage/bat/bat_storage.c
+++ b/sql/storage/bat/bat_storage.c
@@ -13,8 +13,7 @@
 #include "algebra.h"
 #include "gdk_atoms.h"
 
-//#define SNAPSHOT_MINSIZE ((BUN) 1024*128)
-#define SNAPSHOT_MINSIZE ((BUN) 1024*2)
+#define SNAPSHOT_MINSIZE ((BUN) 1024*128)
 
 static MT_Lock destroy_lock = MT_LOCK_INITIALIZER("destroy_lock");
 sql_dbat *tobe_destroyed_dbat = NULL;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to