Changeset: a9338d708269 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a9338d708269
Modified Files:
        gdk/gdk_bbp.c
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_search.c
        gdk/gdk_select.c
        gdk/gdk_unique.c
        monetdb5/extras/rdf/rdftypes.c
Branch: rdf
Log Message:

Disable parent hash, persistent hash


diffs (truncated from 330 to 300 lines):

diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -3839,11 +3839,23 @@ BBPdiskscan(const char *parent)
                        BAT *b = getdesc(bid);
                        delete = (b == NULL || !b->T->vheap || 
b->batCopiedtodisk == 0);
                } else if (strncmp(p + 1, "hhash", 5) == 0) {
+#ifdef PERSISTENTHASH                  
                        BAT *b = getdesc(bid);
                        delete = b == NULL;
+                       if (!delete)
+                               b->H->hash = (Hash *) 1;
+#else
+                       delete = TRUE;
+#endif
                } else if (strncmp(p + 1, "thash", 5) == 0) {
+#ifdef PERSISTENTHASH                  
                        BAT *b = getdesc(bid);
                        delete = b == NULL;
+                       if (!delete)
+                               b->T->hash = (Hash *) 1;
+#else
+                       delete = TRUE;
+#endif
                } else if (strncmp(p + 1, "himprints", 9) == 0) {
                        BAT *b = getdesc(bid);
                        delete = b == NULL;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -1843,19 +1843,36 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT *
                return GDK_SUCCEED;
        }
 
+       rl = BUNfirst(r);
+#ifndef DISABLE_PARENT_HASH
        if (VIEWtparent(r)) {
                BAT *b = BBPdescriptor(-VIEWtparent(r));
-               rl = (BUN) ((r->T->heap.base - b->T->heap.base) >> r->T->shift) 
+ BUNfirst(r);
-               r = b;
-       } else {
-               rl = BUNfirst(r);
+               if (b->batPersistence == PERSISTENT || BATcheckhash(b)) {
+                       /* only use parent's hash if it is persistent
+                        * or already has a hash */
+                       ALGODEBUG
+                               fprintf(stderr, "#hashjoin(%s#"BUNFMT"): "
+                                       "using parent(%s#"BUNFMT") for hash\n",
+                                       BATgetId(r), BATcount(r),
+                                       BATgetId(b), BATcount(b));
+                       rl = (BUN) ((r->T->heap.base - b->T->heap.base) >> 
r->T->shift) + BUNfirst(r);
+                       r = b;
+               } else {
+                       ALGODEBUG
+                               fprintf(stderr, "#hashjoin(%s#"BUNFMT"): not "
+                                       "using parent(%s#"BUNFMT") for hash\n",
+                                       BATgetId(r), BATcount(r),
+                                       BATgetId(b), BATcount(b));
+               }
        }
+#endif
        rh = rl + rend;
        rl += rstart;
        rseq += rstart;
 
        if (BAThash(r, 0) == GDK_FAIL)
                goto bailout;
+
        ri = bat_iterator(r);
        nrcand = (BUN) (rcandend - rcand);
 
@@ -2871,7 +2888,9 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l,
        BUN lcount, rcount, lpcount, rpcount;
        BUN lsize, rsize;
        int lhash, rhash;
+#ifndef DISABLE_PARENT_HASH    
        bat lparent, rparent;
+#endif 
        int swap;
        size_t mem_size;
 
@@ -2912,19 +2931,26 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l,
        rsize = (BUN) (BATcount(r) * (Tsize(r) + (r->T->vheap ? 
r->T->vheap->size : 0) + 2 * sizeof(BUN)));
        mem_size = GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1);
 
+#ifndef DISABLE_PARENT_HASH    
        lparent = VIEWtparent(l);
-       rparent = VIEWtparent(r);
        if (lparent) {
                lpcount = BATcount(BBPdescriptor(lparent));
                lhash = BATcheckhash(l) || 
BATcheckhash(BBPdescriptor(-lparent));
-       } else {
+       } else 
+#endif         
+       {
                lpcount = BATcount(l);
                lhash = BATcheckhash(l);
        }
+
+#ifndef DISABLE_PARENT_HASH    
+       rparent = VIEWtparent(r);
        if (rparent) {
                rpcount = BATcount(BBPdescriptor(rparent));
                rhash = BATcheckhash(r) || 
BATcheckhash(BBPdescriptor(-rparent));
-       } else {
+       } else 
+#endif         
+       {
                rpcount = BATcount(r);
                rhash = BATcheckhash(r);
        }
@@ -2963,21 +2989,33 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l,
                 * large (i.e. prefer hash over binary search, but
                 * only if the hash table doesn't cause thrashing) */
                return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
-       } else if ((l->batPersistence == PERSISTENT ||
-                   (lparent != 0 &&
-                    BBPquickdesc(abs(lparent), 0)->batPersistence == 
PERSISTENT)) &&
-                  !(r->batPersistence == PERSISTENT ||
-                    (rparent != 0 &&
-                     BBPquickdesc(abs(rparent), 0)->batPersistence == 
PERSISTENT))) {
+       } else if ((l->batPersistence == PERSISTENT 
+#ifndef DISABLE_PARENT_HASH                            
+                       || (lparent != 0 &&
+                    BBPquickdesc(abs(lparent), 0)->batPersistence == 
PERSISTENT)
+#endif                 
+                       ) &&
+                  !(r->batPersistence == PERSISTENT 
+#ifndef DISABLE_PARENT_HASH                       
+                          || (rparent != 0 &&
+                     BBPquickdesc(abs(rparent), 0)->batPersistence == 
PERSISTENT)
+#endif                    
+                          )) {
                /* l (or its parent) is persistent and r is not,
                 * create hash on l since it may be reused */
                swap = 1;
-       } else if (!(l->batPersistence == PERSISTENT ||
-                   (lparent != 0 &&
-                    BBPquickdesc(abs(lparent), 0)->batPersistence == 
PERSISTENT)) &&
-                  (r->batPersistence == PERSISTENT ||
-                    (rparent != 0 &&
-                     BBPquickdesc(abs(rparent), 0)->batPersistence == 
PERSISTENT))) {
+       } else if (!(l->batPersistence == PERSISTENT 
+#ifndef DISABLE_PARENT_HASH
+               || (lparent != 0 &&
+                    BBPquickdesc(abs(lparent), 0)->batPersistence == 
PERSISTENT)
+#endif         
+               ) &&
+                  (r->batPersistence == PERSISTENT 
+#ifndef DISABLE_PARENT_HASH                
+                   || (rparent != 0 &&
+                     BBPquickdesc(abs(rparent), 0)->batPersistence == 
PERSISTENT)
+#endif             
+                   )) {
                /* l (and its parent) is not persistent but r (or its
                 * parent) is, create hash on r since it may be
                 * reused */
@@ -2986,6 +3024,7 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l,
                /* no hashes, not sorted, create hash on smallest BAT */
                swap = 1;
        }
+
        if (swap) {
                return hashjoin(r2, r1, r, l, sr, sl, nil_matches, 0, 0, 0);
        } else {
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -12,6 +12,8 @@
 #error this file should not be included outside its source directory
 #endif
 
+#define DISABLE_PARENT_HASH 1
+
 #include "gdk_system_private.h"
 
 enum heaptype {
diff --git a/gdk/gdk_search.c b/gdk/gdk_search.c
--- a/gdk/gdk_search.c
+++ b/gdk/gdk_search.c
@@ -243,7 +243,11 @@ BATcheckhash(BAT *b)
 
                                if ((h = GDKmalloc(sizeof(*h))) != NULL &&
                                    read(fd, hdata, sizeof(hdata)) == 
sizeof(hdata) &&
-                                   hdata[0] == (((size_t) 1 << 24) | 
HASH_VERSION) &&
+                                   hdata[0] == (
+#ifdef PERSISTENTHASH                                      
+                                           ((size_t) 1 << 24) | 
+#endif                                     
+                                           HASH_VERSION) &&
                                    hdata[4] == (size_t) BATcount(b) &&
                                    fstat(fd, &st) == 0 &&
                                    st.st_size >= (off_t) (hp->size = hp->free 
= (hdata[1] + hdata[2]) * hdata[3] + HASH_HEADER_SIZE * SIZEOF_SIZE_T) &&
@@ -320,6 +324,8 @@ BAThash(BAT *b, BUN masksize)
                }
                return GDK_SUCCEED;
        }
+
+
        MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BAThash");
        if (b->T->hash == NULL) {
                unsigned int tpe = ATOMbasetype(b->ttype);
@@ -331,7 +337,9 @@ BAThash(BAT *b, BUN masksize)
                const char *nme = BBP_physical(b->batCacheid);
                const char *ext = b->batCacheid > 0 ? "thash" : "hhash";
                BATiter bi = bat_iterator(b);
+#ifdef PERSISTENTHASH          
                int fd;
+#endif         
 
                ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT 
");\n", BATcount(b));
                if ((hp = GDKzalloc(sizeof(*hp))) == NULL ||
@@ -503,6 +511,7 @@ BAThash(BAT *b, BUN masksize)
                        }
                        break;
                }
+#ifdef PERSISTENTHASH          
                if ((BBP_status(b->batCacheid) & BBPEXISTING) &&
                    HEAPsave(hp, nme, ext) == 0 &&
                    (fd = GDKfdlocate(hp->farmid, nme, "rb+", ext)) >= 0) {
@@ -522,6 +531,7 @@ BAThash(BAT *b, BUN masksize)
                        close(fd);
                } else
                        ALGODEBUG fprintf(stderr, "#BAThash: NOT persisting 
hash %d\n", b->batCacheid);
+#endif         
                b->T->hash = h;
                t1 = GDKusec();
                ALGODEBUG fprintf(stderr, "#BAThash: hash construction " LLFMT 
" usec\n", t1 - t0);
diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c
--- a/gdk/gdk_select.c
+++ b/gdk/gdk_select.c
@@ -1599,18 +1599,27 @@ BATsubselect(BAT *b, BAT *s, const void 
         * persistent and the total size wouldn't be too large; check
         * for existence of hash last since that may involve I/O */
        hash = equi &&
-               (((b->batPersistence == PERSISTENT ||
-                 (parent != 0 &&
-                  BBPquickdesc(abs(parent),0)->batPersistence == PERSISTENT)) 
&&
+               (((b->batPersistence == PERSISTENT 
+#ifndef DISABLE_PARENT_HASH               
+                  || (parent != 0 &&
+                  BBPquickdesc(abs(parent),0)->batPersistence == PERSISTENT)
+#endif            
+                  ) &&
                 (size_t) ATOMsize(b->ttype) >= sizeof(BUN) / 4 &&
                  BATcount(b) * (ATOMsize(b->ttype) + 2 * sizeof(BUN)) < 
GDK_mem_maxsize / 2) ||
-                (BATcheckhash(b) ||
-                 (parent != 0 &&
-                  BATcheckhash(BBPdescriptor(-parent)))));
+                (BATcheckhash(b) 
+#ifndef DISABLE_PARENT_HASH              
+                 || (parent != 0 &&
+                  BATcheckhash(BBPdescriptor(-parent)))
+#endif           
+                 ));
        if (hash &&
            estimate == BUN_NONE &&
-           !BATcheckhash(b) &&
-           (parent == 0 || !BATcheckhash(BBPdescriptor(-parent)))) {
+           !BATcheckhash(b) 
+#ifndef DISABLE_PARENT_HASH        
+           && (parent == 0 || !BATcheckhash(BBPdescriptor(-parent)))
+#endif     
+           ) {
                /* no exact result size, but we need estimate to choose
                 * between hash- & scan-select
                 * (if we already have a hash, it's a no-brainer: we
diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -41,7 +41,9 @@ BATsubunique(BAT *b, BAT *s)
        BUN hb;
        BATiter bi;
        int (*cmp)(const void *, const void *);
+#ifndef DISABLE_PARENT_HASH    
        bat parent;
+#endif 
 
        BATcheck(b, "BATsubunique", NULL);
        if (b->tkey || BATcount(b) <= 1 || BATtdense(b)) {
@@ -243,9 +245,12 @@ BATsubunique(BAT *b, BAT *s)
                seen = NULL;
        } else if (BATcheckhash(b) ||
                   (b->batPersistence == PERSISTENT &&
-                   BAThash(b, 0) == GDK_SUCCEED) ||
-                  ((parent = VIEWtparent(b)) != 0 &&
-                   BATcheckhash(BBPdescriptor(-parent)))) {
+                   BAThash(b, 0) == GDK_SUCCEED) 
+#ifndef DISABLE_PARENT_HASH               
+                  || ((parent = VIEWtparent(b)) != 0 &&
+                   BATcheckhash(BBPdescriptor(-parent)))
+#endif            
+                  ) {
                BUN lo;
                oid seq;
 
@@ -257,12 +262,15 @@ BATsubunique(BAT *b, BAT *s)
                                  s ? BATgetId(s) : "NULL",
                                  s ? BATcount(s) : 0);
                seq = b->hseqbase;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to