Changeset: a9338d708269 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a9338d708269 Modified Files: gdk/gdk_bbp.c gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_search.c gdk/gdk_select.c gdk/gdk_unique.c monetdb5/extras/rdf/rdftypes.c Branch: rdf Log Message:
Disable parent hash, persistent hash diffs (truncated from 330 to 300 lines): diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -3839,11 +3839,23 @@ BBPdiskscan(const char *parent) BAT *b = getdesc(bid); delete = (b == NULL || !b->T->vheap || b->batCopiedtodisk == 0); } else if (strncmp(p + 1, "hhash", 5) == 0) { +#ifdef PERSISTENTHASH BAT *b = getdesc(bid); delete = b == NULL; + if (!delete) + b->H->hash = (Hash *) 1; +#else + delete = TRUE; +#endif } else if (strncmp(p + 1, "thash", 5) == 0) { +#ifdef PERSISTENTHASH BAT *b = getdesc(bid); delete = b == NULL; + if (!delete) + b->T->hash = (Hash *) 1; +#else + delete = TRUE; +#endif } else if (strncmp(p + 1, "himprints", 9) == 0) { BAT *b = getdesc(bid); delete = b == NULL; diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -1843,19 +1843,36 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT * return GDK_SUCCEED; } + rl = BUNfirst(r); +#ifndef DISABLE_PARENT_HASH if (VIEWtparent(r)) { BAT *b = BBPdescriptor(-VIEWtparent(r)); - rl = (BUN) ((r->T->heap.base - b->T->heap.base) >> r->T->shift) + BUNfirst(r); - r = b; - } else { - rl = BUNfirst(r); + if (b->batPersistence == PERSISTENT || BATcheckhash(b)) { + /* only use parent's hash if it is persistent + * or already has a hash */ + ALGODEBUG + fprintf(stderr, "#hashjoin(%s#"BUNFMT"): " + "using parent(%s#"BUNFMT") for hash\n", + BATgetId(r), BATcount(r), + BATgetId(b), BATcount(b)); + rl = (BUN) ((r->T->heap.base - b->T->heap.base) >> r->T->shift) + BUNfirst(r); + r = b; + } else { + ALGODEBUG + fprintf(stderr, "#hashjoin(%s#"BUNFMT"): not " + "using parent(%s#"BUNFMT") for hash\n", + BATgetId(r), BATcount(r), + BATgetId(b), BATcount(b)); + } } +#endif rh = rl + rend; rl += rstart; rseq += rstart; if (BAThash(r, 0) == GDK_FAIL) goto bailout; + ri = bat_iterator(r); nrcand = (BUN) (rcandend - rcand); @@ -2871,7 +2888,9 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BUN lcount, rcount, lpcount, rpcount; BUN lsize, rsize; int lhash, rhash; +#ifndef DISABLE_PARENT_HASH bat lparent, rparent; +#endif int swap; size_t mem_size; @@ -2912,19 +2931,26 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, rsize = (BUN) (BATcount(r) * (Tsize(r) + (r->T->vheap ? r->T->vheap->size : 0) + 2 * sizeof(BUN))); mem_size = GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1); +#ifndef DISABLE_PARENT_HASH lparent = VIEWtparent(l); - rparent = VIEWtparent(r); if (lparent) { lpcount = BATcount(BBPdescriptor(lparent)); lhash = BATcheckhash(l) || BATcheckhash(BBPdescriptor(-lparent)); - } else { + } else +#endif + { lpcount = BATcount(l); lhash = BATcheckhash(l); } + +#ifndef DISABLE_PARENT_HASH + rparent = VIEWtparent(r); if (rparent) { rpcount = BATcount(BBPdescriptor(rparent)); rhash = BATcheckhash(r) || BATcheckhash(BBPdescriptor(-rparent)); - } else { + } else +#endif + { rpcount = BATcount(r); rhash = BATcheckhash(r); } @@ -2963,21 +2989,33 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, * large (i.e. prefer hash over binary search, but * only if the hash table doesn't cause thrashing) */ return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0); - } else if ((l->batPersistence == PERSISTENT || - (lparent != 0 && - BBPquickdesc(abs(lparent), 0)->batPersistence == PERSISTENT)) && - !(r->batPersistence == PERSISTENT || - (rparent != 0 && - BBPquickdesc(abs(rparent), 0)->batPersistence == PERSISTENT))) { + } else if ((l->batPersistence == PERSISTENT +#ifndef DISABLE_PARENT_HASH + || (lparent != 0 && + BBPquickdesc(abs(lparent), 0)->batPersistence == PERSISTENT) +#endif + ) && + !(r->batPersistence == PERSISTENT +#ifndef DISABLE_PARENT_HASH + || (rparent != 0 && + BBPquickdesc(abs(rparent), 0)->batPersistence == PERSISTENT) +#endif + )) { /* l (or its parent) is persistent and r is not, * create hash on l since it may be reused */ swap = 1; - } else if (!(l->batPersistence == PERSISTENT || - (lparent != 0 && - BBPquickdesc(abs(lparent), 0)->batPersistence == PERSISTENT)) && - (r->batPersistence == PERSISTENT || - (rparent != 0 && - BBPquickdesc(abs(rparent), 0)->batPersistence == PERSISTENT))) { + } else if (!(l->batPersistence == PERSISTENT +#ifndef DISABLE_PARENT_HASH + || (lparent != 0 && + BBPquickdesc(abs(lparent), 0)->batPersistence == PERSISTENT) +#endif + ) && + (r->batPersistence == PERSISTENT +#ifndef DISABLE_PARENT_HASH + || (rparent != 0 && + BBPquickdesc(abs(rparent), 0)->batPersistence == PERSISTENT) +#endif + )) { /* l (and its parent) is not persistent but r (or its * parent) is, create hash on r since it may be * reused */ @@ -2986,6 +3024,7 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, /* no hashes, not sorted, create hash on smallest BAT */ swap = 1; } + if (swap) { return hashjoin(r2, r1, r, l, sr, sl, nil_matches, 0, 0, 0); } else { diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -12,6 +12,8 @@ #error this file should not be included outside its source directory #endif +#define DISABLE_PARENT_HASH 1 + #include "gdk_system_private.h" enum heaptype { diff --git a/gdk/gdk_search.c b/gdk/gdk_search.c --- a/gdk/gdk_search.c +++ b/gdk/gdk_search.c @@ -243,7 +243,11 @@ BATcheckhash(BAT *b) if ((h = GDKmalloc(sizeof(*h))) != NULL && read(fd, hdata, sizeof(hdata)) == sizeof(hdata) && - hdata[0] == (((size_t) 1 << 24) | HASH_VERSION) && + hdata[0] == ( +#ifdef PERSISTENTHASH + ((size_t) 1 << 24) | +#endif + HASH_VERSION) && hdata[4] == (size_t) BATcount(b) && fstat(fd, &st) == 0 && st.st_size >= (off_t) (hp->size = hp->free = (hdata[1] + hdata[2]) * hdata[3] + HASH_HEADER_SIZE * SIZEOF_SIZE_T) && @@ -320,6 +324,8 @@ BAThash(BAT *b, BUN masksize) } return GDK_SUCCEED; } + + MT_lock_set(&GDKhashLock(abs(b->batCacheid)), "BAThash"); if (b->T->hash == NULL) { unsigned int tpe = ATOMbasetype(b->ttype); @@ -331,7 +337,9 @@ BAThash(BAT *b, BUN masksize) const char *nme = BBP_physical(b->batCacheid); const char *ext = b->batCacheid > 0 ? "thash" : "hhash"; BATiter bi = bat_iterator(b); +#ifdef PERSISTENTHASH int fd; +#endif ALGODEBUG fprintf(stderr, "#BAThash: create hash(" BUNFMT ");\n", BATcount(b)); if ((hp = GDKzalloc(sizeof(*hp))) == NULL || @@ -503,6 +511,7 @@ BAThash(BAT *b, BUN masksize) } break; } +#ifdef PERSISTENTHASH if ((BBP_status(b->batCacheid) & BBPEXISTING) && HEAPsave(hp, nme, ext) == 0 && (fd = GDKfdlocate(hp->farmid, nme, "rb+", ext)) >= 0) { @@ -522,6 +531,7 @@ BAThash(BAT *b, BUN masksize) close(fd); } else ALGODEBUG fprintf(stderr, "#BAThash: NOT persisting hash %d\n", b->batCacheid); +#endif b->T->hash = h; t1 = GDKusec(); ALGODEBUG fprintf(stderr, "#BAThash: hash construction " LLFMT " usec\n", t1 - t0); diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -1599,18 +1599,27 @@ BATsubselect(BAT *b, BAT *s, const void * persistent and the total size wouldn't be too large; check * for existence of hash last since that may involve I/O */ hash = equi && - (((b->batPersistence == PERSISTENT || - (parent != 0 && - BBPquickdesc(abs(parent),0)->batPersistence == PERSISTENT)) && + (((b->batPersistence == PERSISTENT +#ifndef DISABLE_PARENT_HASH + || (parent != 0 && + BBPquickdesc(abs(parent),0)->batPersistence == PERSISTENT) +#endif + ) && (size_t) ATOMsize(b->ttype) >= sizeof(BUN) / 4 && BATcount(b) * (ATOMsize(b->ttype) + 2 * sizeof(BUN)) < GDK_mem_maxsize / 2) || - (BATcheckhash(b) || - (parent != 0 && - BATcheckhash(BBPdescriptor(-parent))))); + (BATcheckhash(b) +#ifndef DISABLE_PARENT_HASH + || (parent != 0 && + BATcheckhash(BBPdescriptor(-parent))) +#endif + )); if (hash && estimate == BUN_NONE && - !BATcheckhash(b) && - (parent == 0 || !BATcheckhash(BBPdescriptor(-parent)))) { + !BATcheckhash(b) +#ifndef DISABLE_PARENT_HASH + && (parent == 0 || !BATcheckhash(BBPdescriptor(-parent))) +#endif + ) { /* no exact result size, but we need estimate to choose * between hash- & scan-select * (if we already have a hash, it's a no-brainer: we diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c --- a/gdk/gdk_unique.c +++ b/gdk/gdk_unique.c @@ -41,7 +41,9 @@ BATsubunique(BAT *b, BAT *s) BUN hb; BATiter bi; int (*cmp)(const void *, const void *); +#ifndef DISABLE_PARENT_HASH bat parent; +#endif BATcheck(b, "BATsubunique", NULL); if (b->tkey || BATcount(b) <= 1 || BATtdense(b)) { @@ -243,9 +245,12 @@ BATsubunique(BAT *b, BAT *s) seen = NULL; } else if (BATcheckhash(b) || (b->batPersistence == PERSISTENT && - BAThash(b, 0) == GDK_SUCCEED) || - ((parent = VIEWtparent(b)) != 0 && - BATcheckhash(BBPdescriptor(-parent)))) { + BAThash(b, 0) == GDK_SUCCEED) +#ifndef DISABLE_PARENT_HASH + || ((parent = VIEWtparent(b)) != 0 && + BATcheckhash(BBPdescriptor(-parent))) +#endif + ) { BUN lo; oid seq; @@ -257,12 +262,15 @@ BATsubunique(BAT *b, BAT *s) s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0); seq = b->hseqbase; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list