Changeset: f98643db9b52 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f98643db9b52 Branch: nospare Log Message:
merged with default diffs (truncated from 1553 to 300 lines): diff --git a/MonetDB.spec b/MonetDB.spec --- a/MonetDB.spec +++ b/MonetDB.spec @@ -2698,7 +2698,7 @@ sed -i 's|/var/run|/run|' \ are equal to 1.1. (The old code returned 33554432 instead of 1.1e8.) * Sun Nov 5 2017 Sjoerd Mullender <sjo...@acm.org> - 11.27.9-20171105 -- BZ#6460 - selinux doen't allow mmap +- BZ#6460: selinux doen't allow mmap * Mon Oct 23 2017 Sjoerd Mullender <sjo...@acm.org> - 11.27.9-20171023 - Rebuilt. diff --git a/debian/changelog b/debian/changelog --- a/debian/changelog +++ b/debian/changelog @@ -2469,6 +2469,12 @@ monetdb (11.27.11) unstable; urgency=low monetdb (11.27.9) unstable; urgency=low + * BZ#6460: selinux doen't allow mmap + + -- Sjoerd Mullender <sjo...@acm.org> Sun, 5 Nov 2017 09:56:39 +0100 + +monetdb (11.27.9) unstable; urgency=low + * Rebuilt. * BZ#6207: identifier ambiguous when grouping and selecting the same column twice diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -2500,7 +2500,8 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B struct canditer *restrict lci, struct canditer *restrict rci, bool nil_matches, bool nil_on_miss, bool semi, bool only_misses, bool not_in, bool max_one, - BUN estimate, lng t0, bool swapped, bool hash, bool phash, + BUN estimate, lng t0, bool swapped, + bool hash, bool phash, bool hash_cand, const char *reason) { oid lo, ro; @@ -2519,7 +2520,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B const char *v = (const char *) &lval; bool lskipped = false; /* whether we skipped values in l */ Hash *restrict hsh = NULL; - bool hash_cand = false; assert(!BATtvoid(r)); assert(ATOMtype(l->ttype) == ATOMtype(r->ttype)); @@ -2555,7 +2555,24 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B rl = rci->seq - r->hseqbase; rh = canditer_last(rci) + 1 - r->hseqbase; - if (phash) { + if (hash_cand) { + /* we need to create a hash on r specific for the + * candidate list */ + char ext[32]; + assert(rci->s); + MT_thread_setalgorithm(swapped ? "hashjoin using candidate hash (swapped)" : "hashjoin using candidate hash"); + TRC_DEBUG(ALGO, ALGOBATFMT ": creating " + "hash for candidate list " ALGOBATFMT "%s%s\n", + ALGOBATPAR(r), ALGOBATPAR(rci->s), + r->thash ? " ignoring existing hash" : "", + swapped ? " (swapped)" : ""); + if (snprintf(ext, sizeof(ext), "thshjn%x", + (unsigned) rci->s->batCacheid) >= (int) sizeof(ext)) + goto bailout; + if ((hsh = BAThash_impl(r, rci, ext)) == NULL) { + goto bailout; + } + } else if (phash) { /* there is a hash on the parent which we should use */ MT_thread_setalgorithm(swapped ? "hashjoin using parent hash (swapped)" : "hashjoin using parent hash"); BAT *b = BBPdescriptor(VIEWtparent(r)); @@ -2577,24 +2594,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B "existing hash%s\n", ALGOBATPAR(r), swapped ? " (swapped)" : ""); - } else if (rci->tpe != cand_dense || rci->ncand != BATcount(r)) { - /* we need to create a hash on r specific for the - * candidate list */ - char ext[32]; - assert(rci->s); - MT_thread_setalgorithm(swapped ? "hashjoin using candidate hash (swapped)" : "hashjoin using candidate hash"); - TRC_DEBUG(ALGO, ALGOBATFMT ": creating " - "hash for candidate list " ALGOBATFMT "%s%s\n", - ALGOBATPAR(r), ALGOBATPAR(rci->s), - r->thash ? " ignoring existing hash" : "", - swapped ? " (swapped)" : ""); - if (snprintf(ext, sizeof(ext), "thshjn%x", - (unsigned) rci->s->batCacheid) >= (int) sizeof(ext)) - goto bailout; - if ((hsh = BAThash_impl(r, rci, ext)) == NULL) { - goto bailout; - } - hash_cand = true; } else { /* we need to create a hash on r */ MT_thread_setalgorithm(swapped ? "hashjoin using new hash (swapped)" : "hashjoin using new hash"); @@ -3071,6 +3070,79 @@ guess_uniques(BAT *b, struct canditer *c return B; } +/* estimate the cost of doing a hashjoin with a hash on r; return value + * is the estimated cost, the last three arguments receive some extra + * information */ +static double +joincost(BAT *r, struct canditer *lci, struct canditer *rci, + bool *hash, bool *phash, bool *cand) +{ + bool rhash = BATcheckhash(r); + bool prhash = false; + bool rcand = false; + double rcost = 1; + bat parent; + BAT *b; + + if (rci->noids > 0) { + /* if we need to do binary search on candidate + * list, take that into account */ + rcost += log2((double) rci->noids); + } + rcost *= lci->ncand; + if (rhash) { + /* average chain length */ + rcost *= (double) BATcount(r) / r->thash->nheads; + } else if ((parent = VIEWtparent(r)) != 0 && + (b = BBPdescriptor(parent)) != NULL && + BATcheckhash(b)) { + rhash = prhash = true; + /* average chain length */ + rcost *= (double) BATcount(b) / b->thash->nheads; + } else { + PROPrec *prop = BATgetprop(r, GDK_NUNIQUE); + if (prop) { + /* we know number of unique values, assume some + * collisions */ + rcost *= 1.1 * ((double) BATcount(r) / prop->v.val.oval); + } else { + /* guess number of unique value and work with that */ + rcost *= 1.1 * ((double) BATcount(r) / guess_uniques(r, &(struct canditer){.tpe=cand_dense, .ncand=BATcount(r)})); + } +#ifdef PERSISTENTHASH + /* only count the cost of creating the hash for + * non-persistent bats */ + if (!(BBP_status(r->batCacheid) & BBPEXISTING) || r->theap.dirty || GDKinmemory(r->theap.farmid)) +#endif + rcost += BATcount(r) * 2.0; + } + if (rci->ncand != BATcount(r)) { + /* instead of using the hash on r (cost in rcost), we + * can build a new hash on r taking the candidate list + * into account */ + double rccost; + PROPrec *prop = BATgetprop(r, GDK_NUNIQUE); + if (prop) { + /* we know number of unique values, assume some + * chains */ + rccost = 1.1 * ((double) BATcount(r) / prop->v.val.oval); + } else { + /* guess number of unique value and work with that */ + rccost = 1.1 * ((double) BATcount(r) / guess_uniques(r, rci)); + } + rccost *= lci->ncand; + rccost += rci->ncand * 2.0; /* cost of building the hash */ + if (rccost < rcost) { + rcost = rccost; + rcand = true; + } + } + *hash = rhash; + *phash = prhash; + *cand = rcand; + return rcost; +} + #define MASK_EQ 1 #define MASK_LT 2 #define MASK_GT 4 @@ -3412,7 +3484,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B { BUN lcnt, rcnt; struct canditer lci, rci; - bool rhash, prhash = false; + bool rhash, prhash, rcand; bat parent; MT_thread_setalgorithm(__func__); @@ -3502,115 +3574,17 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B nil_matches, nil_on_miss, semi, only_misses, not_in, max_one, estimate, t0, false, func); } - rhash = BATcheckhash(r); - double rcost = 0; - if (rhash) { - /* average chain length */ - rcost = (double) BATcount(r) / r->thash->nheads; - } else if ((parent = VIEWtparent(r)) != 0) { - BAT *b = BBPdescriptor(parent); - rhash = prhash = BATcheckhash(b); - if (prhash) { - /* average chain length */ - rcost = (double) BATcount(b) / b->thash->nheads; - } - } - if (!rhash) { - /* no hash table, so cost includes time to build the - * hash table (single scan) plus the time to do the - * lookups (also single scan, we assume some chains) */ - PROPrec *prop = BATgetprop(r, GDK_NUNIQUE); - if (prop) { - /* we know number of unique values, assume some - * chains */ - rcost = lci.ncand * 1.1 * ((double) BATcount(r) / prop->v.val.oval); - } else { - /* guess number of unique value and work with that */ - rcost = lci.ncand * 1.1 * ((double) BATcount(r) / guess_uniques(r, &rci)); - } -#ifdef PERSISTENTHASH - /* only count the cost of creating the hash for - * non-persistent bats */ - if (rci.ncand != BATcount(r) || !(BBP_status(r->batCacheid) & BBPEXISTING) || r->theap.dirty || GDKinmemory(r->theap.farmid)) -#endif - rcost += rci.ncand * 2.0; - } else { - if (rci.noids > 0) { - /* if we need to do binary search on candidate - * list, take that into account */ - rcost *= log2((double) rci.noids) + 1; - } - /* all of this so far for each lookup of which we have - * rci.ncand */ - rcost *= lci.ncand; - if (rci.ncand < BATcount(r) && - rci.ncand * 2 + lci.ncand * 1.1 < rcost) { - /* it's cheaper to rebuild the hash table for - * just the candidates (this saves on the - * binary searches), again, assume some - * chains */ - rhash = prhash = false; - rcost = rci.ncand * 2 + lci.ncand * 1.1; - } - } + double rcost; + rcost = joincost(r, &lci, &rci, &rhash, &prhash, &rcand); if (!nil_on_miss && !only_misses && !not_in && !max_one) { /* maybe do a hash join on the swapped operands; if we * do, we need to sort the output, so we take that into * account as well */ - bool lhash = BATcheckhash(l); - bool plhash = false; - double lcost = 0; - if (lhash) { - /* average chain length */ - lcost = (double) BATcount(l) / l->thash->nheads; - } else if ((parent = VIEWtparent(l)) != 0) { - BAT *b = BBPdescriptor(parent); - lhash = plhash = BATcheckhash(b); - if (plhash) { - /* average chain length */ - lcost = (double) BATcount(b) / b->thash->nheads; - } - } - if (!lhash) { - /* no hash table, so cost includes time to build the - * hash table (single scan) plus the time to do the - * lookups (also single scan, we assume some chains) */ - PROPrec *prop = BATgetprop(l, GDK_NUNIQUE); - if (prop) { - /* we know number of unique values, assume some - * chains */ - lcost = rci.ncand * 1.1 * ((double) BATcount(l) / prop->v.val.oval); - } else { - /* guess number of unique value and work - * with that */ - lcost = rci.ncand * 1.1 * ((double) BATcount(l) / guess_uniques(l, &lci)); - } -#ifdef PERSISTENTHASH - /* only count the cost of creating the hash - * for non-persistent bats */ - if (lci.ncand != BATcount(l) || !(BBP_status(l->batCacheid) & BBPEXISTING) || l->theap.dirty || GDKinmemory(l->theap.farmid)) -#endif - lcost += lci.ncand * 2.0; - } else { - if (lci.noids > 0) { - /* if we need to do binary search on candidate - * list, take that into account */ - lcost *= log2((double) lci.noids) + 1; - } - /* all of this so far for each lookup of which we have - * rci.ncand */ - lcost *= rci.ncand; - if (lci.ncand < BATcount(l) && - lci.ncand * 2 + rci.ncand * 1.1 < lcost) { - /* it's cheaper to rebuild the hash table for - * just the candidates (this saves on the - * binary searches), again, assume some - * chains */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list