Changeset: b37341fd3a3d for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b37341fd3a3d Modified Files: gdk/gdk_join.c Branch: pushdown Log Message:
Don't delegate to leftjoin. diffs (152 lines): diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -2543,9 +2543,10 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B /* there is a hash on the parent which we should use */ BAT *b = BBPdescriptor(VIEWtparent(r)); TRC_DEBUG(ALGO, "%s(%s): using " - "parent(" ALGOBATFMT ") for hash\n", + "parent(" ALGOBATFMT ") for hash%s\n", __func__, - BATgetId(r), ALGOBATPAR(b)); + BATgetId(r), ALGOBATPAR(b), + swapped ? " (swapped)" : ""); hsh = b->thash; roff = (BUN) ((r->theap.base - b->theap.base) >> r->tshift); rl += roff; @@ -2554,15 +2555,20 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B } else if (hash) { /* there is a hash on r which we should use */ hsh = r->thash; + TRC_DEBUG(ALGO, ALGOBATFMT ": using " + "existing hash%s\n", + ALGOBATPAR(r), + swapped ? " (swapped)" : ""); } else if (rci->tpe != cand_dense || rci->ncand != BATcount(r)) { /* we need to create a hash on r specific for the * candidate list */ char ext[32]; assert(rci->s); - TRC_DEBUG(ALGO, "%s(%s): creating " - "hash for candidate list\n", - __func__, - BATgetId(r)); + TRC_DEBUG(ALGO, ALGOBATFMT ": creating " + "hash for candidate list " ALGOBATFMT "%s%s\n", + ALGOBATPAR(r), ALGOBATPAR(rci->s), + r->thash ? " ignoring existing hash" : "", + swapped ? " (swapped)" : ""); if (snprintf(ext, sizeof(ext), "thshjn%x", rci->s->batCacheid) >= (int) sizeof(ext)) goto bailout; @@ -2572,6 +2578,9 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B hash_cand = true; } else { /* we need to create a hash on r */ + TRC_DEBUG(ALGO, ALGOBATFMT "): creating hash%s\n", + ALGOBATPAR(r), + swapped ? " (swapped)" : ""); if (BAThash(r) != GDK_SUCCEED) goto bailout; hsh = r->thash; @@ -3763,16 +3772,12 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA bool plhash = false, prhash = false; bool swap; bat parent; + gdk_return rc; lng t0 = 0; - const char *reason = ""; + BAT *r2 = NULL; TRC_DEBUG_IF(ALGO) t0 = GDKusec(); - if (r2p == NULL) - return leftjoin(r1p, NULL, l, r, sl, sr, nil_matches, - false, false, false, false, estimate, - __func__, t0); - if ((parent = VIEWtparent(l)) != 0) { BAT *b = BBPdescriptor(parent); if (l->hseqbase == b->hseqbase && @@ -3790,7 +3795,8 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA canditer_init(&rci, r, sr); *r1p = NULL; - *r2p = NULL; + if (r2p) + *r2p = NULL; if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED) return GDK_FAIL; @@ -3814,16 +3820,22 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA nil_matches, t0, false, __func__); } else if (rci.ncand == 1 || (BATordered(r) && BATordered_rev(r)) || (r->ttype == TYPE_void && is_oid_nil(r->tseqbase))) { /* single value to join, use select */ - return selectjoin(r2p, r1p, r, l, &rci, &lci, - nil_matches, t0, true, __func__); + rc = selectjoin(r2p ? r2p : &r2, r1p, r, l, &rci, &lci, + nil_matches, t0, true, __func__); + if (rc == GDK_SUCCEED && r2p == NULL) + BBPunfix(r2->batCacheid); + return rc; } else if (BATtdense(r) && rci.tpe == cand_dense) { /* use special implementation for dense right-hand side */ return mergejoin_void(r1p, r2p, l, r, &lci, &rci, false, false, t0, false, __func__); } else if (BATtdense(l) && lci.tpe == cand_dense) { /* use special implementation for dense right-hand side */ - return mergejoin_void(r2p, r1p, r, l, &rci, &lci, - false, false, t0, true, __func__); + rc = mergejoin_void(r2p ? r2p : &r2, r1p, r, l, &rci, &lci, + false, false, t0, true, __func__); + if (rc == GDK_SUCCEED && r2p == NULL) + BBPunfix(r2->batCacheid); + return rc; } else if ((BATordered(l) || BATordered_rev(l)) && (BATordered(r) || BATordered_rev(r))) { /* both sorted */ @@ -3913,8 +3925,9 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA } /* if the cost of doing searches on l is lower than the cost - * to do searches on r, we swap (i.e., lookups on right) */ - swap = (lcost < rcost); + * to do searches on r, we swap (i.e., lookups on right), but + * add a cost */ + swap = (1.2 * lcost < rcost); if ((BATordered(r) || BATordered_rev(r)) && (lci.ncand * (log2(rci.ncand) + 1) < (swap ? lcost : rcost))) { @@ -3928,19 +3941,25 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA (rci.ncand * (log2(lci.ncand) + 1) < (swap ? lcost : rcost))) { /* l is sorted and it is cheaper to do multiple binary * searches than it is to use a hash */ - return mergejoin(r2p, r1p, r, l, &rci, &lci, - nil_matches, false, false, false, false, - estimate, t0, true, __func__); + rc = mergejoin(r2p ? r2p : &r2, r1p, r, l, &rci, &lci, + nil_matches, false, false, false, false, + estimate, t0, true, __func__); + if (rc == GDK_SUCCEED && r2p == NULL) + BBPunfix(r2->batCacheid); + return rc; } if (swap) { - return hashjoin(r2p, r1p, r, l, &rci, &lci, - nil_matches, false, false, false, false, - estimate, t0, true, lhash, plhash, reason); + rc = hashjoin(r2p ? r2p : &r2, r1p, r, l, &rci, &lci, + nil_matches, false, false, false, false, + estimate, t0, true, lhash, plhash, __func__); + if (rc == GDK_SUCCEED && r2p == NULL) + BBPunfix(r2->batCacheid); + return rc; } else { return hashjoin(r1p, r2p, l, r, &lci, &rci, nil_matches, false, false, false, false, - estimate, t0, false, rhash, prhash, reason); + estimate, t0, false, rhash, prhash, __func__); } } _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list