Changeset: 8403f283f10f for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8403f283f10f Modified Files: clients/Tests/exports.stable.out gdk/gdk_cand.c gdk/gdk_cand.h gdk/gdk_join.c Branch: candidate-exceptions Log Message:
Use candidate iterators for join. diffs (truncated from 2842 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -398,8 +398,12 @@ const bte bte_nil; oid canditer_idx(struct canditer *ci, BUN p); BUN canditer_init(struct canditer *ci, BAT *b, BAT *s); oid canditer_last(struct canditer *ci); +oid canditer_peek(struct canditer *ci); +oid canditer_peekprev(struct canditer *ci); +oid canditer_prev(struct canditer *ci); void canditer_reset(struct canditer *ci); BUN canditer_search(struct canditer *ci, oid o, bool next); +void canditer_setidx(struct canditer *ci, BUN p); BAT *canditer_slice(struct canditer *ci, BUN lo, BUN hi); BAT *canditer_slice2(struct canditer *ci, BUN lo1, BUN hi1, BUN lo2, BUN hi2); int closedir(DIR *dir); diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c --- a/gdk/gdk_cand.c +++ b/gdk/gdk_cand.c @@ -613,6 +613,72 @@ canditer_init(struct canditer *ci, BAT * } oid +canditer_peek(struct canditer *ci) +{ + if (ci->next == ci->ncand) + return oid_nil; + switch (ci->tpe) { + case cand_dense: + return ci->seq + ci->next; + case cand_materialized: + assert(ci->next < ci->noids); + return ci->oids[ci->next]; + case cand_except: + /* work around compiler error: control reaches end of + * non-void function */ + break; + } + oid o = ci->seq + ci->add + ci->next; + while (ci->add < ci->noids && o == ci->oids[ci->add]) { + ci->add++; + o++; + } + return o; +} + +oid +canditer_prev(struct canditer *ci) +{ + if (ci->next == 0) + return oid_nil; + switch (ci->tpe) { + case cand_dense: + return ci->seq + --ci->next; + case cand_materialized: + return ci->oids[--ci->next]; + case cand_except: + break; + } + oid o = ci->seq + ci->add + --ci->next; + while (ci->add > 0 && o == ci->oids[ci->add - 1]) { + ci->add--; + o--; + } + return o; +} + +oid +canditer_peekprev(struct canditer *ci) +{ + if (ci->next == 0) + return oid_nil; + switch (ci->tpe) { + case cand_dense: + return ci->seq + ci->next - 1; + case cand_materialized: + return ci->oids[ci->next - 1]; + case cand_except: + break; + } + oid o = ci->seq + ci->add + ci->next - 1; + while (ci->add > 0 && o == ci->oids[ci->add - 1]) { + ci->add--; + o--; + } + return o; +} + +oid canditer_last(struct canditer *ci) { if (ci->ncand == 0) @@ -665,6 +731,22 @@ canditer_idx(struct canditer *ci, BUN p) } void +canditer_setidx(struct canditer *ci, BUN p) +{ + if (p != ci->next) { + if (p >= ci->ncand) { + ci->next = ci->ncand; + if (ci->tpe == cand_dense) + ci->add = ci->noids; + } else { + ci->next = p; + if (ci->tpe == cand_dense) + ci->add = canditer_idx(ci, p) - ci->seq - p; + } + } +} + +void canditer_reset(struct canditer *ci) { ci->next = 0; diff --git a/gdk/gdk_cand.h b/gdk/gdk_cand.h --- a/gdk/gdk_cand.h +++ b/gdk/gdk_cand.h @@ -100,8 +100,12 @@ canditer_next(struct canditer *ci) } gdk_export BUN canditer_init(struct canditer *ci, BAT *b, BAT *s); +gdk_export oid canditer_peek(struct canditer *ci); gdk_export oid canditer_last(struct canditer *ci); +gdk_export oid canditer_prev(struct canditer *ci); +gdk_export oid canditer_peekprev(struct canditer *ci); gdk_export oid canditer_idx(struct canditer *ci, BUN p); +gdk_export void canditer_setidx(struct canditer *ci, BUN p); gdk_export void canditer_reset(struct canditer *ci); gdk_export BUN canditer_search(struct canditer *ci, oid o, bool next); gdk_export BAT *canditer_slice(struct canditer *ci, BUN lo, BUN hi); diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -212,7 +212,7 @@ joininitresults(BAT **r1p, BAT **r2p, BU #define APPEND(b, o) (((oid *) b->theap.base)[b->batCount++] = (o)) -#define MAYBEEXTEND_PROGRESS(CNT, PROGRESS) \ +#define MAYBEEXTEND_PROGRESS(CNT, LCUR, LCNT) \ do { \ BUN N = (CNT); \ if (BATcount(r1) + N > BATcapacity(r1)) { \ @@ -221,7 +221,7 @@ joininitresults(BAT **r1p, BAT **r2p, BU /* so far is used as the fraction of the */ \ /* expected result size we've produced so */ \ /* far) */ \ - BUN newcap = (BUN) ((double) lcnt / (lcnt - (PROGRESS)) * (BATcount(r1) + N) * 1.5); \ + BUN newcap = (BUN) ((double) (LCNT) / (LCUR) * (BATcount(r1) + N) * 1.5); \ if (newcap < N + BATcount(r1)) \ newcap = N + BATcount(r1) + 1024; \ if (newcap > maxsize) \ @@ -240,8 +240,8 @@ joininitresults(BAT **r1p, BAT **r2p, BU } \ } while (0) -#define MAYBEEXTEND(CNT) MAYBEEXTEND_PROGRESS(CNT, lcand ? (BUN) (lcandend - lcand) : (lend - lstart)) -#define MAYBEEXTEND_NO_CAND(CNT) MAYBEEXTEND_PROGRESS(CNT, lend - lstart) +#define MAYBEEXTEND(CNT, CI) MAYBEEXTEND_PROGRESS(CNT, (CI)->next, (CI)->ncand) +#define MAYBEEXTEND_NO_CAND(CNT) MAYBEEXTEND_PROGRESS(CNT, lstart, lend) /* Return BATs through r1p and r2p for the case that there is no * match between l and r, taking all flags into consideration. @@ -251,14 +251,12 @@ joininitresults(BAT **r1p, BAT **r2p, BU * values of l, and *r2p (if r2p is not NULL) is all nil. If neither * of those flags is set, the result is two empty BATs. */ static gdk_return -nomatch(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BUN lstart, BUN lend, - const oid *lcand, const oid *lcandend, +nomatch(BAT **r1p, BAT **r2p, BAT *l, BAT *r, struct canditer *lci, bool nil_on_miss, bool only_misses, const char *func, lng t0) { - BUN cnt; BAT *r1, *r2 = NULL; - if (lstart == lend || !(nil_on_miss | only_misses)) { + if (lci->ncand == 0 || !(nil_on_miss | only_misses)) { /* return empty BATs */ if ((r1 = BATdense(0, 0, 0)) == NULL) return GDK_FAIL; @@ -278,34 +276,9 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA return GDK_SUCCEED; } - if (lcand) { - cnt = (BUN) (lcandend - lcand); - r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT); - if (r1 == NULL) - return GDK_FAIL; - memcpy(Tloc(r1, 0), lcand, cnt * SIZEOF_OID); - r1->tkey = true; - r1->tnokey[0] = r1->tnokey[1] = 0; - r1->tsorted = true; - r1->tnosorted = 0; - if (cnt == 1) { - r1->trevsorted = true; - r1->tnorevsorted = 0; - } else { - r1->trevsorted = false; - r1->tnorevsorted = 1; - } - r1->tseqbase = oid_nil; - r1->tnil = false; - r1->tnonil = true; - BATsetcount(r1, cnt); - } else { - cnt = lend - lstart; - if ((r1 = BATdense(0, l->hseqbase + lstart, cnt)) == NULL) - return GDK_FAIL; - } + r1 = canditer_slice(lci, 0, lci->ncand); if (r2p) { - if ((r2 = BATconstant(0, TYPE_void, &oid_nil, cnt, TRANSIENT)) == NULL) { + if ((r2 = BATconstant(0, TYPE_void, &oid_nil, lci->ncand, TRANSIENT)) == NULL) { BBPreclaim(r1); return GDK_FAIL; } @@ -325,9 +298,7 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA * point select to find matches in the right column. */ static gdk_return selectjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, - BUN lstart, BUN lend, BUN lcnt, - const oid *restrict lcand, const oid *lcandend, - bool nil_matches, lng t0, bool swapped) + struct canditer *lci, bool nil_matches, lng t0, bool swapped) { BATiter li = bat_iterator(l); const void *v; @@ -340,20 +311,16 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l, nil_matches, swapped ? " swapped" : ""); - assert(lcnt > 0); - assert(lcnt == 1 || (l->tsorted && l->trevsorted)); + assert(lci->ncand > 0); + assert(lci->ncand == 1 || (l->tsorted && l->trevsorted)); - if (lcand) { - v = BUNtail(li, *lcand - l->hseqbase); - } else { - v = BUNtail(li, lstart); - } + oid o = canditer_next(lci); + v = BUNtail(li, o - l->hseqbase); if (!nil_matches && (*ATOMcompare(l->ttype))(v, ATOMnilptr(l->ttype)) == 0) { /* NIL doesn't match anything */ - return nomatch(r1p, r2p, l, r, lstart, lend, - lcand, lcandend, false, false, + return nomatch(r1p, r2p, l, r, lci, false, false, "selectjoin", t0); } @@ -363,12 +330,11 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l, } if (BATcount(bn) == 0) { BBPunfix(bn->batCacheid); - return nomatch(r1p, r2p, l, r, lstart, lend, - lcand, lcandend, false, false, + return nomatch(r1p, r2p, l, r, lci, false, false, "selectjoin", t0); } - BAT *r1 = COLnew(0, TYPE_oid, lcnt * BATcount(bn), TRANSIENT); - BAT *r2 = COLnew(0, TYPE_oid, lcnt * BATcount(bn), TRANSIENT); + BAT *r1 = COLnew(0, TYPE_oid, lci->ncand * BATcount(bn), TRANSIENT); + BAT *r2 = COLnew(0, TYPE_oid, lci->ncand * BATcount(bn), TRANSIENT); if (r1 == NULL || r2 == NULL) { BBPunfix(bn->batCacheid); BBPreclaim(r1); @@ -377,15 +343,15 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l, } r1->tsorted = true; - r1->trevsorted = lcnt == 1; - r1->tseqbase = BATcount(bn) == 1 && lcand == NULL ? l->hseqbase + lstart : oid_nil; + r1->trevsorted = lci->ncand == 1; + r1->tseqbase = BATcount(bn) == 1 && lci->tpe == cand_dense ? o : oid_nil; r1->tkey = BATcount(bn) == 1; r1->tnil = false; r1->tnonil = true; - r2->tsorted = lcnt == 1 || BATcount(bn) == 1; + r2->tsorted = lci->ncand == 1 || BATcount(bn) == 1; r2->trevsorted = BATcount(bn) == 1; - r2->tseqbase = lcnt == 1 && BATtdense(bn) ? bn->tseqbase : oid_nil; - r2->tkey = lcnt == 1; + r2->tseqbase = lci->ncand == 1 && BATtdense(bn) ? bn->tseqbase : oid_nil; + r2->tkey = lci->ncand == 1; r2->tnil = false; r2->tnonil = true; if (BATtdense(bn)) { @@ -394,49 +360,29 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l, oid bno = bn->tseqbase; BUN q = BATcount(bn); - if (lcand) { - while (lcand < lcandend) { - for (BUN p = 0; p < q; p++) { - *o1p++ = *lcand; - *o2p++ = bno + p; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list