Changeset: b1c0560e32fa for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b1c0560e32fa Modified Files: gdk/gdk_select.c Branch: default Log Message:
A first take on using ordered index for range joins. diffs (184 lines): diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -1955,6 +1955,8 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT oid rlval = oid_nil, rhval = oid_nil; int sorted = 0; /* which column is sorted */ BAT *tmp; + int use_orderidx = 0; + oid ll, lh; assert(BAThdense(l)); assert(BAThdense(rl)); @@ -1971,12 +1973,13 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT assert(r2->htype == TYPE_void); assert(r2->ttype == TYPE_oid); - ALGODEBUG fprintf(stderr, "#rangejoin(l=%s#" BUNFMT "[%s]%s%s," + ALGODEBUG fprintf(stderr, "#rangejoin(l=%s#" BUNFMT "[%s]%s%s%s," "rl=%s#" BUNFMT "[%s]%s%s,rh=%s#" BUNFMT "[%s]%s%s," "sl=%s#" BUNFMT "%s%s,sr=%s#" BUNFMT "%s%s)\n", BATgetId(l), BATcount(l), ATOMname(l->ttype), l->tsorted ? "-sorted" : "", l->trevsorted ? "-revsorted" : "", + BATcheckorderidx(l) ? "-orderedidx" : "", BATgetId(rl), BATcount(rl), ATOMname(rl->ttype), rl->tsorted ? "-sorted" : "", rl->trevsorted ? "-revsorted" : "", @@ -2033,7 +2036,17 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT lvars = rlvars = rhvars = NULL; } - if (BATordered(l) || BATordered_rev(l)) { + ll = l->hseqbase; + lh = ll + l->batCount; + if ((!sl || (sl && BATtdense(sl))) && + (BATcheckorderidx(l) || (VIEWtparent(l) && BATcheckorderidx(BBPquickdesc(abs(VIEWtparent(l)), 0))))) { + use_orderidx = 1; + if (VIEWtparent(l) && !BATcheckorderidx(l)) { + l = BBPdescriptor(abs(VIEWtparent(l))); + } + } + + if (BATordered(l) || BATordered_rev(l) || use_orderidx) { /* left column is sorted, use binary search */ const oid *sval = sl ? (const oid *) Tloc(sl, BUNfirst(sl)) : NULL; @@ -2080,18 +2093,19 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT } if (cmp(vrl, nil) == 0 || cmp(vrh, nil) == 0) continue; - if (l->tsorted) { + if (l->tsorted || use_orderidx) { if (li) - low = SORTfndfirst(l, vrl); + low = use_orderidx? ORDERfndfirst(l, vrl): SORTfndfirst(l, vrl); else - low = SORTfndlast(l, vrl); + low = use_orderidx? ORDERfndlast(l, vrl): SORTfndlast(l, vrl); low -= BUNfirst(l); if (hi) - high = SORTfndlast(l, vrh); + high = use_orderidx? ORDERfndlast(l, vrh): SORTfndlast(l, vrh); else - high = SORTfndfirst(l, vrh); + high = use_orderidx? ORDERfndfirst(l, vrh): SORTfndfirst(l, vrh); high -= BUNfirst(l); } else { + assert(l->trevsorted); if (li) low = SORTfndlast(l, vrh); else @@ -2107,14 +2121,20 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT continue; low += l->hseqbase; high += l->hseqbase; - if (sl) { + if (use_orderidx) { + const oid *ord; oid o; + ord = (const oid *) l->torderidx->base + ORDERIDXOFF; - o = (oid) low; - low = SORTfndfirst(sl, &o) - BUNfirst(sl); - o = (oid) high; - high = SORTfndfirst(sl, &o) - BUNfirst(sl); - assert(high >= low); + if (sl) { + assert(BATtdense(sl)); + o = (oid) ((*(ord+low))&BUN_UNMSK); + ll = SORTfndfirst(sl, &o) - BUNfirst(sl); + o = (oid) ((*(ord+high))&BUN_UNMSK); + lh = SORTfndfirst(sl, &o) - BUNfirst(sl); + } + assert(lh >= ll); + if (BATcapacity(r1) < BUNlast(r1) + high - low) { cnt = BUNlast(r1) + high - low + 1024; if (cnt > maxsize) @@ -2128,30 +2148,64 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT dst1 = (oid *) Tloc(r1, BUNfirst(r1)); dst2 = (oid *) Tloc(r2, BUNfirst(r2)); } + + ord += low; while (low < high) { - dst1[r1->batCount++] = sval[low]; - dst2[r2->batCount++] = ro; - low++; + if (ll <= ((*ord)&BUN_UNMSK) && ((*ord)&BUN_UNMSK) < lh) { + dst1[r1->batCount++] = ((*ord)&BUN_UNMSK); + dst2[r2->batCount++] = ro; + low++; + ord++; + } } } else { - /* [low..high) */ - if (BATcapacity(r1) < BUNlast(r1) + high - low) { - cnt = BUNlast(r1) + high - low + 1024; - if (cnt > maxsize) - cnt = maxsize; - BATsetcount(r1, BATcount(r1)); - BATsetcount(r2, BATcount(r2)); - if (BATextend(r1, cnt) != GDK_SUCCEED || - BATextend(r2, cnt) != GDK_SUCCEED) - goto bailout; - assert(BATcapacity(r1) == BATcapacity(r2)); - dst1 = (oid *) Tloc(r1, BUNfirst(r1)); - dst2 = (oid *) Tloc(r2, BUNfirst(r2)); - } - while (low < high) { - dst1[r1->batCount++] = low; - dst2[r2->batCount++] = ro; - low++; + if (sl) { + oid o; + + o = (oid) low; + low = SORTfndfirst(sl, &o) - BUNfirst(sl); + o = (oid) high; + high = SORTfndfirst(sl, &o) - BUNfirst(sl); + assert(high >= low); + + if (BATcapacity(r1) < BUNlast(r1) + high - low) { + cnt = BUNlast(r1) + high - low + 1024; + if (cnt > maxsize) + cnt = maxsize; + BATsetcount(r1, BATcount(r1)); + BATsetcount(r2, BATcount(r2)); + if (BATextend(r1, cnt) != GDK_SUCCEED || + BATextend(r2, cnt) != GDK_SUCCEED) + goto bailout; + assert(BATcapacity(r1) == BATcapacity(r2)); + dst1 = (oid *) Tloc(r1, BUNfirst(r1)); + dst2 = (oid *) Tloc(r2, BUNfirst(r2)); + } + while (low < high) { + dst1[r1->batCount++] = sval[low]; + dst2[r2->batCount++] = ro; + low++; + } + } else { + /* [low..high) */ + if (BATcapacity(r1) < BUNlast(r1) + high - low) { + cnt = BUNlast(r1) + high - low + 1024; + if (cnt > maxsize) + cnt = maxsize; + BATsetcount(r1, BATcount(r1)); + BATsetcount(r2, BATcount(r2)); + if (BATextend(r1, cnt) != GDK_SUCCEED || + BATextend(r2, cnt) != GDK_SUCCEED) + goto bailout; + assert(BATcapacity(r1) == BATcapacity(r2)); + dst1 = (oid *) Tloc(r1, BUNfirst(r1)); + dst2 = (oid *) Tloc(r2, BUNfirst(r2)); + } + while (low < high) { + dst1[r1->batCount++] = low; + dst2[r2->batCount++] = ro; + low++; + } } } } _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list