Changeset: 769197807284 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=769197807284 Modified Files: clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 clients/Tests/exports.stable.out gdk/ChangeLog gdk/gdk.h gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_select.c monetdb5/ChangeLog monetdb5/modules/kernel/algebra.c monetdb5/modules/kernel/algebra.h monetdb5/modules/kernel/algebra.mal sql/backends/monet5/sql_statement.c Branch: default Log Message:
Implemented anti and symmetric flags for rangejoin. diffs (truncated from 607 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -577,7 +577,7 @@ stdout of test 'MAL-signatures` in direc [ "algebra", "project", "pattern algebra.project(b:bat[:any_1], v:any_3):bat[:any_3] ", "ALGprojecttail;", "Fill the tail with a constant" ] [ "algebra", "projection", "command algebra.projection(left:bat[:oid], right:bat[:any_3]):bat[:any_3] ", "ALGprojection;", "Project left input onto right input." ] [ "algebra", "projectionpath", "pattern algebra.projectionpath(l:bat[:any]...):bat[:any] ", "ALGprojectionpath;", "Routine to handle join paths. The type analysis is rather tricky." ] -[ "algebra", "rangejoin", "command algebra.rangejoin(l:bat[:any_1], r1:bat[:any_1], r2:bat[:any_1], sl:bat[:oid], sr:bat[:oid], li:bit, hi:bit, estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGrangejoin;", "Range join: values in l and r1/r2 match if r1 <[=] l <[=] r2" ] +[ "algebra", "rangejoin", "command algebra.rangejoin(l:bat[:any_1], r1:bat[:any_1], r2:bat[:any_1], sl:bat[:oid], sr:bat[:oid], li:bit, hi:bit, anti:bit, symmetric:bit, estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGrangejoin;", "Range join: values in l and r1/r2 match if r1 <[=] l <[=] r2" ] [ "algebra", "reuse", "command algebra.reuse(b:bat[:any_1]):bat[:any_1] ", "ALGreuse;", "Reuse a temporary BAT if you can. Otherwise,\n\tallocate enough storage to accept result of an\n \toperation (not involving the heap)" ] [ "algebra", "select", "command algebra.select(b:bat[:any_1], low:any_1, high:any_1, li:bit, hi:bit, anti:bit):bat[:oid] ", "ALGselect1;", "Select all head values for which the tail value is in range.\n\tInput is a dense-headed BAT, output is a dense-headed BAT with in\n\tthe tail the head value of the input BAT for which the tail value\n\tis between the values low and high (inclusive if li respectively\n\thi is set). The output BAT is sorted on the tail value. If low\n\tor high is nil, the boundary is not considered (effectively - and\n\t+ infinity). If anti is set, the result is the complement. Nil\n\tvalues in the tail are never matched, unless low=nil, high=nil,\n\tli=1, hi=1, anti=0. All non-nil values are returned if low=nil,\n\thigh=nil, and li, hi are not both 1, or anti=1.\n\tNote that the output is suitable as second input for the other\n\tversion of this function." ] [ "algebra", "select", "command algebra.select(b:bat[:any_1], low:any_1, high:any_1, li:bit, hi:bit, anti:bit, unknown:bit):bat[:oid] ", "ALGselect1nil;", "With unknow set, each nil != nil" ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -681,7 +681,7 @@ stdout of test 'MAL-signatures` in direc [ "algebra", "project", "pattern algebra.project(b:bat[:any_1], v:any_3):bat[:any_3] ", "ALGprojecttail;", "Fill the tail with a constant" ] [ "algebra", "projection", "command algebra.projection(left:bat[:oid], right:bat[:any_3]):bat[:any_3] ", "ALGprojection;", "Project left input onto right input." ] [ "algebra", "projectionpath", "pattern algebra.projectionpath(l:bat[:any]...):bat[:any] ", "ALGprojectionpath;", "Routine to handle join paths. The type analysis is rather tricky." ] -[ "algebra", "rangejoin", "command algebra.rangejoin(l:bat[:any_1], r1:bat[:any_1], r2:bat[:any_1], sl:bat[:oid], sr:bat[:oid], li:bit, hi:bit, estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGrangejoin;", "Range join: values in l and r1/r2 match if r1 <[=] l <[=] r2" ] +[ "algebra", "rangejoin", "command algebra.rangejoin(l:bat[:any_1], r1:bat[:any_1], r2:bat[:any_1], sl:bat[:oid], sr:bat[:oid], li:bit, hi:bit, anti:bit, symmetric:bit, estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGrangejoin;", "Range join: values in l and r1/r2 match if r1 <[=] l <[=] r2" ] [ "algebra", "reuse", "command algebra.reuse(b:bat[:any_1]):bat[:any_1] ", "ALGreuse;", "Reuse a temporary BAT if you can. Otherwise,\n\tallocate enough storage to accept result of an\n \toperation (not involving the heap)" ] [ "algebra", "select", "command algebra.select(b:bat[:any_1], low:any_1, high:any_1, li:bit, hi:bit, anti:bit):bat[:oid] ", "ALGselect1;", "Select all head values for which the tail value is in range.\n\tInput is a dense-headed BAT, output is a dense-headed BAT with in\n\tthe tail the head value of the input BAT for which the tail value\n\tis between the values low and high (inclusive if li respectively\n\thi is set). The output BAT is sorted on the tail value. If low\n\tor high is nil, the boundary is not considered (effectively - and\n\t+ infinity). If anti is set, the result is the complement. Nil\n\tvalues in the tail are never matched, unless low=nil, high=nil,\n\tli=1, hi=1, anti=0. All non-nil values are returned if low=nil,\n\thigh=nil, and li, hi are not both 1, or anti=1.\n\tNote that the output is suitable as second input for the other\n\tversion of this function." ] [ "algebra", "select", "command algebra.select(b:bat[:any_1], low:any_1, high:any_1, li:bit, hi:bit, anti:bit, unknown:bit):bat[:oid] ", "ALGselect1nil;", "With unknow set, each nil != nil" ] diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -165,7 +165,7 @@ gdk_return BATprintcolumns(stream *s, in gdk_return BATprod(void *res, int tp, BAT *b, BAT *s, bool skip_nils, bool abort_on_error, bool nil_if_empty); BAT *BATproject(BAT *l, BAT *r); BAT *BATprojectchain(BAT **bats); -gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, bool li, bool hi, BUN estimate) __attribute__((__warn_unused_result__)); +gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, bool li, bool hi, bool anti, bool symmetric, BUN estimate) __attribute__((__warn_unused_result__)); gdk_return BATreplace(BAT *b, BAT *p, BAT *n, bool force) __attribute__((__warn_unused_result__)); gdk_return BATroles(BAT *b, const char *tnme); BAT *BATsample(BAT *b, BUN n); @@ -739,7 +739,7 @@ str ALGouterjoin(bat *r1, bat *r2, const str ALGprojection(bat *result, const bat *lid, const bat *rid); str ALGprojectionpath(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str ALGprojecttail(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); -str ALGrangejoin(bat *r1, bat *r2, const bat *lid, const bat *rlid, const bat *rhid, const bat *slid, const bat *srid, const bit *li, const bit *hi, const lng *estimate); +str ALGrangejoin(bat *r1, bat *r2, const bat *lid, const bat *rlid, const bat *rhid, const bat *slid, const bat *srid, const bit *li, const bit *hi, const bit *anti, const bit *symmetric, const lng *estimate); str ALGreuse(bat *ret, const bat *bid); str ALGselect1(bat *result, const bat *bid, const void *low, const void *high, const bit *li, const bit *hi, const bit *anti); str ALGselect1nil(bat *result, const bat *bid, const void *low, const void *high, const bit *li, const bit *hi, const bit *anti, const bit *unknown); diff --git a/gdk/ChangeLog b/gdk/ChangeLog --- a/gdk/ChangeLog +++ b/gdk/ChangeLog @@ -1,3 +1,7 @@ # ChangeLog file for MonetDB # This file is updated with Maddlog +* Fri Nov 22 2019 Sjoerd Mullender <sjo...@acm.org> +- BATrangeselect now has two extra arguments: anti and symmetric + (both bool). + diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2753,7 +2753,7 @@ gdk_export gdk_return BATjoin(BAT **r1p, __attribute__((__warn_unused_result__)); gdk_export gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const void *c1, const void *c2, bool li, bool hi, BUN estimate) __attribute__((__warn_unused_result__)); -gdk_export gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, bool li, bool hi, BUN estimate) +gdk_export gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, bool li, bool hi, bool anti, bool symmetric, BUN estimate) __attribute__((__warn_unused_result__)); gdk_export BAT *BATproject(BAT *l, BAT *r); gdk_export BAT *BATprojectchain(BAT **bats); diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -3758,7 +3758,7 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, - const void *c1, const void *c2, bool li, bool hi, BUN estimate) + const void *c1, const void *c2, bool li, bool hi, BUN estimate) { lng t0 = 0; @@ -3781,17 +3781,43 @@ BATbandjoin(BAT **r1p, BAT **r2p, BAT *l gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, - BAT *sl, BAT *sr, bool li, bool hi, BUN estimate) + BAT *sl, BAT *sr, bool li, bool hi, bool anti, bool symmetric, + BUN estimate) { + struct canditer lci, rci; BAT *r1, *r2; BUN maxsize; + lng t0; + ALGODEBUG t0 = GDKusec(); *r1p = NULL; if (r2p) { *r2p = NULL; } if (joinparamcheck(l, rl, rh, sl, sr, "BATrangejoin") != GDK_SUCCEED) return GDK_FAIL; + if (canditer_init(&lci, l, sl) == 0 || + canditer_init(&rci, rl, sr) == 0 || + (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)) || + ((rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) && + (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)))) { + /* trivial: empty input */ + return nomatch(r1p, r2p, l, rl, &lci, false, false, + __func__, t0); + } + if (rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) { + if (!anti) + return nomatch(r1p, r2p, l, rl, &lci, false, false, + __func__, t0); + return thetajoin(r1p, r2p, l, rh, sl, sr, MASK_GT, estimate, t0); + } + if (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)) { + if (!anti) + return nomatch(r1p, r2p, l, rl, &lci, false, false, + __func__, t0); + return thetajoin(r1p, r2p, l, rl, sl, sr, MASK_LT, estimate, t0); + } + if ((maxsize = joininitresults(&r1, r2p ? &r2 : NULL, sl ? BATcount(sl) : BATcount(l), sr ? BATcount(sr) : BATcount(rl), false, false, false, false, false, estimate)) == BUN_NONE) return GDK_FAIL; *r1p = r1; @@ -3803,5 +3829,5 @@ BATrangejoin(BAT **r1p, BAT **r2p, BAT * /* note, the rangejoin implementation is in gdk_select.c since * it uses the imprints code there */ - return rangejoin(r1, r2, l, rl, rh, sl, sr, li, hi, maxsize); + return rangejoin(r1, r2, l, rl, rh, &lci, &rci, li, hi, anti, symmetric, maxsize); } diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h --- a/gdk/gdk_private.h +++ b/gdk/gdk_private.h @@ -222,7 +222,7 @@ void IMPSprint(BAT *b) /* never called: __attribute__((__visibility__("hidden"))); __hidden void PROPdestroy(BAT *b) __attribute__((__visibility__("hidden"))); -__hidden gdk_return rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, bool li, bool hi, BUN maxsize) +__hidden gdk_return rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh, struct canditer *lci, struct canditer *rci, bool li, bool hi, bool anti, bool symmetric, BUN maxsize) __attribute__((__warn_unused_result__)) __attribute__((__visibility__("hidden"))); __hidden void strCleanHash(Heap *hp, bool rebuild) diff --git a/gdk/gdk_select.c b/gdk/gdk_select.c --- a/gdk/gdk_select.c +++ b/gdk/gdk_select.c @@ -1699,10 +1699,37 @@ BATthetaselect(BAT *b, BAT *s, const voi s##vals + ((x) * s##width)) #define FVALUE(s, x) (s##vals + ((x) * s##width)) +#define LTany(a,b) ((*cmp)(a, b) < 0) +#define EQany(a,b) ((*cmp)(a, b) == 0) +#define is_any_nil(v) ((v) == NULL || (*cmp)((v), nil) == 0) + +#define less3(a,b,i,t) (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(a, b) || (i && EQ##t(a, b))) +#define grtr3(a,b,i,t) (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(b, a) || (i && EQ##t(a, b))) +#define or3(a,b) ((a) == 1 || (b) == 1 ? 1 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 0) +#define and3(a,b) ((a) == 0 || (b) == 0 ? 0 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 1) +#define not3(a) (is_bit_nil(a) ? bit_nil : !(a)) + +#define between3(v, lo, linc, hi, hinc, TYPE) \ + and3(grtr3(v, lo, linc, TYPE), less3(v, hi, hinc, TYPE)) + +#define BETWEEN(v, lo, linc, hi, hinc, TYPE) \ + (is_##TYPE##_nil(v) \ + ? bit_nil \ + : (bit) (anti \ + ? (symmetric \ + ? not3(or3(between3(v, lo, linc, hi, hinc, TYPE), \ + between3(v, hi, hinc, lo, linc, TYPE))) \ + : not3(between3(v, lo, linc, hi, hinc, TYPE))) \ + : (symmetric \ + ? or3(between3(v, lo, linc, hi, hinc, TYPE), \ + between3(v, hi, hinc, lo, linc, TYPE)) \ + : between3(v, lo, linc, hi, hinc, TYPE)))) + gdk_return -rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, bool li, bool hi, BUN maxsize) +rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh, + struct canditer *lci, struct canditer *rci, + bool li, bool hi, bool anti, bool symmetric, BUN maxsize) { - struct canditer lci, rci; const char *rlvals, *rhvals; const char *lvars, *rlvars, *rhvars; int rlwidth, rhwidth; @@ -1715,7 +1742,7 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT const void *vrl, *vrh; oid ro; oid rlval = oid_nil, rhval = oid_nil; - int sorted = 0; /* which column is sorted */ + int sorted = 0; /* which output column is sorted */ BAT *tmp; bool use_orderidx = false; const char *algo = NULL; @@ -1727,29 +1754,22 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT assert(r1->ttype == TYPE_oid); assert(r2 == NULL || r2->ttype == TYPE_oid); assert(r2 == NULL || BATcount(r1) == BATcount(r2)); + assert(l->ttype != TYPE_void || !is_oid_nil(l->tseqbase)); + assert(rl->ttype != TYPE_void || !is_oid_nil(rl->tseqbase)); + assert(rh->ttype != TYPE_void || !is_oid_nil(rh->tseqbase)); ALGODEBUG fprintf(stderr, "#%s: %s(l=" ALGOBATFMT "," "rl=" ALGOBATFMT ",rh=" ALGOBATFMT "," - "sl=" ALGOOPTBATFMT ",sr=" ALGOOPTBATFMT ")\n", + "sl=" ALGOOPTBATFMT ",sr=" ALGOOPTBATFMT "," + "anti=%s,symmetric=%s)\n", MT_thread_getname(), __func__, ALGOBATPAR(l), ALGOBATPAR(rl), ALGOBATPAR(rh), - ALGOOPTBATPAR(sl), - ALGOOPTBATPAR(sr)); - - if ((l->ttype == TYPE_void && is_oid_nil(l->tseqbase)) || - (rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) || - (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase))) { - /* trivial: nils don't match anything */ - return GDK_SUCCEED; - } - - if (canditer_init(&lci, l, sl) == 0 || - canditer_init(&rci, rl, sr) == 0) { - /* trivial: empty input */ - return GDK_SUCCEED; - } + ALGOOPTBATPAR(lci->s), + ALGOOPTBATPAR(rci->s), + anti ? "true" : "false", + symmetric ? "true" : "false"); rlvals = rl->ttype == TYPE_void ? NULL : (const char *) Tloc(rl, 0); rhvals = rh->ttype == TYPE_void ? NULL : (const char *) Tloc(rh, 0); @@ -1784,13 +1804,13 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT vrl = &rlval; vrh = &rhval; - if (BATordered(l) || BATordered_rev(l) || use_orderidx) { + if (!anti && !symmetric && (BATordered(l) || BATordered_rev(l) || use_orderidx)) { /* left column is sorted, use binary search */ sorted = 2; - for (BUN i = 0; i < rci.ncand; i++) { + for (BUN i = 0; i < rci->ncand; i++) { BUN low, high; - ro = canditer_next(&rci); + ro = canditer_next(rci); if (rlvals) { vrl = VALUE(rl, ro - rl->hseqbase); } else { @@ -1837,8 +1857,8 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT if (high <= low) continue; if (l->tsorted || l->trevsorted) { - low = canditer_search(&lci, low + l->hseqbase, true); - high = canditer_search(&lci, high + l->hseqbase, true); + low = canditer_search(lci, low + l->hseqbase, true); + high = canditer_search(lci, high + l->hseqbase, true); assert(high >= low); if (BATcapacity(r1) < BUNlast(r1) + high - low) { @@ -1857,9 +1877,9 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT dst2 = (oid *) Tloc(r2, 0); } } - canditer_setidx(&lci, low); + canditer_setidx(lci, low); while (low < high) { - dst1[r1->batCount++] = canditer_next(&lci); + dst1[r1->batCount++] = canditer_next(lci); if (r2) { dst2[r2->batCount++] = ro; } @@ -1889,7 +1909,7 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT } while (low < high) { - if (canditer_search(&lci, ord[low], false) != BUN_NONE) { + if (canditer_search(lci, ord[low], false) != BUN_NONE) { dst1[r1->batCount++] = ord[low]; if (r2) { dst2[r2->batCount++] = ro; @@ -1901,7 +1921,8 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT } cnt = BATcount(r1); assert(r2 == NULL || BATcount(r1) == BATcount(r2)); - } else if ((BATcount(rl) > 2 || + } else if (!anti && !symmetric && + (BATcount(rl) > 2 || !l->batTransient || (VIEWtparent(l) != 0 && (tmp = BBPquickdesc(VIEWtparent(l), false)) != NULL && @@ -1918,9 +1939,9 @@ rangejoin(BAT *r1, BAT *r2, BAT *l, BAT sorted = 2; cnt = 0; - maximum = lci.ncand; - for (BUN i = 0; i < rci.ncand; i++) { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list