Changeset: 9e6df0555030 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9e6df0555030 Modified Files: clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_batop.c gdk/gdk_join.c gdk/gdk_logger.c monetdb5/mal/mal_authorize.c monetdb5/modules/kernel/algebra.c monetdb5/modules/kernel/algebra.h monetdb5/modules/kernel/algebra.mal sql/backends/monet5/sql.c sql/backends/monet5/sql_statement.c sql/storage/bat/bat_table.c sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-1join-query.stable.out sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-2join-query.stable.out Branch: subquery Log Message:
Implemented an extra :bit argument to algebra.difference for NOT IN queries. diffs (truncated from 713 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -524,7 +524,7 @@ Ready. [ "algebra", "bandjoin", "command algebra.bandjoin(l:bat[:any_1], r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], c1:any_1, c2:any_1, li:bit, hi:bit, estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGbandjoin;", "Band join: values in l and r match if r - c1 <[=] l <[=] r + c2" ] [ "algebra", "copy", "command algebra.copy(b:bat[:any_1]):bat[:any_1] ", "ALGcopy;", "Returns physical copy of a BAT." ] [ "algebra", "crossproduct", "command algebra.crossproduct(left:bat[:any_1], right:bat[:any_2]) (l:bat[:oid], r:bat[:oid]) ", "ALGcrossproduct2;", "Returns 2 columns with all BUNs, consisting of the head-oids\n\t from 'left' and 'right' for which there are BUNs in 'left'\n\t and 'right' with equal tails" ] -[ "algebra", "difference", "command algebra.difference(l:bat[:any_1], r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng):bat[:oid] ", "ALGdifference;", "Difference of l and r with candidate lists" ] +[ "algebra", "difference", "command algebra.difference(l:bat[:any_1], r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, nil_clears:bit, estimate:lng):bat[:oid] ", "ALGdifference;", "Difference of l and r with candidate lists" ] [ "algebra", "exist", "command algebra.exist(b:bat[:any_1], val:any_1):bit ", "ALGexist;", "Returns whether 'val' occurs in b." ] [ "algebra", "fetch", "command algebra.fetch(b:bat[:any_1], x:oid):any_1 ", "ALGfetchoid;", "Returns the value of the BUN at x-th position with 0 <= x < b.count" ] [ "algebra", "find", "command algebra.find(b:bat[:any_1], t:any_1):oid ", "ALGfind;", "Returns the index position of a value. If no such BUN exists return OID-nil." ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -628,7 +628,7 @@ Ready. [ "algebra", "bandjoin", "command algebra.bandjoin(l:bat[:any_1], r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], c1:any_1, c2:any_1, li:bit, hi:bit, estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGbandjoin;", "Band join: values in l and r match if r - c1 <[=] l <[=] r + c2" ] [ "algebra", "copy", "command algebra.copy(b:bat[:any_1]):bat[:any_1] ", "ALGcopy;", "Returns physical copy of a BAT." ] [ "algebra", "crossproduct", "command algebra.crossproduct(left:bat[:any_1], right:bat[:any_2]) (l:bat[:oid], r:bat[:oid]) ", "ALGcrossproduct2;", "Returns 2 columns with all BUNs, consisting of the head-oids\n\t from 'left' and 'right' for which there are BUNs in 'left'\n\t and 'right' with equal tails" ] -[ "algebra", "difference", "command algebra.difference(l:bat[:any_1], r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng):bat[:oid] ", "ALGdifference;", "Difference of l and r with candidate lists" ] +[ "algebra", "difference", "command algebra.difference(l:bat[:any_1], r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, nil_clears:bit, estimate:lng):bat[:oid] ", "ALGdifference;", "Difference of l and r with candidate lists" ] [ "algebra", "exist", "command algebra.exist(b:bat[:any_1], val:any_1):bit ", "ALGexist;", "Returns whether 'val' occurs in b." ] [ "algebra", "fetch", "command algebra.fetch(b:bat[:any_1], x:oid):any_1 ", "ALGfetchoid;", "Returns the value of the BUN at x-th position with 0 <= x < b.count" ] [ "algebra", "find", "command algebra.find(b:bat[:any_1], t:any_1):oid ", "ALGfind;", "Returns the index position of a value. If no such BUN exists return OID-nil." ] diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -113,7 +113,7 @@ BAT *BATconvert(BAT *b, BAT *s, int tp, BUN BATcount_no_nil(BAT *b); gdk_return BATdel(BAT *b, BAT *d) __attribute__((__warn_unused_result__)); BAT *BATdense(oid hseq, oid tseq, BUN cnt) __attribute__((__warn_unused_result__)); -BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate); +BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool nil_clears, BUN estimate); gdk_return BATextend(BAT *b, BUN newcap) __attribute__((__warn_unused_result__)); void BATfakeCommit(BAT *b); gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool nilslast, bool distinct) __attribute__((__warn_unused_result__)); @@ -703,7 +703,7 @@ str ALGcount_bat(lng *result, const bat str ALGcount_nil(lng *result, const bat *bid, const bit *ignore_nils); str ALGcount_no_nil(lng *result, const bat *bid); str ALGcrossproduct2(bat *l, bat *r, const bat *lid, const bat *rid); -str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate); +str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const bit *nil_clears, const lng *estimate); str ALGexist(bit *ret, const bat *bid, const void *val); str ALGfetchoid(ptr ret, const bat *bid, const oid *pos); str ALGfind(oid *ret, const bat *bid, ptr val); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2719,7 +2719,7 @@ gdk_export gdk_return BATthetajoin(BAT * gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate); -gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate); +gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in, BUN estimate); gdk_export gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); gdk_export gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const void *c1, const void *c2, bool li, bool hi, BUN estimate) diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -558,7 +558,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f * this */ BAT *d; - d = BATdiff(n, b, s, NULL, true, BUN_NONE); + d = BATdiff(n, b, s, NULL, true, false, BUN_NONE); if (d == NULL) return GDK_FAIL; s = BATunique(n, d); diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -868,8 +868,8 @@ mergejoin_int(BAT **r1p, BAT **r2p, BAT BUN i; ALGODEBUG fprintf(stderr, "#mergejoin_int(l=" ALGOBATFMT "," - "r=" ALGOBATFMT ")%s\n", - ALGOBATPAR(l), ALGOBATPAR(r), + "r=" ALGOBATFMT ",nil_matches=%d)%s\n", + ALGOBATPAR(l), ALGOBATPAR(r), nil_matches, swapped ? " swapped" : ""); assert(ATOMtype(l->ttype) == ATOMtype(r->ttype)); @@ -1145,8 +1145,8 @@ mergejoin_lng(BAT **r1p, BAT **r2p, BAT BUN i; ALGODEBUG fprintf(stderr, "#mergejoin_lng(l=" ALGOBATFMT "," - "r=" ALGOBATFMT ")%s\n", - ALGOBATPAR(l), ALGOBATPAR(r), + "r=" ALGOBATFMT ",nil_matches=%d)%s\n", + ALGOBATPAR(l), ALGOBATPAR(r), nil_matches, swapped ? " swapped" : ""); assert(ATOMtype(l->ttype) == ATOMtype(r->ttype)); @@ -1428,7 +1428,7 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, BUN rstart, BUN rend, BUN rcnt, const oid *restrict rcand, const oid *rcandend, bool nil_matches, bool nil_on_miss, bool semi, bool only_misses, - BUN estimate, lng t0, bool swapped) + bool not_in, BUN estimate, lng t0, bool swapped) { BUN rstartorig; const oid *rcandorig; @@ -1460,7 +1460,8 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, oid lval = oid_nil, rval = oid_nil; /* temporary space to point v to */ if (sl == NULL && sr == NULL && !nil_on_miss && - !semi && !only_misses && l->tsorted && r->tsorted && r2p != NULL) { + !semi && !only_misses && !not_in && + l->tsorted && r->tsorted && r2p != NULL) { /* special cases with far fewer options */ switch (ATOMbasetype(l->ttype)) { case TYPE_int: @@ -1475,9 +1476,10 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, ALGODEBUG fprintf(stderr, "#mergejoin(l=" ALGOBATFMT "," "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ",nil_matches=%d," - "nil_on_miss=%d,semi=%d)%s\n", + "nil_on_miss=%d,semi=%d,only_misses=%d," + "not_in=%d)%s\n", ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr), - nil_matches, nil_on_miss, semi, + nil_matches, nil_on_miss, semi, only_misses, not_in, swapped ? " swapped" : ""); assert(ATOMtype(l->ttype) == ATOMtype(r->ttype)); @@ -1501,6 +1503,12 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, /* basic properties will be adjusted if necessary later on, * they were initially set by joininitresults() */ + if (not_in && rstart < rend && !r->tnonil && + ((BATtvoid(r) && r->tseqbase == oid_nil) || + (rvals && cmp(nil, VALUE(r, r->tsorted ? rcand ? rcand[0] : rstart : rcand ? rcandend[-1] : rend -1)) == 0))) + return nomatch(r1p, r2p, l, r, 0, 0, NULL, NULL, false, false, + "mergejoin", t0); + if (lstart == lend || rstart == rend || (!nil_matches && @@ -2463,6 +2471,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B BUN rstart, BUN rend, BUN rcnt, const oid *restrict rcand, const oid *rcandend, bool nil_matches, bool nil_on_miss, bool semi, bool only_misses, + bool not_in, BUN estimate, lng t0, bool swapped, bool phash, const char *reason) { oid lo, ro; @@ -2486,10 +2495,11 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B ALGODEBUG fprintf(stderr, "#hashjoin(l=" ALGOBATFMT "," "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ",nil_matches=%d," - "nil_on_miss=%d,semi=%d,only_misses=%d)%s%s%s\n", + "nil_on_miss=%d,semi=%d,only_misses=%d," + "not_in=%d)%s%s%s\n", ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr), - nil_matches, nil_on_miss, semi, only_misses, + nil_matches, nil_on_miss, semi, only_misses, not_in, swapped ? " swapped" : "", *reason ? " " : "", reason); @@ -2519,23 +2529,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B only_misses, estimate); if (maxsize == BUN_NONE) return GDK_FAIL; - BAT *r1 = *r1p; - BAT *r2 = r2p ? *r2p : NULL; - - /* basic properties will be adjusted if necessary later on, - * they were initially set by joininitresults() */ - - if (r2) { - r2->tkey = l->tkey; - /* r2 is not likely to be sorted (although it is - * certainly possible) */ - r2->tsorted = false; - r2->trevsorted = false; - r2->tseqbase = oid_nil; - } - - if (sl && !BATtdense(sl)) - r1->tseqbase = oid_nil; rl = 0; if (phash) { @@ -2568,16 +2561,47 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B BATgetId(r)); snprintf(ext, sizeof(ext), "thash%x", sr->batCacheid); if ((hsh = BAThash_impl(r, sr, ext)) == NULL) - goto bailout; + return GDK_FAIL; } } else { if (BAThash(r) != GDK_SUCCEED) - goto bailout; + return GDK_FAIL; hsh = r->thash; } ri = bat_iterator(r); t = ATOMbasetype(r->ttype); + if (not_in && !r->tnonil) { + for (rb = HASHget(hsh, HASHprobe(hsh, nil)); + rb != HASHnil(hsh); + rb = HASHgetlink(hsh, rb)) { + ro = BUNtoid(sr, rb); + if ((*cmp)(nil, BUNtail(ri, ro - r->hseqbase)) == 0) { + return nomatch(r1p, r2p, l, r, 0, 0, NULL, NULL, + false, false, "hashjoin", t0); + } + } + + } + + BAT *r1 = *r1p; + BAT *r2 = r2p ? *r2p : NULL; + + /* basic properties will be adjusted if necessary later on, + * they were initially set by joininitresults() */ + + if (r2) { + r2->tkey = l->tkey; + /* r2 is not likely to be sorted (although it is + * certainly possible) */ + r2->tsorted = false; + r2->trevsorted = false; + r2->tseqbase = oid_nil; + } + + if (sl && !BATtdense(sl)) + r1->tseqbase = oid_nil; + if (lcand) { while (lcand < lcandend) { lo = *lcand++; @@ -3549,7 +3573,7 @@ fetchjoin(BAT **r1p, BAT **r2p, BAT *l, static gdk_return leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool nil_on_miss, bool semi, bool only_misses, - BUN estimate, const char *func, lng t0) + bool not_in, BUN estimate, const char *func, lng t0) { BUN lstart, lend, lcnt; const oid *lcand, *lcandend; @@ -3563,6 +3587,8 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B assert(r2p != NULL || (semi | only_misses)); /* if nil_on_miss is set, we really need a right output */ assert(!nil_on_miss || r2p != NULL); + /* if not_in is set, then so is only_misses */ + assert(!not_in || only_misses); *r1p = NULL; if (r2p) *r2p = NULL; @@ -3578,16 +3604,18 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B ALGODEBUG fprintf(stderr, "#%s(l=" ALGOBATFMT "," "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ",nil_matches=%d," - "nil_on_miss=%d,semi=%d,only_misses=%d)\n", + "nil_on_miss=%d,semi=%d,only_misses=%d," + "not_in=%d)\n", func, ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr), - nil_matches, nil_on_miss, semi, only_misses); + nil_matches, nil_on_miss, semi, only_misses, + not_in); return nomatch(r1p, r2p, l, r, lstart, lend, lcand, lcandend, nil_on_miss, only_misses, func, t0); } - if (!nil_on_miss && !semi && !only_misses && + if (!nil_on_miss && !semi && !only_misses && !not_in && (lcnt == 1 || (BATordered(l) && BATordered_rev(l)))) { /* single value to join, use select */ return selectjoin(r1p, r2p, l, r, sl, sr, @@ -3605,6 +3633,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B && !semi && !nil_matches && !only_misses + && !not_in /* && (rcnt * 1024) < lcnt */ && (BATordered(r) || BATordered_rev(r))) { assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */ @@ -3620,7 +3649,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B lstart, lend, lcnt, lcand, lcandend, rstart, rend, rcnt, rcand, rcandend, nil_matches, nil_on_miss, semi, only_misses, - estimate, t0, false); + not_in, estimate, t0, false); } phash = sr == NULL && VIEWtparent(r) != 0 && @@ -3629,7 +3658,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B lstart, lend, lcnt, lcand, lcandend, rstart, rend, rcnt, rcand, rcandend, nil_matches, nil_on_miss, semi, only_misses, - estimate, t0, false, phash, func); + not_in, estimate, t0, false, phash, func); } _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list