Changeset: 9e6df0555030 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9e6df0555030
Modified Files:
        clients/Tests/MAL-signatures.stable.out
        clients/Tests/MAL-signatures.stable.out.int128
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_batop.c
        gdk/gdk_join.c
        gdk/gdk_logger.c
        monetdb5/mal/mal_authorize.c
        monetdb5/modules/kernel/algebra.c
        monetdb5/modules/kernel/algebra.h
        monetdb5/modules/kernel/algebra.mal
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql_statement.c
        sql/storage/bat/bat_table.c
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-1join-query.stable.out
        
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-2join-query.stable.out
Branch: subquery
Log Message:

Implemented an extra :bit argument to algebra.difference for NOT IN queries.


diffs (truncated from 713 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -524,7 +524,7 @@ Ready.
 [ "algebra",   "bandjoin",     "command algebra.bandjoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], c1:any_1, c2:any_1, li:bit, hi:bit, 
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGbandjoin;", "Band join: 
values in l and r match if r - c1 <[=] l <[=] r + c2"       ]
 [ "algebra",   "copy", "command algebra.copy(b:bat[:any_1]):bat[:any_1] ",     
"ALGcopy;",     "Returns physical copy of a BAT."       ]
 [ "algebra",   "crossproduct", "command algebra.crossproduct(left:bat[:any_1], 
right:bat[:any_2]) (l:bat[:oid], r:bat[:oid]) ",        "ALGcrossproduct2;",    
"Returns 2 columns with all BUNs, consisting of the head-oids\n\t  from 'left' 
and 'right' for which there are BUNs in 'left'\n\t  and 'right' with equal 
tails"        ]
-[ "algebra",   "difference",   "command algebra.difference(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, 
estimate:lng):bat[:oid] ",       "ALGdifference;",       "Difference of l and r 
with candidate lists"    ]
+[ "algebra",   "difference",   "command algebra.difference(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, nil_clears:bit, 
estimate:lng):bat[:oid] ",       "ALGdifference;",       "Difference of l and r 
with candidate lists"    ]
 [ "algebra",   "exist",        "command algebra.exist(b:bat[:any_1], 
val:any_1):bit ", "ALGexist;",    "Returns whether 'val' occurs in b."    ]
 [ "algebra",   "fetch",        "command algebra.fetch(b:bat[:any_1], 
x:oid):any_1 ",   "ALGfetchoid;", "Returns the value of the BUN at x-th 
position with 0 <= x < b.count"   ]
 [ "algebra",   "find", "command algebra.find(b:bat[:any_1], t:any_1):oid ",    
"ALGfind;",     "Returns the index position of a value.  If no such BUN exists 
return OID-nil." ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -628,7 +628,7 @@ Ready.
 [ "algebra",   "bandjoin",     "command algebra.bandjoin(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], c1:any_1, c2:any_1, li:bit, hi:bit, 
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGbandjoin;", "Band join: 
values in l and r match if r - c1 <[=] l <[=] r + c2"       ]
 [ "algebra",   "copy", "command algebra.copy(b:bat[:any_1]):bat[:any_1] ",     
"ALGcopy;",     "Returns physical copy of a BAT."       ]
 [ "algebra",   "crossproduct", "command algebra.crossproduct(left:bat[:any_1], 
right:bat[:any_2]) (l:bat[:oid], r:bat[:oid]) ",        "ALGcrossproduct2;",    
"Returns 2 columns with all BUNs, consisting of the head-oids\n\t  from 'left' 
and 'right' for which there are BUNs in 'left'\n\t  and 'right' with equal 
tails"        ]
-[ "algebra",   "difference",   "command algebra.difference(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, 
estimate:lng):bat[:oid] ",       "ALGdifference;",       "Difference of l and r 
with candidate lists"    ]
+[ "algebra",   "difference",   "command algebra.difference(l:bat[:any_1], 
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, nil_clears:bit, 
estimate:lng):bat[:oid] ",       "ALGdifference;",       "Difference of l and r 
with candidate lists"    ]
 [ "algebra",   "exist",        "command algebra.exist(b:bat[:any_1], 
val:any_1):bit ", "ALGexist;",    "Returns whether 'val' occurs in b."    ]
 [ "algebra",   "fetch",        "command algebra.fetch(b:bat[:any_1], 
x:oid):any_1 ",   "ALGfetchoid;", "Returns the value of the BUN at x-th 
position with 0 <= x < b.count"   ]
 [ "algebra",   "find", "command algebra.find(b:bat[:any_1], t:any_1):oid ",    
"ALGfind;",     "Returns the index position of a value.  If no such BUN exists 
return OID-nil." ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -113,7 +113,7 @@ BAT *BATconvert(BAT *b, BAT *s, int tp, 
 BUN BATcount_no_nil(BAT *b);
 gdk_return BATdel(BAT *b, BAT *d) __attribute__((__warn_unused_result__));
 BAT *BATdense(oid hseq, oid tseq, BUN cnt) 
__attribute__((__warn_unused_result__));
-BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate);
+BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool 
nil_clears, BUN estimate);
 gdk_return BATextend(BAT *b, BUN newcap) 
__attribute__((__warn_unused_result__));
 void BATfakeCommit(BAT *b);
 gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, 
BUN n, bool asc, bool nilslast, bool distinct) 
__attribute__((__warn_unused_result__));
@@ -703,7 +703,7 @@ str ALGcount_bat(lng *result, const bat 
 str ALGcount_nil(lng *result, const bat *bid, const bit *ignore_nils);
 str ALGcount_no_nil(lng *result, const bat *bid);
 str ALGcrossproduct2(bat *l, bat *r, const bat *lid, const bat *rid);
-str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid, 
const bat *srid, const bit *nil_matches, const lng *estimate);
+str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid, 
const bat *srid, const bit *nil_matches, const bit *nil_clears, const lng 
*estimate);
 str ALGexist(bit *ret, const bat *bid, const void *val);
 str ALGfetchoid(ptr ret, const bat *bid, const oid *pos);
 str ALGfind(oid *ret, const bat *bid, ptr val);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2719,7 +2719,7 @@ gdk_export gdk_return BATthetajoin(BAT *
 gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, bool nil_matches, BUN estimate)
        __attribute__((__warn_unused_result__));
 gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool 
nil_matches, BUN estimate);
-gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, 
BUN estimate);
+gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, 
bool not_in, BUN estimate);
 gdk_export gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, 
BAT *sr, bool nil_matches, BUN estimate)
        __attribute__((__warn_unused_result__));
 gdk_export gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, const void *c1, const void *c2, bool li, bool hi, BUN estimate)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -558,7 +558,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
                 * this */
                BAT *d;
 
-               d = BATdiff(n, b, s, NULL, true, BUN_NONE);
+               d = BATdiff(n, b, s, NULL, true, false, BUN_NONE);
                if (d == NULL)
                        return GDK_FAIL;
                s = BATunique(n, d);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -868,8 +868,8 @@ mergejoin_int(BAT **r1p, BAT **r2p, BAT 
        BUN i;
 
        ALGODEBUG fprintf(stderr, "#mergejoin_int(l=" ALGOBATFMT ","
-                         "r=" ALGOBATFMT ")%s\n",
-                         ALGOBATPAR(l), ALGOBATPAR(r),
+                         "r=" ALGOBATFMT ",nil_matches=%d)%s\n",
+                         ALGOBATPAR(l), ALGOBATPAR(r), nil_matches,
                          swapped ? " swapped" : "");
 
        assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
@@ -1145,8 +1145,8 @@ mergejoin_lng(BAT **r1p, BAT **r2p, BAT 
        BUN i;
 
        ALGODEBUG fprintf(stderr, "#mergejoin_lng(l=" ALGOBATFMT ","
-                         "r=" ALGOBATFMT ")%s\n",
-                         ALGOBATPAR(l), ALGOBATPAR(r),
+                         "r=" ALGOBATFMT ",nil_matches=%d)%s\n",
+                         ALGOBATPAR(l), ALGOBATPAR(r), nil_matches,
                          swapped ? " swapped" : "");
 
        assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
@@ -1428,7 +1428,7 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
          BUN rstart, BUN rend, BUN rcnt,
          const oid *restrict rcand, const oid *rcandend,
          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
-         BUN estimate, lng t0, bool swapped)
+         bool not_in, BUN estimate, lng t0, bool swapped)
 {
        BUN rstartorig;
        const oid *rcandorig;
@@ -1460,7 +1460,8 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
        oid lval = oid_nil, rval = oid_nil; /* temporary space to point v to */
 
        if (sl == NULL && sr == NULL && !nil_on_miss &&
-           !semi && !only_misses && l->tsorted && r->tsorted && r2p != NULL) {
+           !semi && !only_misses && !not_in &&
+           l->tsorted && r->tsorted && r2p != NULL) {
                /* special cases with far fewer options */
                switch (ATOMbasetype(l->ttype)) {
                case TYPE_int:
@@ -1475,9 +1476,10 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
        ALGODEBUG fprintf(stderr, "#mergejoin(l=" ALGOBATFMT ","
                          "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
                          "sr=" ALGOOPTBATFMT ",nil_matches=%d,"
-                         "nil_on_miss=%d,semi=%d)%s\n",
+                         "nil_on_miss=%d,semi=%d,only_misses=%d,"
+                         "not_in=%d)%s\n",
                          ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(sl), 
ALGOOPTBATPAR(sr),
-                         nil_matches, nil_on_miss, semi,
+                         nil_matches, nil_on_miss, semi, only_misses, not_in,
                          swapped ? " swapped" : "");
 
        assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
@@ -1501,6 +1503,12 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, 
        /* basic properties will be adjusted if necessary later on,
         * they were initially set by joininitresults() */
 
+       if (not_in && rstart < rend && !r->tnonil &&
+           ((BATtvoid(r) && r->tseqbase == oid_nil) ||
+            (rvals && cmp(nil, VALUE(r, r->tsorted ? rcand ? rcand[0] : rstart 
: rcand ? rcandend[-1] : rend -1)) == 0)))
+               return nomatch(r1p, r2p, l, r, 0, 0, NULL, NULL, false, false,
+                              "mergejoin", t0);
+
        if (lstart == lend ||
            rstart == rend ||
            (!nil_matches &&
@@ -2463,6 +2471,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
         BUN rstart, BUN rend, BUN rcnt,
         const oid *restrict rcand, const oid *rcandend,
         bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
+        bool not_in,
         BUN estimate, lng t0, bool swapped, bool phash, const char *reason)
 {
        oid lo, ro;
@@ -2486,10 +2495,11 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
        ALGODEBUG fprintf(stderr, "#hashjoin(l=" ALGOBATFMT ","
                          "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
                          "sr=" ALGOOPTBATFMT ",nil_matches=%d,"
-                         "nil_on_miss=%d,semi=%d,only_misses=%d)%s%s%s\n",
+                         "nil_on_miss=%d,semi=%d,only_misses=%d,"
+                         "not_in=%d)%s%s%s\n",
                          ALGOBATPAR(l), ALGOBATPAR(r),
                          ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
-                         nil_matches, nil_on_miss, semi, only_misses,
+                         nil_matches, nil_on_miss, semi, only_misses, not_in,
                          swapped ? " swapped" : "",
                          *reason ? " " : "", reason);
 
@@ -2519,23 +2529,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                                      only_misses, estimate);
        if (maxsize == BUN_NONE)
                return GDK_FAIL;
-       BAT *r1 = *r1p;
-       BAT *r2 = r2p ? *r2p : NULL;
-
-       /* basic properties will be adjusted if necessary later on,
-        * they were initially set by joininitresults() */
-
-       if (r2) {
-               r2->tkey = l->tkey;
-               /* r2 is not likely to be sorted (although it is
-                * certainly possible) */
-               r2->tsorted = false;
-               r2->trevsorted = false;
-               r2->tseqbase = oid_nil;
-       }
-
-       if (sl && !BATtdense(sl))
-               r1->tseqbase = oid_nil;
 
        rl = 0;
        if (phash) {
@@ -2568,16 +2561,47 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                                          BATgetId(r));
                        snprintf(ext, sizeof(ext), "thash%x", sr->batCacheid);
                        if ((hsh = BAThash_impl(r, sr, ext)) == NULL)
-                               goto bailout;
+                               return GDK_FAIL;
                }
        } else {
                if (BAThash(r) != GDK_SUCCEED)
-                       goto bailout;
+                       return GDK_FAIL;
                hsh = r->thash;
        }
        ri = bat_iterator(r);
        t = ATOMbasetype(r->ttype);
 
+       if (not_in && !r->tnonil) {
+               for (rb = HASHget(hsh, HASHprobe(hsh, nil));
+                    rb != HASHnil(hsh);
+                    rb = HASHgetlink(hsh, rb)) {
+                       ro = BUNtoid(sr, rb);
+                       if ((*cmp)(nil, BUNtail(ri, ro - r->hseqbase)) == 0) {
+                               return nomatch(r1p, r2p, l, r, 0, 0, NULL, NULL,
+                                              false, false, "hashjoin", t0);
+                       }
+               }
+               
+       }
+
+       BAT *r1 = *r1p;
+       BAT *r2 = r2p ? *r2p : NULL;
+
+       /* basic properties will be adjusted if necessary later on,
+        * they were initially set by joininitresults() */
+
+       if (r2) {
+               r2->tkey = l->tkey;
+               /* r2 is not likely to be sorted (although it is
+                * certainly possible) */
+               r2->tsorted = false;
+               r2->trevsorted = false;
+               r2->tseqbase = oid_nil;
+       }
+
+       if (sl && !BATtdense(sl))
+               r1->tseqbase = oid_nil;
+
        if (lcand) {
                while (lcand < lcandend) {
                        lo = *lcand++;
@@ -3549,7 +3573,7 @@ fetchjoin(BAT **r1p, BAT **r2p, BAT *l, 
 static gdk_return
 leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
         bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
-        BUN estimate, const char *func, lng t0)
+        bool not_in, BUN estimate, const char *func, lng t0)
 {
        BUN lstart, lend, lcnt;
        const oid *lcand, *lcandend;
@@ -3563,6 +3587,8 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
        assert(r2p != NULL || (semi | only_misses));
        /* if nil_on_miss is set, we really need a right output */
        assert(!nil_on_miss || r2p != NULL);
+       /* if not_in is set, then so is only_misses */
+       assert(!not_in || only_misses);
        *r1p = NULL;
        if (r2p)
                *r2p = NULL;
@@ -3578,16 +3604,18 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                ALGODEBUG fprintf(stderr, "#%s(l=" ALGOBATFMT ","
                                  "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
                                  "sr=" ALGOOPTBATFMT ",nil_matches=%d,"
-                                 "nil_on_miss=%d,semi=%d,only_misses=%d)\n",
+                                 "nil_on_miss=%d,semi=%d,only_misses=%d,"
+                                 "not_in=%d)\n",
                                  func,
                                  ALGOBATPAR(l), ALGOBATPAR(r),
                                  ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
-                                 nil_matches, nil_on_miss, semi, only_misses);
+                                 nil_matches, nil_on_miss, semi, only_misses,
+                                 not_in);
                return nomatch(r1p, r2p, l, r, lstart, lend, lcand, lcandend,
                               nil_on_miss, only_misses, func, t0);
        }
 
-       if (!nil_on_miss && !semi && !only_misses &&
+       if (!nil_on_miss && !semi && !only_misses && !not_in &&
            (lcnt == 1 || (BATordered(l) && BATordered_rev(l)))) {
                /* single value to join, use select */
                return selectjoin(r1p, r2p, l, r, sl, sr,
@@ -3605,6 +3633,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                   && !semi
                   && !nil_matches
                   && !only_misses
+                  && !not_in
                   /* && (rcnt * 1024) < lcnt */
                   && (BATordered(r) || BATordered_rev(r))) {
                assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
@@ -3620,7 +3649,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                                 lstart, lend, lcnt, lcand, lcandend,
                                 rstart, rend, rcnt, rcand, rcandend,
                                 nil_matches, nil_on_miss, semi, only_misses,
-                                estimate, t0, false);
+                                not_in, estimate, t0, false);
        }
        phash = sr == NULL &&
                VIEWtparent(r) != 0 &&
@@ -3629,7 +3658,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                        lstart, lend, lcnt, lcand, lcandend,
                        rstart, rend, rcnt, rcand, rcandend,
                        nil_matches, nil_on_miss, semi, only_misses,
-                       estimate, t0, false, phash, func);
+                       not_in, estimate, t0, false, phash, func);
 }
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to