Changeset: 43e157bb2be3 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/43e157bb2be3 Modified Files: gdk/gdk_join.c sql/test/SQLancer/Tests/sqlancer22.test sql/test/SQLancer/Tests/sqlancer23.test sql/test/subquery/Tests/subquery3.test sql/test/subquery/Tests/subquery5.test Branch: Jun2023 Log Message:
Extend selectjoin implementation to also do semi, min_one, max_one options. diffs (266 lines): diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -301,11 +301,13 @@ nomatch(BAT **r1p, BAT **r2p, BAT *l, BA static gdk_return selectjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, struct canditer *lci, struct canditer *rci, - bool nil_matches, lng t0, bool swapped, const char *reason) + bool nil_matches, bool semi, bool max_one, bool min_one, + lng t0, bool swapped, const char *reason) { BATiter li = bat_iterator(l); const void *v; BAT *bn = NULL; + BUN bncount; assert(lci->ncand > 0); assert(lci->ncand == 1 || (li.sorted && li.revsorted)); @@ -334,32 +336,46 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l, if (bn == NULL) { return GDK_FAIL; } - if (BATcount(bn) == 0) { + bncount = BATcount(bn); + if (bncount == 0) { BBPunfix(bn->batCacheid); + if (min_one) { + GDKerror("not enough matches"); + return GDK_FAIL; + } return nomatch(r1p, r2p, l, r, lci, false, false, reason, t0); } - BAT *r1 = COLnew(0, TYPE_oid, lci->ncand * BATcount(bn), TRANSIENT); + if (bncount > 1) { + if (semi) + bncount = 1; + if (max_one) { + BBPunfix(bn->batCacheid); + GDKerror("more than one match"); + return GDK_FAIL; + } + } + BAT *r1 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT); if (r1 == NULL) { BBPunfix(bn->batCacheid); return GDK_FAIL; } r1->tsorted = true; r1->trevsorted = lci->ncand == 1; - r1->tseqbase = BATcount(bn) == 1 && lci->tpe == cand_dense ? o : oid_nil; - r1->tkey = BATcount(bn) == 1; + r1->tseqbase = bncount == 1 && lci->tpe == cand_dense ? o : oid_nil; + r1->tkey = bncount == 1; r1->tnil = false; r1->tnonil = true; BAT *r2 = NULL; if (r2p) { - r2 = COLnew(0, TYPE_oid, lci->ncand * BATcount(bn), TRANSIENT); + r2 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT); if (r2 == NULL) { BBPunfix(bn->batCacheid); BBPreclaim(r1); return GDK_FAIL; } - r2->tsorted = lci->ncand == 1 || BATcount(bn) == 1; - r2->trevsorted = BATcount(bn) == 1; + r2->tsorted = lci->ncand == 1 || bncount == 1; + r2->trevsorted = bncount == 1; r2->tseqbase = lci->ncand == 1 && BATtdense(bn) ? bn->tseqbase : oid_nil; r2->tkey = lci->ncand == 1; r2->tnil = false; @@ -369,7 +385,7 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l, oid *o1p = (oid *) Tloc(r1, 0); oid *o2p = r2 ? (oid *) Tloc(r2, 0) : NULL; oid bno = bn->tseqbase; - BUN p, q = BATcount(bn); + BUN p, q = bncount; do { GDK_CHECK_TIMEOUT(timeoffset, counter, @@ -388,7 +404,7 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l, oid *o1p = (oid *) Tloc(r1, 0); oid *o2p = r2 ? (oid *) Tloc(r2, 0) : NULL; const oid *bnp = (const oid *) Tloc(bn, 0); - BUN p, q = BATcount(bn); + BUN p, q = bncount; do { GDK_CHECK_TIMEOUT(timeoffset, counter, @@ -404,10 +420,10 @@ selectjoin(BAT **r1p, BAT **r2p, BAT *l, o = canditer_next(lci); } while (!is_oid_nil(o)); } - BATsetcount(r1, lci->ncand * BATcount(bn)); + BATsetcount(r1, lci->ncand * bncount); *r1p = r1; if (r2p) { - BATsetcount(r2, lci->ncand * BATcount(bn)); + BATsetcount(r2, lci->ncand * bncount); *r2p = r2; } BBPunfix(bn->batCacheid); @@ -3865,12 +3881,13 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B goto doreturn; } - if (!nil_on_miss && !semi && !max_one && !min_one && !only_misses && !not_in && + if (!nil_on_miss && !only_misses && !not_in && (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) || (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)))) { /* single value to join, use select */ rc = selectjoin(r1p, r2p, l, r, &lci, &rci, - nil_matches, t0, false, func); + nil_matches, semi, max_one, min_one, + t0, false, func); goto doreturn; } else if (BATtdense(r) && rci.tpe == cand_dense) { /* use special implementation for dense right-hand side */ @@ -4239,12 +4256,14 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA if (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) || (l->ttype == TYPE_void && is_oid_nil(l->tseqbase))) { /* single value to join, use select */ rc = selectjoin(r1p, r2p, l, r, &lci, &rci, - nil_matches, t0, false, __func__); + nil_matches, false, false, false, + t0, false, __func__); goto doreturn; } else if (rci.ncand == 1 || (BATordered(r) && BATordered_rev(r)) || (r->ttype == TYPE_void && is_oid_nil(r->tseqbase))) { /* single value to join, use select */ rc = selectjoin(r2p ? r2p : &r2, r1p, r, l, &rci, &lci, - nil_matches, t0, true, __func__); + nil_matches, false, false, false, + t0, true, __func__); if (rc == GDK_SUCCEED && r2p == NULL) BBPunfix(r2->batCacheid); goto doreturn; diff --git a/sql/test/SQLancer/Tests/sqlancer22.test b/sql/test/SQLancer/Tests/sqlancer22.test --- a/sql/test/SQLancer/Tests/sqlancer22.test +++ b/sql/test/SQLancer/Tests/sqlancer22.test @@ -101,23 +101,23 @@ INSERT INTO mct20 VALUES (INTERVAL '1' D statement error GDK reported error: BATsubcross: more than one match select 1 from mct20 where ((select true) union all (select true)) -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match select 1 from mct20 inner join (select x.x from (values (true)) x(x)) as nort0(norc0) on true and greatest(((select nort0.norc0) union all (select true)), true) -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match select 1 from mct20 inner join (select x.x from (values (true)) x(x)) as nort0(norc0) on ((select nort0.norc0) union all (select true)) -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match select 1 from mct20 inner join (select x.x from (values (true)) x(x)) as nort0(norc0) on true where ((select nort0.norc0) union all (select true)) -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match select rank() over (order by ((select mct20.c0) union all (select interval '1' day))) from mct20 ---- -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match select sum(((select mct20.c0) union all (select interval '1' day))) over () from mct20 ---- @@ -125,7 +125,7 @@ statement error GDK reported error: BATs select sum(((select interval '2' day) union all (select interval '1' day))) over () from mct20 ---- -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match select max(((select mct20.c0) union all (select interval '1' day))) over () from mct20 ---- @@ -519,7 +519,7 @@ statement ok rowcount 4 INSERT INTO rt3 VALUES (DATE '4153-10-28'),(DATE '2471-08-17'),(DATE '2022-02-08'),(DATE '1970-01-20') query T nosort -SELECT (SELECT DATE '2011-10-01' FROM rt3 y RIGHT OUTER JOIN (VALUES (DATE '2010-10-01'), (z.c0)) AS x(x) ON +SELECT (SELECT DATE '2011-10-01' FROM rt3 y RIGHT OUTER JOIN (VALUES (DATE '2010-10-01'), (z.c0)) AS x(x) ON length('a') > 0 WHERE rt3.c0 = DATE '2012-10-01') FROM rt3, rt3 z ---- NULL diff --git a/sql/test/SQLancer/Tests/sqlancer23.test b/sql/test/SQLancer/Tests/sqlancer23.test --- a/sql/test/SQLancer/Tests/sqlancer23.test +++ b/sql/test/SQLancer/Tests/sqlancer23.test @@ -168,7 +168,7 @@ SELECT 1 FROM t0 GROUP BY t0.c0 HAVING m ---- # Postgres doesn't give an error here, but we are confident it must -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match SELECT 1 FROM t0 CROSS JOIN LATERAL (SELECT (VALUES (y.y), (y.y)) FROM (SELECT 1) y(y) WHERE t0.c0 = 2) x(x) statement ok @@ -195,10 +195,10 @@ SELECT 1 FROM (SELECT DISTINCT 3) vx(vx) statement error 21000!Cardinality violation, scalar value expected SELECT 1 FROM (SELECT DISTINCT 3) vx(vx) WHERE 1 <> (SELECT 2 UNION ALL SELECT vx.vx) -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match SELECT 1 FROM (SELECT 3) vx(vx) WHERE 1 <> (VALUES (2), (vx.vx)) -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match SELECT 1 FROM (SELECT 3) vx(vx) WHERE 1 <> (SELECT 2 UNION ALL SELECT vx.vx) statement error 42000!SELECT: aggregate functions not allowed in JOIN conditions diff --git a/sql/test/subquery/Tests/subquery3.test b/sql/test/subquery/Tests/subquery3.test --- a/sql/test/subquery/Tests/subquery3.test +++ b/sql/test/subquery/Tests/subquery3.test @@ -555,7 +555,7 @@ SELECT (SELECT 1 FROM integers i2 INNER JOIN integers i3 on MAX(i3.i) = MIN(i2.i)) FROM integers i1 -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match SELECT (SELECT SUM(SUM(i1.i) + i2.i) FROM integers i2 GROUP BY i2.i) FROM integers i1 @@ -717,7 +717,7 @@ 4 4 4 -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match SELECT (SELECT outt FROM evilfunction((SELECT MAX(t1.col1) FROM tbl_ProductSales))) FROM another_T t1 @@ -741,7 +741,7 @@ SELECT * FROM evilfunction(1) 1 1 -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match SELECT (SELECT outt FROM evilfunction((SELECT MIN(col1)))) FROM another_T @@ -751,7 +751,7 @@ SELECT (SELECT outt FROM evilfunction((SELECT MAX(ColID) FROM tbl_ProductSales))) FROM another_T -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match SELECT (SELECT outt FROM evilfunction((SELECT MAX(t1.col1) FROM tbl_ProductSales))) FROM another_T t1 diff --git a/sql/test/subquery/Tests/subquery5.test b/sql/test/subquery/Tests/subquery5.test --- a/sql/test/subquery/Tests/subquery5.test +++ b/sql/test/subquery/Tests/subquery5.test @@ -441,10 +441,10 @@ query I rowsort SELECT MAX(i1.i) FROM integers i1 HAVING (VALUES((AVG(i1.i))) EXCEPT VALUES(AVG(i1.i))) <> 0 ---- -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match SELECT (VALUES(SUM(i1.i)) UNION VALUES(AVG(i1.i))) FROM integers i1 -statement error GDK reported error: mergejoin: more than one match +statement error GDK reported error: selectjoin: more than one match SELECT ((SELECT SUM(i1.i)) UNION ALL (SELECT AVG(i1.i))) FROM integers i1 statement error 42000!SELECT: cannot use non GROUP BY column 'i1.i' in query results without an aggregate function _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org