Changeset: 89b4bf8a943d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/89b4bf8a943d
Added Files:
        sql/test/BugTracker-2024/Tests/7045-do-not-push-down-converts.test
Modified Files:
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/sql_statement.c
        sql/server/rel_exp.c
        sql/server/rel_exp.h
        sql/server/rel_optimize_others.c
        sql/server/rel_optimize_proj.c
        sql/server/rel_optimize_sel.c
        sql/server/rel_rel.c
        sql/server/rel_rel.h
        sql/server/rel_statistics.c
        sql/server/rel_unnest.c
        sql/test/BugTracker-2024/Tests/All
        sql/test/BugTracker-2024/Tests/atom_cmp-Bug-7477.test
        sql/test/SQLancer/Tests/sqlancer17.test
        sql/test/miscellaneous/Tests/select_groupby.stable.err
        sql/test/miscellaneous/Tests/select_groupby.stable.out
Branch: Aug2024
Log Message:

improved split of join and select expressions (fixes #7553)
improved (not) push down of expressions (converts mostly) when could potentialy 
lead to incorrect errors (fixes #7045).


diffs (truncated from 620 to 300 lines):

diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -1605,6 +1605,15 @@ exp_bin(backend *be, sql_exp *e, stmt *l
                if (from->type->eclass == EC_SEC && to->type->eclass == EC_SEC) 
{
                        // trivial conversion because EC_SEC is always in 
milliseconds
                        s = l;
+               } else if (depth && sel && l->nrcols == 0 && left && 
left->nrcols && exp_unsafe(e, false, true)) {
+                       stmt *rows = bin_find_smallest_column(be, left);
+                       l = stmt_const(be, rows, l);
+                       s = stmt_convert(be, l, sel, from, to);
+               } else if (depth && l->nrcols == 0 && left && left->nrcols && 
from->type->localtype > to->type->localtype &&
+                               exp_unsafe(e, false, true)) {
+                       stmt *rows = bin_find_smallest_column(be, left);
+                       l = stmt_const(be, rows, l);
+                       s = stmt_convert(be, l, sel, from, to);
                } else {
                        s = stmt_convert(be, l, (!push&&l->nrcols==0)?NULL:sel, 
from, to);
                }
@@ -2829,12 +2838,12 @@ can_join_exp(sql_rel *rel, sql_exp *e, b
                        sql_exp *l = e->l, *r = e->r, *f = e->f;
 
                        if (f) {
-                               int ll = rel_find_exp(rel->l, l) != NULL;
-                               int rl = rel_find_exp(rel->r, l) != NULL;
-                               int lr = rel_find_exp(rel->l, r) != NULL;
-                               int rr = rel_find_exp(rel->r, r) != NULL;
-                               int lf = rel_find_exp(rel->l, f) != NULL;
-                               int rf = rel_find_exp(rel->r, f) != NULL;
+                               int ll = rel_has_exp(rel->l, l, true) == 0;
+                               int rl = rel_has_exp(rel->r, l, true) == 0;
+                               int lr = rel_has_exp(rel->l, r, true) == 0;
+                               int rr = rel_has_exp(rel->r, r, true) == 0;
+                               int lf = rel_has_exp(rel->l, f, true) == 0;
+                               int rf = rel_has_exp(rel->r, f, true) == 0;
                                int nrcr1 = 0, nrcr2 = 0, nrcl1 = 0, nrcl2 = 0;
 
                                if ((ll && !rl &&
@@ -2848,15 +2857,15 @@ can_join_exp(sql_rel *rel, sql_exp *e, b
                        } else {
                                int ll = 0, lr = 0, rl = 0, rr = 0, cst = 0;
                                if (l->card != CARD_ATOM || !exp_is_atom(l)) {
-                                       ll = rel_find_exp(rel->l, l) != NULL;
-                                       rl = rel_find_exp(rel->r, l) != NULL;
+                                       ll = rel_has_exp(rel->l, l, true) == 0;
+                                       rl = rel_has_exp(rel->r, l, true) == 0;
                                } else if (anti) {
                                        ll = 1;
                                        cst = 1;
                                }
                                if (r->card != CARD_ATOM || !exp_is_atom(r)) {
-                                       lr = rel_find_exp(rel->l, r) != NULL;
-                                       rr = rel_find_exp(rel->r, r) != NULL;
+                                       lr = rel_has_exp(rel->l, r, true) == 0;
+                                       rr = rel_has_exp(rel->r, r, true) == 0;
                                } else if (anti) {
                                        rr = cst?0:1;
                                }
@@ -2871,16 +2880,16 @@ can_join_exp(sql_rel *rel, sql_exp *e, b
                                sql_exp *ee = n->data;
 
                                if (ee->card != CARD_ATOM || !exp_is_atom(ee)) {
-                                       ll |= rel_find_exp(rel->l, ee) != NULL;
-                                       rl |= rel_find_exp(rel->r, ee) != NULL;
+                                       ll |= rel_has_exp(rel->l, ee, true) == 
0;
+                                       rl |= rel_has_exp(rel->r, ee, true) == 
0;
                                }
                        }
                        for (node *n = r->h ; n ; n = n->next) {
                                sql_exp *ee = n->data;
 
                                if (ee->card != CARD_ATOM || !exp_is_atom(ee)) {
-                                       lr |= rel_find_exp(rel->l, ee) != NULL;
-                                       rr |= rel_find_exp(rel->r, ee) != NULL;
+                                       lr |= rel_has_exp(rel->l, ee, true) == 
0;
+                                       rr |= rel_has_exp(rel->r, ee, true) == 
0;
                                }
                        }
                        if ((ll && !lr && !rl && rr) || (!ll && lr && rl && 
!rr))
diff --git a/sql/backends/monet5/sql_statement.c 
b/sql/backends/monet5/sql_statement.c
--- a/sql/backends/monet5/sql_statement.c
+++ b/sql/backends/monet5/sql_statement.c
@@ -3785,7 +3785,7 @@ temporal_convert(backend *be, stmt *v, s
        MalBlkPtr mb = be->mb;
        InstrPtr q = NULL;
        const char *convert = t->type->impl, *mod = mtimeRef;
-       bool add_tz = false, pushed = (v->cand && v->cand == sel);
+       bool add_tz = false, pushed = (v->cand && v->cand == sel), cand = 0;
 
        if (before) {
                if (f->type->eclass == EC_TIMESTAMP_TZ && (t->type->eclass == 
EC_TIMESTAMP || t->type->eclass == EC_TIME)) {
@@ -3816,6 +3816,7 @@ temporal_convert(backend *be, stmt *v, s
                                convert = "timestamptz";
                        mod = calcRef;
                        add_tz = true;
+                       cand = 1;
                } else {
                        return v;
                }
@@ -3847,17 +3848,28 @@ temporal_convert(backend *be, stmt *v, s
        }
        q = pushArgument(mb, q, v->nr);
 
+       if (cand) {
+               if (sel && !pushed && !v->cand) {
+                       q = pushArgument(mb, q, sel->nr);
+                       pushed = 1;
+               } else if (v->nrcols > 0) {
+                       q = pushNilBat(mb, q);
+               }
+       }
+
        if (EC_VARCHAR(f->type->eclass))
                q = pushInt(mb, q, t->digits);
 
        if (add_tz)
                        q = pushLng(mb, q, be->mvc->timezone);
 
-       if (sel && !pushed && !v->cand) {
-               q = pushArgument(mb, q, sel->nr);
-               pushed = 1;
-       } else if (v->nrcols > 0) {
-               q = pushNilBat(mb, q);
+       if (!cand) {
+               if (sel && !pushed && !v->cand) {
+                       q = pushArgument(mb, q, sel->nr);
+                       pushed = 1;
+               } else if (v->nrcols > 0) {
+                       q = pushNilBat(mb, q);
+               }
        }
 
        bool enabled = be->mvc->sa->eb.enabled;
diff --git a/sql/server/rel_exp.c b/sql/server/rel_exp.c
--- a/sql/server/rel_exp.c
+++ b/sql/server/rel_exp.c
@@ -1747,6 +1747,27 @@ exps_find_prop(list *exps, rel_prop kind
        return NULL;
 }
 
+/* check is one of the exps can be found in this relation */
+static sql_exp* rel_find_exp_and_corresponding_rel_(sql_rel *rel, sql_exp *e, 
bool subexp, sql_rel **res);
+
+static bool
+rel_find_exps_and_corresponding_rel_(sql_rel *rel, list *l, bool subexp, 
sql_rel **res)
+{
+       int all = 1;
+
+       if (list_empty(l))
+               return true;
+       for(node *n = l->h; n && (subexp || all); n = n->next) {
+               sql_exp *ne = rel_find_exp_and_corresponding_rel_(rel, n->data, 
subexp, res);
+               if (subexp && ne)
+                       return true;
+               all &= (ne?1:0);
+       }
+       if (all)
+               return true;
+       return false;
+}
+
 static sql_exp *
 rel_find_exp_and_corresponding_rel_(sql_rel *rel, sql_exp *e, bool subexp, 
sql_rel **res)
 {
@@ -1773,22 +1794,28 @@ rel_find_exp_and_corresponding_rel_(sql_
                return rel_find_exp_and_corresponding_rel_(rel, e->l, subexp, 
res);
        case e_aggr:
        case e_func:
-               if (e->l) {
-                       list *l = e->l;
-                       node *n = l->h;
-
-                       ne = n->data;
-                       while ((subexp || ne != NULL) && n != NULL) {
-                               ne = rel_find_exp_and_corresponding_rel_(rel, 
n->data, subexp, res);
-                               if (subexp && ne)
-                                       break;
-                               n = n->next;
-                       }
-                       return ne;
+               if (e->l)
+                       if (rel_find_exps_and_corresponding_rel_(rel, e->l, 
subexp, res))
+                               return e;
+               return NULL;
+       case e_cmp:
+               if (!subexp)
+                       return NULL;
+
+               if (e->flag == cmp_or || e->flag == cmp_filter) {
+                       if (rel_find_exps_and_corresponding_rel_(rel, e->l, 
subexp, res) ||
+                               rel_find_exps_and_corresponding_rel_(rel, e->r, 
subexp, res))
+                               return e;
+               } else if (e->flag == cmp_in || e->flag == cmp_notin) {
+                       if (rel_find_exp_and_corresponding_rel_(rel, e->l, 
subexp, res) ||
+                               rel_find_exps_and_corresponding_rel_(rel, e->r, 
subexp, res))
+                               return e;
+               } else if (rel_find_exp_and_corresponding_rel_(rel, e->l, 
subexp, res) ||
+                           rel_find_exp_and_corresponding_rel_(rel, e->r, 
subexp, res) ||
+                           (!e->f || rel_find_exp_and_corresponding_rel_(rel, 
e->f, subexp, res))) {
+                               return e;
                }
-               break;
-               /* fall through */
-       case e_cmp:
+               return NULL;
        case e_psm:
                return NULL;
        case e_atom:
@@ -2104,10 +2131,8 @@ exp_is_null(sql_exp *e )
                return 0;
        case e_cmp:
                if (!is_semantics(e)) {
-                       if (e->flag == cmp_or) {
+                       if (e->flag == cmp_or || e->flag == cmp_filter) {
                                return (exps_have_null(e->l) && 
exps_have_null(e->r));
-                       } else if (e->flag == cmp_filter) {
-                               return (exps_have_null(e->l) || 
exps_have_null(e->r));
                        } else if (e->flag == cmp_in || e->flag == cmp_notin) {
                                return ((e->flag == cmp_in && 
exp_is_null(e->l)) ||
                                                (e->flag == cmp_notin && 
(exp_is_null(e->l) || exps_have_null(e->r))));
@@ -2712,44 +2737,53 @@ exp_has_sideeffect( sql_exp *e )
        return 0;
 }
 
-int
-exps_have_unsafe(list *exps, int allow_identity)
+bool
+exps_have_unsafe(list *exps, bool allow_identity, bool card)
 {
        int unsafe = 0;
 
        if (list_empty(exps))
                return 0;
        for (node *n = exps->h; n && !unsafe; n = n->next)
-               unsafe |= exp_unsafe(n->data, allow_identity);
+               unsafe |= exp_unsafe(n->data, allow_identity, card);
        return unsafe;
 }
 
-int
-exp_unsafe(sql_exp *e, int allow_identity)
+bool
+exp_unsafe(sql_exp *e, bool allow_identity, bool card)
 {
        switch (e->type) {
        case e_convert:
-               return exp_unsafe(e->l, allow_identity);
+               if (card) {
+                       sql_subtype *t = exp_totype(e);
+                       sql_subtype *f = exp_fromtype(e);
+                       if (t->type->eclass == EC_FLT && (f->type->eclass == 
EC_DEC || f->type->eclass == EC_NUM))
+                               return false;
+                       if (f->type->localtype > t->type->localtype)
+                               return true;
+                       return false;
+               }
+               return exp_unsafe(e->l, allow_identity, card);
        case e_aggr:
        case e_func: {
                sql_subfunc *f = e->f;
 
                if (IS_ANALYTIC(f->func) || !LANG_INT_OR_MAL(f->func->lang) || 
f->func->side_effect || (!allow_identity && is_identity(e, NULL)))
                        return 1;
-               return exps_have_unsafe(e->l, allow_identity);
+               return exps_have_unsafe(e->l, allow_identity, card);
        } break;
        case e_cmp: {
                if (e->flag == cmp_in || e->flag == cmp_notin) {
-                       return exp_unsafe(e->l, allow_identity) || 
exps_have_unsafe(e->r, allow_identity);
+                       return exp_unsafe(e->l, allow_identity, card) || 
exps_have_unsafe(e->r, allow_identity, card);
                } else if (e->flag == cmp_or || e->flag == cmp_filter) {
-                       return exps_have_unsafe(e->l, allow_identity) || 
exps_have_unsafe(e->r, allow_identity);
+                       return exps_have_unsafe(e->l, allow_identity, card) || 
exps_have_unsafe(e->r, allow_identity, card);
                } else {
-                       return exp_unsafe(e->l, allow_identity) || 
exp_unsafe(e->r, allow_identity) || (e->f && exp_unsafe(e->f, allow_identity));
+                       return exp_unsafe(e->l, allow_identity, card) || 
exp_unsafe(e->r, allow_identity, card) || (e->f && exp_unsafe(e->f, 
allow_identity, card));
                }
        } break;
        case e_atom: {
                if (e->f)
-                       return exps_have_unsafe(e->f, allow_identity);
+                       return exps_have_unsafe(e->f, allow_identity, card);
                return 0;
        } break;
        case e_column:
diff --git a/sql/server/rel_exp.h b/sql/server/rel_exp.h
--- a/sql/server/rel_exp.h
+++ b/sql/server/rel_exp.h
@@ -178,8 +178,9 @@ extern sql_exp *exp_rel_label(mvc *sql, 
 extern int exp_rel_depth(sql_exp *e);
 extern int exps_are_atoms(list *exps);
 extern int exp_has_func(sql_exp *e);
-extern int exps_have_unsafe(list *exps, int allow_identity);
-extern int exp_unsafe(sql_exp *e, int allow_identity);
+extern bool exps_have_unsafe(list *exps, bool allow_identity, bool card /* on 
true check for possible cardinality related
+                                                                               
                                                                  unsafeness 
(conversions for example) */);
+extern bool exp_unsafe(sql_exp *e, bool allow_identity, bool card);
 extern int exp_has_sideeffect(sql_exp *e);
 
 extern sql_exp *exps_find_prop(list *exps, rel_prop kind);
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to