MonetDB: default - Added group by x+1,(y-2)*3,2-z into group by ...

Pedro Ferreira Wed, 13 Apr 2022 06:25:39 -0700

Changeset: fb6b0c5a4d0e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/fb6b0c5a4d0e
Modified Files:
        sql/server/rel_optimize_proj.c
        sql/test/miscellaneous/Tests/groupby_expressions.test
        sql/test/miscellaneous/Tests/simple_selects.test
Branch: default
Log Message:


Added group by x+1,(y-2)*3,2-z into group by x,y,z optimization


diffs (196 lines):

diff --git a/sql/server/rel_optimize_proj.c b/sql/server/rel_optimize_proj.c
--- a/sql/server/rel_optimize_proj.c
+++ b/sql/server/rel_optimize_proj.c
@@ -1333,6 +1333,121 @@ rel_project_cse(visitor *v, sql_rel *rel
        return rel;
 }
 
+/* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */
+static inline sql_rel *
+rel_simplify_groupby_columns(visitor *v, sql_rel *rel)
+{
+       if (is_groupby(rel->op) && !list_empty(rel->r)) {
+               sql_rel *l = rel->l;
+
+               for (node *n=((list*)rel->r)->h; n ; n = n->next) {
+                       sql_exp *e = n->data;
+                       e->used = 0; /* we need to use this flag, clean it 
first */
+               }
+               for (node *n=((list*)rel->r)->h; n ; n = n->next) {
+                       sql_exp *e = n->data;
+
+                       if (e->type == e_column) {
+                               bool searching = true;
+                               sql_rel *efrel = NULL;
+                               sql_exp *exp = 
rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL;
+
+                               while (searching && !col) {
+                                       sql_exp *exp_col = exp;
+
+                                       if (exp && is_numeric_upcast(exp))
+                                               exp = exp->l;
+                                       if (exp && exp->type == e_func) {
+                                               list *el = exp->l;
+                                               sql_subfunc *sf = exp->f;
+                                               /* At the moment look only at 
injective math functions */
+                                               if (sf->func->type == F_FUNC && 
!sf->func->s &&
+                                                       
(!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, 
"sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) {
+                                                       sql_exp *e1 = 
(sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data;
+                                                       /* the optimization 
cannot be done if side-effect calls (e.g. rand()) are present */
+                                                       int e1ok = 
exp_is_atom(e1) && !exp_unsafe(e1, 1) && !exp_has_sideeffect(e1), e2ok = 
exp_is_atom(e2) && !exp_unsafe(e2, 1) && !exp_has_sideeffect(e2);
+
+                                                       if ((!e1ok && e2ok) || 
(e1ok && !e2ok)) {
+                                                               sql_exp *c = 
e1ok ? e2 : e1;
+                                                               bool done = 
false;
+
+                                                               while (!done) {
+                                                                       if 
(is_numeric_upcast(c))
+                                                                               
c = c->l;
+                                                                       if 
(c->type == e_column) {
+                                                                               
if (is_simple_project(efrel->op) || is_groupby(efrel->op)) {
+                                                                               
        /* in a simple projection, self-references may occur */
+                                                                               
        sql_exp *nc = (c->l ? exps_bind_column2(efrel->exps, c->l, c->r, NULL) 
: exps_bind_column(efrel->exps, c->r, NULL, NULL, 0));
+                                                                               
        if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, 
exp_col)) {
+                                                                               
                exp_col = c;
+                                                                               
                c = nc;
+                                                                               
                continue;
+                                                                               
        }
+                                                                               
}
+                                                                               
col = c; /* 'c' is a column reference from the left relation */
+                                                                               
done = true;
+                                                                       } else {
+                                                                               
exp = c; /* maybe a nested function call, let's continue searching */
+                                                                               
done = true;
+                                                                       }
+                                                               }
+                                                       } else {
+                                                               searching = 
false;
+                                                       }
+                                               } else {
+                                                       searching = false;
+                                               }
+                                       } else {
+                                               searching = false;
+                                       }
+                               }
+                               if (col) { /* a column reference was found */
+                                       const char *rname = exp_relname(e), 
*name = exp_name(e);
+
+                                       /* the grouping column has an alias, we 
have to keep it */
+                                       if ((rname && name && (strcmp(rname, 
e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, 
e->r) != 0)) {
+                                               if (!has_label(e)) /* dangerous 
to merge, skip it */
+                                                       continue;
+                                               if (!is_simple_project(l->op) 
|| !list_empty(l->r) || rel_is_ref(l) || need_distinct(l) || is_single(l))
+                                                       rel->l = l = 
rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
+                                               list_append(l->exps, e);
+                                               n->data = e = exp_ref(v->sql, 
e);
+                                               list_hash_clear(rel->r);
+                                       }
+
+                                       sql_exp *f = (col->l ? 
exps_bind_column2(rel->r, col->l, col->r, NULL) : exps_bind_column(rel->r, 
col->r, NULL, NULL, 0));
+
+                                       if (f && list_position(rel->r, f) < 
list_position(rel->r, e)) { /* if already present, remove it */
+                                               e->used = 1;
+                                       } else {
+                                               /* Use an unique reference to 
the column found. If there's another grouping column label pointing into it,
+                                                  rel_groupby_cse will 
hopefully remove it */
+                                               sql_exp *ne = exp_ref(v->sql, 
col);
+                                               if (!has_label(ne))
+                                                       exp_label(v->sql->sa, 
ne, ++v->sql->label);
+
+                                               if (!is_simple_project(l->op) 
|| !list_empty(l->r) || rel_is_ref(l) || need_distinct(l) || is_single(l))
+                                                       rel->l = l = 
rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1));
+                                               list_append(l->exps, ne);
+                                               n->data = exp_ref(v->sql, ne);
+                                               list_hash_clear(rel->r);
+                                       }
+                                       v->changes++;
+                               }
+                       }
+               }
+               for (node *n=((list*)rel->r)->h; n ; ) {
+                       node *next = n->next;
+                       sql_exp *e = n->data;
+
+                       if (e->used) /* remove unecessary grouping columns */
+                               list_remove_node(rel->r, NULL, n);
+                       n = next;
+               }
+       }
+       return rel;
+}
+
 /* remove identical grouping columns */
 static inline sql_rel *
 rel_groupby_cse(visitor *v, sql_rel *rel)
@@ -2516,6 +2631,8 @@ rel_optimize_projections_(visitor *v, sq
        if (!rel || !is_groupby(rel->op))
                return rel;
 
+       if (v->value_based_opt)
+               rel = rel_simplify_groupby_columns(v, rel);
        rel = rel_groupby_cse(v, rel);
        rel = rel_push_aggr_down(v, rel);
        rel = rel_push_groupby_down(v, rel);
diff --git a/sql/test/miscellaneous/Tests/groupby_expressions.test 
b/sql/test/miscellaneous/Tests/groupby_expressions.test
--- a/sql/test/miscellaneous/Tests/groupby_expressions.test
+++ b/sql/test/miscellaneous/Tests/groupby_expressions.test
@@ -26,8 +26,8 @@ project (
 | group by (
 | | project (
 | | | table("sys"."groupings") [ "groupings"."aa" ]
-| | ) [ bigint(33)["groupings"."aa"] as "%3"."%3", "sys"."sql_add"("%3"."%3", 
bigint(33) "1") as "%1"."%1" ]
-| ) [ "%1"."%1" ] [ "%1"."%1" ]
+| | ) [ bigint(33)["groupings"."aa"] as "%3"."%3", "sys"."sql_add"("%3"."%3", 
bigint(33) "1") as "%1"."%1", "groupings"."aa" as "%4"."%4" ]
+| ) [ "%4"."%4" ] [ "%1"."%1" ]
 ) [ "%1"."%1" ]
 
 query I rowsort
diff --git a/sql/test/miscellaneous/Tests/simple_selects.test 
b/sql/test/miscellaneous/Tests/simple_selects.test
--- a/sql/test/miscellaneous/Tests/simple_selects.test
+++ b/sql/test/miscellaneous/Tests/simple_selects.test
@@ -926,6 +926,44 @@ 3
 3
 4
 
+# remove 'x1' from grouping columns list. This one is more complex
+query T nosort
+plan SELECT x AS x, x + 1 AS x1 FROM (VALUES (1),(2),(3)) x(x) GROUP BY x, x1
+----
+project (
+| group by (
+| | project (
+| | |  [  [ tinyint(2) "1", tinyint(2) "2", tinyint(2) "3" ] as "x"."x" ]
+| | ) [ "x"."x", "sys"."sql_add"("x"."x", tinyint(2) "1") as "x1" ]
+| ) [ "x"."x" ] [ "x"."x", "x1" ]
+) [ "x"."x", "x1" ]
+
+query II rowsort
+SELECT x AS x, (10 + x) * 2 - 29 AS x1 FROM (VALUES (1),(2),(3),(4),(5)) x(x) 
GROUP BY x, x1
+----
+1
+-7
+2
+-5
+3
+-3
+4
+-1
+5
+1
+
+# remove 'x1' from grouping columns list. This one is even more complex
+query T nosort
+plan SELECT x AS x, (10 + x) * 2 - 29 AS x1 FROM (VALUES (1),(2),(3),(4),(5)) 
x(x) GROUP BY x, x1
+----
+project (
+| group by (
+| | project (
+| | |  [  [ tinyint(3) "1", tinyint(3) "2", tinyint(3) "3", tinyint(3) "4", 
tinyint(3) "5" ] as "x"."x" ]
+| | ) [ "x"."x", "sys"."sql_add"("x"."x", tinyint(5) "10") as "%7"."%7", 
"sys"."sql_mul"("%7"."%7", tinyint(2) "2") as "%10"."%10", 
"sys"."sql_sub"("%10"."%10", smallint(10) "29") as "x1" ]
+| ) [ "x"."x" ] [ "x"."x", "x1" ]
+) [ "x"."x", "x1" ]
+
 statement ok
 create global temp table x(x int, y int)
 
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

MonetDB: default - Added group by x+1,(y-2)*3,2-z into group by ...

Reply via email to