Changeset: fb6b0c5a4d0e for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/fb6b0c5a4d0e Modified Files: sql/server/rel_optimize_proj.c sql/test/miscellaneous/Tests/groupby_expressions.test sql/test/miscellaneous/Tests/simple_selects.test Branch: default Log Message:
Added group by x+1,(y-2)*3,2-z into group by x,y,z optimization diffs (196 lines): diff --git a/sql/server/rel_optimize_proj.c b/sql/server/rel_optimize_proj.c --- a/sql/server/rel_optimize_proj.c +++ b/sql/server/rel_optimize_proj.c @@ -1333,6 +1333,121 @@ rel_project_cse(visitor *v, sql_rel *rel return rel; } +/* optimize group by x+1,(y-2)*3,2-z into group by x,y,z */ +static inline sql_rel * +rel_simplify_groupby_columns(visitor *v, sql_rel *rel) +{ + if (is_groupby(rel->op) && !list_empty(rel->r)) { + sql_rel *l = rel->l; + + for (node *n=((list*)rel->r)->h; n ; n = n->next) { + sql_exp *e = n->data; + e->used = 0; /* we need to use this flag, clean it first */ + } + for (node *n=((list*)rel->r)->h; n ; n = n->next) { + sql_exp *e = n->data; + + if (e->type == e_column) { + bool searching = true; + sql_rel *efrel = NULL; + sql_exp *exp = rel_find_exp_and_corresponding_rel(l, e, false, &efrel, NULL), *col = NULL; + + while (searching && !col) { + sql_exp *exp_col = exp; + + if (exp && is_numeric_upcast(exp)) + exp = exp->l; + if (exp && exp->type == e_func) { + list *el = exp->l; + sql_subfunc *sf = exp->f; + /* At the moment look only at injective math functions */ + if (sf->func->type == F_FUNC && !sf->func->s && + (!strcmp(sf->func->base.name, "sql_sub") || !strcmp(sf->func->base.name, "sql_add") || !strcmp(sf->func->base.name, "sql_mul"))) { + sql_exp *e1 = (sql_exp*) el->h->data, *e2 = (sql_exp*) el->h->next->data; + /* the optimization cannot be done if side-effect calls (e.g. rand()) are present */ + int e1ok = exp_is_atom(e1) && !exp_unsafe(e1, 1) && !exp_has_sideeffect(e1), e2ok = exp_is_atom(e2) && !exp_unsafe(e2, 1) && !exp_has_sideeffect(e2); + + if ((!e1ok && e2ok) || (e1ok && !e2ok)) { + sql_exp *c = e1ok ? e2 : e1; + bool done = false; + + while (!done) { + if (is_numeric_upcast(c)) + c = c->l; + if (c->type == e_column) { + if (is_simple_project(efrel->op) || is_groupby(efrel->op)) { + /* in a simple projection, self-references may occur */ + sql_exp *nc = (c->l ? exps_bind_column2(efrel->exps, c->l, c->r, NULL) : exps_bind_column(efrel->exps, c->r, NULL, NULL, 0)); + if (nc && list_position(efrel->exps, nc) < list_position(efrel->exps, exp_col)) { + exp_col = c; + c = nc; + continue; + } + } + col = c; /* 'c' is a column reference from the left relation */ + done = true; + } else { + exp = c; /* maybe a nested function call, let's continue searching */ + done = true; + } + } + } else { + searching = false; + } + } else { + searching = false; + } + } else { + searching = false; + } + } + if (col) { /* a column reference was found */ + const char *rname = exp_relname(e), *name = exp_name(e); + + /* the grouping column has an alias, we have to keep it */ + if ((rname && name && (strcmp(rname, e->l) != 0 || strcmp(name, e->r) != 0)) || (!rname && name && strcmp(name, e->r) != 0)) { + if (!has_label(e)) /* dangerous to merge, skip it */ + continue; + if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l) || is_single(l)) + rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1)); + list_append(l->exps, e); + n->data = e = exp_ref(v->sql, e); + list_hash_clear(rel->r); + } + + sql_exp *f = (col->l ? exps_bind_column2(rel->r, col->l, col->r, NULL) : exps_bind_column(rel->r, col->r, NULL, NULL, 0)); + + if (f && list_position(rel->r, f) < list_position(rel->r, e)) { /* if already present, remove it */ + e->used = 1; + } else { + /* Use an unique reference to the column found. If there's another grouping column label pointing into it, + rel_groupby_cse will hopefully remove it */ + sql_exp *ne = exp_ref(v->sql, col); + if (!has_label(ne)) + exp_label(v->sql->sa, ne, ++v->sql->label); + + if (!is_simple_project(l->op) || !list_empty(l->r) || rel_is_ref(l) || need_distinct(l) || is_single(l)) + rel->l = l = rel_project(v->sql->sa, l, rel_projections(v->sql, l, NULL, 1, 1)); + list_append(l->exps, ne); + n->data = exp_ref(v->sql, ne); + list_hash_clear(rel->r); + } + v->changes++; + } + } + } + for (node *n=((list*)rel->r)->h; n ; ) { + node *next = n->next; + sql_exp *e = n->data; + + if (e->used) /* remove unecessary grouping columns */ + list_remove_node(rel->r, NULL, n); + n = next; + } + } + return rel; +} + /* remove identical grouping columns */ static inline sql_rel * rel_groupby_cse(visitor *v, sql_rel *rel) @@ -2516,6 +2631,8 @@ rel_optimize_projections_(visitor *v, sq if (!rel || !is_groupby(rel->op)) return rel; + if (v->value_based_opt) + rel = rel_simplify_groupby_columns(v, rel); rel = rel_groupby_cse(v, rel); rel = rel_push_aggr_down(v, rel); rel = rel_push_groupby_down(v, rel); diff --git a/sql/test/miscellaneous/Tests/groupby_expressions.test b/sql/test/miscellaneous/Tests/groupby_expressions.test --- a/sql/test/miscellaneous/Tests/groupby_expressions.test +++ b/sql/test/miscellaneous/Tests/groupby_expressions.test @@ -26,8 +26,8 @@ project ( | group by ( | | project ( | | | table("sys"."groupings") [ "groupings"."aa" ] -| | ) [ bigint(33)["groupings"."aa"] as "%3"."%3", "sys"."sql_add"("%3"."%3", bigint(33) "1") as "%1"."%1" ] -| ) [ "%1"."%1" ] [ "%1"."%1" ] +| | ) [ bigint(33)["groupings"."aa"] as "%3"."%3", "sys"."sql_add"("%3"."%3", bigint(33) "1") as "%1"."%1", "groupings"."aa" as "%4"."%4" ] +| ) [ "%4"."%4" ] [ "%1"."%1" ] ) [ "%1"."%1" ] query I rowsort diff --git a/sql/test/miscellaneous/Tests/simple_selects.test b/sql/test/miscellaneous/Tests/simple_selects.test --- a/sql/test/miscellaneous/Tests/simple_selects.test +++ b/sql/test/miscellaneous/Tests/simple_selects.test @@ -926,6 +926,44 @@ 3 3 4 +# remove 'x1' from grouping columns list. This one is more complex +query T nosort +plan SELECT x AS x, x + 1 AS x1 FROM (VALUES (1),(2),(3)) x(x) GROUP BY x, x1 +---- +project ( +| group by ( +| | project ( +| | | [ [ tinyint(2) "1", tinyint(2) "2", tinyint(2) "3" ] as "x"."x" ] +| | ) [ "x"."x", "sys"."sql_add"("x"."x", tinyint(2) "1") as "x1" ] +| ) [ "x"."x" ] [ "x"."x", "x1" ] +) [ "x"."x", "x1" ] + +query II rowsort +SELECT x AS x, (10 + x) * 2 - 29 AS x1 FROM (VALUES (1),(2),(3),(4),(5)) x(x) GROUP BY x, x1 +---- +1 +-7 +2 +-5 +3 +-3 +4 +-1 +5 +1 + +# remove 'x1' from grouping columns list. This one is even more complex +query T nosort +plan SELECT x AS x, (10 + x) * 2 - 29 AS x1 FROM (VALUES (1),(2),(3),(4),(5)) x(x) GROUP BY x, x1 +---- +project ( +| group by ( +| | project ( +| | | [ [ tinyint(3) "1", tinyint(3) "2", tinyint(3) "3", tinyint(3) "4", tinyint(3) "5" ] as "x"."x" ] +| | ) [ "x"."x", "sys"."sql_add"("x"."x", tinyint(5) "10") as "%7"."%7", "sys"."sql_mul"("%7"."%7", tinyint(2) "2") as "%10"."%10", "sys"."sql_sub"("%10"."%10", smallint(10) "29") as "x1" ] +| ) [ "x"."x" ] [ "x"."x", "x1" ] +) [ "x"."x", "x1" ] + statement ok create global temp table x(x int, y int) _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org