Changeset: d19e20576c8b for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/d19e20576c8b Modified Files: sql/server/rel_optimizer.c sql/server/rel_rel.c sql/server/rel_rel.h sql/server/rel_unnest.c sql/test/miscellaneous/Tests/simple_plans.test Branch: default Log Message:
Be less restrict pushing down selections under projections. Push down selections before projections requiring distinct get rewritten into groupings. Small cleanup If I get a performance regression I will backout this. diffs (202 lines): diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -25,7 +25,8 @@ typedef struct global_props { int cnt[ddl_maxops]; uint8_t - has_mergetable:1; + has_mergetable:1, + needs_distinct:1; } global_props; static int @@ -337,6 +338,7 @@ rel_properties(mvc *sql, global_props *g case op_topn: case op_sample: case op_truncate: + gp->needs_distinct |= need_distinct(rel); if (rel->l) rel_properties(sql, gp, rel->l); break; @@ -2593,7 +2595,7 @@ is_fk_column_of_pk(mvc *sql, sql_rel *re return NULL; } -static inline sql_rel * +static sql_rel * rel_distinct_project2groupby(visitor *v, sql_rel *rel) { sql_rel *l = rel->l; @@ -2894,7 +2896,7 @@ rel_merge_projects(visitor *v, sql_rel * prj && prj->op == op_project && !(rel_is_ref(prj)) && !prj->r) { int all = 1; - if (project_unsafe(rel,0,0) || project_unsafe(prj,0,0) || exps_share_expensive_exp(rel->exps, prj->exps)) + if (project_unsafe(rel,0) || project_unsafe(prj,0) || exps_share_expensive_exp(rel->exps, prj->exps)) return rel; /* here we need to fix aliases */ @@ -3396,8 +3398,8 @@ rel_merge_union(visitor *v, sql_rel *rel sql_rel *ref = NULL; if (is_union(rel->op) && - l && is_project(l->op) && !project_unsafe(l,0,0) && - r && is_project(r->op) && !project_unsafe(r,0,0) && + l && is_project(l->op) && !project_unsafe(l,0) && + r && is_project(r->op) && !project_unsafe(r,0) && (ref = rel_find_ref(l)) != NULL && ref == rel_find_ref(r)) { /* Find selects and try to merge */ sql_rel *ls = rel_find_select(l); @@ -4686,8 +4688,8 @@ rel_push_select_down(visitor *v, sql_rel if (is_select(rel->op) && r && r->op == op_project && !rel_is_ref(r) && !is_single(r)){ sql_rel *pl = r->l; - /* we cannot push through rank (row_number etc) functions or projects with distinct */ - if (pl && !project_unsafe(r, 1, 1)) { + /* we cannot push through window functions (for safety I disabled projects over DDL too) */ + if (pl && pl->op != op_ddl && !exps_have_unsafe(r->exps, 1)) { /* introduce selects under the project (if needed) */ set_processed(pl); for (n = exps->h; n;) { @@ -5552,7 +5554,7 @@ rel_push_project_down_union(visitor *v, sql_rel *ul = u->l; sql_rel *ur = u->r; - if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel,0,0)) + if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || rel_is_ref(u) || project_unsafe(rel,0)) return rel; /* don't push project down union of single values */ if ((is_project(ul->op) && !ul->l) || (is_project(ur->op) && !ur->l)) @@ -6360,8 +6362,8 @@ rel_push_project_up(visitor *v, sql_rel (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push projections above merge statments left joins */) || (is_select(rel->op) && l->op != op_project) || (is_join(rel->op) && ((l->op != op_project && r->op != op_project) || is_topn(r->op) || is_sample(r->op))) || - ((l->op == op_project && (!l->l || l->r || project_unsafe(l,is_select(rel->op),0))) || - (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r,0,0)))))) + ((l->op == op_project && (!l->l || l->r || project_unsafe(l,is_select(rel->op)))) || + (is_join(rel->op) && (r->op == op_project && (!r->l || r->r || project_unsafe(r,0)))))) return rel; if (l->op == op_project && l->l) { @@ -6687,7 +6689,7 @@ rel_exps_mark_used(sql_allocator *sa, sq } /* for count/rank we need atleast one column */ if (!nr && subrel && (is_project(subrel->op) || is_base(subrel->op)) && !list_empty(subrel->exps) && - (is_simple_project(rel->op) && project_unsafe(rel, 0, 0))) { + (is_simple_project(rel->op) && project_unsafe(rel, 0))) { sql_exp *e = subrel->exps->h->data; e->used = 1; } @@ -9504,7 +9506,6 @@ static sql_rel * rel_first_level_optimizations(visitor *v, sql_rel *rel) { rel = rel_distinct_aggregate_on_unique_values(v, rel); - rel = rel_distinct_project2groupby(v, rel); /* rel_simplify_math optimizer requires to clear the hash, so make sure it runs last in this batch */ if (v->value_based_opt) rel = rel_simplify_math(v, rel); @@ -9798,6 +9799,9 @@ optimize_rel(mvc *sql, sql_rel *rel, int if (gp.cnt[op_topn] || gp.cnt[op_sample]) rel = rel_visitor_topdown(&v, rel, &rel_push_topn_and_sample_down); + if (level <= 0 && gp.needs_distinct && gp.cnt[op_project]) + rel = rel_visitor_bottomup(&v, rel, &rel_distinct_project2groupby); + if (gp.has_mergetable) rel = rel_visitor_topdown(&v, rel, &rel_merge_table_rewrite); diff --git a/sql/server/rel_rel.c b/sql/server/rel_rel.c --- a/sql/server/rel_rel.c +++ b/sql/server/rel_rel.c @@ -25,25 +25,24 @@ rel_set_exps(sql_rel *rel, list *exps) /* some projections results are order dependend (row_number etc) */ int -project_unsafe(sql_rel *rel, int allow_identity, int allow_self_reference) +project_unsafe(sql_rel *rel, int allow_identity) { sql_rel *sub = rel->l; - node *n; if (need_distinct(rel) || rel->r /* order by */) return 1; - if (!rel->exps) + if (list_empty(rel->exps)) return 0; /* projects without sub and projects around ddl's cannot be changed */ - if (!sub || (sub && sub->op == op_ddl)) + if (!sub || sub->op == op_ddl) return 1; - for(n = rel->exps->h; n; n = n->next) { + for(node *n = rel->exps->h; n; n = n->next) { sql_exp *e = n->data, *ne; /* aggr func in project ! */ if (exp_unsafe(e, allow_identity)) return 1; - if (!allow_self_reference && (ne = rel_find_exp(rel, e)) && ne != e) + if ((ne = rel_find_exp(rel, e)) && ne != e) return 1; /* no self referencing */ } return 0; diff --git a/sql/server/rel_rel.h b/sql/server/rel_rel.h --- a/sql/server/rel_rel.h +++ b/sql/server/rel_rel.h @@ -52,7 +52,7 @@ #define is_sql_merge(X) ((X & sql_merge) == sql_merge) extern void rel_set_exps(sql_rel *rel, list *exps); -extern int project_unsafe(sql_rel *rel, int allow_identity, int allow_self_reference); +extern int project_unsafe(sql_rel *rel, int allow_identity); extern const char *rel_name( sql_rel *r ); extern sql_rel *rel_distinct(sql_rel *l); diff --git a/sql/server/rel_unnest.c b/sql/server/rel_unnest.c --- a/sql/server/rel_unnest.c +++ b/sql/server/rel_unnest.c @@ -3296,7 +3296,7 @@ rewrite_remove_xp_project(visitor *v, sq if (rel->op == op_join && list_empty(rel->exps)) { sql_rel *r = rel->r; - if (is_simple_project(r->op) && r->l && !project_unsafe(r, 1, 0)) { + if (is_simple_project(r->op) && r->l && !project_unsafe(r, 1)) { sql_rel *rl = r->l; if (is_simple_project(rl->op) && !rl->l && list_length(rl->exps) == 1) { diff --git a/sql/test/miscellaneous/Tests/simple_plans.test b/sql/test/miscellaneous/Tests/simple_plans.test --- a/sql/test/miscellaneous/Tests/simple_plans.test +++ b/sql/test/miscellaneous/Tests/simple_plans.test @@ -561,6 +561,32 @@ project ( | ) [ "x"."x", "x"."y" ] ) [ "x"."x", "x"."y" ] +query T nosort +plan select x, y from (select a as b, a / 3 as c from t1 union distinct select a as z, a / 5 as d from t2) x(x,y) where x > 2 +---- +project ( +| distinct union ( +| | project ( +| | | group by ( +| | | | project ( +| | | | | select ( +| | | | | | table("sys"."t1") [ "t1"."a" ] COUNT +| | | | | ) [ "t1"."a" > int "2" ] +| | | | ) [ "t1"."a" as "b", "sys"."sql_div"("t1"."a", tinyint "3") as "c" ] +| | | ) [ "b", "c" ] [ "b", "c" ] +| | ) [ "b" as "x"."x", "c" as "x"."y" ], +| | project ( +| | | group by ( +| | | | project ( +| | | | | select ( +| | | | | | table("sys"."t2") [ "t2"."a" ] COUNT +| | | | | ) [ "t2"."a" > int "2" ] +| | | | ) [ "t2"."a" as "z", "sys"."sql_div"("t2"."a", tinyint "5") as "d" ] +| | | ) [ "z", "d" ] [ "z", "d" ] +| | ) [ "z" as "x"."x", "d" as "x"."y" ] +| ) [ "x"."x", "x"."y" ] +) [ "x"."x", "x"."y" ] + statement ok rollback _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list