Changeset: 66099b3f3afd for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=66099b3f3afd Modified Files: sql/server/rel_optimizer.c sql/server/rel_optimizer.h sql/server/rel_unnest.c sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out sql/test/BugTracker/Tests/bug_in_selection.SF-1892413.stable.out Branch: default Log Message:
improved handling of unnesting, ie don't introduce an intermediate in the general unnesting case when left already has unique/distinct values diffs (truncated from 337 to 300 lines): diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -41,24 +41,6 @@ static sql_rel * rel_remove_empty_select static sql_subfunc *find_func( mvc *sql, char *name, list *exps ); -static int -exps_unique( list *exps ) -{ - node *n; - - if ((n = exps->h) != NULL) { - sql_exp *e = n->data; - prop *p; - - if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL) { - sql_ukey *k = p->value; - if (k && list_length(k->k.columns) <= 1) - return 1; - } - } - return 0; -} - /* The important task of the relational optimizer is to optimize the join order. @@ -130,10 +112,11 @@ name_find_column( sql_rel *rel, const ch case op_left: case op_right: case op_full: + /* first right (possible subquery) */ + c = name_find_column( rel->r, rname, name, pnr, bt); + /* fall through */ case op_semi: case op_anti: - /* first right (possible subquery) */ - c = name_find_column( rel->r, rname, name, pnr, bt); if (!c) c = name_find_column( rel->l, rname, name, pnr, bt); return c; @@ -2398,6 +2381,81 @@ exp_push_down_prj(mvc *sql, sql_exp *e, return NULL; } +static int +rel_is_unique( sql_rel *rel, sql_ukey *k) +{ + switch(rel->op) { + case op_left: + case op_right: + case op_full: + case op_join: + return 0; + case op_semi: + case op_anti: + return rel_is_unique(rel->l, k); + case op_table: + case op_basetable: + return 1; + default: + return 0; + } +} + +int +exps_unique(mvc *sql, sql_rel *rel, list *exps) +{ + node *n; + char *matched = NULL; + int nr = 0; + sql_ukey *k = NULL; + + if (list_empty(exps)) + return 0; + for(n = exps->h; n && !k; n = n->next) { + sql_exp *e = n->data; + prop *p; + + if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL) + k = p->value; + } + if (!k || list_length(k->k.columns) > list_length(exps)) + return 0; + if (rel) { + matched = (char*)sa_alloc(sql->sa, list_length(k->k.columns)); + memset(matched, 0, list_length(k->k.columns)); + for(n = exps->h; n; n = n->next) { + sql_exp *e = n->data; + fcmp cmp = (fcmp)&kc_column_cmp; + sql_column *c = exp_find_column(rel, e, -2); + node *m; + + if (c && (m=list_find(k->k.columns, c, cmp)) != NULL) { + int pos = list_position(k->k.columns, m->data); + if (!matched[pos]) + nr++; + matched[pos] = 1; + } + } + if (nr == list_length(k->k.columns)) { + return rel_is_unique(rel, k); + } + } + /* + if ((n = exps->h) != NULL) { + sql_exp *e = n->data; + prop *p; + + if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL) { + sql_ukey *k = p->value; + if (k && list_length(k->k.columns) <= 1) + return 1; + } + } + */ + return 0; +} + + static sql_rel * rel_distinct_project2groupby(int *changes, mvc *sql, sql_rel *rel) { @@ -2411,9 +2469,9 @@ rel_distinct_project2groupby(int *change } /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ]) - * into project [ pk ] ( select ( table ) */ + * into project [ pk ] ( select/semijoin ( table ) */ if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) && - l->op == op_select && exps_unique(rel->exps)) + (l->op == op_select || l->op == op_semi) && exps_unique(sql, rel, rel->exps)) set_nodistinct(rel); /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e ] )[ e op val ]) * into project [ gbe ] ( select ( group etc ) */ @@ -5173,7 +5231,7 @@ static sql_rel * rel_push_project_down_union(int *changes, mvc *sql, sql_rel *rel) { /* first remove distinct if already unique */ - if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(rel->exps)) + if (rel->op == op_project && need_distinct(rel) && rel->exps && exps_unique(sql, rel, rel->exps)) set_nodistinct(rel); if (rel->op == op_project && rel->l && rel->exps && !rel->r) { @@ -5201,8 +5259,8 @@ rel_push_project_down_union(int *changes ur = rel_project(sql->sa, ur, rel_projections(sql, ur, NULL, 1, 1)); need_distinct = (need_distinct && - (!exps_unique(ul->exps) || - !exps_unique(ur->exps))); + (!exps_unique(sql, ul, ul->exps) || + !exps_unique(sql, ur, ur->exps))); rel_rename_exps(sql, u->exps, ul->exps); rel_rename_exps(sql, u->exps, ur->exps); @@ -5588,7 +5646,7 @@ rel_groupby_distinct(int *changes, mvc * if (exp_aggr_is_count(e) && need_distinct(e)) { /* if count over unique values (ukey/pkey) */ - if (e->l && exps_unique(e->l)) + if (e->l && exps_unique(sql, rel, e->l)) set_nodistinct(e); } } diff --git a/sql/server/rel_optimizer.h b/sql/server/rel_optimizer.h --- a/sql/server/rel_optimizer.h +++ b/sql/server/rel_optimizer.h @@ -17,6 +17,7 @@ extern sql_rel * rel_optimizer(mvc *sql, extern int exp_joins_rels(sql_exp *e, list *rels); extern void *name_find_column( sql_rel *rel, const char *rname, const char *name, int pnr, sql_rel **bt ); +extern int exps_unique(mvc *sql, sql_rel *rel, list *exps); extern sql_rel * rel_dce(mvc *sql, sql_rel *rel); diff --git a/sql/server/rel_unnest.c b/sql/server/rel_unnest.c --- a/sql/server/rel_unnest.c +++ b/sql/server/rel_unnest.c @@ -10,11 +10,22 @@ #include "monetdb_config.h" #include "rel_unnest.h" +#include "rel_optimizer.h" +#include "rel_prop.h" #include "rel_rel.h" #include "rel_exp.h" static int exps_have_freevar( list *exps ); +/* check if the set is distinct for the set of free variables */ +static int +is_distinct_set(mvc *sql, sql_rel *rel, list *ad) +{ + if (ad && exps_unique(sql, rel, ad )) + return 1; + return need_distinct(rel); +} + int exp_has_freevar( sql_exp *e) { @@ -483,19 +494,23 @@ exps_is_constant( list *exps ) } static sql_rel * -push_up_groupby(mvc *sql, sql_rel *rel) +push_up_groupby(mvc *sql, sql_rel *rel, list *ad) { /* input rel is dependent join with on the right a project */ if (rel && (is_join(rel->op) || is_semi(rel->op)) && is_dependent(rel)) { sql_rel *l = rel->l, *r = rel->r; /* left of rel should be a set */ - if (l && need_distinct(l) && r && r->op == op_groupby) { + if (l && is_distinct_set(sql, l, ad) && r && r->op == op_groupby) { list *sexps, *jexps; node *n; /* move groupby up, ie add attributes of left + the old expression list */ list *a = rel_projections(sql, rel->l, NULL, 1, 1); + assert(rel->op != op_anti); + if (rel->op == op_semi && !need_distinct(l)) + rel->op = op_join; + for (n = r->exps->h; n; n = n->next ) { sql_exp *e = n->data; @@ -611,7 +626,7 @@ push_up_join(mvc *sql, sql_rel *rel) sql_rel *d = rel->l, *j = rel->r; /* left of rel should be a set */ - if (d && need_distinct(d) && j && (is_join(j->op) || is_semi(j->op))) { + if (d && is_distinct_set(sql, d, NULL) && j && (is_join(j->op) || is_semi(j->op))) { sql_rel *jl = j->l, *jr = j->r; /* op_join if F(jl) intersect A(D) = empty -> jl join (D djoin jr) * F(jr) intersect A(D) = empty -> (D djoin jl) join jr @@ -688,7 +703,7 @@ push_up_set(mvc *sql, sql_rel *rel) sql_rel *d = rel->l, *s = rel->r; /* left of rel should be a set */ - if (d && need_distinct(d) && s && is_set(s->op)) { + if (d && is_distinct_set(sql, d, NULL) && s && is_set(s->op)) { list *sexps; node *m; sql_rel *sl = s->l, *sr = s->r, *n; @@ -721,7 +736,7 @@ push_up_table(mvc *sql, sql_rel *rel) sql_rel *d = rel->l, *tf = rel->r; /* for now just push d into function */ - if (d && need_distinct(d) && tf && is_base(tf->op)) { + if (d && is_distinct_set(sql, d, NULL) && tf && is_base(tf->op)) { if (tf->l) { sql_rel *l = tf->l; @@ -842,7 +857,7 @@ rel_unnest_dependent(mvc *sql, sql_rel * /* try to push dependent join down */ if (rel_has_freevar(r)){ - list *ad; + list *ad = rel_dependent_var(sql, rel->l, rel->r); if (r && is_simple_project(r->op)) { rel = push_up_project(sql, rel); @@ -859,22 +874,22 @@ rel_unnest_dependent(mvc *sql, sql_rel * return rel_unnest_dependent(sql, rel); } - if (r && is_groupby(r->op) && need_distinct(l)) { - rel = push_up_groupby(sql, rel); + if (r && is_groupby(r->op) && is_distinct_set(sql, l, ad)) { + rel = push_up_groupby(sql, rel, ad); return rel_unnest_dependent(sql, rel); } - if (r && (is_join(r->op) || is_semi(r->op)) && need_distinct(l)) { + if (r && (is_join(r->op) || is_semi(r->op)) && is_distinct_set(sql, l, NULL)) { rel = push_up_join(sql, rel); return rel_unnest_dependent(sql, rel); } - if (r && is_set(r->op) && need_distinct(l)) { + if (r && is_set(r->op) && is_distinct_set(sql, l, NULL)) { rel = push_up_set(sql, rel); return rel_unnest_dependent(sql, rel); } - if (r && is_base(r->op) && need_distinct(l)) { /* TODO table functions need dependent implementation */ + if (r && is_base(r->op) && is_distinct_set(sql, l, NULL)) { /* TODO table functions need dependent implementation */ rel = push_up_table(sql, rel); return rel; } diff --git a/sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out b/sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out --- a/sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out +++ b/sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out @@ -98,10 +98,10 @@ project ( | | | | | | | & REF 2 | | | | | | ) [ "b3a"."open_auction_id" NOT NULL = "o"."open_auction_id" NOT NULL ] | | | | | ) [ "o"."open_auction_id" NOT NULL ] [ sys.min no nil ("b3a"."id" NOT NULL HASHCOL ) NOT NULL as "L3"."L3", "o"."open_auction_id" NOT NULL ] -| | | | ) [ "L3"."L3" NOT NULL, "o"."open_auction_id" NOT NULL as "L25"."L25" ] _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list