Changeset: 6b9e6db9df85 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6b9e6db9df85 Modified Files: sql/include/sql_relation.h sql/server/rel_optimizer.c sql/server/rel_rel.c sql/server/rel_rel.h Branch: Oct2020 Log Message:
sometimes select expressions end up in the join expression list. Added an optimizer to split these into rel_select's. This required an new rel_rebind_exp(sql, rel, e) which checks if an expression e (with its sub parts) could be fully bound by the relation 'rel'. (cleanly implemented using exp_rel_visitor). This new rel_rebind_exp is used in order_joins now, when we search for other expressions belonging to the same (sub) join tree, when reordering joins (solving an issue with sqlancer06). diffs (207 lines): diff --git a/sql/include/sql_relation.h b/sql/include/sql_relation.h --- a/sql/include/sql_relation.h +++ b/sql/include/sql_relation.h @@ -185,6 +185,7 @@ typedef enum operator_type { #define is_left(op) (op == op_left) #define is_right(op) (op == op_right) #define is_full(op) (op == op_full) +#define is_innerjoin(op) (op == op_join) #define is_join(op) (op == op_join || is_outerjoin(op)) #define is_semi(op) (op == op_semi || op == op_anti) #define is_joinop(op) (is_join(op) || is_semi(op)) diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -897,15 +897,23 @@ order_joins(visitor *v, list *rels, list rel_join_add_exp(v->sql->sa, top, cje); /* all other join expressions on these 2 relations */ - while((djn = list_find(exps, n_rels, (fcmp)&exp_joins_rels)) != NULL) { - sql_exp *e = djn->data; - - rel_join_add_exp(v->sql->sa, top, e); - list_remove_data(exps, e); + for (node *en = exps->h; en; ) { + node *next = en->next; + sql_exp *e = en->data; + if (rel_rebind_exp(v->sql, top, e)) { + rel_join_add_exp(v->sql->sa, top, e); + list_remove_data(exps, e); + } + en = next; } /* Remove other joins on the current 'n_rels' set in the distinct list too */ - while((djn = list_find(sdje, n_rels, (fcmp)&exp_joins_rels)) != NULL) - list_remove_data(sdje, djn->data); + for (node *en = sdje->h; en; ) { + node *next = en->next; + sql_exp *e = en->data; + if (rel_rebind_exp(v->sql, top, e)) + list_remove_data(sdje, en->data); + en = next; + } fnd = 1; } /* build join tree using the ordered list */ @@ -952,15 +960,24 @@ order_joins(visitor *v, list *rels, list rel_join_add_exp(v->sql->sa, top, cje); /* all join expressions on these tables */ - while((en = list_find(exps, n_rels, (fcmp)&exp_joins_rels)) != NULL) { + for (en = exps->h; en; ) { + node *next = en->next; sql_exp *e = en->data; - rel_join_add_exp(v->sql->sa, top, e); - list_remove_data(exps, e); + if (rel_rebind_exp(v->sql, top, e)) { + rel_join_add_exp(v->sql->sa, top, e); + list_remove_data(exps, e); + } + en = next; } /* Remove other joins on the current 'n_rels' set in the distinct list too */ - while((en = list_find(sdje, n_rels, (fcmp)&exp_joins_rels)) != NULL) - list_remove_data(sdje, en->data); + for (en = sdje->h; en; ) { + node *next = en->next; + sql_exp *e = en->data; + if (rel_rebind_exp(v->sql, top, e)) + list_remove_data(sdje, en->data); + en = next; + } fnd = 1; } } @@ -1091,6 +1108,9 @@ push_up_join_exps( mvc *sql, sql_rel *re if (l && r) { l = list_merge(l, r, (fdup)NULL); r = NULL; + } else if (!l) { + l = r; + r = NULL; } if (rel->exps) { if (l && !r) @@ -4599,7 +4619,7 @@ rel_push_select_down_join(visitor *v, sq r = rel->l; /* push select through join */ - if (is_select(rel->op) && exps && r && r->op == op_join && !(rel_is_ref(r))) { + if (/* DISABLES CODE */ (0) && is_select(rel->op) && exps && r && r->op == op_join && !(rel_is_ref(r))) { rel->exps = new_exp_list(v->sql->sa); for (n = exps->h; n; n = n->next) { sql_exp *e = n->data; @@ -4634,6 +4654,36 @@ rel_push_select_down_join(visitor *v, sq } } return rel; + /* push select exps part of the join expressions down */ + } else if (is_innerjoin(rel->op) && !list_empty(exps)) { + int can_push = 0; + for (n = exps->h; !can_push && n; n = n->next) { + sql_exp *e = n->data; + if (rel_rebind_exp(v->sql, rel->l, e) || rel_rebind_exp(v->sql, rel->r, e)) + can_push = 1; + } + if (!can_push) + return rel; + + rel->exps = sa_list(v->sql->sa); + for (n = exps->h; n; n = n->next) { + sql_exp *e = n->data; + if (rel_rebind_exp(v->sql, rel->l, e)) { + sql_rel *l = rel->l; + if (!is_select(l->op)) + rel->l = l = rel_select(v->sql->sa, rel->l, NULL); + rel_select_add_exp(v->sql->sa, rel->l, e); + v->changes++; + } else if (rel_rebind_exp(v->sql, rel->r, e)) { + sql_rel *r = rel->r; + if (!is_select(r->op)) + rel->r = r = rel_select(v->sql->sa, rel->r, NULL); + rel_select_add_exp(v->sql->sa, rel->r, e); + v->changes++; + } else { + append(rel->exps, e); + } + } } return rel; } @@ -9545,7 +9595,7 @@ optimize_rel(mvc *sql, sql_rel *rel, int if (gp.cnt[op_project]) rel = rel_exp_visitor_bottomup(&v, rel, &rel_merge_project_rse, false); - if (gp.cnt[op_select] && gp.cnt[op_join] && /* DISABLES CODE */ (0)) + if (gp.cnt[op_join]) rel = rel_visitor_topdown(&v, rel, &rel_push_select_down_join); if (gp.cnt[op_select]) diff --git a/sql/server/rel_rel.c b/sql/server/rel_rel.c --- a/sql/server/rel_rel.c +++ b/sql/server/rel_rel.c @@ -352,7 +352,7 @@ rel_bind_column2( mvc *sql, sql_rel *rel if (!list_empty(rel->exps)) { e = exps_bind_column2(rel->exps, tname, cname, &multi); if (multi) - return sql_error(sql, ERR_AMBIGUOUS, SQLSTATE(42000) "SELECT: identifier '%s.%s' ambiguous", + return sql_error(sql, ERR_AMBIGUOUS, SQLSTATE(42000) "SELECT: identifier '%s.%s' ambiguous", tname, cname); if (!e && is_groupby(rel->op) && rel->r) { e = exps_bind_alias(rel->r, tname, cname); @@ -363,7 +363,7 @@ rel_bind_column2( mvc *sql, sql_rel *rel else e = exps_bind_column(rel->exps, nname, &ambiguous, &multi, 0); if (ambiguous || multi) - return sql_error(sql, ERR_AMBIGUOUS, SQLSTATE(42000) "SELECT: identifier '%s%s%s' ambiguous", + return sql_error(sql, ERR_AMBIGUOUS, SQLSTATE(42000) "SELECT: identifier '%s%s%s' ambiguous", rname ? rname : "", rname ? "." : "", nname); if (e) return e; @@ -373,7 +373,7 @@ rel_bind_column2( mvc *sql, sql_rel *rel if (!e && (is_sql_sel(f) || is_sql_having(f) || !f) && is_groupby(rel->op) && rel->r) { e = exps_bind_column2(rel->r, tname, cname, &multi); if (multi) - return sql_error(sql, ERR_AMBIGUOUS, SQLSTATE(42000) "SELECT: identifier '%s.%s' ambiguous", + return sql_error(sql, ERR_AMBIGUOUS, SQLSTATE(42000) "SELECT: identifier '%s.%s' ambiguous", tname, cname); if (e) { e = exp_ref(sql, e); @@ -2333,3 +2333,25 @@ exps_have_analytics(mvc *sql, list *exps (void)exps_exp_visitor_topdown(&v, NULL, exps, 0, &exp_check_has_analytics, true); return v.changes; } + +static sql_exp * +_rel_rebind_exp(visitor *v, sql_rel *rel, sql_exp *e, int depth) +{ + (void)depth; + /* visitor will handle recursion, ie only need to check columns here */ + if (e->type == e_column) { + sql_exp *ne = rel_find_exp(rel, e); + if (!ne) + v->changes++; + } + return e; +} + +bool +rel_rebind_exp(mvc *sql, sql_rel *rel, sql_exp *e) +{ + visitor v = { .sql = sql }; + exp_visitor(&v, rel, e, 0, &_rel_rebind_exp, true, true); + /* problems are passed via changes */ + return (v.changes==0); +} diff --git a/sql/server/rel_rel.h b/sql/server/rel_rel.h --- a/sql/server/rel_rel.h +++ b/sql/server/rel_rel.h @@ -144,4 +144,7 @@ typedef sql_rel *(*rel_rewrite_fptr)(vis extern sql_rel *rel_visitor_topdown(visitor *v, sql_rel *rel, rel_rewrite_fptr rel_rewriter); extern sql_rel *rel_visitor_bottomup(visitor *v, sql_rel *rel, rel_rewrite_fptr rel_rewriter); +/* validate that all parts of the expression e can be bound to the relation rel (or are atoms) */ +extern bool rel_rebind_exp(mvc *sql, sql_rel *rel, sql_exp *e); + #endif /* _REL_REL_H_ */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list