Changeset: 77a50c739760 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/77a50c739760 Added Files: sql/test/rel-optimizers/Tests/All sql/test/rel-optimizers/Tests/groupby-cse.test sql/test/rel-optimizers/Tests/groupjoin.test sql/test/rel-optimizers/Tests/split-select.test Removed Files: sql/test/rel-optimizers/optimize-proj/Tests/All sql/test/rel-optimizers/optimize-proj/Tests/groupby-cse.test Modified Files: sql/backends/monet5/rel_bin.c sql/include/sql_relation.h sql/server/rel_dump.c sql/server/rel_exp.c sql/server/rel_exp.h sql/server/rel_optimize_exps.c sql/server/rel_optimize_others.c sql/server/rel_optimize_proj.c sql/server/rel_optimize_sel.c sql/test/BugTracker-2015/Tests/large_join.Bug-3809.test sql/test/astro/Tests/astro.test Branch: groupjoin Log Message:
cleanup mulitple equal (minus is_any/is_semantics) rewriters added for aliases to the same expression of the inner relation (first we add a expression back into the same expression list, later the users (in join/select) cleanup there usage to this refered expression. small fixes to group(mark)join with no-nil semantics implemented fixes to the groupjoin optimizer, it now detects group joins (with single expression). The left group joins are temporarily switch off, because left outer handling isn't optimized jet. fixes in the groupjoin code generation for using the correct group by column diffs (truncated from 978 to 300 lines): diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c --- a/sql/backends/monet5/rel_bin.c +++ b/sql/backends/monet5/rel_bin.c @@ -2811,7 +2811,6 @@ rel2bin_groupjoin(backend *be, sql_rel * stmt *left = NULL, *right = NULL, *join = NULL, *jl = NULL, *jr = NULL, *m = NULL, *ls = NULL, *res; bool need_project = false, exist = true, mark = false; - assert(rel->op == op_left); if (rel->op == op_left) { /* left outer group join */ if (list_length(rel->attr) == 1) { sql_exp *e = rel->attr->h->data; @@ -2873,7 +2872,7 @@ rel2bin_groupjoin(backend *be, sql_rel * l = r; r = t; } - if (!is_semantics(e) && is_anti(e)) + if ((!is_semantics(e) && is_anti(e)) || !mark) ls = l; if (en || !mark) { /* split out (left)join vs (left)mark-join */ @@ -3006,7 +3005,7 @@ rel2bin_groupjoin(backend *be, sql_rel * assert(m); sql_exp *e = rel->attr->h->data; if (exp_is_atom(e) && need_no_nil(e)) - m = sql_Nop_(be, "ifthenelse", sql_unop_(be, "isnull", m), stmt_bool(be, !exist), m, NULL); + m = sql_Nop_(be, "ifthenelse", sql_unop_(be, "isnull", m), stmt_bool(be, false), m, NULL); if (!exist) { sql_subtype *bt = sql_bind_localtype("bit"); sql_subfunc *not = sql_bind_func(be->mvc, "sys", "not", bt, NULL, F_FUNC, true); @@ -3017,7 +3016,8 @@ rel2bin_groupjoin(backend *be, sql_rel * append(l, s); } else { /* group / aggrs */ - stmt *groupby = stmt_group(be, jl, NULL, NULL, NULL, true); + stmt *nls = stmt_project(be, jl, ls); + stmt *groupby = stmt_group(be, nls, NULL, NULL, NULL, true); stmt *grp = stmt_result(be, groupby, 0); stmt *ext = stmt_result(be, groupby, 1); stmt *cnt = stmt_result(be, groupby, 2); diff --git a/sql/include/sql_relation.h b/sql/include/sql_relation.h --- a/sql/include/sql_relation.h +++ b/sql/include/sql_relation.h @@ -49,6 +49,7 @@ typedef struct expression { card:2, /* card (0 truth value!) (1 atoms) (2 aggr) (3 multi value) */ freevar:4, /* free variable, ie binds to the upper dependent join */ intern:1, + selfref:1, /* set when the expression references a expression in the same projection list */ anti:1, ascending:1, /* order direction */ nulls_last:1, /* return null after all other rows */ @@ -247,6 +248,8 @@ typedef enum operator_type { #define set_symmetric(e) (e)->symmetric = 1 #define is_intern(e) ((e)->intern) #define set_intern(e) (e)->intern = 1 +#define is_selfref(e) ((e)->selfref) +#define set_selfref(e) (e)->selfref = 1 #define is_basecol(e) ((e)->base) #define set_basecol(e) (e)->base = 1 diff --git a/sql/server/rel_dump.c b/sql/server/rel_dump.c --- a/sql/server/rel_dump.c +++ b/sql/server/rel_dump.c @@ -472,8 +472,12 @@ rel_print_rel(mvc *sql, stream *fout, s r = "intersect"; else if (rel->op == op_except) r = "except"; - else if (!rel->exps && rel->op == op_join) - r = "crossproduct"; + else if (rel->op == op_join) { + if (list_empty(rel->exps)) + r = rel->attr?"group crossproduct":"crossproduct"; + else + r = rel->attr?"group join":"join"; + } if (is_dependent(rel)) mnstr_printf(fout, "dependent "); @@ -2069,34 +2073,6 @@ rel_read(mvc *sql, char *r, int *pos, li if (r[*pos] == '[' && !(rel->r = read_exps(sql, rel, nrel, NULL, r, pos, '[', 0, 1))) return NULL; break; - case 'g': - *pos += (int) strlen("group by"); - skipWS(r, pos); - - if (r[*pos] != '(') - return sql_error(sql, -1, SQLSTATE(42000) "Group by: missing '('\n"); - (*pos)++; - skipWS(r, pos); - if (!(nrel = rel_read(sql, r, pos, refs))) - return NULL; - skipWS(r, pos); - if (r[*pos] != ')') - return sql_error(sql, -1, SQLSTATE(42000) "Group by: missing ')'\n"); - (*pos)++; - skipWS(r, pos); - - if (!(gexps = read_exps(sql, nrel, NULL, NULL, r, pos, '[', 0, 1))) - return NULL; - skipWS(r, pos); - rel = rel_groupby(sql, nrel, gexps); - rel->exps = new_exp_list(sql->sa); /* empty projection list for now */ - set_processed(rel); /* don't search beyond the group by */ - /* first group projected expressions, then group by columns, then left relation projections */ - if (!(exps = read_exps(sql, rel, nrel, NULL, r, pos, '[', 1, 1))) - return NULL; - rel->exps = exps; - rel->nrcols = list_length(exps); - break; case 's': case 'a': if (r[*pos+1] == 'a') { @@ -2174,8 +2150,44 @@ rel_read(mvc *sql, char *r, int *pos, li set_processed(rel); } break; + case 'g': + *pos += (int) strlen("group"); + skipWS(r, pos); + + if (r[*pos] == 'b') { + *pos += (int) strlen("by"); + skipWS(r, pos); + + if (r[*pos] != '(') + return sql_error(sql, -1, SQLSTATE(42000) "Group by: missing '('\n"); + (*pos)++; + skipWS(r, pos); + if (!(nrel = rel_read(sql, r, pos, refs))) + return NULL; + skipWS(r, pos); + if (r[*pos] != ')') + return sql_error(sql, -1, SQLSTATE(42000) "Group by: missing ')'\n"); + (*pos)++; + skipWS(r, pos); + + if (!(gexps = read_exps(sql, nrel, NULL, NULL, r, pos, '[', 0, 1))) + return NULL; + skipWS(r, pos); + rel = rel_groupby(sql, nrel, gexps); + rel->exps = new_exp_list(sql->sa); /* empty projection list for now */ + set_processed(rel); /* don't search beyond the group by */ + /* first group projected expressions, then group by columns, then left relation projections */ + if (!(exps = read_exps(sql, rel, nrel, NULL, r, pos, '[', 1, 1))) + return NULL; + rel->exps = exps; + rel->nrcols = list_length(exps); + break; + } else { + groupjoin = true; + } + /* fall through */ case 'l': - if (strcmp(r+*pos, "left outer join") == 0) { + if (strncmp(r+*pos, "left outer join", strlen("left outer join")) == 0) { *pos += (int) strlen("left outer join"); } else { groupjoin = true; diff --git a/sql/server/rel_exp.c b/sql/server/rel_exp.c --- a/sql/server/rel_exp.c +++ b/sql/server/rel_exp.c @@ -1026,6 +1026,7 @@ exp_swap( sql_exp *e ) e->l = e->r; e->r = s; e->flag = swap_compare((comp_type)e->flag); + assert(!e->f); } sql_subtype * @@ -1354,13 +1355,13 @@ exps_equal( list *l, list *r) } int -exp_match_exp( sql_exp *e1, sql_exp *e2) +exp_match_exp_semantics( sql_exp *e1, sql_exp *e2, bool semantics) { if (exp_match(e1, e2)) return 1; if (is_ascending(e1) != is_ascending(e2) || nulls_last(e1) != nulls_last(e2) || zero_if_empty(e1) != zero_if_empty(e2) || - need_no_nil(e1) != need_no_nil(e2) || is_anti(e1) != is_anti(e2) || is_semantics(e1) != is_semantics(e2) || - is_any(e1) != is_any(e2) || + need_no_nil(e1) != need_no_nil(e2) || is_anti(e1) != is_anti(e2) || (semantics && is_semantics(e1) != is_semantics(e2)) || + (semantics && is_any(e1) != is_any(e2)) || is_symmetric(e1) != is_symmetric(e2) || is_unique(e1) != is_unique(e2) || need_distinct(e1) != need_distinct(e2)) return 0; if (e1->type == e2->type) { @@ -1425,6 +1426,12 @@ exp_match_exp( sql_exp *e1, sql_exp *e2) return 0; } +int +exp_match_exp( sql_exp *e1, sql_exp *e2) +{ + return exp_match_exp_semantics( e1, e2, true); +} + sql_exp * exps_any_match(list *l, sql_exp *e) { diff --git a/sql/server/rel_exp.h b/sql/server/rel_exp.h --- a/sql/server/rel_exp.h +++ b/sql/server/rel_exp.h @@ -145,6 +145,7 @@ extern int exp_refers( sql_exp *p, sql_e extern int exp_match( sql_exp *e1, sql_exp *e2); extern sql_exp* exps_find_exp( list *l, sql_exp *e); extern int exp_match_exp( sql_exp *e1, sql_exp *e2); +extern int exp_match_exp_semantics( sql_exp *e1, sql_exp *e2, bool semantics); extern sql_exp* exps_any_match(list *l, sql_exp *e); /* match just the column (cmp equality) expressions */ extern int exp_match_col_exps( sql_exp *e, list *l); diff --git a/sql/server/rel_optimize_exps.c b/sql/server/rel_optimize_exps.c --- a/sql/server/rel_optimize_exps.c +++ b/sql/server/rel_optimize_exps.c @@ -744,6 +744,74 @@ rel_simplify_predicates(visitor *v, sql_ } static inline sql_exp * +rel_remove_alias(visitor *v, sql_rel *rel, sql_exp *e) +{ + if (e->type != e_column) + return e; + if (!rel_is_ref(rel) && rel->op == op_project && rel->l && list_length(rel->exps) > 1) { + sql_rel *l = rel->l; + if (l->op == op_project) { + sql_exp *ne = rel_find_exp(l, e); + if (ne && ne->type == e_column && is_selfref(ne)) { + sql_exp *nne = NULL; + /* found ne in projection, try to find reference in the same list */ + if (ne->l) + nne = exps_bind_column2(l->exps, ne->l, ne->r, NULL); + else + nne = exps_bind_column(l->exps, ne->r, NULL, NULL, 1); + if (nne && nne != ne && list_position(l->exps, nne) < list_position(l->exps, ne)) { + e->l = (char*)exp_relname(nne); + e->r = (char*)exp_name(nne); + v->changes++; + } + } + } + } + if (!rel_is_ref(rel) && rel->op != op_project) { + bool found = false; + if ((is_select(rel->op) || is_join(rel->op)) && rel->l && list_length(rel->exps) > 1) { + sql_rel *l = rel->l; + if (l->op == op_project) { + sql_exp *ne = rel_find_exp(l, e); + found = true; + if (ne && ne->type == e_column && is_selfref(ne)) { + sql_exp *nne = NULL; + if (ne->l) + nne = exps_bind_column2(l->exps, ne->l, ne->r, NULL); + else + nne = exps_bind_column(l->exps, ne->r, NULL, NULL, 1); + if (nne && nne != ne && list_position(l->exps, nne) < list_position(l->exps, ne)) { + e->l = (char*)exp_relname(nne); + e->r = (char*)exp_name(nne); + v->changes++; + } + } + } + } + if (!found && is_join(rel->op) && rel->r && list_length(rel->exps) > 1 && !is_semi(rel->op)) { + sql_rel *l = rel->r; + if (l->op == op_project) { + sql_exp *ne = rel_find_exp(l, e); + found = true; + if (ne && ne->type == e_column && is_selfref(ne)) { + sql_exp *nne = NULL; + if (ne->l) + nne = exps_bind_column2(l->exps, ne->l, ne->r, NULL); + else + nne = exps_bind_column(l->exps, ne->r, NULL, NULL, 1); + if (nne && nne != ne && list_position(l->exps, nne) < list_position(l->exps, ne)) { + e->l = (char*)exp_relname(nne); + e->r = (char*)exp_name(nne); + v->changes++; + } + } + } + } + } + return e; +} + +static inline sql_exp * rel_merge_project_rse(visitor *v, sql_rel *rel, sql_exp *e) { if (is_simple_project(rel->op) && is_func(e->type) && e->l) { @@ -797,6 +865,7 @@ rel_optimize_exps_(visitor *v, sql_rel * if (v->value_based_opt) e = rel_simplify_predicates(v, rel, e); e = rel_merge_project_rse(v, rel, e); + e = rel_remove_alias(v, rel, e); return e; } diff --git a/sql/server/rel_optimize_others.c b/sql/server/rel_optimize_others.c --- a/sql/server/rel_optimize_others.c +++ b/sql/server/rel_optimize_others.c @@ -120,7 +120,7 @@ exp_push_down_prj(mvc *sql, sql_exp *e, _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org