Changeset: 77a50c739760 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/77a50c739760
Added Files:
        sql/test/rel-optimizers/Tests/All
        sql/test/rel-optimizers/Tests/groupby-cse.test
        sql/test/rel-optimizers/Tests/groupjoin.test
        sql/test/rel-optimizers/Tests/split-select.test
Removed Files:
        sql/test/rel-optimizers/optimize-proj/Tests/All
        sql/test/rel-optimizers/optimize-proj/Tests/groupby-cse.test
Modified Files:
        sql/backends/monet5/rel_bin.c
        sql/include/sql_relation.h
        sql/server/rel_dump.c
        sql/server/rel_exp.c
        sql/server/rel_exp.h
        sql/server/rel_optimize_exps.c
        sql/server/rel_optimize_others.c
        sql/server/rel_optimize_proj.c
        sql/server/rel_optimize_sel.c
        sql/test/BugTracker-2015/Tests/large_join.Bug-3809.test
        sql/test/astro/Tests/astro.test
Branch: groupjoin
Log Message:

cleanup mulitple equal (minus is_any/is_semantics)
rewriters added for aliases to the same expression of the inner relation (first 
we add a expression back into the same expression list, later the users (in 
join/select) cleanup there usage to this refered expression.
small fixes to group(mark)join with no-nil semantics
implemented fixes to the groupjoin optimizer, it now detects group joins (with 
single expression). The left group joins are temporarily switch off, because 
left outer handling isn't optimized jet.
fixes in the groupjoin code generation for using the correct group by column


diffs (truncated from 978 to 300 lines):

diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -2811,7 +2811,6 @@ rel2bin_groupjoin(backend *be, sql_rel *
        stmt *left = NULL, *right = NULL, *join = NULL, *jl = NULL, *jr = NULL, 
*m = NULL, *ls = NULL, *res;
        bool need_project = false, exist = true, mark = false;
 
-       assert(rel->op == op_left);
        if (rel->op == op_left) { /* left outer group join */
                if (list_length(rel->attr) == 1) {
                        sql_exp *e = rel->attr->h->data;
@@ -2873,7 +2872,7 @@ rel2bin_groupjoin(backend *be, sql_rel *
                        l = r;
                        r = t;
                }
-               if (!is_semantics(e) && is_anti(e))
+               if ((!is_semantics(e) && is_anti(e)) || !mark)
                        ls = l;
                if (en || !mark) {
                        /* split out (left)join vs (left)mark-join */
@@ -3006,7 +3005,7 @@ rel2bin_groupjoin(backend *be, sql_rel *
                                assert(m);
                                sql_exp *e = rel->attr->h->data;
                                if (exp_is_atom(e) && need_no_nil(e))
-                                       m = sql_Nop_(be, "ifthenelse", 
sql_unop_(be, "isnull", m), stmt_bool(be, !exist), m, NULL);
+                                       m = sql_Nop_(be, "ifthenelse", 
sql_unop_(be, "isnull", m), stmt_bool(be, false), m, NULL);
                                if (!exist) {
                                        sql_subtype *bt = 
sql_bind_localtype("bit");
                                        sql_subfunc *not = 
sql_bind_func(be->mvc, "sys", "not", bt, NULL, F_FUNC, true);
@@ -3017,7 +3016,8 @@ rel2bin_groupjoin(backend *be, sql_rel *
                        append(l, s);
                } else {
                        /* group / aggrs */
-                       stmt *groupby = stmt_group(be, jl, NULL, NULL, NULL, 
true);
+                       stmt *nls = stmt_project(be, jl, ls);
+                       stmt *groupby = stmt_group(be, nls, NULL, NULL, NULL, 
true);
                        stmt *grp = stmt_result(be, groupby, 0);
                        stmt *ext = stmt_result(be, groupby, 1);
                        stmt *cnt = stmt_result(be, groupby, 2);
diff --git a/sql/include/sql_relation.h b/sql/include/sql_relation.h
--- a/sql/include/sql_relation.h
+++ b/sql/include/sql_relation.h
@@ -49,6 +49,7 @@ typedef struct expression {
         card:2,        /* card (0 truth value!) (1 atoms) (2 aggr) (3 multi 
value) */
         freevar:4,     /* free variable, ie binds to the upper dependent join 
*/
         intern:1,
+        selfref:1,             /* set when the expression references a 
expression in the same projection list */
         anti:1,
         ascending:1,   /* order direction */
         nulls_last:1,  /* return null after all other rows */
@@ -247,6 +248,8 @@ typedef enum operator_type {
 #define set_symmetric(e)       (e)->symmetric = 1
 #define is_intern(e)           ((e)->intern)
 #define set_intern(e)          (e)->intern = 1
+#define is_selfref(e)          ((e)->selfref)
+#define set_selfref(e)                 (e)->selfref = 1
 #define is_basecol(e)          ((e)->base)
 #define set_basecol(e)                 (e)->base = 1
 
diff --git a/sql/server/rel_dump.c b/sql/server/rel_dump.c
--- a/sql/server/rel_dump.c
+++ b/sql/server/rel_dump.c
@@ -472,8 +472,12 @@ rel_print_rel(mvc *sql, stream  *fout, s
                        r = "intersect";
                else if (rel->op == op_except)
                        r = "except";
-               else if (!rel->exps && rel->op == op_join)
-                       r = "crossproduct";
+               else if (rel->op == op_join) {
+                       if (list_empty(rel->exps))
+                               r = rel->attr?"group 
crossproduct":"crossproduct";
+                       else
+                               r = rel->attr?"group join":"join";
+               }
 
                if (is_dependent(rel))
                        mnstr_printf(fout, "dependent ");
@@ -2069,34 +2073,6 @@ rel_read(mvc *sql, char *r, int *pos, li
                if (r[*pos] == '[' && !(rel->r = read_exps(sql, rel, nrel, 
NULL, r, pos, '[', 0, 1)))
                        return NULL;
                break;
-       case 'g':
-               *pos += (int) strlen("group by");
-               skipWS(r, pos);
-
-               if (r[*pos] != '(')
-                       return sql_error(sql, -1, SQLSTATE(42000) "Group by: 
missing '('\n");
-               (*pos)++;
-               skipWS(r, pos);
-               if (!(nrel = rel_read(sql, r, pos, refs)))
-                       return NULL;
-               skipWS(r, pos);
-               if (r[*pos] != ')')
-                       return sql_error(sql, -1, SQLSTATE(42000) "Group by: 
missing ')'\n");
-               (*pos)++;
-               skipWS(r, pos);
-
-               if (!(gexps = read_exps(sql, nrel, NULL, NULL, r, pos, '[', 0, 
1)))
-                       return NULL;
-               skipWS(r, pos);
-               rel = rel_groupby(sql, nrel, gexps);
-               rel->exps = new_exp_list(sql->sa); /* empty projection list for 
now */
-               set_processed(rel); /* don't search beyond the group by */
-               /* first group projected expressions, then group by columns, 
then left relation projections */
-               if (!(exps = read_exps(sql, rel, nrel, NULL, r, pos, '[', 1, 
1)))
-                       return NULL;
-               rel->exps = exps;
-               rel->nrcols = list_length(exps);
-               break;
        case 's':
        case 'a':
                if (r[*pos+1] == 'a') {
@@ -2174,8 +2150,44 @@ rel_read(mvc *sql, char *r, int *pos, li
                        set_processed(rel);
                }
                break;
+       case 'g':
+               *pos += (int) strlen("group");
+               skipWS(r, pos);
+
+               if (r[*pos] == 'b') {
+                       *pos += (int) strlen("by");
+                       skipWS(r, pos);
+
+                       if (r[*pos] != '(')
+                               return sql_error(sql, -1, SQLSTATE(42000) 
"Group by: missing '('\n");
+                       (*pos)++;
+                       skipWS(r, pos);
+                       if (!(nrel = rel_read(sql, r, pos, refs)))
+                               return NULL;
+                       skipWS(r, pos);
+                       if (r[*pos] != ')')
+                               return sql_error(sql, -1, SQLSTATE(42000) 
"Group by: missing ')'\n");
+                       (*pos)++;
+                       skipWS(r, pos);
+
+                       if (!(gexps = read_exps(sql, nrel, NULL, NULL, r, pos, 
'[', 0, 1)))
+                               return NULL;
+                       skipWS(r, pos);
+                       rel = rel_groupby(sql, nrel, gexps);
+                       rel->exps = new_exp_list(sql->sa); /* empty projection 
list for now */
+                       set_processed(rel); /* don't search beyond the group by 
*/
+                       /* first group projected expressions, then group by 
columns, then left relation projections */
+                       if (!(exps = read_exps(sql, rel, nrel, NULL, r, pos, 
'[', 1, 1)))
+                               return NULL;
+                       rel->exps = exps;
+                       rel->nrcols = list_length(exps);
+                       break;
+               } else {
+                       groupjoin = true;
+               }
+               /* fall through */
        case 'l':
-               if (strcmp(r+*pos, "left outer join") == 0) {
+               if (strncmp(r+*pos, "left outer join", strlen("left outer 
join")) == 0) {
                        *pos += (int) strlen("left outer join");
                } else {
                        groupjoin = true;
diff --git a/sql/server/rel_exp.c b/sql/server/rel_exp.c
--- a/sql/server/rel_exp.c
+++ b/sql/server/rel_exp.c
@@ -1026,6 +1026,7 @@ exp_swap( sql_exp *e )
        e->l = e->r;
        e->r = s;
        e->flag = swap_compare((comp_type)e->flag);
+       assert(!e->f);
 }
 
 sql_subtype *
@@ -1354,13 +1355,13 @@ exps_equal( list *l, list *r)
 }
 
 int
-exp_match_exp( sql_exp *e1, sql_exp *e2)
+exp_match_exp_semantics( sql_exp *e1, sql_exp *e2, bool semantics)
 {
        if (exp_match(e1, e2))
                return 1;
        if (is_ascending(e1) != is_ascending(e2) || nulls_last(e1) != 
nulls_last(e2) || zero_if_empty(e1) != zero_if_empty(e2) ||
-               need_no_nil(e1) != need_no_nil(e2) || is_anti(e1) != 
is_anti(e2) || is_semantics(e1) != is_semantics(e2) ||
-               is_any(e1) != is_any(e2) ||
+               need_no_nil(e1) != need_no_nil(e2) || is_anti(e1) != 
is_anti(e2) || (semantics && is_semantics(e1) != is_semantics(e2)) ||
+               (semantics && is_any(e1) != is_any(e2)) ||
                is_symmetric(e1) != is_symmetric(e2) || is_unique(e1) != 
is_unique(e2) || need_distinct(e1) != need_distinct(e2))
                return 0;
        if (e1->type == e2->type) {
@@ -1425,6 +1426,12 @@ exp_match_exp( sql_exp *e1, sql_exp *e2)
        return 0;
 }
 
+int
+exp_match_exp( sql_exp *e1, sql_exp *e2)
+{
+       return exp_match_exp_semantics( e1, e2, true);
+}
+
 sql_exp *
 exps_any_match(list *l, sql_exp *e)
 {
diff --git a/sql/server/rel_exp.h b/sql/server/rel_exp.h
--- a/sql/server/rel_exp.h
+++ b/sql/server/rel_exp.h
@@ -145,6 +145,7 @@ extern int exp_refers( sql_exp *p, sql_e
 extern int exp_match( sql_exp *e1, sql_exp *e2);
 extern sql_exp* exps_find_exp( list *l, sql_exp *e);
 extern int exp_match_exp( sql_exp *e1, sql_exp *e2);
+extern int exp_match_exp_semantics( sql_exp *e1, sql_exp *e2, bool semantics);
 extern sql_exp* exps_any_match(list *l, sql_exp *e);
 /* match just the column (cmp equality) expressions */
 extern int exp_match_col_exps( sql_exp *e, list *l);
diff --git a/sql/server/rel_optimize_exps.c b/sql/server/rel_optimize_exps.c
--- a/sql/server/rel_optimize_exps.c
+++ b/sql/server/rel_optimize_exps.c
@@ -744,6 +744,74 @@ rel_simplify_predicates(visitor *v, sql_
 }
 
 static inline sql_exp *
+rel_remove_alias(visitor *v, sql_rel *rel, sql_exp *e)
+{
+       if (e->type != e_column)
+               return e;
+       if (!rel_is_ref(rel) && rel->op == op_project && rel->l && 
list_length(rel->exps) > 1) {
+               sql_rel *l = rel->l;
+               if (l->op == op_project) {
+                       sql_exp *ne = rel_find_exp(l, e);
+                       if (ne && ne->type == e_column && is_selfref(ne)) {
+                               sql_exp *nne = NULL;
+                               /* found ne in projection, try to find 
reference in the same list */
+                               if (ne->l)
+                                       nne = exps_bind_column2(l->exps, ne->l, 
ne->r, NULL);
+                               else
+                                       nne = exps_bind_column(l->exps, ne->r, 
NULL, NULL, 1);
+                               if (nne && nne != ne && list_position(l->exps, 
nne) < list_position(l->exps, ne)) {
+                                       e->l = (char*)exp_relname(nne);
+                                       e->r = (char*)exp_name(nne);
+                                       v->changes++;
+                               }
+                       }
+               }
+       }
+       if (!rel_is_ref(rel) && rel->op != op_project) {
+               bool found = false;
+               if ((is_select(rel->op) || is_join(rel->op)) && rel->l && 
list_length(rel->exps) > 1) {
+                       sql_rel *l = rel->l;
+                       if (l->op == op_project) {
+                               sql_exp *ne = rel_find_exp(l, e);
+                               found = true;
+                               if (ne && ne->type == e_column && 
is_selfref(ne)) {
+                                       sql_exp *nne = NULL;
+                                       if (ne->l)
+                                               nne = 
exps_bind_column2(l->exps, ne->l, ne->r, NULL);
+                                       else
+                                               nne = exps_bind_column(l->exps, 
ne->r, NULL, NULL, 1);
+                                       if (nne && nne != ne && 
list_position(l->exps, nne) < list_position(l->exps, ne)) {
+                                               e->l = (char*)exp_relname(nne);
+                                               e->r = (char*)exp_name(nne);
+                                               v->changes++;
+                                       }
+                               }
+                       }
+               }
+               if (!found && is_join(rel->op) && rel->r && 
list_length(rel->exps) > 1 && !is_semi(rel->op)) {
+                       sql_rel *l = rel->r;
+                       if (l->op == op_project) {
+                               sql_exp *ne = rel_find_exp(l, e);
+                               found = true;
+                               if (ne && ne->type == e_column && 
is_selfref(ne)) {
+                                       sql_exp *nne = NULL;
+                                       if (ne->l)
+                                               nne = 
exps_bind_column2(l->exps, ne->l, ne->r, NULL);
+                                       else
+                                               nne = exps_bind_column(l->exps, 
ne->r, NULL, NULL, 1);
+                                       if (nne && nne != ne && 
list_position(l->exps, nne) < list_position(l->exps, ne)) {
+                                               e->l = (char*)exp_relname(nne);
+                                               e->r = (char*)exp_name(nne);
+                                               v->changes++;
+                                       }
+                               }
+                       }
+               }
+       }
+       return e;
+}
+
+static inline sql_exp *
 rel_merge_project_rse(visitor *v, sql_rel *rel, sql_exp *e)
 {
        if (is_simple_project(rel->op) && is_func(e->type) && e->l) {
@@ -797,6 +865,7 @@ rel_optimize_exps_(visitor *v, sql_rel *
        if (v->value_based_opt)
                e = rel_simplify_predicates(v, rel, e);
        e = rel_merge_project_rse(v, rel, e);
+       e = rel_remove_alias(v, rel, e);
        return e;
 }
 
diff --git a/sql/server/rel_optimize_others.c b/sql/server/rel_optimize_others.c
--- a/sql/server/rel_optimize_others.c
+++ b/sql/server/rel_optimize_others.c
@@ -120,7 +120,7 @@ exp_push_down_prj(mvc *sql, sql_exp *e, 
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to