Changeset: 9161d206d412 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/9161d206d412
Modified Files:
        sql/backends/monet5/rel_bin.c
        sql/server/rel_optimize_others.c
        sql/server/rel_optimize_proj.c
        sql/server/rel_optimize_sel.c
Branch: groupjoin
Log Message:

initial groupjoin, doing count/min/max/sum etc aggregates (mal level is still 
doing the old setup)


diffs (truncated from 306 to 300 lines):

diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -2817,17 +2817,20 @@ rel2bin_groupjoin(backend *be, sql_rel *
        list *l;
        node *n , *en;
        stmt *left = NULL, *right = NULL, *join = NULL, *jl, *jr, *ls = NULL, 
*res;
-       bool need_project = false;
-       bool exist = true;
+       bool need_project = false, exist = true, mark = false;
 
        assert(rel->op == op_left);
        if (rel->op == op_left) { /* left outer group join */
                if (list_length(rel->attr) == 1) {
                        sql_exp *e = rel->attr->h->data;
+                       if (exp_is_atom(e))
+                               mark = true;
                        if (exp_is_atom(e) && exp_is_false(e))
                                exist = false;
                }
        }
+       if (mark)
+               printf("# mark join \n");
 
        if (rel->l) /* first construct the left sub relation */
                left = subrel_bin(be, rel->l, refs);
@@ -2844,7 +2847,7 @@ rel2bin_groupjoin(backend *be, sql_rel *
        list *jexps = get_simple_equi_joins_first(sql, rel, rel->exps, 
&equality_only);
 
        en = jexps?jexps->h:NULL;
-       if ((/*list_empty(jexps)*/ list_length(jexps) <= 1 || 
!gj_outerjoin_exp(rel, en->data)) && !(list_length(jexps) == 1 && 
is_equi_exp_((sql_exp*)en->data) && can_join_exp(rel, en->data, false))) {
+       if ((/*list_empty(jexps)*/ list_length(jexps) <= (0+mark) || 
!gj_outerjoin_exp(rel, en->data)) && !(list_length(jexps) == 1 && 
is_equi_exp_((sql_exp*)en->data) && can_join_exp(rel, en->data, false))) {
                printf("# outer cross\n");
                stmt *l = bin_find_smallest_column(be, left);
                stmt *r = bin_find_smallest_column(be, right);
@@ -2879,8 +2882,10 @@ rel2bin_groupjoin(backend *be, sql_rel *
                        r = t;
                }
                ls = l;
-               if (en) {
+               if (en || !mark) {
                        printf("# outer join\n");
+                       /* split out (left)join vs (left)mark-join */
+                       /* call 3 result version */
                        join = stmt_join_cand(be, column(be, l), column(be, r), 
left->cand, NULL/*right->cand*/, is_anti(e), (comp_type) cmp_equal/*e->flag*/, 
0, is_any(e)|is_semantics(e), false, rel->op == op_left?false:true);
                } else {
                        printf("# mark join\n");
@@ -2921,14 +2926,6 @@ rel2bin_groupjoin(backend *be, sql_rel *
                }
                left = sub = stmt_list(be, nl);
 
-               /* if any
-                *               jr==bit_nil if left == NULL else true/false?
-                * else simple mark
-                *              jr = isnull(jr) bit_nil else alse
-                *
-                *      ls == NULL -> false
-                *              m==bit_nil iff left == NULL else true/false
-                */
                if (ls) {
                        stmt *nls = stmt_project(be, jl, ls);
                        jr = sql_Nop_(be, "ifthenelse", sql_unop_(be, "isnull", 
nls), stmt_bool(be, bit_nil),
@@ -2981,28 +2978,55 @@ rel2bin_groupjoin(backend *be, sql_rel *
                s = stmt_alias(be, s, rnme, nme);
                list_append(l, s);
        }
+       if (!mark) {
+               for (n = right->op4.lval->h; n; n = n->next) {
+                       stmt *c = n->data;
+                       const char *rnme = table_name(sql->sa, c);
+                       const char *nme = column_name(sql->sa, c);
+                       stmt *s = stmt_project(be, jr, column(be, c));
+
+                       s = stmt_alias(be, s, rnme, nme);
+                       list_append(l, s);
+               }
+               left = stmt_list(be, l);
+               l = sa_list(sql->sa);
+       }
        if (rel->attr) {
                sql_exp *e = rel->attr->h->data;
                const char *rnme = exp_relname(e);
                const char *nme = exp_name(e);
 
-               if (need_project) {
-                       jr = sql_Nop_(be, "ifthenelse", sql_unop_(be, "isnull", 
jr), stmt_bool(be, !exist), stmt_bool(be, exist), NULL);
-               } else if (list_length(rel->attr) == 1) {
-                       sql_exp *e = rel->attr->h->data;
-                       if (exp_is_atom(e) && need_no_nil(e))
-                               jr = sql_Nop_(be, "ifthenelse", sql_unop_(be, 
"isnull", jr), stmt_bool(be, !exist), jr, NULL);
-                       if (!exist) {
-                               sql_subtype *bt = sql_bind_localtype("bit");
-                               sql_subfunc *not = sql_bind_func(be->mvc, 
"sys", "not", bt, NULL, F_FUNC, true);
-                               jr = stmt_unop(be, jr, NULL, not);
+               if (mark) {
+                       if (need_project) {
+                               jr = sql_Nop_(be, "ifthenelse", sql_unop_(be, 
"isnull", jr), stmt_bool(be, !exist), stmt_bool(be, exist), NULL);
+                       } else {
+                               sql_exp *e = rel->attr->h->data;
+                               if (exp_is_atom(e) && need_no_nil(e))
+                                       jr = sql_Nop_(be, "ifthenelse", 
sql_unop_(be, "isnull", jr), stmt_bool(be, !exist), jr, NULL);
+                               if (!exist) {
+                                       sql_subtype *bt = 
sql_bind_localtype("bit");
+                                       sql_subfunc *not = 
sql_bind_func(be->mvc, "sys", "not", bt, NULL, F_FUNC, true);
+                                       jr = stmt_unop(be, jr, NULL, not);
+                               }
                        }
-               }
-
-               stmt *s = stmt_alias(be, jr, rnme, nme);
-               list_append(l, s);
-       }
-
+                       stmt *s = stmt_alias(be, jr, rnme, nme);
+                       append(l, s);
+               } else {
+                       /* group / aggrs */
+                       stmt *groupby = stmt_group(be, jl, NULL, NULL, NULL, 
true);
+                       stmt *grp = stmt_result(be, groupby, 0);
+                       stmt *ext = stmt_result(be, groupby, 1);
+                       stmt *cnt = stmt_result(be, groupby, 2);
+                       for(node *n = rel->attr->h; n; n = n->next) {
+                               sql_exp *e = n->data;
+                               const char *rnme = exp_relname(e);
+                               const char *nme = exp_name(e);
+                               stmt *s = exp_bin(be, e, left, NULL, grp, ext, 
cnt, NULL, 0, 0, 0);
+                               s = stmt_alias(be, s, rnme, nme);
+                               append(l, s);
+                       }
+               }
+       }
        res = stmt_list(be, l);
        return res;
 }
diff --git a/sql/server/rel_optimize_others.c b/sql/server/rel_optimize_others.c
--- a/sql/server/rel_optimize_others.c
+++ b/sql/server/rel_optimize_others.c
@@ -386,7 +386,14 @@ rel_exps_mark_used(sql_allocator *sa, sq
                        exp_mark_used(rel, e, -1);
                }
        }
+       if (rel->attr) {
+               for (node *n = rel->attr->h; n; n = n->next) {
+                       sql_exp *e = n->data;
 
+                       if (e->used)
+                               nr += exp_mark_used(subrel, e, -2);
+               }
+       }
        if (rel->exps) {
                node *n;
                int len = list_length(rel->exps), i;
@@ -711,6 +718,31 @@ rel_remove_unused(mvc *sql, sql_rel *rel
                }
                return rel;
 
+       case op_join:
+       case op_left:
+       case op_right:
+       case op_full:
+               if (list_length(rel->attr) > 1) {
+                       for(node *n=rel->attr->h; n && !needed; n = n->next) {
+                               sql_exp *e = n->data;
+
+                               if (!e->used)
+                                       needed = 1;
+                       }
+                       if (!needed)
+                               return rel;
+
+                       for(node *n=rel->attr->h; n;) {
+                               node *next = n->next;
+                               sql_exp *e = n->data;
+
+                               if (!e->used)
+                                       list_remove_node(rel->attr, NULL, n);
+                               n = next;
+                       }
+               }
+               return rel;
+
        case op_union:
        case op_inter:
        case op_except:
@@ -723,10 +755,6 @@ rel_remove_unused(mvc *sql, sql_rel *rel
 
        case op_select:
 
-       case op_join:
-       case op_left:
-       case op_right:
-       case op_full:
        case op_semi:
        case op_anti:
                return rel;
@@ -884,6 +912,8 @@ rel_dce_down(mvc *sql, sql_rel *rel, int
                        rel->l = rel_dce_down(sql, rel->l, 0);
                if (rel->r)
                        rel->r = rel_dce_down(sql, rel->r, 0);
+               if (!skip_proj && !list_empty(rel->attr))
+                       rel_dce_sub(sql, rel);
                return rel;
 
        case op_ddl:
diff --git a/sql/server/rel_optimize_proj.c b/sql/server/rel_optimize_proj.c
--- a/sql/server/rel_optimize_proj.c
+++ b/sql/server/rel_optimize_proj.c
@@ -2879,6 +2879,48 @@ rel_simplify_count(visitor *v, sql_rel *
 }
 
 static sql_rel *
+rel_groupjoin(visitor *v, sql_rel *rel)
+{
+       if (!rel || rel_is_ref(rel) || !is_groupby(rel->op) || 
list_empty(rel->r))
+               return rel;
+
+       sql_rel *j = rel->l;
+       if (!j || rel_is_ref(j) || !is_left(j->op) || !list_empty(rel->attr))
+               return rel;
+       /* check group by exps == equi join exps */
+       list *gbes = rel->r;
+       if (list_length(gbes) != list_length(j->exps))
+               return rel;
+       int nr = 0;
+       for(node *n = gbes->h; n; n = n->next) {
+               sql_exp *gbe = n->data;
+               for(node *m = j->exps->h; m; m = m->next) {
+                       sql_exp *je = m->data;
+                       if (je->type != e_cmp || je->flag != cmp_equal)
+                               return rel;
+                       /* check if its a join exp (ie not a selection) */
+                       if (!( (!rel_has_exp(j->l, je->l, false) && 
!rel_has_exp(j->r, je->r, false)) ||
+                                  (!rel_has_exp(j->l, je->r, false) && 
!rel_has_exp(j->r, je->l, false))))
+                               return rel;
+                       if (exp_match(je->l, gbe)) {
+                               nr++;
+                       } else if (exp_match(je->r, gbe)) {
+                               nr++;
+                       }
+               }
+       }
+       if (nr == list_length(gbes)) {
+               printf("#group by converted\n");
+               j = rel_dup(j);
+               j->attr = rel->exps;
+               v->changes++;
+               rel_destroy(rel);
+               return j;
+       }
+       return rel;
+}
+
+static sql_rel *
 rel_optimize_projections_(visitor *v, sql_rel *rel)
 {
        rel = rel_project_cse(v, rel);
@@ -2902,6 +2944,8 @@ rel_optimize_projections_(visitor *v, sq
        if (v->value_based_opt) {
                rel = rel_simplify_count(v, rel);
                rel = rel_basecount(v, rel);
+
+               rel = rel_groupjoin(v, rel);
        }
        return rel;
 }
diff --git a/sql/server/rel_optimize_sel.c b/sql/server/rel_optimize_sel.c
--- a/sql/server/rel_optimize_sel.c
+++ b/sql/server/rel_optimize_sel.c
@@ -2941,7 +2941,7 @@ rel_simplify_project_fk_join(mvc *sql, s
        int fk_left = 1;
 
        /* check for foreign key join */
-       if (list_length(r->exps) != 1)
+       if (list_length(r->exps) != 1 || !list_empty(r->attr))
                return r;
        if (!(je = exps_find_prop(r->exps, PROP_JOINIDX)) || je->flag != 
cmp_equal)
                return r;
@@ -2972,11 +2972,20 @@ rel_simplify_project_fk_join(mvc *sql, s
        (*changes)++;
        /* if the foreign key column doesn't have NULL values, then return it */
        if (!has_nil(le) || is_full(r->op) || (fk_left && is_left(r->op)) || 
(!fk_left && is_right(r->op))) {
+               /* if ->attr, introduce group by on index */
                if (fk_left) {
                        nr = rel_dup(r->l);
                } else {
                        nr = rel_dup(r->r);
                }
+               if (!list_empty(r->attr)) {
+                       nr = rel_groupby(sql, nr, NULL);
+                       if (nr) {
+                               printf("introduced groupby  \n");
+                               nr->r = append(sa_list(sql->sa), le);
+                               nr->exps = r->attr;
+                       }
+               }
                return nr;
        }
 
@@ -3595,7 +3604,6 @@ rel_use_index(visitor *v, sql_rel *rel)
 static sql_rel *
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to