Changeset: 66099b3f3afd for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=66099b3f3afd
Modified Files:
        sql/server/rel_optimizer.c
        sql/server/rel_optimizer.h
        sql/server/rel_unnest.c
        sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out
        sql/test/BugTracker/Tests/bug_in_selection.SF-1892413.stable.out
Branch: default
Log Message:

improved handling of unnesting, ie don't introduce an intermediate in the
general unnesting case when left already has unique/distinct values


diffs (truncated from 337 to 300 lines):

diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -41,24 +41,6 @@ static sql_rel * rel_remove_empty_select
 
 static sql_subfunc *find_func( mvc *sql, char *name, list *exps );
 
-static int
-exps_unique( list *exps )
-{
-       node *n;
-
-       if ((n = exps->h) != NULL) {
-               sql_exp *e = n->data;
-               prop *p;
-
-               if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL) {
-                       sql_ukey *k = p->value;
-                       if (k && list_length(k->k.columns) <= 1)
-                               return 1;
-               }
-       }
-       return 0;
-}
-
 /* The important task of the relational optimizer is to optimize the
    join order. 
 
@@ -130,10 +112,11 @@ name_find_column( sql_rel *rel, const ch
        case op_left: 
        case op_right: 
        case op_full: 
+               /* first right (possible subquery) */
+               c = name_find_column( rel->r, rname, name, pnr, bt);
+               /* fall through */
        case op_semi: 
        case op_anti: 
-               /* first right (possible subquery) */
-               c = name_find_column( rel->r, rname, name, pnr, bt);
                if (!c) 
                        c = name_find_column( rel->l, rname, name, pnr, bt);
                return c;
@@ -2398,6 +2381,81 @@ exp_push_down_prj(mvc *sql, sql_exp *e, 
        return NULL;
 }
 
+static int
+rel_is_unique( sql_rel *rel, sql_ukey *k)
+{
+       switch(rel->op) {
+       case op_left:
+       case op_right:
+       case op_full:
+       case op_join:
+               return 0;
+       case op_semi:
+       case op_anti:
+               return rel_is_unique(rel->l, k);
+       case op_table:
+       case op_basetable:
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+int
+exps_unique(mvc *sql, sql_rel *rel, list *exps)
+{
+       node *n;
+       char *matched = NULL; 
+       int nr = 0;
+       sql_ukey *k = NULL;
+
+       if (list_empty(exps))
+               return 0;
+       for(n = exps->h; n && !k; n = n->next) {
+               sql_exp *e = n->data;
+               prop *p;
+
+               if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL)
+                       k = p->value;
+       }
+       if (!k || list_length(k->k.columns) > list_length(exps))
+               return 0;
+       if (rel) {
+               matched = (char*)sa_alloc(sql->sa, list_length(k->k.columns));
+               memset(matched, 0, list_length(k->k.columns));
+               for(n = exps->h; n; n = n->next) {
+                       sql_exp *e = n->data;
+                       fcmp cmp = (fcmp)&kc_column_cmp;
+                       sql_column *c = exp_find_column(rel, e, -2);
+                       node *m;
+       
+                       if (c && (m=list_find(k->k.columns, c, cmp)) != NULL) {
+                               int pos = list_position(k->k.columns, m->data);
+                               if (!matched[pos]) 
+                                       nr++;
+                               matched[pos] = 1;
+                       }
+               }
+               if (nr == list_length(k->k.columns)) {
+                       return rel_is_unique(rel, k);
+               }
+       }
+       /*
+       if ((n = exps->h) != NULL) {
+               sql_exp *e = n->data;
+               prop *p;
+
+               if (e && (p = find_prop(e->p, PROP_HASHCOL)) != NULL) {
+                       sql_ukey *k = p->value;
+                       if (k && list_length(k->k.columns) <= 1)
+                               return 1;
+               }
+       }
+       */
+       return 0;
+}
+
+
 static sql_rel *
 rel_distinct_project2groupby(int *changes, mvc *sql, sql_rel *rel)
 {
@@ -2411,9 +2469,9 @@ rel_distinct_project2groupby(int *change
        }
 
        /* rewrite distinct project [ pk ] ( select ( table ) [ e op val ]) 
-        * into project [ pk ] ( select ( table )  */
+        * into project [ pk ] ( select/semijoin ( table )  */
        if (rel->op == op_project && rel->l && !rel->r /* no order by */ && 
need_distinct(rel) &&
-           l->op == op_select && exps_unique(rel->exps)) 
+           (l->op == op_select || l->op == op_semi) && exps_unique(sql, rel, 
rel->exps)) 
                set_nodistinct(rel);
        /* rewrite distinct project [ gbe ] ( select ( groupby [ gbe ] [ gbe, e 
] )[ e op val ]) 
         * into project [ gbe ] ( select ( group etc ) */
@@ -5173,7 +5231,7 @@ static sql_rel *
 rel_push_project_down_union(int *changes, mvc *sql, sql_rel *rel) 
 {
        /* first remove distinct if already unique */
-       if (rel->op == op_project && need_distinct(rel) && rel->exps && 
exps_unique(rel->exps))
+       if (rel->op == op_project && need_distinct(rel) && rel->exps && 
exps_unique(sql, rel, rel->exps))
                set_nodistinct(rel);
 
        if (rel->op == op_project && rel->l && rel->exps && !rel->r) {
@@ -5201,8 +5259,8 @@ rel_push_project_down_union(int *changes
                        ur = rel_project(sql->sa, ur, 
                                rel_projections(sql, ur, NULL, 1, 1));
                need_distinct = (need_distinct && 
-                               (!exps_unique(ul->exps) ||
-                                !exps_unique(ur->exps)));
+                               (!exps_unique(sql, ul, ul->exps) ||
+                                !exps_unique(sql, ur, ur->exps)));
                rel_rename_exps(sql, u->exps, ul->exps);
                rel_rename_exps(sql, u->exps, ur->exps);
 
@@ -5588,7 +5646,7 @@ rel_groupby_distinct(int *changes, mvc *
 
                        if (exp_aggr_is_count(e) && need_distinct(e)) {
                                /* if count over unique values (ukey/pkey) */
-                               if (e->l && exps_unique(e->l))
+                               if (e->l && exps_unique(sql, rel, e->l))
                                        set_nodistinct(e);
                        }
                }
diff --git a/sql/server/rel_optimizer.h b/sql/server/rel_optimizer.h
--- a/sql/server/rel_optimizer.h
+++ b/sql/server/rel_optimizer.h
@@ -17,6 +17,7 @@ extern sql_rel * rel_optimizer(mvc *sql,
 extern int exp_joins_rels(sql_exp *e, list *rels);
 
 extern void *name_find_column( sql_rel *rel, const char *rname, const char 
*name, int pnr, sql_rel **bt );
+extern int exps_unique(mvc *sql, sql_rel *rel, list *exps);
 
 extern sql_rel * rel_dce(mvc *sql, sql_rel *rel);
 
diff --git a/sql/server/rel_unnest.c b/sql/server/rel_unnest.c
--- a/sql/server/rel_unnest.c
+++ b/sql/server/rel_unnest.c
@@ -10,11 +10,22 @@
 
 #include "monetdb_config.h"
 #include "rel_unnest.h"
+#include "rel_optimizer.h"
+#include "rel_prop.h"
 #include "rel_rel.h"
 #include "rel_exp.h"
 
 static int exps_have_freevar( list *exps );
 
+/* check if the set is distinct for the set of free variables */
+static int
+is_distinct_set(mvc *sql, sql_rel *rel, list *ad)
+{
+       if (ad && exps_unique(sql, rel, ad ))
+               return 1;
+       return need_distinct(rel);
+}      
+
 int
 exp_has_freevar( sql_exp *e)
 {
@@ -483,19 +494,23 @@ exps_is_constant( list *exps )
 }
 
 static sql_rel *
-push_up_groupby(mvc *sql, sql_rel *rel) 
+push_up_groupby(mvc *sql, sql_rel *rel, list *ad) 
 {
        /* input rel is dependent join with on the right a project */ 
        if (rel && (is_join(rel->op) || is_semi(rel->op)) && is_dependent(rel)) 
{
                sql_rel *l = rel->l, *r = rel->r;
 
                /* left of rel should be a set */ 
-               if (l && need_distinct(l) && r && r->op == op_groupby) {
+               if (l && is_distinct_set(sql, l, ad) && r && r->op == 
op_groupby) {
                        list *sexps, *jexps;
                        node *n;
                        /* move groupby up, ie add attributes of left + the old 
expression list */
                        list *a = rel_projections(sql, rel->l, NULL, 1, 1);
                
+                       assert(rel->op != op_anti);
+                       if (rel->op == op_semi && !need_distinct(l))
+                               rel->op = op_join;
+
                        for (n = r->exps->h; n; n = n->next ) {
                                sql_exp *e = n->data;
 
@@ -611,7 +626,7 @@ push_up_join(mvc *sql, sql_rel *rel)
                sql_rel *d = rel->l, *j = rel->r;
 
                /* left of rel should be a set */ 
-               if (d && need_distinct(d) && j && (is_join(j->op) || 
is_semi(j->op))) {
+               if (d && is_distinct_set(sql, d, NULL) && j && (is_join(j->op) 
|| is_semi(j->op))) {
                        sql_rel *jl = j->l, *jr = j->r;
                        /* op_join if F(jl) intersect A(D) = empty -> jl join 
(D djoin jr) 
                         *            F(jr) intersect A(D) = empty -> (D djoin 
jl) join jr
@@ -688,7 +703,7 @@ push_up_set(mvc *sql, sql_rel *rel)
                sql_rel *d = rel->l, *s = rel->r;
 
                /* left of rel should be a set */ 
-               if (d && need_distinct(d) && s && is_set(s->op)) {
+               if (d && is_distinct_set(sql, d, NULL) && s && is_set(s->op)) {
                        list *sexps;
                        node *m;
                        sql_rel *sl = s->l, *sr = s->r, *n;
@@ -721,7 +736,7 @@ push_up_table(mvc *sql, sql_rel *rel)
                sql_rel *d = rel->l, *tf = rel->r;
 
                /* for now just push d into function */
-               if (d && need_distinct(d) && tf && is_base(tf->op)) {
+               if (d && is_distinct_set(sql, d, NULL) && tf && 
is_base(tf->op)) {
                        if (tf->l) {
                                sql_rel *l = tf->l;
 
@@ -842,7 +857,7 @@ rel_unnest_dependent(mvc *sql, sql_rel *
 
                /* try to push dependent join down */
                if (rel_has_freevar(r)){
-                       list *ad;
+                       list *ad = rel_dependent_var(sql, rel->l, rel->r);
 
                        if (r && is_simple_project(r->op)) {
                                rel = push_up_project(sql, rel);
@@ -859,22 +874,22 @@ rel_unnest_dependent(mvc *sql, sql_rel *
                                return rel_unnest_dependent(sql, rel);
                        }
 
-                       if (r && is_groupby(r->op) && need_distinct(l)) { 
-                               rel = push_up_groupby(sql, rel);
+                       if (r && is_groupby(r->op) && is_distinct_set(sql, l, 
ad)) { 
+                               rel = push_up_groupby(sql, rel, ad);
                                return rel_unnest_dependent(sql, rel);
                        }
 
-                       if (r && (is_join(r->op) || is_semi(r->op)) && 
need_distinct(l)) {
+                       if (r && (is_join(r->op) || is_semi(r->op)) && 
is_distinct_set(sql, l, NULL)) {
                                rel = push_up_join(sql, rel);
                                return rel_unnest_dependent(sql, rel);
                        }
 
-                       if (r && is_set(r->op) && need_distinct(l)) {
+                       if (r && is_set(r->op) && is_distinct_set(sql, l, 
NULL)) {
                                rel = push_up_set(sql, rel);
                                return rel_unnest_dependent(sql, rel);
                        }
 
-                       if (r && is_base(r->op) && need_distinct(l)) { /* TODO 
table functions need dependent implementation */
+                       if (r && is_base(r->op) && is_distinct_set(sql, l, 
NULL)) { /* TODO table functions need dependent implementation */
                                rel = push_up_table(sql, rel);
                                return rel; 
                        }
diff --git a/sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out 
b/sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out
--- a/sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out
+++ b/sql/test/BugTracker-2015/Tests/crash.Bug-3736.stable.out
@@ -98,10 +98,10 @@ project (
 | | | | | | | & REF 2  
 | | | | | | ) [ "b3a"."open_auction_id" NOT NULL = "o"."open_auction_id" NOT 
NULL ]
 | | | | | ) [ "o"."open_auction_id" NOT NULL ] [ sys.min no nil ("b3a"."id" 
NOT NULL HASHCOL ) NOT NULL as "L3"."L3", "o"."open_auction_id" NOT NULL ]
-| | | | ) [ "L3"."L3" NOT NULL, "o"."open_auction_id" NOT NULL as "L25"."L25" ]
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to