Changeset: 3da49a645bba for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/3da49a645bba
Modified Files:
        sql/server/rel_statistics.c
Branch: balanced_union
Log Message:

add statistics for munion


diffs (144 lines):

diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -285,7 +285,7 @@ rel_setop_get_statistics(mvc *sql, sql_r
                        set_minmax_property(sql, e, PROP_MIN, lval_min);
        }
 
-       if (is_union(rel->op)) {
+       if (is_union(rel->op) || is_munion(rel->op)) {
                if (!has_nil(le) && !has_nil(re))
                        set_has_no_nil(e);
                if (need_distinct(rel) && list_length(rel->exps) == 1)
@@ -310,6 +310,42 @@ rel_setop_get_statistics(mvc *sql, sql_r
        return false;
 }
 
+
+static void
+rel_munion_get_statistics(mvc *sql, sql_rel *rel, list *rels, sql_exp *e, int 
i)
+{
+       assert(is_munion(rel->op));
+
+       sql_rel *l = rels->h->data;
+       sql_exp *le = list_fetch(l->exps, i);
+       atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = 
find_prop_and_get(le->p, PROP_MAX);
+       bool has_nonil = !has_nil(le);
+
+       for(node *n = rels->h->next; n; n = n->next) {
+               sql_rel *r = n->data;
+               sql_exp *re = list_fetch(r->exps, i);
+               atom *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max 
= find_prop_and_get(re->p, PROP_MAX);
+
+               if (lval_max && rval_max) {
+                       set_minmax_property(sql, e, PROP_MAX, 
statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be 
the max of the two */
+                       lval_max = find_prop_and_get(e->p, PROP_MAX);
+               }
+               if (lval_min && rval_min) {
+                       set_minmax_property(sql, e, PROP_MIN, 
statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be 
the min of the two */
+                       lval_min = find_prop_and_get(e->p, PROP_MIN);
+               }
+               has_nonil &= !has_nil(re);
+
+       }
+
+       if (has_nonil)
+               set_has_no_nil(e);
+
+       if (need_distinct(rel) && list_length(rel->exps) == 1)
+               set_unique(e);
+}
+
+
 static sql_exp *
 rel_propagate_statistics(visitor *v, sql_rel *rel, sql_exp *e, int depth)
 {
@@ -789,9 +825,86 @@ rel_get_statistics_(visitor *v, sql_rel 
                }
                break;
        }
-       case op_munion:
-               // TODO: munion statistis
+       case op_munion: {
+               list *l = rel->l, *nrels = sa_list(v->sql->sa);
+               BUN cnt = 0;
+               bool needs_pruning = false;
+
+               for (node *n = l->h; n; n = n->next) {
+                       sql_rel *r = n->data, *pl = r;
+
+                       while (is_sample(pl->op) || is_topn(pl->op)) /* skip 
topN and sample relations in the middle */
+                                       pl = pl->l;
+                       /* if it's not a projection, then project and propagate 
statistics */
+                       if (!is_project(pl->op) && !is_base(pl->op)) {
+                               pl = rel_project(v->sql->sa, pl, 
rel_projections(v->sql, pl, NULL, 0, 1));
+                               set_count_prop(v->sql->sa, pl, 
get_rel_count(pl->l));
+                               pl->exps = exps_exp_visitor_bottomup(v, pl, 
pl->exps, 0, &rel_propagate_statistics, false);
+                       }
+                       nrels = append(nrels, pl);
+                       /* we need new munion statistics */
+                       /* propagate row count */
+                       BUN rv = need_distinct(rel) ? rel_calc_nuniques(v->sql, 
r, r->exps) : get_rel_count(r);
+                       if (!rv && can_be_pruned)
+                               needs_pruning = true;
+                       if (rv > (BUN_MAX - cnt)) /* overflow check */
+                               rv = BUN_MAX;
+                       else
+                               cnt += rv;
+               }
+               int i = 0;
+               for (node *n = rel->exps->h ; n ; n = n->next, i++)
+                       rel_munion_get_statistics(v->sql, rel, nrels, n->data, 
i);
+
+               if (needs_pruning) {
+                       v->changes++;
+                       list *nl = sa_list(l->sa);
+
+                       for (node *n = nrels->h; n; n = n->next) {
+                               sql_rel *r = n->data;
+                               BUN rv = need_distinct(rel) ? 
rel_calc_nuniques(v->sql, r, r->exps) : get_rel_count(r);
+
+                               if (!rv) { /* keep last for now */
+                                       rel_destroy(r);
+                                       continue;
+                               }
+                               nl = append(nl, r);
+                       }
+                       rel->l = nl;
+                       if (list_length(nl) == 1) {
+                               sql_rel *l = rel->l = nl->h->data; /* ugh */
+                               rel->op = op_project;
+
+                               for (node *n = rel->exps->h, *m = l->exps->h ; 
n && m ; n = n->next, m = m->next) {
+                                       sql_exp *pe = n->data, *ie = m->data;
+                                       sql_exp *ne = exp_ref(v->sql, ie);
+                                       exp_setname(v->sql->sa, ne, 
exp_relname(pe), exp_name(pe));
+                                       n->data = ne;
+                               }
+                               list_hash_clear(rel->exps);
+                       } else if (list_empty(nl)) {
+                               /* empty select (project [ nils ] ) */
+                               for (node *n = rel->exps->h ; n ; n = n->next) {
+                                       sql_exp *e = n->data, *a = 
exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL));
+                                       exp_prop_alias(v->sql->sa, a, e);
+                                       n->data = a;
+                               }
+                               list_hash_clear(rel->exps);
+                               sql_rel *l = rel_project(v->sql->sa, NULL, 
rel->exps);
+                               set_count_prop(v->sql->sa, l, 1);
+                               l = rel_select(v->sql->sa, l, 
exp_atom_bool(v->sql->sa, 0));
+                               set_count_prop(v->sql->sa, l, 0);
+                               rel->op = op_project;
+                               rel->l = l;
+                               rel->exps = rel_projections(v->sql, l, NULL, 1, 
1);
+                               set_count_prop(v->sql->sa, rel, 0);
+                               set_nodistinct(rel); /* set relations may have 
distinct flag set */
+                       }
+               } else {
+                       set_count_prop(v->sql->sa, rel, cnt);
+               }
                break;
+       }
        case op_join:
        case op_left:
        case op_right:
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to