Changeset: c47cfa18f023 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/c47cfa18f023
Modified Files:
        sql/server/rel_statistics.c
Branch: properties
Log Message:

Tunning cardinality estimation for distinct projections and single joins


diffs (155 lines):

diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -604,12 +604,36 @@ trivial_project_exp_card(sql_exp *e)
        return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
 }
 
+static BUN
+rel_calc_nuniques(sql_rel *rel, sql_rel *l, list *exps)
+{
+       BUN nuniques = 0;
+
+       /* compute the highest number of unique values */
+       if (!list_empty(exps)) {
+               for (node *n = exps->h ; n && nuniques != BUN_NONE ; n = 
n->next) {
+                       sql_exp *e = n->data;
+                       sql_rel *bt = NULL;
+                       prop *p = NULL;
+
+                       if (e->type == e_column && is_unique(e) &&
+                               name_find_column(rel, e->l, e->r, -1, &bt) && 
bt && (p = find_prop(bt->p, PROP_COUNT))) {
+                               nuniques = MAX(nuniques, p->value.lval);
+                       } else if ((p = find_prop(e->p, PROP_NUNIQUES))) {
+                               nuniques = MAX(nuniques, (BUN) p->value.dval);
+                       } else {
+                               nuniques = BUN_NONE;
+                       }
+               }
+       }
+       return nuniques != BUN_NONE ? nuniques : get_rel_count(l);
+}
+
 static sql_rel *
 rel_get_statistics_(visitor *v, sql_rel *rel)
 {
        /* Don't prune updates as pruning will possibly result in removing the 
joins which therefore cannot be used for constraint checking */
        uint8_t has_special_modify = *(uint8_t*) v->data;
-       prop *p;
        bool can_be_pruned = !has_special_modify && v->storage_based_opt;
 
        /* Don't look at the same relation twice */
@@ -635,34 +659,33 @@ rel_get_statistics_(visitor *v, sql_rel 
        case op_except: {
                bool empty_cross = false;
                int i = 0;
-               sql_rel *l = rel->l, *r = rel->r;
+               sql_rel *l = rel->l, *pl = rel->l, *r = rel->r, *pr = rel->r;
 
-               while (is_sample(l->op) || is_topn(l->op)) /* skip topN and 
sample relations in the middle */
-                       l = l->l;
-               while (is_sample(r->op) || is_topn(r->op))
-                       r = r->l;
+               while (is_sample(pl->op) || is_topn(pl->op)) /* skip topN and 
sample relations in the middle */
+                       pl = pl->l;
+               while (is_sample(r->op) || is_topn(pr->op))
+                       pr = pr->l;
                /* if it's not a projection, then project and propagate 
statistics */
-               if (!is_project(l->op) && !is_base(l->op)) {
-                       l = rel_project(v->sql->sa, l, rel_projections(v->sql, 
l, NULL, 0, 1));
-                       set_count_prop(v->sql->sa, l, get_rel_count(l->l));
-                       l->exps = exps_exp_visitor_bottomup(v, l, l->exps, 0, 
&rel_propagate_statistics, false);
+               if (!is_project(pl->op) && !is_base(l->op)) {
+                       pl = rel_project(v->sql->sa, pl, 
rel_projections(v->sql, pl, NULL, 0, 1));
+                       set_count_prop(v->sql->sa, pl, get_rel_count(pl->l));
+                       pl->exps = exps_exp_visitor_bottomup(v, pl, pl->exps, 
0, &rel_propagate_statistics, false);
                }
-               if (!is_project(r->op) && !is_base(r->op)) {
-                       r = rel_project(v->sql->sa, r, rel_projections(v->sql, 
r, NULL, 0, 1));
-                       set_count_prop(v->sql->sa, r, get_rel_count(r->l));
-                       r->exps = exps_exp_visitor_bottomup(v, r, r->exps, 0, 
&rel_propagate_statistics, false);
+               if (!is_project(pr->op) && !is_base(pr->op)) {
+                       pr = rel_project(v->sql->sa, pr, 
rel_projections(v->sql, pr, NULL, 0, 1));
+                       set_count_prop(v->sql->sa, pr, get_rel_count(pr->l));
+                       pr->exps = exps_exp_visitor_bottomup(v, pr, pr->exps, 
0, &rel_propagate_statistics, false);
                }
 
                for (node *n = rel->exps->h ; n ; n = n->next) {
-                       empty_cross |= rel_setop_get_statistics(v->sql, rel, 
l->exps, r->exps, n->data, i);
+                       empty_cross |= rel_setop_get_statistics(v->sql, rel, 
pl->exps, pr->exps, n->data, i);
                        i++;
                }
 
-               l = rel->l;
-               r = rel->r;
                /* propagate row count */
                if (is_union(rel->op)) {
-                       BUN lv = get_rel_count(l), rv = get_rel_count(r);
+                       BUN lv = need_distinct(rel) ? rel_calc_nuniques(rel, 
pl, pl->exps) : get_rel_count(pl),
+                               rv = need_distinct(rel) ? 
rel_calc_nuniques(rel, pr, pr->exps) : get_rel_count(pr);
 
                        if (lv == 0 && rv == 0) { /* both sides empty */
                                if (can_be_pruned)
@@ -677,7 +700,8 @@ rel_get_statistics_(visitor *v, sql_rel 
                                set_count_prop(v->sql->sa, rel, (rv > (BUN_MAX 
- lv)) ? BUN_MAX : (lv + rv)); /* overflow check */
                        } 
                } else if (is_inter(rel->op) || is_except(rel->op)) {
-                       BUN lv = get_rel_count(l), rv = get_rel_count(r);
+                       BUN lv = need_distinct(rel) ? rel_calc_nuniques(rel, 
pl, pl->exps) : get_rel_count(pl),
+                               rv = need_distinct(rel) ? 
rel_calc_nuniques(rel, pr, pr->exps) : get_rel_count(pr);
 
                        if (lv == 0) { /* left side empty */
                                if (can_be_pruned)
@@ -744,7 +768,7 @@ rel_get_statistics_(visitor *v, sql_rel 
                case op_full: {
                        BUN lv = get_rel_count(l), rv = get_rel_count(r), 
uniques_estimate = BUN_MAX, join_idx_estimate = BUN_MAX;
 
-                       if (!list_empty(rel->exps)) {
+                       if (!list_empty(rel->exps) && !is_single(rel)) {
                                for (node *n = rel->exps->h ; n ; n = n->next) {
                                        sql_exp *e = n->data, *el = e->l, *er = 
e->r;
 
@@ -764,7 +788,9 @@ rel_get_statistics_(visitor *v, sql_rel 
                                        }
                                }
                        }
-                       if (join_idx_estimate != BUN_MAX) {
+                       if (is_single(rel)) {
+                               set_count_prop(v->sql->sa, rel, lv);
+                       } else if (join_idx_estimate != BUN_MAX) {
                                set_count_prop(v->sql->sa, rel, 
join_idx_estimate);
                        } else if (uniques_estimate != BUN_MAX) {
                                set_count_prop(v->sql->sa, rel, 
uniques_estimate);
@@ -803,7 +829,11 @@ rel_get_statistics_(visitor *v, sql_rel 
                } break;
                case op_project: {
                        if (l) {
-                               set_count_prop(v->sql->sa, rel, 
get_rel_count(l));
+                               if (need_distinct(rel)) {
+                                       set_count_prop(v->sql->sa, rel, 
rel_calc_nuniques(rel, l, rel->exps));
+                               } else {
+                                       set_count_prop(v->sql->sa, rel, 
get_rel_count(l));
+                               }
                        } else {
                                BUN card = 1;
 
@@ -819,18 +849,8 @@ rel_get_statistics_(visitor *v, sql_rel 
                case op_groupby: {
                        if (list_empty(rel->r)) {
                                set_count_prop(v->sql->sa, rel, 1);
-                       } else if (list_length(rel->r) == 1) {
-                               sql_exp *e = ((list*)rel->r)->h->data;
-                               sql_rel *bt = NULL;
-                               if (e->type == e_column && is_unique(e) && 
name_find_column(rel, e->l, e->r, -1, &bt) && bt && (p = find_prop(bt->p, 
PROP_COUNT))) {
-                                       set_count_prop(v->sql->sa, rel, 
p->value.lval);
-                               } else if ((p = find_prop(e->p, 
PROP_NUNIQUES))) {
-                                       set_count_prop(v->sql->sa, rel, (BUN) 
p->value.dval);
-                               } else {
-                                       set_count_prop(v->sql->sa, rel, 
get_rel_count(l));
-                               }
                        } else {
-                               set_count_prop(v->sql->sa, rel, 
get_rel_count(l));
+                               set_count_prop(v->sql->sa, rel, 
rel_calc_nuniques(rel, l, rel->r));
                        }
                } break;
                default:
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to