Changeset: 61c0e7677763 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/61c0e7677763
Modified Files:
        sql/server/rel_prop.c
        sql/server/rel_prop.h
        sql/server/rel_rewriter.c
        sql/server/rel_rewriter.h
        sql/server/rel_statistics.c
Branch: properties
Log Message:

Make count property an unsigned number and fix propagation


diffs (295 lines):

diff --git a/sql/server/rel_prop.c b/sql/server/rel_prop.c
--- a/sql/server/rel_prop.c
+++ b/sql/server/rel_prop.c
@@ -118,7 +118,7 @@ propvalue2string(sql_allocator *sa, prop
 
        switch(p->kind) {
        case PROP_COUNT: {
-               snprintf(buf, BUFSIZ, LLFMT, p->value.lval);
+               snprintf(buf, BUFSIZ, BUNFMT, p->value.lval);
                return sa_strdup(sa, buf);
        }
        case PROP_NUNIQUES: {
diff --git a/sql/server/rel_prop.h b/sql/server/rel_prop.h
--- a/sql/server/rel_prop.h
+++ b/sql/server/rel_prop.h
@@ -25,7 +25,7 @@ typedef enum rel_prop {
 typedef struct prop {
        rel_prop kind;  /* kind of property */
        union {
-               lng lval; /* property with simple counts */
+               BUN lval; /* property with simple counts */
                dbl dval; /* property with estimate */
                void *pval; /* property value */
        } value;
diff --git a/sql/server/rel_rewriter.c b/sql/server/rel_rewriter.c
--- a/sql/server/rel_rewriter.c
+++ b/sql/server/rel_rewriter.c
@@ -499,16 +499,18 @@ exps_unique(mvc *sql, sql_rel *rel, list
        return 0;
 }
 
-lng
+BUN
 get_rel_count(sql_rel *rel)
 {
        prop *found = find_prop(rel->p, PROP_COUNT);
-       return found ? found->value.lval : -1;
+       return found ? found->value.lval : BUN_NONE;
 }
 
 void
-set_count_prop(sql_allocator *sa, sql_rel *rel, lng val)
+set_count_prop(sql_allocator *sa, sql_rel *rel, BUN val)
 {
-       prop *p = rel->p = prop_create(sa, PROP_COUNT, rel->p);
-       p->value.lval = val;
+       if (val != BUN_NONE) {
+               prop *p = rel->p = prop_create(sa, PROP_COUNT, rel->p);
+               p->value.lval = val;
+       }
 }
diff --git a/sql/server/rel_rewriter.h b/sql/server/rel_rewriter.h
--- a/sql/server/rel_rewriter.h
+++ b/sql/server/rel_rewriter.h
@@ -47,7 +47,7 @@ extern int exps_unique(mvc *sql, sql_rel
 
 extern sql_column *exp_find_column(sql_rel *rel, sql_exp *exp, int pnr);
 
-extern lng get_rel_count(sql_rel *rel);
-extern void set_count_prop(sql_allocator *sa, sql_rel *rel, lng val);
+extern BUN get_rel_count(sql_rel *rel);
+extern void set_count_prop(sql_allocator *sa, sql_rel *rel, BUN val);
 
 #endif /*_REL_REWRITER_H_*/
diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -596,12 +596,12 @@ set_setop_side(visitor *v, sql_rel *rel,
        return side;
 }
 
-static lng
+static BUN
 trivial_project_exp_card(sql_exp *e)
 {
        if (e->type == e_convert)
                return trivial_project_exp_card(e->l);
-       return e->type == e_atom && e->f ? list_length(e->f) : 1;
+       return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1;
 }
 
 static sql_rel *
@@ -628,7 +628,8 @@ rel_get_statistics_(visitor *v, sql_rel 
                }
                /* set table row count */
                /* TODO look for remote/replica tables */
-               set_count_prop(v->sql->sa, rel, isTable(t) ? 
(lng)store->storage_api.count_col(v->sql->session->tr, 
ol_first_node(t->columns)->data, 0) : 500000);
+               if (isTable(t))
+                       set_count_prop(v->sql->sa, rel, 
(BUN)store->storage_api.count_col(v->sql->session->tr, 
ol_first_node(t->columns)->data, 0));
        } break;
        case op_union:
        case op_inter:
@@ -662,7 +663,7 @@ rel_get_statistics_(visitor *v, sql_rel 
                r = rel->r;
                /* propagate row count */
                if (is_union(rel->op)) {
-                       lng lv = get_rel_count(l), rv = get_rel_count(r);
+                       BUN lv = get_rel_count(l), rv = get_rel_count(r);
 
                        if (lv == 0 && rv == 0) { /* both sides empty */
                                if (can_be_pruned)
@@ -673,13 +674,15 @@ rel_get_statistics_(visitor *v, sql_rel 
                                rel = set_setop_side(v, rel, r);
                        } else if (can_be_pruned && rv == 0 && 
!rel_is_ref(rel)) { /* right side empty */
                                rel = set_setop_side(v, rel, l);
-                       } else if ((lv + rv) < lv) {
-                               set_count_prop(v->sql->sa, rel, MAX(lv, rv));
-                       } else {
-                               set_count_prop(v->sql->sa, rel, lv + rv);
-                       }
+                       } else if (lv != BUN_NONE && rv != BUN_NONE) {
+                               if ((lv + rv) < lv) {
+                                       set_count_prop(v->sql->sa, rel, MAX(lv, 
rv));
+                               } else {
+                                       set_count_prop(v->sql->sa, rel, lv + 
rv);
+                               }
+                       } 
                } else if (is_inter(rel->op) || is_except(rel->op)) {
-                       lng lv = get_rel_count(l), rv = get_rel_count(r);
+                       BUN lv = get_rel_count(l), rv = get_rel_count(r);
 
                        if (lv == 0) { /* left side empty */
                                if (can_be_pruned)
@@ -735,7 +738,7 @@ rel_get_statistics_(visitor *v, sql_rel 
                        rel->exps = rel_prune_predicates(v, rel);
                        if (v->changes > changes) {
                                rel = rewrite_simplify(v, 0, 
v->value_based_opt, rel);
-                               if (is_select(rel->op) && get_rel_count(rel->l) 
== -1) /* hack, set generated projection count */
+                               if (is_select(rel->op) && get_rel_count(rel->l) 
== BUN_NONE) /* hack, set generated projection count */
                                        set_count_prop(v->sql->sa, rel->l, 0);
                        }
                }
@@ -747,7 +750,7 @@ rel_get_statistics_(visitor *v, sql_rel 
                case op_left:
                case op_right:
                case op_full: {
-                       lng lv = get_rel_count(l), rv = get_rel_count(r), 
uniques_estimate = GDK_lng_max, join_idx_estimate = GDK_lng_max;
+                       BUN lv = get_rel_count(l), rv = get_rel_count(r), 
uniques_estimate = BUN_NONE, join_idx_estimate = BUN_NONE;
 
                        if (!list_empty(rel->exps)) {
                                for (node *n = rel->exps->h ; n ; n = n->next) {
@@ -759,15 +762,15 @@ rel_get_statistics_(visitor *v, sql_rel 
                                                /* if one of the sides is 
unique, the cardinality will be that exact number, but look for nulls */
                                                if (!is_semantics(e) || 
!has_nil(el) || !has_nil(er)) {
                                                        if (is_unique(el)) {
-                                                               lng ncount = 
(is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
+                                                               BUN ncount = 
(is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv;
                                                                
uniques_estimate = MIN(uniques_estimate, ncount);
                                                        } else if 
(is_unique(er)) {
-                                                               lng ncount = 
(is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
+                                                               BUN ncount = 
(is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv;
                                                                
uniques_estimate = MIN(uniques_estimate, ncount);
                                                        }
                                                }
                                                if ((p = find_prop(el->p, 
PROP_NUNIQUES)) && (p2 = find_prop(er->p, PROP_NUNIQUES))) {
-                                                       lng pv = (lng) 
p->value.dval, pv2 = (lng) p2->value.dval, mul = pv * pv2;
+                                                       BUN pv = (BUN) 
p->value.dval, pv2 = (BUN) p2->value.dval, mul = pv * pv2;
                                                        mul = mul < pv ? 
MAX(pv, pv2) : mul; /* check for overflows */
 
                                                        if (is_left(rel->op))
@@ -782,29 +785,31 @@ rel_get_statistics_(visitor *v, sql_rel 
                                        }
                                }
                        }
-                       if (join_idx_estimate != GDK_lng_max) {
+                       if (join_idx_estimate != BUN_NONE) {
                                set_count_prop(v->sql->sa, rel, 
join_idx_estimate);
-                       } else if (uniques_estimate != GDK_lng_max) {
+                       } else if (uniques_estimate != BUN_NONE) {
                                set_count_prop(v->sql->sa, rel, 
uniques_estimate);
-                       } else if (list_empty(rel->exps) && 
is_outerjoin(rel->op)) { /* outer joins without conditions, sum cardinalities 
instead of multiply */
-                               if ((lv + rv) < lv) {
+                       } else if (lv != BUN_NONE && rv != BUN_NONE) {
+                               if (list_empty(rel->exps) && 
is_outerjoin(rel->op)) { /* outer joins without conditions, sum cardinalities 
instead of multiply */
+                                       if ((lv + rv) < lv) {
+                                               set_count_prop(v->sql->sa, rel, 
MAX(lv, rv));
+                                       } else {
+                                               set_count_prop(v->sql->sa, rel, 
lv + rv);
+                                       }
+                               } else if ((lv * rv) < lv) {
                                        set_count_prop(v->sql->sa, rel, MAX(lv, 
rv));
                                } else {
-                                       set_count_prop(v->sql->sa, rel, lv + 
rv);
-                               }
-                       } else if ((lv * rv) < lv) {
-                               set_count_prop(v->sql->sa, rel, MAX(lv, rv));
-                       } else {
-                               lng mul = lv * rv;
+                                       BUN mul = lv * rv;
 
-                               if (is_left(rel->op))
-                                       set_count_prop(v->sql->sa, rel, 
MAX(mul, lv));
-                               else if (is_right(rel->op))
-                                       set_count_prop(v->sql->sa, rel, 
MAX(mul, rv));
-                               else if (is_full(rel->op))
-                                       set_count_prop(v->sql->sa, rel, 
MAX(MAX(mul, lv), rv));
-                               else
-                                       set_count_prop(v->sql->sa, rel, lv * 
rv);
+                                       if (is_left(rel->op))
+                                               set_count_prop(v->sql->sa, rel, 
MAX(mul, lv));
+                                       else if (is_right(rel->op))
+                                               set_count_prop(v->sql->sa, rel, 
MAX(mul, rv));
+                                       else if (is_full(rel->op))
+                                               set_count_prop(v->sql->sa, rel, 
MAX(MAX(mul, lv), rv));
+                                       else
+                                               set_count_prop(v->sql->sa, rel, 
lv * rv);
+                               }
                        }
                } break;
                case op_semi:
@@ -817,7 +822,7 @@ rel_get_statistics_(visitor *v, sql_rel 
                        if (l) {
                                set_count_prop(v->sql->sa, rel, 
get_rel_count(l));
                        } else {
-                               lng card = 1;
+                               BUN card = 1;
 
                                if (!list_empty(rel->exps)) {
                                        for (node *n = rel->exps->h ; n ; n = 
n->next) {
@@ -837,7 +842,7 @@ rel_get_statistics_(visitor *v, sql_rel 
                                if (e->type == e_column && is_unique(e) && 
name_find_column(rel, e->l, e->r, -1, &bt) && bt && (p = find_prop(bt->p, 
PROP_COUNT))) {
                                        set_count_prop(v->sql->sa, rel, 
p->value.lval);
                                } else if ((p = find_prop(e->p, 
PROP_NUNIQUES))) {
-                                       set_count_prop(v->sql->sa, rel, (lng) 
p->value.dval);
+                                       set_count_prop(v->sql->sa, rel, (BUN) 
p->value.dval);
                                } else {
                                        set_count_prop(v->sql->sa, rel, 
get_rel_count(l));
                                }
@@ -847,39 +852,42 @@ rel_get_statistics_(visitor *v, sql_rel 
                        break;
                }
        } break;
-       case op_table: {
-               set_count_prop(v->sql->sa, rel, 1); /* TODO later we can tune 
it */
-       } break;
        case op_topn: {
-               sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) 
> 1 ? rel->exps->h->next->data : NULL;
-               lng lv = get_rel_count(rel->l);
+               BUN lv = get_rel_count(rel->l);
 
-               if (oe && oe->l && !exp_is_null(oe)) { /* no parameters */
-                       lng offset = ((atom*)oe->l)->data.val.lval;
-                       lv = offset > lv ? 0 : lv - offset;
+               if (lv != BUN_NONE) {
+                       sql_exp *le = rel->exps->h->data, *oe = 
list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL;
+                       if (oe && oe->l && !exp_is_null(oe)) { /* no parameters 
*/
+                               BUN offset = (BUN) 
((atom*)oe->l)->data.val.lval;
+                               lv = offset > lv ? 0 : lv - offset;
+                       }
+                       if (le->l && !exp_is_null(le)) {
+                               BUN limit = (BUN) ((atom*)le->l)->data.val.lval;
+                               lv = MIN(lv, limit);
+                       }
+                       set_count_prop(v->sql->sa, rel, lv);
                }
-               if (le->l && !exp_is_null(le)) {
-                       lng limit = ((atom*)le->l)->data.val.lval;
-                       lv = MIN(lv, limit);
-               }
-               set_count_prop(v->sql->sa, rel, lv);
        } break;
        case op_sample: {
-               sql_exp *se = rel->exps->h->data;
-               sql_subtype *tp = exp_subtype(se);
-               lng lv = get_rel_count(rel->l);
+               BUN lv = get_rel_count(rel->l);
+
+               if (lv != BUN_NONE) {
+                       sql_exp *se = rel->exps->h->data;
+                       sql_subtype *tp = exp_subtype(se);
 
-               if (se->l && tp->type->eclass == EC_NUM) { /* sample is a 
number of rows */
-                       lng sample = ((atom*)se->l)->data.val.lval;
-                       lv = MIN(lv, sample);
-               } else if (se->l) { /* sample is a percentage of rows */
-                       dbl percent = ((atom*)se->l)->data.val.dval;
-                       assert(tp->type->eclass == EC_FLT);
-                       lv = (lng) ceil((dbl)lv * percent);
+                       if (se->l && tp->type->eclass == EC_NUM) { /* sample is 
a number of rows */
+                               BUN sample = (BUN) 
((atom*)se->l)->data.val.lval;
+                               lv = MIN(lv, sample);
+                       } else if (se->l) { /* sample is a percentage of rows */
+                               dbl percent = ((atom*)se->l)->data.val.dval;
+                               assert(tp->type->eclass == EC_FLT);
+                               lv = (BUN) ceil((dbl)lv * percent);
+                       }
+                       set_count_prop(v->sql->sa, rel, lv);
                }
-               set_count_prop(v->sql->sa, rel, lv);
        } break;
        /*These relations are less important for now
+       case op_table: TODO later we can tune it
        case op_insert:
        case op_update:
        case op_delete:
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to