Changeset: 61c0e7677763 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/61c0e7677763 Modified Files: sql/server/rel_prop.c sql/server/rel_prop.h sql/server/rel_rewriter.c sql/server/rel_rewriter.h sql/server/rel_statistics.c Branch: properties Log Message:
Make count property an unsigned number and fix propagation diffs (295 lines): diff --git a/sql/server/rel_prop.c b/sql/server/rel_prop.c --- a/sql/server/rel_prop.c +++ b/sql/server/rel_prop.c @@ -118,7 +118,7 @@ propvalue2string(sql_allocator *sa, prop switch(p->kind) { case PROP_COUNT: { - snprintf(buf, BUFSIZ, LLFMT, p->value.lval); + snprintf(buf, BUFSIZ, BUNFMT, p->value.lval); return sa_strdup(sa, buf); } case PROP_NUNIQUES: { diff --git a/sql/server/rel_prop.h b/sql/server/rel_prop.h --- a/sql/server/rel_prop.h +++ b/sql/server/rel_prop.h @@ -25,7 +25,7 @@ typedef enum rel_prop { typedef struct prop { rel_prop kind; /* kind of property */ union { - lng lval; /* property with simple counts */ + BUN lval; /* property with simple counts */ dbl dval; /* property with estimate */ void *pval; /* property value */ } value; diff --git a/sql/server/rel_rewriter.c b/sql/server/rel_rewriter.c --- a/sql/server/rel_rewriter.c +++ b/sql/server/rel_rewriter.c @@ -499,16 +499,18 @@ exps_unique(mvc *sql, sql_rel *rel, list return 0; } -lng +BUN get_rel_count(sql_rel *rel) { prop *found = find_prop(rel->p, PROP_COUNT); - return found ? found->value.lval : -1; + return found ? found->value.lval : BUN_NONE; } void -set_count_prop(sql_allocator *sa, sql_rel *rel, lng val) +set_count_prop(sql_allocator *sa, sql_rel *rel, BUN val) { - prop *p = rel->p = prop_create(sa, PROP_COUNT, rel->p); - p->value.lval = val; + if (val != BUN_NONE) { + prop *p = rel->p = prop_create(sa, PROP_COUNT, rel->p); + p->value.lval = val; + } } diff --git a/sql/server/rel_rewriter.h b/sql/server/rel_rewriter.h --- a/sql/server/rel_rewriter.h +++ b/sql/server/rel_rewriter.h @@ -47,7 +47,7 @@ extern int exps_unique(mvc *sql, sql_rel extern sql_column *exp_find_column(sql_rel *rel, sql_exp *exp, int pnr); -extern lng get_rel_count(sql_rel *rel); -extern void set_count_prop(sql_allocator *sa, sql_rel *rel, lng val); +extern BUN get_rel_count(sql_rel *rel); +extern void set_count_prop(sql_allocator *sa, sql_rel *rel, BUN val); #endif /*_REL_REWRITER_H_*/ diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c --- a/sql/server/rel_statistics.c +++ b/sql/server/rel_statistics.c @@ -596,12 +596,12 @@ set_setop_side(visitor *v, sql_rel *rel, return side; } -static lng +static BUN trivial_project_exp_card(sql_exp *e) { if (e->type == e_convert) return trivial_project_exp_card(e->l); - return e->type == e_atom && e->f ? list_length(e->f) : 1; + return e->type == e_atom && e->f ? (BUN) list_length(e->f) : 1; } static sql_rel * @@ -628,7 +628,8 @@ rel_get_statistics_(visitor *v, sql_rel } /* set table row count */ /* TODO look for remote/replica tables */ - set_count_prop(v->sql->sa, rel, isTable(t) ? (lng)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 0) : 500000); + if (isTable(t)) + set_count_prop(v->sql->sa, rel, (BUN)store->storage_api.count_col(v->sql->session->tr, ol_first_node(t->columns)->data, 0)); } break; case op_union: case op_inter: @@ -662,7 +663,7 @@ rel_get_statistics_(visitor *v, sql_rel r = rel->r; /* propagate row count */ if (is_union(rel->op)) { - lng lv = get_rel_count(l), rv = get_rel_count(r); + BUN lv = get_rel_count(l), rv = get_rel_count(r); if (lv == 0 && rv == 0) { /* both sides empty */ if (can_be_pruned) @@ -673,13 +674,15 @@ rel_get_statistics_(visitor *v, sql_rel rel = set_setop_side(v, rel, r); } else if (can_be_pruned && rv == 0 && !rel_is_ref(rel)) { /* right side empty */ rel = set_setop_side(v, rel, l); - } else if ((lv + rv) < lv) { - set_count_prop(v->sql->sa, rel, MAX(lv, rv)); - } else { - set_count_prop(v->sql->sa, rel, lv + rv); - } + } else if (lv != BUN_NONE && rv != BUN_NONE) { + if ((lv + rv) < lv) { + set_count_prop(v->sql->sa, rel, MAX(lv, rv)); + } else { + set_count_prop(v->sql->sa, rel, lv + rv); + } + } } else if (is_inter(rel->op) || is_except(rel->op)) { - lng lv = get_rel_count(l), rv = get_rel_count(r); + BUN lv = get_rel_count(l), rv = get_rel_count(r); if (lv == 0) { /* left side empty */ if (can_be_pruned) @@ -735,7 +738,7 @@ rel_get_statistics_(visitor *v, sql_rel rel->exps = rel_prune_predicates(v, rel); if (v->changes > changes) { rel = rewrite_simplify(v, 0, v->value_based_opt, rel); - if (is_select(rel->op) && get_rel_count(rel->l) == -1) /* hack, set generated projection count */ + if (is_select(rel->op) && get_rel_count(rel->l) == BUN_NONE) /* hack, set generated projection count */ set_count_prop(v->sql->sa, rel->l, 0); } } @@ -747,7 +750,7 @@ rel_get_statistics_(visitor *v, sql_rel case op_left: case op_right: case op_full: { - lng lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = GDK_lng_max, join_idx_estimate = GDK_lng_max; + BUN lv = get_rel_count(l), rv = get_rel_count(r), uniques_estimate = BUN_NONE, join_idx_estimate = BUN_NONE; if (!list_empty(rel->exps)) { for (node *n = rel->exps->h ; n ; n = n->next) { @@ -759,15 +762,15 @@ rel_get_statistics_(visitor *v, sql_rel /* if one of the sides is unique, the cardinality will be that exact number, but look for nulls */ if (!is_semantics(e) || !has_nil(el) || !has_nil(er)) { if (is_unique(el)) { - lng ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv; + BUN ncount = (is_right(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : lv; uniques_estimate = MIN(uniques_estimate, ncount); } else if (is_unique(er)) { - lng ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv; + BUN ncount = (is_left(rel->op) || is_full(rel->op)) ? MAX(lv, rv) : rv; uniques_estimate = MIN(uniques_estimate, ncount); } } if ((p = find_prop(el->p, PROP_NUNIQUES)) && (p2 = find_prop(er->p, PROP_NUNIQUES))) { - lng pv = (lng) p->value.dval, pv2 = (lng) p2->value.dval, mul = pv * pv2; + BUN pv = (BUN) p->value.dval, pv2 = (BUN) p2->value.dval, mul = pv * pv2; mul = mul < pv ? MAX(pv, pv2) : mul; /* check for overflows */ if (is_left(rel->op)) @@ -782,29 +785,31 @@ rel_get_statistics_(visitor *v, sql_rel } } } - if (join_idx_estimate != GDK_lng_max) { + if (join_idx_estimate != BUN_NONE) { set_count_prop(v->sql->sa, rel, join_idx_estimate); - } else if (uniques_estimate != GDK_lng_max) { + } else if (uniques_estimate != BUN_NONE) { set_count_prop(v->sql->sa, rel, uniques_estimate); - } else if (list_empty(rel->exps) && is_outerjoin(rel->op)) { /* outer joins without conditions, sum cardinalities instead of multiply */ - if ((lv + rv) < lv) { + } else if (lv != BUN_NONE && rv != BUN_NONE) { + if (list_empty(rel->exps) && is_outerjoin(rel->op)) { /* outer joins without conditions, sum cardinalities instead of multiply */ + if ((lv + rv) < lv) { + set_count_prop(v->sql->sa, rel, MAX(lv, rv)); + } else { + set_count_prop(v->sql->sa, rel, lv + rv); + } + } else if ((lv * rv) < lv) { set_count_prop(v->sql->sa, rel, MAX(lv, rv)); } else { - set_count_prop(v->sql->sa, rel, lv + rv); - } - } else if ((lv * rv) < lv) { - set_count_prop(v->sql->sa, rel, MAX(lv, rv)); - } else { - lng mul = lv * rv; + BUN mul = lv * rv; - if (is_left(rel->op)) - set_count_prop(v->sql->sa, rel, MAX(mul, lv)); - else if (is_right(rel->op)) - set_count_prop(v->sql->sa, rel, MAX(mul, rv)); - else if (is_full(rel->op)) - set_count_prop(v->sql->sa, rel, MAX(MAX(mul, lv), rv)); - else - set_count_prop(v->sql->sa, rel, lv * rv); + if (is_left(rel->op)) + set_count_prop(v->sql->sa, rel, MAX(mul, lv)); + else if (is_right(rel->op)) + set_count_prop(v->sql->sa, rel, MAX(mul, rv)); + else if (is_full(rel->op)) + set_count_prop(v->sql->sa, rel, MAX(MAX(mul, lv), rv)); + else + set_count_prop(v->sql->sa, rel, lv * rv); + } } } break; case op_semi: @@ -817,7 +822,7 @@ rel_get_statistics_(visitor *v, sql_rel if (l) { set_count_prop(v->sql->sa, rel, get_rel_count(l)); } else { - lng card = 1; + BUN card = 1; if (!list_empty(rel->exps)) { for (node *n = rel->exps->h ; n ; n = n->next) { @@ -837,7 +842,7 @@ rel_get_statistics_(visitor *v, sql_rel if (e->type == e_column && is_unique(e) && name_find_column(rel, e->l, e->r, -1, &bt) && bt && (p = find_prop(bt->p, PROP_COUNT))) { set_count_prop(v->sql->sa, rel, p->value.lval); } else if ((p = find_prop(e->p, PROP_NUNIQUES))) { - set_count_prop(v->sql->sa, rel, (lng) p->value.dval); + set_count_prop(v->sql->sa, rel, (BUN) p->value.dval); } else { set_count_prop(v->sql->sa, rel, get_rel_count(l)); } @@ -847,39 +852,42 @@ rel_get_statistics_(visitor *v, sql_rel break; } } break; - case op_table: { - set_count_prop(v->sql->sa, rel, 1); /* TODO later we can tune it */ - } break; case op_topn: { - sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL; - lng lv = get_rel_count(rel->l); + BUN lv = get_rel_count(rel->l); - if (oe && oe->l && !exp_is_null(oe)) { /* no parameters */ - lng offset = ((atom*)oe->l)->data.val.lval; - lv = offset > lv ? 0 : lv - offset; + if (lv != BUN_NONE) { + sql_exp *le = rel->exps->h->data, *oe = list_length(rel->exps) > 1 ? rel->exps->h->next->data : NULL; + if (oe && oe->l && !exp_is_null(oe)) { /* no parameters */ + BUN offset = (BUN) ((atom*)oe->l)->data.val.lval; + lv = offset > lv ? 0 : lv - offset; + } + if (le->l && !exp_is_null(le)) { + BUN limit = (BUN) ((atom*)le->l)->data.val.lval; + lv = MIN(lv, limit); + } + set_count_prop(v->sql->sa, rel, lv); } - if (le->l && !exp_is_null(le)) { - lng limit = ((atom*)le->l)->data.val.lval; - lv = MIN(lv, limit); - } - set_count_prop(v->sql->sa, rel, lv); } break; case op_sample: { - sql_exp *se = rel->exps->h->data; - sql_subtype *tp = exp_subtype(se); - lng lv = get_rel_count(rel->l); + BUN lv = get_rel_count(rel->l); + + if (lv != BUN_NONE) { + sql_exp *se = rel->exps->h->data; + sql_subtype *tp = exp_subtype(se); - if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */ - lng sample = ((atom*)se->l)->data.val.lval; - lv = MIN(lv, sample); - } else if (se->l) { /* sample is a percentage of rows */ - dbl percent = ((atom*)se->l)->data.val.dval; - assert(tp->type->eclass == EC_FLT); - lv = (lng) ceil((dbl)lv * percent); + if (se->l && tp->type->eclass == EC_NUM) { /* sample is a number of rows */ + BUN sample = (BUN) ((atom*)se->l)->data.val.lval; + lv = MIN(lv, sample); + } else if (se->l) { /* sample is a percentage of rows */ + dbl percent = ((atom*)se->l)->data.val.dval; + assert(tp->type->eclass == EC_FLT); + lv = (BUN) ceil((dbl)lv * percent); + } + set_count_prop(v->sql->sa, rel, lv); } - set_count_prop(v->sql->sa, rel, lv); } break; /*These relations are less important for now + case op_table: TODO later we can tune it case op_insert: case op_update: case op_delete: _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org