Changeset: 4ea9c4d4ff60 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4ea9c4d4ff60 Modified Files: sql/server/rel_statistics.c Branch: properties Log Message:
Reduce number of iterations in the AST, also prune intersect relations if that's the case diffs (truncated from 402 to 300 lines): diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c --- a/sql/server/rel_statistics.c +++ b/sql/server/rel_statistics.c @@ -44,6 +44,9 @@ rel_propagate_column_ref_statistics(mvc case op_semi: { bool found_without_semantics = false, found_left = false, found_right = false; + if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data)) + return NULL; /* nothing will pass, skip */ + /* propagate from the bottom first */ if (rel_propagate_column_ref_statistics(sql, rel->l, e)) found_left = true; @@ -232,28 +235,33 @@ rel_basetable_get_statistics(visitor *v, return e; } -static void +static bool rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp *e, int i) { sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i); - atom *lval, *rval; + atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX), + *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX); - assert(le && re && e); - if ((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval = find_prop_and_get(re->p, PROP_MAX))) { + if (is_inter(rel->op) && exp_is_not_null(le) && exp_is_not_null(re) && + lval_min && rval_min && lval_max && rval_max && !lval_min->isnull && !rval_min->isnull && !lval_max->isnull && !rval_max->isnull && + (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0)) + return true; + + if (lval_min && rval_min) { if (is_union(rel->op)) - set_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval, rval)); /* for union the new max will be the max of the two */ + set_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the min of the two */ else if (is_inter(rel->op)) - set_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval, rval)); /* for intersect the new max will be the min of the two */ + set_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min will be the max of the two */ else /* except */ - set_property(sql, e, PROP_MAX, lval); + set_property(sql, e, PROP_MIN, lval_min); } - if ((lval = find_prop_and_get(le->p, PROP_MIN)) && (rval = find_prop_and_get(re->p, PROP_MIN))) { + if (lval_max && rval_max) { if (is_union(rel->op)) - set_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval, rval)); /* for union the new min will be the min of the two */ + set_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the max of the two */ else if (is_inter(rel->op)) - set_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval, rval)); /* for intersect the new min will be the max of the two */ + set_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max will be the min of the two */ else /* except */ - set_property(sql, e, PROP_MIN, lval); + set_property(sql, e, PROP_MAX, lval_max); } if (is_union(rel->op)) { @@ -267,6 +275,7 @@ rel_setop_get_statistics(mvc *sql, sql_r if (!has_nil(le)) set_has_no_nil(e); } + return false; } static sql_exp * @@ -407,6 +416,130 @@ rel_propagate_statistics(visitor *v, sql return e; } +static list * /* Remove predicates always false from min/max values */ +rel_prune_predicates(visitor *v, sql_rel *rel) +{ + for (node *n = rel->exps->h ; n ; n = n->next) { + sql_exp *e = n->data; + + if (e->type == e_cmp && (is_theta_exp(e->flag) || e->f)) { + sql_exp *le = e->l, *re = e->r, *fe = e->f; + atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = find_prop_and_get(le->p, PROP_MAX), + *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX); + bool always_false = false, always_true = false; + + if (fe && !(e->flag & CMP_SYMMETRIC)) { + atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX); + comp_type lower = range2lcompare(e->flag), higher = range2rcompare(e->flag); + int not_int1 = rval_min && lval_max && !rval_min->isnull && !lval_max->isnull && /* the middle and left intervals don't overlap */ + (!e->anti && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)), + not_int2 = lval_min && fval_max && !lval_min->isnull && !fval_max->isnull && /* the middle and right intervals don't overlap */ + (!e->anti && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)), + not_int3 = rval_min && fval_max && !rval_min->isnull && !fval_max->isnull && /* the left interval is after the right one */ + (!e->anti && (atom_cmp(rval_min, fval_max) > 0)); + + always_false |= not_int1 || not_int2 || not_int3; + /* for anti the middle must be before the left or after the right or the right after the left, for the other the middle must be always between the left and right intervals */ + always_true |= exp_is_not_null(le) && exp_is_not_null(re) && exp_is_not_null(fe) && + lval_min && lval_max && rval_min && rval_max && fval_min && fval_max && !lval_min->isnull && !lval_max->isnull && !rval_min->isnull && !rval_max->isnull && !fval_min->isnull && !fval_max->isnull && + (e->anti ? ((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) : + ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0))); + } else { + switch (e->flag) { + case cmp_equal: + if (lval_min && lval_max && rval_min && rval_max && !lval_min->isnull && !lval_max->isnull && !rval_min->isnull && !rval_max->isnull) + always_false |= (!e->anti && (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0)) || (e->anti && atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= 0); + if (is_semantics(e)) + always_false |= is_semantics(e) ? + e->anti ? (exp_is_null(le) && exp_is_null(re)) || (exp_is_not_null(le) && exp_is_not_null(re)) : (exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)) : + e->anti ? exp_is_not_null(le) && exp_is_not_null(re) : (exp_is_null(le) && exp_is_null(re)) || (exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)); + break; + case cmp_gt: + if (lval_max && rval_min && !lval_max->isnull && !rval_min->isnull) + always_false |= e->anti ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0; + if (lval_min && rval_max && !lval_min->isnull && !rval_max->isnull) + always_true |= exp_is_not_null(le) && exp_is_not_null(re) && (e->anti ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0); + break; + case cmp_gte: + if (lval_max && rval_min && !lval_max->isnull && !rval_min->isnull) + always_false |= e->anti ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0; + if (lval_min && rval_max && !lval_min->isnull && !rval_max->isnull) + always_true |= exp_is_not_null(le) && exp_is_not_null(re) && (e->anti ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0); + break; + case cmp_lt: + if (lval_min && rval_max && !lval_min->isnull && !rval_max->isnull) + always_false |= e->anti ? atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0; + if (lval_max && rval_min && !lval_max->isnull && !rval_min->isnull) + always_true |= exp_is_not_null(le) && exp_is_not_null(re) && (e->anti ? atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0); + break; + case cmp_lte: + if (lval_min && rval_max && !lval_min->isnull && !rval_max->isnull) + always_false |= e->anti ? atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0; + if (lval_max && rval_min && !lval_max->isnull && !rval_min->isnull) + always_true |= exp_is_not_null(le) && exp_is_not_null(re) && (e->anti ? atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0); + break; + default: /* Maybe later I can do cmp_in and cmp_notin */ + break; + } + } + assert(!always_false || !always_true); + if (always_false || always_true) { + sql_exp *ne = exp_atom_bool(v->sql->sa, always_true); + if (exp_name(e)) + exp_prop_alias(v->sql->sa, ne, e); + n->data = ne; + v->changes++; + } + } + } + return rel->exps; +} + +static list* +rel_simplify_count(visitor *v, sql_rel *rel) +{ + mvc *sql = v->sql; + int ncountstar = 0; + + /* Convert count(no null) into count(*) */ + for (node *n = rel->exps->h; n ; n = n->next) { + sql_exp *e = n->data; + + if (exp_aggr_is_count(e) && !need_distinct(e)) { + if (list_length(e->l) == 0) { + ncountstar++; + } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) { + sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR); + sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0); + if (exp_name(e)) + exp_prop_alias(sql->sa, ne, e); + n->data = ne; + ncountstar++; + v->changes++; + } + } + } + /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */ + if (ncountstar > 1) { + sql_exp *count_star = NULL; + for (node *n = rel->exps->h; n ; n = n->next) { + sql_exp *e = n->data; + + if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) { + if (!count_star) { + count_star = e; + } else { + sql_exp *ne = exp_ref(sql, count_star); + if (exp_name(e)) + exp_prop_alias(sql->sa, ne, e); + n->data = ne; + } + } + } + } + return rel->exps; +} + static sql_rel * rel_get_statistics(visitor *v, sql_rel *rel) { @@ -417,6 +550,7 @@ rel_get_statistics(visitor *v, sql_rel * case op_union: case op_inter: case op_except: { + bool can_be_pruned = false; int i = 0; sql_rel *l = rel->l, *r = rel->r; @@ -434,10 +568,24 @@ rel_get_statistics(visitor *v, sql_rel * r->exps = exps_exp_visitor_bottomup(v, r, r->exps, 0, &rel_propagate_statistics, false); } - for (node *n = rel->exps->h ; n ; n = n->next) { - rel_setop_get_statistics(v->sql, rel, l->exps, r->exps, n->data, i); + for (node *n = rel->exps->h ; n && !can_be_pruned ; n = n->next) { + can_be_pruned |= rel_setop_get_statistics(v->sql, rel, l->exps, r->exps, n->data, i); i++; } + if (can_be_pruned) { + rel_destroy(rel->l); + rel->l = NULL; + rel_destroy(rel->r); + rel->r = NULL; + for (node *n = rel->exps->h ; n ; n = n->next) { + sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, exp_subtype(e), NULL)); + exp_prop_alias(v->sql->sa, a, e); + n->data = a; + } + rel = rel_project(v->sql->sa, NULL, rel->exps); + rel = rel_select(v->sql->sa, rel, exp_atom_bool(v->sql->sa, 0)); + rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1)); + } } break; case op_join: case op_left: @@ -450,8 +598,13 @@ rel_get_statistics(visitor *v, sql_rel * case op_groupby: case op_ddl: rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, &rel_propagate_statistics, false); - if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r)) + if (is_simple_project(rel->op) && !list_empty(rel->r)) rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, false); + /* The following optimizations can only be applied after propagating the statistics to rel->exps */ + if (is_groupby(rel->op) && rel->exps) + rel->exps = rel_simplify_count(v, rel); + if ((is_join(rel->op) || is_select(rel->op)) && rel->exps) + rel->exps = rel_prune_predicates(v, rel); break; /*These relations are less important for now case op_table: @@ -468,131 +621,6 @@ rel_get_statistics(visitor *v, sql_rel * return rel; } -static sql_rel * -rel_simplify_count(visitor *v, sql_rel *rel) -{ - mvc *sql = v->sql; - - if (is_groupby(rel->op)) { - int ncountstar = 0; - - /* Convert count(no null) into count(*) */ - for (node *n = rel->exps->h; n ; n = n->next) { - sql_exp *e = n->data; - - if (exp_aggr_is_count(e) && !need_distinct(e)) { - if (list_length(e->l) == 0) { - ncountstar++; - } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) { - sql_subfunc *cf = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR); - sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0); - if (exp_name(e)) - exp_prop_alias(sql->sa, ne, e); - n->data = ne; - ncountstar++; - v->changes++; - } - } - } - /* With multiple count(*), use exp_ref to reduce the number of calls to this aggregate */ - if (ncountstar > 1) { - sql_exp *count_star = NULL; - for (node *n = rel->exps->h; n ; n = n->next) { - sql_exp *e = n->data; - - if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) { - if (!count_star) { - count_star = e; - } else { - sql_exp *ne = exp_ref(sql, count_star); - if (exp_name(e)) - exp_prop_alias(sql->sa, ne, e); - n->data = ne; - } - } - } - } - } - return rel; -} - -static sql_exp * /* Remove predicates always false from min/max values */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list