Changeset: 8a951994a21d for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8a951994a21d Modified Files: sql/server/rel_statistics.c sql/server/rel_statistics.h Branch: properties Log Message:
Attempting to make properties propagation more accurate. Continue looking into the join/select list of expressions for a column to continue propagating min/max values. If there is a comparison with null semantics, then the 'nonil' property won't be propagated. If the column is used in a comparison and the relation is not an outer join, then the 'nonil' property can be adds diffs (truncated from 411 to 300 lines): diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c --- a/sql/server/rel_statistics.c +++ b/sql/server/rel_statistics.c @@ -12,15 +12,39 @@ #include "rel_optimizer.h" #include "rel_rewriter.h" -static bool -exps_have_or(list *exps) +static void exps_have_or_or_semantics(list *exps, bool *have_or, bool *have_semantics); + +static void +exp_have_or_or_semantics(sql_exp *e, bool *have_or, bool *have_semantics) { - for (node *n = exps->h ; n ; n = n->next) { - sql_exp *e = n->data; - if (e->type == e_cmp && e->flag == cmp_or) - return true; + if (e->type == e_cmp) { + if (e->semantics) + *have_semantics = true; + if (e->flag == cmp_in || e->flag == cmp_notin) { + exp_have_or_or_semantics(e->l, have_or, have_semantics); + exps_have_or_or_semantics(e->r, have_or, have_semantics); + } else if (e->flag == cmp_or) { + *have_or = true; + exps_have_or_or_semantics(e->l, have_or, have_semantics); + exps_have_or_or_semantics(e->r, have_or, have_semantics); + } else if (e->flag == cmp_filter) { + exps_have_or_or_semantics(e->l, have_or, have_semantics); + exps_have_or_or_semantics(e->r, have_or, have_semantics); + } else { + exp_have_or_or_semantics(e->l, have_or, have_semantics); + exp_have_or_or_semantics(e->r, have_or, have_semantics); + if (e->f) + exp_have_or_or_semantics(e->f, have_or, have_semantics); + } } - return false; +} + +static void +exps_have_or_or_semantics(list *exps, bool *have_or, bool *have_semantics) +{ + if (exps) + for (node *n = exps->h ; n ; n = n->next) + exp_have_or_or_semantics(n->data, have_or, have_semantics); } static sql_exp * @@ -52,115 +76,123 @@ rel_propagate_column_ref_statistics(mvc case op_select: /* case op_anti: later */ case op_semi: { - if (!list_empty(rel->exps) && !exps_have_or(rel->exps)) { /* if there's an or, the MIN and MAX get difficult to propagate */ - for (node *n = rel->exps->h ; n && !ne; n = n->next) { + bool have_or = false, have_semantics = false, found_left = false, found_right = false, found_on_exps = false; + sql_exp *ne = NULL; + + exps_have_or_or_semantics(rel->exps, &have_or, &have_semantics); + if (!list_empty(rel->exps)) { /* if there's an or, the MIN and MAX get difficult to propagate */ + for (node *n = rel->exps->h ; n ; n = n->next) { sql_exp *comp = n->data; if (comp->type == e_cmp) { switch (comp->flag) { case cmp_equal: { - sql_exp *le = comp->l, *re = comp->r, *rne = NULL; + sql_exp *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL; - if ((ne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e))) { + if ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e))) { if (is_outerjoin(rel->op)) { - if ((lval = find_prop_and_get(le ? le->p : re->p, PROP_MAX))) - set_property(sql, e, PROP_MAX, lval); - if ((lval = find_prop_and_get(le ? le->p : re->p, PROP_MIN))) - set_property(sql, e, PROP_MIN, lval); + if (!have_or && (lval = find_prop_and_get(le ? le->p : re->p, PROP_MAX))) + set_max_property(sql, e, lval); + if (!have_or && (lval = find_prop_and_get(le ? le->p : re->p, PROP_MIN))) + set_min_property(sql, e, lval); } else { - if ((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval = find_prop_and_get(re->p, PROP_MAX))) - set_min_of_values(sql, e, PROP_MAX, lval, rval); /* for equality reduce */ - if ((lval = find_prop_and_get(le->p, PROP_MIN)) && (rval = find_prop_and_get(re->p, PROP_MIN))) - set_max_of_values(sql, e, PROP_MIN, lval, rval); + if (!have_or && (lval = find_prop_and_get(le->p, PROP_MAX)) && + (rval = find_prop_and_get(re->p, PROP_MAX))) + set_max_property(sql, e, atom_min(lval, rval)); /* for equality reduce */ + if (!have_or && (lval = find_prop_and_get(le->p, PROP_MIN)) && + (rval = find_prop_and_get(re->p, PROP_MIN))) + set_min_property(sql, e, atom_max(lval, rval)); } } - ne = ne ? ne : rne; + ne = ne ? ne : lne ? lne : rne; } break; case cmp_notequal: { - sql_exp *le = comp->l, *re = comp->r, *rne = NULL; + sql_exp *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL; - if ((ne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e))) { + if ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e))) { if (is_outerjoin(rel->op)) { - if ((lval = find_prop_and_get(le ? le->p : re->p, PROP_MAX))) - set_property(sql, e, PROP_MAX, lval); - if ((lval = find_prop_and_get(le ? le->p : re->p, PROP_MIN))) - set_property(sql, e, PROP_MIN, lval); + if (!have_or && (lval = find_prop_and_get(le ? le->p : re->p, PROP_MAX))) + set_max_property(sql, e, lval); + if (!have_or && (lval = find_prop_and_get(le ? le->p : re->p, PROP_MIN))) + set_min_property(sql, e, lval); } else { - if ((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval = find_prop_and_get(re->p, PROP_MAX))) - set_max_of_values(sql, e, PROP_MAX, lval, rval); /* for inequality expand */ - if ((lval = find_prop_and_get(le->p, PROP_MIN)) && (rval = find_prop_and_get(re->p, PROP_MIN))) - set_min_of_values(sql, e, PROP_MIN, lval, rval); + if (!have_or && (lval = find_prop_and_get(le->p, PROP_MAX)) && + (rval = find_prop_and_get(re->p, PROP_MAX))) + set_max_property(sql, e, atom_max(lval, rval));/* for inequality expand */ + if (!have_or && (lval = find_prop_and_get(le->p, PROP_MIN)) && + (rval = find_prop_and_get(re->p, PROP_MIN))) + set_min_property(sql, e, atom_min(lval, rval)); } } - ne = ne ? ne : rne; + ne = ne ? ne : lne ? lne : rne; } break; case cmp_gt: case cmp_gte: { - sql_exp *le = comp->l, *re = comp->r, *rne = NULL; + sql_exp *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL; assert(!comp->f); - if ((ne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e))) { + if ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e))) { if (is_outerjoin(rel->op)) { - if ((lval = find_prop_and_get(le ? le->p : re->p, PROP_MAX))) - set_property(sql, e, PROP_MAX, lval); - if ((lval = find_prop_and_get(le ? le->p : re->p, PROP_MIN))) - set_property(sql, e, PROP_MIN, lval); - } else if (ne) { - if ((lval = find_prop_and_get(le->p, PROP_MAX))) - set_property(sql, e, PROP_MAX, lval); - if ((rval = find_prop_and_get(re->p, PROP_MAX))) - set_property(sql, e, PROP_MIN, rval); + if (!have_or && (lval = find_prop_and_get(le ? le->p : re->p, PROP_MAX))) + set_max_property(sql, e, lval); + if (!have_or && (lval = find_prop_and_get(le ? le->p : re->p, PROP_MIN))) + set_min_property(sql, e, lval); + } else if (lne) { + if (!have_or && (lval = find_prop_and_get(le->p, PROP_MAX))) + set_max_property(sql, e, lval); + if (!have_or && (rval = find_prop_and_get(re->p, PROP_MAX))) + set_min_property(sql, e, rval); } else { - if ((lval = find_prop_and_get(le->p, PROP_MIN))) - set_property(sql, e, PROP_MAX, lval); - if ((rval = find_prop_and_get(re->p, PROP_MIN))) - set_property(sql, e, PROP_MIN, rval); + if (!have_or && (lval = find_prop_and_get(le->p, PROP_MIN))) + set_max_property(sql, e, lval); + if (!have_or && (rval = find_prop_and_get(re->p, PROP_MIN))) + set_min_property(sql, e, rval); } } - ne = ne ? ne : rne; + ne = ne ? ne : lne ? lne : rne; } break; case cmp_lt: case cmp_lte: { - sql_exp *le = comp->l, *re = comp->r, *fe = comp->f, *rne = NULL, *fne = NULL; + sql_exp *le = comp->l, *lne = NULL, *re = comp->r, *rne = NULL, *fe = comp->f, *fne = NULL; - if ((ne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e)))) { + if ((lne = comparison_find_column(le, e)) || (rne = comparison_find_column(re, e)) || (fe && (fne = comparison_find_column(fe, e)))) { if (is_outerjoin(rel->op)) { - if ((lval = find_prop_and_get(le ? le->p : re ? re->p : fe->p, PROP_MAX))) - set_property(sql, e, PROP_MAX, lval); - if ((lval = find_prop_and_get(le ? le->p : re ? re->p : fe->p, PROP_MIN))) - set_property(sql, e, PROP_MIN, lval); - } else if (ne) { + if (!have_or && (lval = find_prop_and_get(le ? le->p : re ? re->p : fe->p, PROP_MAX))) + set_max_property(sql, e, lval); + if (!have_or && (lval = find_prop_and_get(le ? le->p : re ? re->p : fe->p, PROP_MIN))) + set_min_property(sql, e, lval); + } else if (lne) { if (fe) { /* range case */ - if ((lval = find_prop_and_get(fe->p, PROP_MIN))) - set_property(sql, e, PROP_MAX, lval); - if ((rval = find_prop_and_get(re->p, PROP_MAX))) - set_property(sql, e, PROP_MIN, rval); + if (!have_or && (lval = find_prop_and_get(fe->p, PROP_MIN))) + set_max_property(sql, e, lval); + if (!have_or && (rval = find_prop_and_get(re->p, PROP_MAX))) + set_min_property(sql, e, rval); } else { - if ((lval = find_prop_and_get(re->p, PROP_MIN))) - set_property(sql, e, PROP_MAX, lval); - if ((rval = find_prop_and_get(le->p, PROP_MIN))) - set_property(sql, e, PROP_MIN, rval); + if (!have_or && (lval = find_prop_and_get(re->p, PROP_MIN))) + set_max_property(sql, e, lval); + if (!have_or && (rval = find_prop_and_get(le->p, PROP_MIN))) + set_min_property(sql, e, rval); } } else if (rne) { if (fe) { /* range case */ - if ((lval = find_prop_and_get(re->p, PROP_MIN))) - set_property(sql, e, PROP_MAX, lval); - if ((rval = find_prop_and_get(le->p, PROP_MIN))) - set_property(sql, e, PROP_MIN, rval); + if (!have_or && (lval = find_prop_and_get(re->p, PROP_MIN))) + set_max_property(sql, e, lval); + if (!have_or && (rval = find_prop_and_get(le->p, PROP_MIN))) + set_min_property(sql, e, rval); } else { - if ((lval = find_prop_and_get(re->p, PROP_MAX))) - set_property(sql, e, PROP_MAX, lval); - if ((rval = find_prop_and_get(le->p, PROP_MAX))) - set_property(sql, e, PROP_MIN, rval); + if (!have_or && (lval = find_prop_and_get(re->p, PROP_MAX))) + set_max_property(sql, e, lval); + if (!have_or && (rval = find_prop_and_get(le->p, PROP_MAX))) + set_min_property(sql, e, rval); } } else { /* range case */ - if ((lval = find_prop_and_get(fe->p, PROP_MAX))) - set_property(sql, e, PROP_MAX, lval); - if ((rval = find_prop_and_get(le->p, PROP_MAX))) - set_property(sql, e, PROP_MIN, rval); + if (!have_or && (lval = find_prop_and_get(fe->p, PROP_MAX))) + set_max_property(sql, e, lval); + if (!have_or && (rval = find_prop_and_get(le->p, PROP_MAX))) + set_min_property(sql, e, rval); } } - ne = ne ? ne : rne ? rne : fne; + ne = ne ? ne : lne ? lne : rne ? rne : fne; } break; default: /* Maybe later I can do cmp_in and cmp_notin */ break; @@ -168,22 +200,27 @@ rel_propagate_column_ref_statistics(mvc } } } - if (ne && !find_prop(e->p, PROP_MAX) && !find_prop(e->p, PROP_MIN)) /* ne was found, but the properties could not be propagated */ - ne = NULL; - if (!ne) - ne = rel_propagate_column_ref_statistics(sql, rel->l, e); - if (!ne && is_join(rel->op)) - ne = rel_propagate_column_ref_statistics(sql, rel->r, e); if (ne) - set_has_nil(e); /* TODO do this better */ + found_on_exps = true; + if (!ne && (ne = rel_propagate_column_ref_statistics(sql, rel->l, e))) + found_left = true; + if (!ne && is_join(rel->op) && (ne = rel_propagate_column_ref_statistics(sql, rel->r, e))) + found_right = true; + if (ne) { + /* if semantics flag was found, null values will pass */ + if (is_full(rel->op) || (is_left(rel->op) && found_right) || (is_right(rel->op) && found_left) || have_semantics) + set_has_nil(e); + else if (found_on_exps && (!has_nil(ne) || !is_outerjoin(rel->op))) /* at an outer join, null values pass */ + set_has_no_nil(e); + } } break; /* case op_table: later */ case op_basetable: { if (e->l && (ne = exps_bind_column2(rel->exps, e->l, e->r, NULL))) { if ((lval = find_prop_and_get(ne->p, PROP_MAX))) - set_property(sql, e, PROP_MAX, lval); + set_max_property(sql, e, lval); if ((lval = find_prop_and_get(ne->p, PROP_MIN))) - set_property(sql, e, PROP_MIN, lval); + set_min_property(sql, e, lval); if (!has_nil(ne)) set_has_no_nil(e); } @@ -196,9 +233,9 @@ rel_propagate_column_ref_statistics(mvc ne = e->l ? exps_bind_column2(rel->exps, e->l, e->r, NULL) : exps_bind_column(rel->exps, e->r, NULL, NULL, 1); if (ne) { if ((lval = find_prop_and_get(ne->p, PROP_MAX))) - set_property(sql, e, PROP_MAX, lval); + set_max_property(sql, e, lval); if ((lval = find_prop_and_get(ne->p, PROP_MIN))) - set_property(sql, e, PROP_MIN, lval); + set_min_property(sql, e, lval); if (!has_nil(ne)) set_has_no_nil(e); } @@ -269,19 +306,19 @@ rel_set_get_statistics(mvc *sql, sql_rel assert(le && e); if ((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval = find_prop_and_get(re->p, PROP_MAX))) { if (rel->op == op_union) - set_max_of_values(sql, e, PROP_MAX, lval, rval); /* for union the new max will be the max of the two */ + set_max_property(sql, e, atom_max(lval, rval)); /* for union the new max will be the max of the two */ else if (rel->op == op_inter) - set_min_of_values(sql, e, PROP_MAX, lval, rval); /* for intersect the new max will be the min of the two */ + set_max_property(sql, e, atom_min(lval, rval)); /* for intersect the new max will be the min of the two */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list