Changeset: 5dd880ae4f97 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/5dd880ae4f97 Branch: default Log Message:
Merges cmp-or-patterns branch diffs (truncated from 796 to 300 lines): diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c --- a/sql/backends/monet5/rel_bin.c +++ b/sql/backends/monet5/rel_bin.c @@ -520,7 +520,7 @@ handle_in_tuple_exps(backend *be, sql_ex else s = cursel; } - if (sel && !(depth || !reduce)) + if (!depth && reduce) s = stmt_uselect(be, s->nrcols == 0?stmt_const(be, bin_find_smallest_column(be, left), s): s, stmt_bool(be, 1), cmp_equal, sel, 0, 0); diff --git a/sql/server/rel_optimize_sel.c b/sql/server/rel_optimize_sel.c --- a/sql/server/rel_optimize_sel.c +++ b/sql/server/rel_optimize_sel.c @@ -423,91 +423,366 @@ exps_cse( mvc *sql, list *oexps, list *l return res; } -static int -are_equality_exps( list *exps, sql_exp **L) +static inline int +exp_col_key(sql_exp *e) +{ + return e->nid ? e->nid : e->alias.label; +} + +static inline int +exp_cmp_eq_unique_id(sql_exp *e) +{ + return exp_col_key(e->l); +} + +static inline int +exp_multi_col_key(list *l) +{ + int k = exp_col_key(l->h->data); + for (node *n = l->h->next; n; n = n->next) { + k <<= 4; + k ^= exp_col_key(n->data); + } + return k; +} + +typedef struct exp_eq_col_values { + /* we need ->first in order to remove it from the list of cmp_eq exps + * in case that we find another occurrence (with a different value) + */ + sql_exp* first; + sql_exp* col; /* column */ + list *vs; /* list of values */ +} eq_cv; + +typedef struct exp_eq_multi_col_values { + /* we need ->first in order to remove it from the list of multi col + * cmp_eq exps in case that we find another occurrence (with different values) + */ + list *first; + list *cols; /* list of col exps */ + list *lvs; /* list of lists of values */ +} eq_mcv; + +static bool +detect_col_cmp_eqs(mvc *sql, list *eqs, sql_hash *eqh) { - sql_exp *l = *L; - - if (list_length(exps) == 1) { - sql_exp *e = exps->h->data, *le = e->l, *re = e->r; - - if (e->type == e_cmp && e->flag == cmp_equal && le->card != CARD_ATOM && re->card == CARD_ATOM && !is_semantics(e)) { - if (!l) { - *L = l = le; - if (!is_column(le->type)) - return 0; + bool col_multivalue_cmp_eq = false; + for (node *n = eqs->h; n; n = n->next ) { + sql_exp *e = n->data; + sql_exp *le = e->l, *re = e->r; + + /* find the le in the hash and append the re in the hash value (ea->list) */ + bool found = false; + + int key = eqh->key(le); + sql_hash_e *he = eqh->buckets[key&(eqh->size-1)]; + + for (;he && !found; he = he->chain) { + eq_cv *cv = he->value; + if (!exp_equal(le, cv->col)) { + cv->vs = append(cv->vs, re); + found = col_multivalue_cmp_eq = true; + /* remove this and the previous (->first) occurrence (if exists) from eqs */ + if (cv->first) { + list_remove_data(eqs, NULL, cv->first); + cv->first = NULL; + } + list_remove_node(eqs, NULL, n); } - return (exp_match(l, le)); + } + + if (!found) { + eq_cv *cv = SA_NEW(sql->sa, eq_cv); + cv->first = e; + cv->vs = sa_list(sql->sa); + cv->vs = append(cv->vs, re); + cv->col = le; + + hash_add(eqh, key, cv); } - if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && !is_semantics(e)) - return (are_equality_exps(e->l, L) && are_equality_exps(e->r, L)); } - return 0; + return col_multivalue_cmp_eq; +} + +static bool +detect_multicol_cmp_eqs(mvc *sql, list *mce_ands, sql_hash *meqh) +{ + /* we get as input a list of AND associated expressions (hence the entries are lists themselves) + * we need to detect cmp_eq-only AND-associated expressions with the same columns so we can + * group together their values + * e.g. [[n = 1, m = 10], [m = 20, k = 100, l = 3000], [m = 20, n = 2]] has + * - (m,k,l) group with a single value (20, 100, 3000) + * - (n,k) group with two values (1, 10) and (2, 20) + * at the end we return true only if we have at least a group of columns with more than a single value + * e.g. in this example (n,k) + */ + bool multi_multivalue_cmp_eq = false; + for (node *n = mce_ands->h; n; n = n->next) { + list *l = n->data; + + /* sort the list of the cmp_eq expressions based on the col exp + * NOTE: from now on we only work with the sorted list, sl */ + list *sl = list_sort(l, (fkeyvalue)&exp_cmp_eq_unique_id, NULL); + list_append_before(mce_ands, n, sl); + list_remove_node(mce_ands, NULL, n); + + /* find the eq exp in the hash and append the values */ + bool found = false; + + int key = meqh->key(sl); + sql_hash_e *he = meqh->buckets[key&(meqh->size-1)]; + + for (;he && !found; he = he->chain) { + /* compare the values of the hash_entry with the cols under cmp_eq from the list */ + bool same_cols = true; + eq_mcv *mcv = he->value; + for (node *m = sl->h, *k = mcv->cols->h; m && k && same_cols; m = m->next, k = k->next) { + sql_exp *col_exp = ((sql_exp*)m->data)->l; + if (exp_equal(col_exp, k->data)) + same_cols = false; + } + if (same_cols) { + /* we found the same multi cmp_eq exp in mce_ands list multiple times! */ + found = multi_multivalue_cmp_eq = true; + /* gather all the values of the list and add them to the hash entry */ + list *atms = sa_list(sql->sa); + for (node *m = sl->h; m; m = m->next) + atms = append(atms, ((sql_exp*)m->data)->r); + mcv->lvs = append(mcv->lvs, atms); + /* remove this and the previous occurrence (which means that's the first time + * that we found the *same* multi cmp_eq exp) + */ + if (mcv->first) { + list_remove_data(mce_ands, NULL, mcv->first); + mcv->first = NULL; + } + list_remove_data(mce_ands, NULL, sl); + } + } + + if (!found) { + eq_mcv *mcv = SA_NEW(sql->sa, eq_mcv); + mcv->first = sl; + mcv->cols = sa_list(sql->sa); + for (node *m = sl->h; m; m = m->next) + mcv->cols = append(mcv->cols, ((sql_exp*)m->data)->l); + /* for the list of values (atoms) create a list and append it to the lvs list */ + list *atms = sa_list(sql->sa); + for (node *m = sl->h; m; m = m->next) + atms = append(atms, ((sql_exp*)m->data)->r); + mcv->lvs = sa_list(sql->sa); + mcv->lvs = append(mcv->lvs, atms); + + hash_add(meqh, key, mcv); + } + } + return multi_multivalue_cmp_eq; } static void -get_exps( list *n, list *l ) +exp_or_chain_groups(mvc *sql, list *exps, list **gen_ands, list **mce_ands, list **eqs, list **noneq) { - sql_exp *e = l->h->data, *re = e->r; - - if (e->type == e_cmp && e->flag == cmp_equal && re->card == CARD_ATOM) - list_append(n, re); - if (e->type == e_cmp && e->flag == cmp_or) { - get_exps(n, e->l); - get_exps(n, e->r); + /* identify three different groups + * 1. gen_ands: lists of generic expressions (their inner association is AND) + * 2. mce_ands: lists of multi_colum cmp_eq ONLY expressions (same^^^) + * 3. eqs: equality expressions + * 4. neq: non equality col expressions + * + * return true if there is an exp with more than one cmp_eq + */ + bool eq_only = true; + for (node *n = exps->h; n && eq_only; n = n->next) { + sql_exp *e = n->data; + sql_exp *le = e->l, *re = e->r; + eq_only &= (e->type == e_cmp && e->flag == cmp_equal && + le->card != CARD_ATOM && is_column(le->type) && + re->card == CARD_ATOM && !is_semantics(e)); + } + + if (list_length(exps) > 1) { + if (eq_only) + *mce_ands = append(*mce_ands, exps); + else + *gen_ands = append(*gen_ands, exps); + } else if (list_length(exps) == 1) { + sql_exp *se = exps->h->data; + sql_exp *le = se->l, *re = se->r; + + if (se->type == e_cmp && se->flag == cmp_or && !is_anti(se)) { + /* for a cmp_or expression go down the tree */ + exp_or_chain_groups(sql, (list*)le, gen_ands, mce_ands, eqs, noneq); + exp_or_chain_groups(sql, (list*)re, gen_ands, mce_ands, eqs, noneq); + + } else if (eq_only) { + *eqs = append(*eqs, se); + } else { + *noneq = append(*noneq, se); + } } } -static sql_exp * -equality_exps_2_in( mvc *sql, sql_exp *ce, list *l, list *r) +static list * +generate_single_col_cmp_in(mvc *sql, sql_hash *eqh) { - list *nl = new_exp_list(sql->sa); - - get_exps(nl, l); - get_exps(nl, r); - - return exp_in( sql->sa, ce, nl, cmp_in); + /* from single col cmp_eq with multiple atoms in the hash generate + * "e_col in (val0, val1, ...)" (see detect_col_cmp_eqs()) + */ + list *ins = new_exp_list(sql->sa); + for (int i = 0; i < eqh->size; i++) { + sql_hash_e *he = eqh->buckets[i]; + + while (he) { + eq_cv *cv = he->value; + /* NOTE: cmp_eq expressions with a single entry are still in eqs */ + if (list_length(cv->vs) > 1) + ins = append(ins, exp_in(sql->sa, cv->col, cv->vs, cmp_in)); + he = he->chain; + } + } + return ins; +} + +static list * +generate_multi_col_cmp_in(mvc *sql, sql_hash *meqh) +{ + /* from multivalue cmp_eq with multiple lists of atoms in the hash generate + * "(col1, col2, ...) in [(val10, val20, ...), (val11, val21, ...), ... ]" + * (see detect_multicol_cmp_eqs()) + */ + list *ins = new_exp_list(sql->sa); + for (int i = 0; i < meqh->size; i++) { + sql_hash_e *he = meqh->buckets[i]; + while (he) { + eq_mcv *mcv = he->value; + /* NOTE: multivalue cmp_eq expressions with a single entry are still in mce_ands */ + if (list_length(mcv->lvs) > 1) { + sql_exp *mc = exp_label(sql->sa, exp_values(sql->sa, mcv->cols), ++sql->label); + for (node *a = mcv->lvs->h; a; a = a->next) + a->data = exp_values(sql->sa, a->data); + ins = append(ins, exp_in(sql->sa, mc, mcv->lvs, cmp_in)); + } + he = he->chain; + } + } + return ins; } static list * merge_ors(mvc *sql, list *exps, int *changes) { - list *nexps = NULL; _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org