Changeset: 5dd880ae4f97 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/5dd880ae4f97
Branch: default
Log Message:

Merges cmp-or-patterns branch


diffs (truncated from 796 to 300 lines):

diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -520,7 +520,7 @@ handle_in_tuple_exps(backend *be, sql_ex
                else
                        s = cursel;
        }
-       if (sel && !(depth || !reduce))
+       if (!depth && reduce)
                s = stmt_uselect(be,
                        s->nrcols == 0?stmt_const(be, 
bin_find_smallest_column(be, left), s): s,
                        stmt_bool(be, 1), cmp_equal, sel, 0, 0);
diff --git a/sql/server/rel_optimize_sel.c b/sql/server/rel_optimize_sel.c
--- a/sql/server/rel_optimize_sel.c
+++ b/sql/server/rel_optimize_sel.c
@@ -423,91 +423,366 @@ exps_cse( mvc *sql, list *oexps, list *l
        return res;
 }
 
-static int
-are_equality_exps( list *exps, sql_exp **L)
+static inline int
+exp_col_key(sql_exp *e)
+{
+       return e->nid ? e->nid : e->alias.label;
+}
+
+static inline int
+exp_cmp_eq_unique_id(sql_exp *e)
+{
+       return exp_col_key(e->l);
+}
+
+static inline int
+exp_multi_col_key(list *l)
+{
+       int k = exp_col_key(l->h->data);
+       for (node *n = l->h->next; n; n = n->next) {
+               k <<= 4;
+               k ^= exp_col_key(n->data);
+       }
+       return k;
+}
+
+typedef struct exp_eq_col_values {
+       /* we need ->first in order to remove it from the list of cmp_eq exps
+        * in case that we find another occurrence (with a different value)
+        */
+       sql_exp* first;
+       sql_exp* col; /* column */
+       list *vs;     /* list of values */
+} eq_cv;
+
+typedef struct exp_eq_multi_col_values {
+       /* we need ->first in order to remove it from the list of multi col
+        * cmp_eq exps in case that we find another occurrence (with different 
values)
+        */
+       list *first;
+       list *cols;  /* list of col exps */
+       list *lvs;   /* list of lists of values */
+} eq_mcv;
+
+static bool
+detect_col_cmp_eqs(mvc *sql, list *eqs, sql_hash *eqh)
 {
-       sql_exp *l = *L;
-
-       if (list_length(exps) == 1) {
-               sql_exp *e = exps->h->data, *le = e->l, *re = e->r;
-
-               if (e->type == e_cmp && e->flag == cmp_equal && le->card != 
CARD_ATOM && re->card == CARD_ATOM && !is_semantics(e)) {
-                       if (!l) {
-                               *L = l = le;
-                               if (!is_column(le->type))
-                                       return 0;
+       bool col_multivalue_cmp_eq = false;
+       for (node *n = eqs->h; n; n = n->next ) {
+               sql_exp *e = n->data;
+               sql_exp *le = e->l, *re = e->r;
+
+               /* find the le in the hash and append the re in the hash value 
(ea->list) */
+               bool found = false;
+
+               int key = eqh->key(le);
+               sql_hash_e *he = eqh->buckets[key&(eqh->size-1)];
+
+               for (;he && !found; he = he->chain) {
+                       eq_cv *cv = he->value;
+                       if (!exp_equal(le, cv->col)) {
+                               cv->vs = append(cv->vs, re);
+                               found = col_multivalue_cmp_eq = true;
+                               /* remove this and the previous (->first) 
occurrence (if exists) from eqs */
+                               if (cv->first) {
+                                       list_remove_data(eqs, NULL, cv->first);
+                                       cv->first = NULL;
+                               }
+                               list_remove_node(eqs, NULL, n);
                        }
-                       return (exp_match(l, le));
+               }
+
+               if (!found) {
+                       eq_cv *cv = SA_NEW(sql->sa, eq_cv);
+                       cv->first = e;
+                       cv->vs = sa_list(sql->sa);
+                       cv->vs = append(cv->vs, re);
+                       cv->col = le;
+
+                       hash_add(eqh, key, cv);
                }
-               if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e) && 
!is_semantics(e))
-                       return (are_equality_exps(e->l, L) && 
are_equality_exps(e->r, L));
        }
-       return 0;
+       return col_multivalue_cmp_eq;
+}
+
+static bool
+detect_multicol_cmp_eqs(mvc *sql, list *mce_ands, sql_hash *meqh)
+{
+       /* we get as input a list of AND associated expressions (hence the 
entries are lists themselves)
+        * we need to detect cmp_eq-only AND-associated expressions with the 
same columns so we can
+        * group together their values
+        * e.g. [[n = 1, m = 10], [m = 20, k = 100, l = 3000], [m = 20, n = 2]] 
has
+        *      - (m,k,l) group with a single value (20, 100, 3000)
+        *      - (n,k) group with two values (1, 10) and (2, 20)
+        * at the end we return true only if we have at least a group of 
columns with more than a single value
+        * e.g. in this example (n,k)
+        */
+       bool multi_multivalue_cmp_eq = false;
+       for (node *n = mce_ands->h; n; n = n->next) {
+               list *l = n->data;
+
+               /* sort the list of the cmp_eq expressions based on the col exp
+                * NOTE: from now on we only work with the sorted list, sl */
+               list *sl = list_sort(l, (fkeyvalue)&exp_cmp_eq_unique_id, NULL);
+               list_append_before(mce_ands, n, sl);
+               list_remove_node(mce_ands, NULL, n);
+
+               /* find the eq exp in the hash and append the values */
+               bool found = false;
+
+               int key = meqh->key(sl);
+               sql_hash_e *he = meqh->buckets[key&(meqh->size-1)];
+
+               for (;he && !found; he = he->chain) {
+                       /* compare the values of the hash_entry with the cols 
under cmp_eq from the list */
+                       bool same_cols = true;
+                       eq_mcv *mcv = he->value;
+                       for (node *m = sl->h, *k = mcv->cols->h; m && k && 
same_cols; m = m->next, k = k->next) {
+                               sql_exp *col_exp = ((sql_exp*)m->data)->l;
+                               if (exp_equal(col_exp, k->data))
+                                       same_cols = false;
+                       }
+                       if (same_cols) {
+                               /* we found the same multi cmp_eq exp in 
mce_ands list multiple times! */
+                               found = multi_multivalue_cmp_eq = true;
+                               /* gather all the values of the list and add 
them to the hash entry */
+                               list *atms = sa_list(sql->sa);
+                               for (node *m = sl->h; m; m = m->next)
+                                       atms = append(atms, 
((sql_exp*)m->data)->r);
+                               mcv->lvs = append(mcv->lvs, atms);
+                               /* remove this and the previous occurrence 
(which means that's the first time
+                                * that we found the *same* multi cmp_eq exp)
+                                */
+                               if (mcv->first) {
+                                       list_remove_data(mce_ands, NULL, 
mcv->first);
+                                       mcv->first = NULL;
+                               }
+                               list_remove_data(mce_ands, NULL, sl);
+                       }
+               }
+
+               if (!found) {
+                       eq_mcv *mcv = SA_NEW(sql->sa, eq_mcv);
+                       mcv->first = sl;
+                       mcv->cols = sa_list(sql->sa);
+                       for (node *m = sl->h; m; m = m->next)
+                               mcv->cols = append(mcv->cols, 
((sql_exp*)m->data)->l);
+                       /* for the list of values (atoms) create a list and 
append it to the lvs list */
+                       list *atms = sa_list(sql->sa);
+                       for (node *m = sl->h; m; m = m->next)
+                               atms = append(atms, ((sql_exp*)m->data)->r);
+                       mcv->lvs = sa_list(sql->sa);
+                       mcv->lvs = append(mcv->lvs, atms);
+
+                       hash_add(meqh, key, mcv);
+               }
+       }
+       return multi_multivalue_cmp_eq;
 }
 
 static void
-get_exps( list *n, list *l )
+exp_or_chain_groups(mvc *sql, list *exps, list **gen_ands, list **mce_ands, 
list **eqs, list **noneq)
 {
-       sql_exp *e = l->h->data, *re = e->r;
-
-       if (e->type == e_cmp && e->flag == cmp_equal && re->card == CARD_ATOM)
-               list_append(n, re);
-       if (e->type == e_cmp && e->flag == cmp_or) {
-               get_exps(n, e->l);
-               get_exps(n, e->r);
+       /* identify three different groups
+        * 1. gen_ands: lists of generic expressions (their inner association 
is AND)
+        * 2. mce_ands: lists of multi_colum cmp_eq ONLY expressions (same^^^)
+        * 3. eqs: equality expressions
+        * 4. neq: non equality col expressions
+        *
+        * return true if there is an exp with more than one cmp_eq
+        */
+    bool eq_only = true;
+    for (node *n = exps->h; n && eq_only; n = n->next) {
+        sql_exp *e = n->data;
+        sql_exp *le = e->l, *re = e->r;
+        eq_only &= (e->type == e_cmp && e->flag == cmp_equal &&
+                    le->card != CARD_ATOM && is_column(le->type) &&
+                    re->card == CARD_ATOM && !is_semantics(e));
+    }
+
+       if (list_length(exps) > 1) {
+               if (eq_only)
+                       *mce_ands = append(*mce_ands, exps);
+               else
+                       *gen_ands = append(*gen_ands, exps);
+       } else if (list_length(exps) == 1) {
+               sql_exp *se = exps->h->data;
+               sql_exp *le = se->l, *re = se->r;
+
+               if (se->type == e_cmp && se->flag == cmp_or && !is_anti(se)) {
+                       /* for a cmp_or expression go down the tree */
+                       exp_or_chain_groups(sql, (list*)le, gen_ands, mce_ands, 
eqs, noneq);
+                       exp_or_chain_groups(sql, (list*)re, gen_ands, mce_ands, 
eqs, noneq);
+
+               } else if (eq_only) {
+                       *eqs = append(*eqs, se);
+               } else {
+                       *noneq = append(*noneq, se);
+               }
        }
 }
 
-static sql_exp *
-equality_exps_2_in( mvc *sql, sql_exp *ce, list *l, list *r)
+static list *
+generate_single_col_cmp_in(mvc *sql, sql_hash *eqh)
 {
-       list *nl = new_exp_list(sql->sa);
-
-       get_exps(nl, l);
-       get_exps(nl, r);
-
-       return exp_in( sql->sa, ce, nl, cmp_in);
+       /* from single col cmp_eq with multiple atoms in the hash generate
+        * "e_col in (val0, val1, ...)" (see detect_col_cmp_eqs())
+        */
+       list *ins = new_exp_list(sql->sa);
+       for (int i = 0; i < eqh->size; i++) {
+               sql_hash_e *he = eqh->buckets[i];
+
+               while (he) {
+                       eq_cv *cv = he->value;
+                       /* NOTE: cmp_eq expressions with a single entry are 
still in eqs */
+                       if (list_length(cv->vs) > 1)
+                               ins = append(ins, exp_in(sql->sa, cv->col, 
cv->vs, cmp_in));
+                       he = he->chain;
+               }
+       }
+       return ins;
+}
+
+static list *
+generate_multi_col_cmp_in(mvc *sql, sql_hash *meqh)
+{
+       /* from multivalue cmp_eq with multiple lists of atoms in the hash 
generate
+        * "(col1, col2, ...) in [(val10, val20, ...), (val11, val21, ...), ... 
]"
+        * (see detect_multicol_cmp_eqs())
+        */
+       list *ins = new_exp_list(sql->sa);
+       for (int i = 0; i < meqh->size; i++) {
+               sql_hash_e *he = meqh->buckets[i];
+               while (he) {
+                       eq_mcv *mcv = he->value;
+                       /* NOTE: multivalue cmp_eq expressions with a single 
entry are still in mce_ands */
+                       if (list_length(mcv->lvs) > 1) {
+                               sql_exp *mc = exp_label(sql->sa, 
exp_values(sql->sa, mcv->cols), ++sql->label);
+                               for (node *a = mcv->lvs->h; a; a = a->next)
+                                       a->data = exp_values(sql->sa, a->data);
+                               ins = append(ins, exp_in(sql->sa, mc, mcv->lvs, 
cmp_in));
+                       }
+                       he = he->chain;
+               }
+       }
+       return ins;
 }
 
 static list *
 merge_ors(mvc *sql, list *exps, int *changes)
 {
-       list *nexps = NULL;
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to