Changeset: e0131bc1dd88 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/e0131bc1dd88
Modified Files:
        sql/server/rel_optimize_sel.c
Branch: cmp-or-patterns
Log Message:

New implementation of merge_ors


diffs (183 lines):

diff --git a/sql/server/rel_optimize_sel.c b/sql/server/rel_optimize_sel.c
--- a/sql/server/rel_optimize_sel.c
+++ b/sql/server/rel_optimize_sel.c
@@ -472,9 +472,11 @@ equality_exps_2_in( mvc *sql, sql_exp *c
 static list *
 merge_ors(mvc *sql, list *exps, int *changes)
 {
+       /* n = 1 OR n = 2 ==> n in (1, 2) */
        list *nexps = NULL;
        int needed = 0;
 
+       // blindly looks for any cmp_or in the top list
        for (node *n = exps->h; n && !needed; n = n->next) {
                sql_exp *e = n->data;
 
@@ -510,6 +512,154 @@ merge_ors(mvc *sql, list *exps, int *cha
        return nexps;
 }
 
+static inline int
+exp_unique_id(sql_exp *e)
+{
+       assert(e->nid);
+       return e->nid;
+}
+
+typedef struct exp_eq_atoms {
+       sql_exp* e;
+       list *l;
+} ea;
+
+static bool
+exp_or_chain_groups(mvc *sql, list *exps, list **ands, list **noneq, sql_hash 
*eqh)
+{
+       /* identify three different groups
+        * 1. ands: lists of expressions (their inner association is AND)
+        * 2. neq: non equality col expressions
+        * 3. eqh: col = X (we store the different X values)
+        *
+        * return true if there is an exp with more than one cmp_eq
+        */
+       bool multi_atom_cmp_eq = false;
+
+       if (list_length(exps) > 1) {
+               *ands = append(*ands, exps);
+       } else if (list_length(exps) == 1) {
+               sql_exp *se = exps->h->data;
+               sql_exp *le = se->l, *re = se->r;
+
+               if (se->type == e_cmp && se->flag == cmp_or && !is_anti(se)) {
+                       /* for a cmp_or expression go down the tree */
+                       multi_atom_cmp_eq |= exp_or_chain_groups(sql, 
(list*)le, ands, noneq, eqh);
+                       multi_atom_cmp_eq |= exp_or_chain_groups(sql, 
(list*)re, ands, noneq, eqh);
+
+               } else if (se->type == e_cmp && se->flag == cmp_equal &&
+                                  le->card != CARD_ATOM && is_column(le->type) 
&&
+                                  re->card == CARD_ATOM && !is_semantics(se)) {
+                       /* find the le in the hash and append the re in the 
hash value (ea->list) */
+                       bool found = false;
+
+                       int key = eqh->key(le);
+                       sql_hash_e *he = eqh->buckets[key&(eqh->size-1)];
+
+                       for (;he && !found; he = he->chain) {
+                               ea *eas = he->value;
+                               if(!exp_equal(le, eas->e)){
+                                       eas->l = append(eas->l, re);
+                                       found = multi_atom_cmp_eq = true;
+                               }
+                       }
+
+                       if (!found) {
+                               ea *eas = SA_NEW(sql->sa, ea);
+                               eas->l = sa_list(sql->sa);
+                               eas->l = append(eas->l, re);
+                               eas->e = le;
+
+                               hash_add(eqh, key, eas);
+                       }
+               } else {
+                       *noneq = append(*noneq, se);
+               }
+       }
+       return multi_atom_cmp_eq;
+}
+
+static list *
+merge_ors_NEW(mvc *sql, list *exps, int *changes)
+{
+       sql_hash *eqh = NULL;
+       list *neq, *ands, *ins = exps;
+       for (node *n = exps->h; n; n = n->next) {
+               sql_exp *e = n->data;
+
+               if (e->type == e_cmp && e->flag == cmp_or && !is_anti(e)) {
+                       /* NOTE: ands is a list of lists since the AND 
association
+                        *       between expressions is expressed with a list
+                        *       e.g. [[e1, e2], [e3, e4, e5]] semantically 
translates
+                        *         to [(e1 AND e2), (e3 AND e4 AND e5)]
+                        *       those (internal) AND list can be then used to
+                        *       reconstructed an OR tree
+                        *       [[e1, e2], [e3, e4, e5]] =>
+                        *       (([e1, e2] OR [e3, e4, e5]) OR <whatever-else> 
)
+                        */
+                       ands = new_exp_list(sql->sa);
+                       neq = new_exp_list(sql->sa);
+                       eqh = hash_new(sql->sa, 4 /* TODO: HOW MUCH? prob. 
64*/, (fkeyvalue)&exp_unique_id);
+
+                       /* if no multi-atom cmp_eq exps bailout, we don't need 
to gen cmp_in */
+                       bool ma = false;
+                       ma |= exp_or_chain_groups(sql, e->l, &ands, &neq, eqh);
+                       ma |= exp_or_chain_groups(sql, e->r, &ands, &neq, eqh);
+                       if (!ma)
+                               continue;
+
+                       /* from equality atoms in the hash generate "e_col in 
(...)" for
+                        * entries with multiple atoms (see 
exp_or_chain_groups())
+                        * */
+                       ins = new_exp_list(sql->sa);
+                       for (int i = 0; i < eqh->size; i++) {
+                               sql_hash_e *he = eqh->buckets[i];
+
+                               while (he) {
+                                       ea *eas = he->value;
+                                       /* only if there are multiple cmp_eq 
atoms for this col turn them into a cmp_in */
+                                       if (list_length(eas->l) > 1)
+                                               ins = append(ins, 
exp_in(sql->sa, eas->e, eas->l, cmp_in));
+                                       else
+                                               ins = append(ins, 
exp_compare(sql->sa, eas->e, eas->l->h->data, cmp_equal));
+                                       he = he->chain;
+                               }
+                       }
+
+                       /* create the new OR tree */
+                       assert(list_length(ins));
+                       sql_exp *new = ins->h->data;
+                       for (node *i = ins->h->next; i; i = i->next) {
+                               list *l = new_exp_list(sql->sa);
+                               list *r = new_exp_list(sql->sa);
+                               l = append(l, new);
+                               r = append(r, (sql_exp*)i->data);
+                               new = exp_or(sql->sa, l, r, 0);
+                       }
+
+                       // TODO: we can recurse on ands
+                       for (node *a = ands->h; a; a = a->next){
+                               list *l = new_exp_list(sql->sa);
+                               l = append(l, new);
+                               new = exp_or(sql->sa, l, a->data, 0);
+                       }
+
+                       for (node *o = neq->h; o; o = o->next){
+                               list *l = new_exp_list(sql->sa);
+                               list *r = new_exp_list(sql->sa);
+                               l = append(l, new);
+                               r = append(r, (sql_exp*)o->data);
+                               new = exp_or(sql->sa, l, r, 0);
+                       }
+
+                       list_remove_node(exps, NULL, n);
+                       exps = append(exps, new);
+                       changes++;
+               }
+       }
+       return exps;
+}
+
 #define TRIVIAL_NOT_EQUAL_CMP(e) \
        ((e)->type == e_cmp && (e)->flag == cmp_notequal && !is_anti((e)) && 
!is_semantics((e)) && ((sql_exp*)(e)->l)->card != CARD_ATOM && 
((sql_exp*)(e)->r)->card == CARD_ATOM)
 
@@ -744,8 +894,11 @@ rel_select_cse(visitor *v, sql_rel *rel)
        if ((is_select(rel->op) || is_join(rel->op) || is_semi(rel->op)) && 
rel->exps) /* cleanup equal expressions */
                rel->exps = cleanup_equal_exps(v->sql, rel, rel->exps, 
&v->changes); /* (a = b) and (a += b) */
 
+       if (NULL && is_select(rel->op) && rel->exps)
+               rel->exps = merge_ors(v->sql, rel->exps, &v->changes); /* x = 1 
or x = 2 => x in (1, 2)*/
+
        if (is_select(rel->op) && rel->exps)
-               rel->exps = merge_ors(v->sql, rel->exps, &v->changes); /* x = 1 
or x = 2 => x in (1, 2)*/
+               rel->exps = merge_ors_NEW(v->sql, rel->exps, &v->changes);
 
        if (is_select(rel->op) && rel->exps)
                rel->exps = merge_notequal(v->sql, rel->exps, &v->changes); /* 
x <> 1 and x <> 2 => x not in (1, 2)*/
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to