Changeset: 8d7615bb914b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/8d7615bb914b
Modified Files:
        sql/server/rel_optimize_sel.c
Branch: cmp-or-patterns
Log Message:

Adds multi multivalue cmp_eq exp optimizer WIP


diffs (143 lines):

diff --git a/sql/server/rel_optimize_sel.c b/sql/server/rel_optimize_sel.c
--- a/sql/server/rel_optimize_sel.c
+++ b/sql/server/rel_optimize_sel.c
@@ -518,11 +518,117 @@ exp_unique_id(sql_exp *e)
        return e->nid ? e->nid : e->alias.label;
 }
 
+static inline int
+exp_cmp_eq_unique_id(sql_exp *e)
+{
+       return exp_unique_id(e->l);
+}
+
 typedef struct exp_eq_atoms {
        sql_exp* e;
        list *l;
 } ea;
 
+typedef struct exp_eq_multi_cols_atoms {
+       char *cs; /* col(rname, name) concat str */
+       list *ces; /* list of col exps */
+       list *gvs; /* list of lists of atoms */
+} mca;
+
+static bool
+detect_multivalue_cmp_eqs(mvc *sql, list *ands, sql_hash *meqh)
+{
+       /* we get as input a list of AND associated expressions (hence the 
entries are lists themselves)
+        * we need to detect cmp_eq-only AND-associated expressions with the 
same columns so we can
+        * group together their values
+        * e.g. [[n = 1, m = 10], [m = 20, k = 100, l = 3000], [m = 20, n = 2]] 
has
+        *      - (m,k,l) group with a single value (20, 100, 3000)
+        *      - (n,k) group with two values (1, 10) and (2, 20)
+        * at the end we return true only if we have at least a group of 
columns with more than a single value
+        * e.g. in this example (n,k)
+        */
+       bool multi_multivalue_cmp_eq = false;
+       for (node *n = ands->h; n; n = n->next) {
+               bool eq_only = true;
+               list *l = n->data;
+
+               /* check if this list has only cmp_eq with a column lhs and an 
atom rhs */
+               for (node *m = l->h; m && eq_only; m = m->next) {
+                       sql_exp *se = m->data;
+                       sql_exp *le = se->l, *re = se->r;
+                       eq_only &= (se->type == e_cmp && se->flag == cmp_equal 
&& le->card != CARD_ATOM && is_column(le->type) && re->card == CARD_ATOM && 
!is_semantics(se));
+               }
+
+               if (!eq_only)
+                       continue;
+               else {
+                       /* sort the list of the cmp_eq expressions based on the 
col exp
+                        * NOTE: from now on we only work with the sorted list, 
sl */
+                       list *sl = list_sort(l, 
(fkeyvalue)&exp_cmp_eq_unique_id, NULL);
+                       list_append_before(ands, n, sl);
+                       list_remove_node(ands, NULL, n);
+
+                       /* make a hash key out of the concat str of (rname1, 
name1, rname2, name2..) */
+                       char *cs = "";
+                       for (node *m = sl->h; m; m = m->next) {
+                               sql_exp *col_exp = ((sql_exp*)m->data)->l;
+                               cs = strconcat(cs, 
strconcat(col_exp->alias.rname, col_exp->alias.name));
+                       }
+
+                       /* find the eq exp in the hash and append the values */
+                       bool found = false;
+
+                       int key = meqh->key(cs);
+                       sql_hash_e *he = meqh->buckets[key&(meqh->size-1)];
+
+                       for (;he && !found; he = he->chain) {
+                               /* compare the values of the hash_entry with 
the cols under cmp_eq from the list */
+                               bool same_cols = true;
+                               mca *mcas = he->value;
+                               for (node *m = sl->h, *k = mcas->ces->h; m && k 
&& same_cols; m = m->next, k = k->next) {
+                                       sql_exp *col_exp = 
((sql_exp*)m->data)->l;
+                                       if (exp_equal(col_exp, k->data))
+                                               same_cols = false;
+                               }
+                               if (same_cols) {
+                                       /* we found the same multi cmp_eq exp 
in ands list multiple times! */
+                                       found = multi_multivalue_cmp_eq = true;
+                                       /* gather all the values of the list 
and add them to the hash entry */
+                                       list *atms = sa_list(sql->sa);
+                                       for (node *m = sl->h; m; m = m->next) {
+                                               sql_exp *val_exp = 
((sql_exp*)m->data)->r;
+                                               atms = append(atms, 
(atom*)val_exp->l);
+                                       }
+                                       mcas->gvs = append(mcas->gvs, atms);
+                               }
+                       }
+
+                       if (!found) {
+                               mca *mcas = SA_NEW(sql->sa, mca);
+                               mcas->cs = cs;
+                               mcas->ces = sa_list(sql->sa);
+                               for (node *m = sl->h; m; m = m->next) {
+                                       sql_exp *col_exp = 
((sql_exp*)m->data)->l;
+                                       mcas->ces = append(mcas->ces, col_exp);
+                               }
+                               /* for (group values) gv create a list and 
append it to the gvs list */
+                               list *atms = sa_list(sql->sa);
+                               for (node *m = sl->h; m; m = m->next) {
+                                       sql_exp *val_exp = 
((sql_exp*)m->data)->r;
+                                       atms = append(atms, (atom*)val_exp->l);
+                               }
+                               mcas->gvs = sa_list(sql->sa);
+                               mcas->gvs = append(mcas->gvs, atms);
+
+                               hash_add(meqh, key, mcas);
+                       }
+
+                       /* TODO: manage the entries: remove the entry of the 
ands list as we are going to reconstruct it later */
+               }
+       }
+       return multi_multivalue_cmp_eq;
+}
+
 static bool
 exp_or_chain_groups(mvc *sql, list *exps, list **ands, list **noneq, sql_hash 
*eqh)
 {
@@ -581,7 +687,7 @@ exp_or_chain_groups(mvc *sql, list *exps
 static list *
 merge_ors_NEW(mvc *sql, list *exps, int *changes)
 {
-       sql_hash *eqh = NULL;
+       sql_hash *eqh = NULL, *meqh = NULL;
        list *neq, *ands, *ins;
        for (node *n = exps->h; n; n = n->next) {
                sql_exp *e = n->data;
@@ -625,6 +731,12 @@ merge_ors_NEW(mvc *sql, list *exps, int 
                                }
                        }
 
+                       /* detect AND-chained cmp_eq-only exps with multiple 
values */
+                       if (list_length(ands) > 1) {
+                               meqh = hash_new(sql->sa, 4 /* TODO: HOW MUCH? 
prob. 16*/, (fkeyvalue)&hash_key);
+                               detect_multivalue_cmp_eqs(sql, ands, meqh);
+                       }
+
                        /* create the new OR tree */
                        assert(list_length(ins));
                        sql_exp *new = ins->h->data;
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to