Changeset: bec9777bc6ca for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/bec9777bc6ca
Modified Files:
        sql/include/sql_relation.h
        sql/server/rel_optimize_sel.c
Branch: default
Log Message:

implemented more efficient join order algorithm


diffs (294 lines):

diff --git a/sql/include/sql_relation.h b/sql/include/sql_relation.h
--- a/sql/include/sql_relation.h
+++ b/sql/include/sql_relation.h
@@ -46,7 +46,8 @@ typedef struct expression {
        void *l;
        void *r;
        void *f;        /* func's and aggr's, also e_cmp may have have 2 
arguments */
-       unsigned int flag; /* cmp types, PSM types/level */
+       unsigned short flag; /* cmp types, PSM types/level */
+       unsigned short tmp;
        unsigned int
         card:2,        /* card (0 truth value!) (1 atoms) (2 aggr) (3 multi 
value) */
         freevar:4,     /* free variable, ie binds to the upper dependent join 
*/
diff --git a/sql/server/rel_optimize_sel.c b/sql/server/rel_optimize_sel.c
--- a/sql/server/rel_optimize_sel.c
+++ b/sql/server/rel_optimize_sel.c
@@ -1714,12 +1714,6 @@ joinexp_col(sql_exp *e, sql_rel *r)
        return NULL;
 }
 
-static int
-rel_has_exp2(sql_rel *r, sql_exp *e)
-{
-       return rel_has_exp(r, e, false);
-}
-
 static sql_column *
 table_colexp(sql_exp *e, sql_rel *r)
 {
@@ -2033,28 +2027,101 @@ find_fk( mvc *sql, list *rels, list *exp
        return sdje;
 }
 
+static int
+rels_find_one_rel( sql_rel **rels, int nr, sql_exp *e)
+{
+       int fnd = 0;
+
+       for(int i = 1; i<=nr; i++) {
+               if (rel_has_exp(rels[i], e, false) == 0) {
+                       if (fnd)
+                               return 0;
+                       fnd = i;
+               }
+       }
+       return fnd;
+}
+
+/* TODO move popcount and popcount64 into gdk_*.h, used in gdk_cand, strimps 
and here */
+static inline int
+popcount64(uint64_t x)
+{
+#if defined(__GNUC__)
+       return (uint32_t) __builtin_popcountll(x);
+#elif defined(_MSC_VER)
+       return (uint32_t) __popcnt64(x);
+#else
+       x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL);
+       x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
+       x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >> 4) & 0x0F0F0F0F0F0F0F0FULL);
+       return (x * 0x0101010101010101ULL) >> 56;
+#endif
+}
+
 static sql_rel *
 order_joins(visitor *v, list *rels, list *exps)
 {
        sql_rel *top = NULL, *l = NULL, *r = NULL;
        sql_exp *cje;
        node *djn;
-       list *sdje, *n_rels = sa_list(v->sql->sa);
+       list *sdje, *n_rels = NULL;
        int fnd = 0;
        unsigned int rsingle;
+       int direct = 1;
 
        /* find foreign keys and reorder the expressions on reducing quality */
        sdje = find_fk(v->sql, rels, exps);
 
+       for(djn = sdje->h; djn; djn = djn->next ) {
+               sql_exp *e = djn->data;
+               list_remove_data(exps, NULL, e);
+       }
        if (list_length(rels) > 2 && mvc_debug_on(v->sql, 256)) {
-               for(djn = sdje->h; djn; djn = djn->next ) {
-                       sql_exp *e = djn->data;
-                       list_remove_data(exps, NULL, e);
-               }
                top =  rel_planner(v->sql, rels, sdje, exps);
                return top;
        }
 
+       int nr_exps = list_length(sdje), nr_rels = list_length(rels), ci = 1;
+       if (nr_rels > 64) {
+               direct = 0;
+               n_rels = sa_list(v->sql->ta);
+       }
+       sql_rel **rels_a = SA_NEW_ARRAY(v->sql->ta, sql_rel*, nr_rels+1); /* 
don't use slot 0 */
+       rels_a[0] = NULL;
+       for (node *n = rels->h; n; n = n->next, ci++) {
+               rels_a[ci] = n->data;
+       }
+       ulng *h = SA_NEW_ARRAY(v->sql->ta, ulng, nr_exps), rel_mask = 0;        
/* bit field (for > 64 its an imprint) */
+       uint16_t *r1 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
+       uint16_t *r2 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps);
+       /* change r3 into rest list's */
+       int *r3 = SA_NEW_ARRAY(v->sql->ta, int, nr_exps);
+
+       ci = 0;
+       for (node *n = sdje->h; n; n = n->next, ci++) {
+               sql_exp *cje = n->data;
+
+               h[ci] = r1[ci] = r2[ci] = 0;
+               r3[ci] = 0;
+               /* h[ci] = exp_find_rels(cje, rels) */
+               if (cje->type != e_cmp || is_complex_exp(cje->flag) || 
!find_prop(cje->p, PROP_HASHCOL) ||
+                  (cje->type == e_cmp && cje->f == NULL)) {
+                       cje->tmp = ci;
+                       r1[ci] = rels_find_one_rel(rels_a, nr_rels, cje->l);
+                       r2[ci] = rels_find_one_rel(rels_a, nr_rels, cje->r);
+                       if (r1[ci])
+                               h[ci] |= 1L<<((r1[ci]-1)%64);
+                       if (r2[ci])
+                               h[ci] |= 1L<<((r2[ci]-1)%64);
+                       if (cje->f) {
+                               r3[ci] = rels_find_one_rel(rels_a, nr_rels, 
cje->f);
+                               if (r3[ci] == r2[ci])
+                                       r3[ci] = 0;
+                               if (r3[ci])
+                                       h[ci] |= 1L<<((r3[ci]-1)%64);
+                       }
+               }
+       }
        /* open problem, some expressions use more than 2 relations */
        /* For example a.x = b.y * c.z; */
        if (list_length(rels) >= 2 && sdje->h) {
@@ -2066,22 +2133,26 @@ order_joins(visitor *v, list *rels, list
                /* complex expressions may touch multiple base tables
                 * Should be pushed up to extra selection.
                 * */
+               if (0 && popcount64(h[cje->tmp]) > 2)
+                       assert(0);
                if (cje->type != e_cmp || is_complex_exp(cje->flag) || 
!find_prop(cje->p, PROP_HASHCOL) ||
                   (cje->type == e_cmp && cje->f == NULL)) {
-                       l = find_one_rel(rels, cje->l);
-                       r = find_one_rel(rels, cje->r);
+                       l = rels_a[r1[cje->tmp]];
+                       r = rels_a[r2[cje->tmp]];
+                       rel_mask |= h[cje->tmp];
                }
-
-               if (l && r && l != r) {
+               cje->tmp = 0;
+
+               if (l && r && l != r)
                        list_remove_data(sdje, NULL, cje);
-                       list_remove_data(exps, NULL, cje);
-               }
        }
        if (l && r && l != r) {
                list_remove_data(rels, NULL, l);
                list_remove_data(rels, NULL, r);
-               list_append(n_rels, l);
-               list_append(n_rels, r);
+               if (!direct) {
+                       list_append(n_rels, l);
+                       list_append(n_rels, r);
+               }
 
                /* Create a relation between l and r. Since the calling
                   functions rewrote the join tree, into a list of expressions
@@ -2105,43 +2176,44 @@ order_joins(visitor *v, list *rels, list
                        }
                        en = next;
                }
-               /* Remove other joins on the current 'n_rels' set in the 
distinct list too */
-               for (node *en = sdje->h; en; ) {
-                       node *next = en->next;
-                       sql_exp *e = en->data;
-                       if (rel_rebind_exp(v->sql, top, e))
-                               list_remove_data(sdje, NULL, en->data);
-                       en = next;
-               }
                fnd = 1;
        }
        /* build join tree using the ordered list */
-       while(list_length(exps) && fnd) {
+       while(list_length(sdje) && fnd) {
                fnd = 0;
                /* find the first expression which could be added */
-               if (list_length(sdje) > 1)
-                       sdje = order_join_expressions(v->sql, sdje, rels);
                for(djn = sdje->h; djn && !fnd && rels->h; djn = 
(!fnd)?djn->next:NULL) {
-                       node *ln, *rn, *en;
+                       node *en;
+                       l = r = NULL;
 
                        cje = djn->data;
-                       ln = list_find(n_rels, cje->l, (fcmp)&rel_has_exp2);
-                       rn = list_find(n_rels, cje->r, (fcmp)&rel_has_exp2);
-
-                       if (ln && rn) {
+                       if ((h[cje->tmp] & rel_mask) > 0) {
+                               if (rel_mask & (1L<<((r1[cje->tmp]-1)%64)))
+                                       l = rels_a[r1[cje->tmp]];
+                               if (rel_mask & (1L<<((r2[cje->tmp]-1)%64)))
+                                       r = rels_a[r2[cje->tmp]];
+                       }
+                       if (!direct) { /* check if atleast one side in n_rels */
+                               if (l && !list_find(n_rels, l, NULL))
+                                       l = NULL;
+                               if (r && !list_find(n_rels, r, NULL))
+                                       r = NULL;
+                       }
+
+                       if (l && r) {
                                assert(0);
                                /* create a selection on the current */
-                               l = ln->data;
-                               r = rn->data;
                                rel_join_add_exp(v->sql->sa, top, cje);
                                fnd = 1;
-                       } else if (ln || rn) {
-                               if (ln) {
-                                       l = ln->data;
-                                       r = find_rel(rels, cje->r);
+                       } else if (l || r) {
+                               /* TODO: handle case for joins which need > 2 
relations, ie where the current 'top' of the
+                                * join tree needs to add more then one 
relation */
+                               rel_mask |= h[cje->tmp];
+                               if (l) {
+                                       r = rels_a[r2[cje->tmp]];
                                } else {
-                                       l = rn->data;
-                                       r = find_rel(rels, cje->l);
+                                       l = r;
+                                       r = rels_a[r1[cje->tmp]];
                                }
                                if (!r) {
                                        fnd = 1; /* not really, but this bails 
out */
@@ -2151,10 +2223,10 @@ order_joins(visitor *v, list *rels, list
 
                                /* remove the expression from the lists */
                                list_remove_data(sdje, NULL, cje);
-                               list_remove_data(exps, NULL, cje);
 
                                list_remove_data(rels, NULL, r);
-                               append(n_rels, r);
+                               if (!direct)
+                                       append(n_rels, r);
 
                                /* create a join using the current expression */
                                rsingle = is_single(r);
@@ -2179,8 +2251,11 @@ order_joins(visitor *v, list *rels, list
                                for (en = sdje->h; en; ) {
                                        node *next = en->next;
                                        sql_exp *e = en->data;
-                                       if (rel_rebind_exp(v->sql, top, e))
+                                       if ((direct && ((e->flag <= 
cmp_notequal && (h[e->tmp] & rel_mask) == h[e->tmp]) || (e->flag > cmp_notequal 
&& rel_rebind_exp(v->sql, top, e))))  ||
+                                           (!direct && rel_rebind_exp(v->sql, 
top, e))) {
+                                               rel_join_add_exp(v->sql->sa, 
top, e);
                                                list_remove_data(sdje, NULL, 
en->data);
+                                       }
                                        en = next;
                                }
                                fnd = 1;
@@ -2202,21 +2277,17 @@ order_joins(visitor *v, list *rels, list
                                top = nr;
                }
        }
+       if (list_length(sdje)) {
+               if (list_empty(exps))
+                       exps = sdje;
+               else
+                       exps = list_merge(exps, sdje, (fdup)NULL);
+       }
        if (list_length(exps)) { /* more expressions (add selects) */
                top = rel_select(v->sql->sa, top, NULL);
                for(node *n=exps->h; n; n = n->next) {
                        sql_exp *e = n->data;
 
-                       /* find the involved relations */
-
-                       /* complex expressions may touch multiple base tables
-                        * Should be push up to extra selection. */
-                       /*
-                       l = find_one_rel(rels, e->l);
-                       r = find_one_rel(rels, e->r);
-
-                       if (l && r)
-                       */
                        if (exp_is_join_exp(e) == 0) {
                                sql_rel *nr = NULL;
                                if (is_theta_exp(e->flag)) {
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to