Changeset: bec9777bc6ca for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/bec9777bc6ca Modified Files: sql/include/sql_relation.h sql/server/rel_optimize_sel.c Branch: default Log Message:
implemented more efficient join order algorithm diffs (294 lines): diff --git a/sql/include/sql_relation.h b/sql/include/sql_relation.h --- a/sql/include/sql_relation.h +++ b/sql/include/sql_relation.h @@ -46,7 +46,8 @@ typedef struct expression { void *l; void *r; void *f; /* func's and aggr's, also e_cmp may have have 2 arguments */ - unsigned int flag; /* cmp types, PSM types/level */ + unsigned short flag; /* cmp types, PSM types/level */ + unsigned short tmp; unsigned int card:2, /* card (0 truth value!) (1 atoms) (2 aggr) (3 multi value) */ freevar:4, /* free variable, ie binds to the upper dependent join */ diff --git a/sql/server/rel_optimize_sel.c b/sql/server/rel_optimize_sel.c --- a/sql/server/rel_optimize_sel.c +++ b/sql/server/rel_optimize_sel.c @@ -1714,12 +1714,6 @@ joinexp_col(sql_exp *e, sql_rel *r) return NULL; } -static int -rel_has_exp2(sql_rel *r, sql_exp *e) -{ - return rel_has_exp(r, e, false); -} - static sql_column * table_colexp(sql_exp *e, sql_rel *r) { @@ -2033,28 +2027,101 @@ find_fk( mvc *sql, list *rels, list *exp return sdje; } +static int +rels_find_one_rel( sql_rel **rels, int nr, sql_exp *e) +{ + int fnd = 0; + + for(int i = 1; i<=nr; i++) { + if (rel_has_exp(rels[i], e, false) == 0) { + if (fnd) + return 0; + fnd = i; + } + } + return fnd; +} + +/* TODO move popcount and popcount64 into gdk_*.h, used in gdk_cand, strimps and here */ +static inline int +popcount64(uint64_t x) +{ +#if defined(__GNUC__) + return (uint32_t) __builtin_popcountll(x); +#elif defined(_MSC_VER) + return (uint32_t) __popcnt64(x); +#else + x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL); + x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); + x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >> 4) & 0x0F0F0F0F0F0F0F0FULL); + return (x * 0x0101010101010101ULL) >> 56; +#endif +} + static sql_rel * order_joins(visitor *v, list *rels, list *exps) { sql_rel *top = NULL, *l = NULL, *r = NULL; sql_exp *cje; node *djn; - list *sdje, *n_rels = sa_list(v->sql->sa); + list *sdje, *n_rels = NULL; int fnd = 0; unsigned int rsingle; + int direct = 1; /* find foreign keys and reorder the expressions on reducing quality */ sdje = find_fk(v->sql, rels, exps); + for(djn = sdje->h; djn; djn = djn->next ) { + sql_exp *e = djn->data; + list_remove_data(exps, NULL, e); + } if (list_length(rels) > 2 && mvc_debug_on(v->sql, 256)) { - for(djn = sdje->h; djn; djn = djn->next ) { - sql_exp *e = djn->data; - list_remove_data(exps, NULL, e); - } top = rel_planner(v->sql, rels, sdje, exps); return top; } + int nr_exps = list_length(sdje), nr_rels = list_length(rels), ci = 1; + if (nr_rels > 64) { + direct = 0; + n_rels = sa_list(v->sql->ta); + } + sql_rel **rels_a = SA_NEW_ARRAY(v->sql->ta, sql_rel*, nr_rels+1); /* don't use slot 0 */ + rels_a[0] = NULL; + for (node *n = rels->h; n; n = n->next, ci++) { + rels_a[ci] = n->data; + } + ulng *h = SA_NEW_ARRAY(v->sql->ta, ulng, nr_exps), rel_mask = 0; /* bit field (for > 64 its an imprint) */ + uint16_t *r1 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps); + uint16_t *r2 = SA_NEW_ARRAY(v->sql->ta, uint16_t, nr_exps); + /* change r3 into rest list's */ + int *r3 = SA_NEW_ARRAY(v->sql->ta, int, nr_exps); + + ci = 0; + for (node *n = sdje->h; n; n = n->next, ci++) { + sql_exp *cje = n->data; + + h[ci] = r1[ci] = r2[ci] = 0; + r3[ci] = 0; + /* h[ci] = exp_find_rels(cje, rels) */ + if (cje->type != e_cmp || is_complex_exp(cje->flag) || !find_prop(cje->p, PROP_HASHCOL) || + (cje->type == e_cmp && cje->f == NULL)) { + cje->tmp = ci; + r1[ci] = rels_find_one_rel(rels_a, nr_rels, cje->l); + r2[ci] = rels_find_one_rel(rels_a, nr_rels, cje->r); + if (r1[ci]) + h[ci] |= 1L<<((r1[ci]-1)%64); + if (r2[ci]) + h[ci] |= 1L<<((r2[ci]-1)%64); + if (cje->f) { + r3[ci] = rels_find_one_rel(rels_a, nr_rels, cje->f); + if (r3[ci] == r2[ci]) + r3[ci] = 0; + if (r3[ci]) + h[ci] |= 1L<<((r3[ci]-1)%64); + } + } + } /* open problem, some expressions use more than 2 relations */ /* For example a.x = b.y * c.z; */ if (list_length(rels) >= 2 && sdje->h) { @@ -2066,22 +2133,26 @@ order_joins(visitor *v, list *rels, list /* complex expressions may touch multiple base tables * Should be pushed up to extra selection. * */ + if (0 && popcount64(h[cje->tmp]) > 2) + assert(0); if (cje->type != e_cmp || is_complex_exp(cje->flag) || !find_prop(cje->p, PROP_HASHCOL) || (cje->type == e_cmp && cje->f == NULL)) { - l = find_one_rel(rels, cje->l); - r = find_one_rel(rels, cje->r); + l = rels_a[r1[cje->tmp]]; + r = rels_a[r2[cje->tmp]]; + rel_mask |= h[cje->tmp]; } - - if (l && r && l != r) { + cje->tmp = 0; + + if (l && r && l != r) list_remove_data(sdje, NULL, cje); - list_remove_data(exps, NULL, cje); - } } if (l && r && l != r) { list_remove_data(rels, NULL, l); list_remove_data(rels, NULL, r); - list_append(n_rels, l); - list_append(n_rels, r); + if (!direct) { + list_append(n_rels, l); + list_append(n_rels, r); + } /* Create a relation between l and r. Since the calling functions rewrote the join tree, into a list of expressions @@ -2105,43 +2176,44 @@ order_joins(visitor *v, list *rels, list } en = next; } - /* Remove other joins on the current 'n_rels' set in the distinct list too */ - for (node *en = sdje->h; en; ) { - node *next = en->next; - sql_exp *e = en->data; - if (rel_rebind_exp(v->sql, top, e)) - list_remove_data(sdje, NULL, en->data); - en = next; - } fnd = 1; } /* build join tree using the ordered list */ - while(list_length(exps) && fnd) { + while(list_length(sdje) && fnd) { fnd = 0; /* find the first expression which could be added */ - if (list_length(sdje) > 1) - sdje = order_join_expressions(v->sql, sdje, rels); for(djn = sdje->h; djn && !fnd && rels->h; djn = (!fnd)?djn->next:NULL) { - node *ln, *rn, *en; + node *en; + l = r = NULL; cje = djn->data; - ln = list_find(n_rels, cje->l, (fcmp)&rel_has_exp2); - rn = list_find(n_rels, cje->r, (fcmp)&rel_has_exp2); - - if (ln && rn) { + if ((h[cje->tmp] & rel_mask) > 0) { + if (rel_mask & (1L<<((r1[cje->tmp]-1)%64))) + l = rels_a[r1[cje->tmp]]; + if (rel_mask & (1L<<((r2[cje->tmp]-1)%64))) + r = rels_a[r2[cje->tmp]]; + } + if (!direct) { /* check if atleast one side in n_rels */ + if (l && !list_find(n_rels, l, NULL)) + l = NULL; + if (r && !list_find(n_rels, r, NULL)) + r = NULL; + } + + if (l && r) { assert(0); /* create a selection on the current */ - l = ln->data; - r = rn->data; rel_join_add_exp(v->sql->sa, top, cje); fnd = 1; - } else if (ln || rn) { - if (ln) { - l = ln->data; - r = find_rel(rels, cje->r); + } else if (l || r) { + /* TODO: handle case for joins which need > 2 relations, ie where the current 'top' of the + * join tree needs to add more then one relation */ + rel_mask |= h[cje->tmp]; + if (l) { + r = rels_a[r2[cje->tmp]]; } else { - l = rn->data; - r = find_rel(rels, cje->l); + l = r; + r = rels_a[r1[cje->tmp]]; } if (!r) { fnd = 1; /* not really, but this bails out */ @@ -2151,10 +2223,10 @@ order_joins(visitor *v, list *rels, list /* remove the expression from the lists */ list_remove_data(sdje, NULL, cje); - list_remove_data(exps, NULL, cje); list_remove_data(rels, NULL, r); - append(n_rels, r); + if (!direct) + append(n_rels, r); /* create a join using the current expression */ rsingle = is_single(r); @@ -2179,8 +2251,11 @@ order_joins(visitor *v, list *rels, list for (en = sdje->h; en; ) { node *next = en->next; sql_exp *e = en->data; - if (rel_rebind_exp(v->sql, top, e)) + if ((direct && ((e->flag <= cmp_notequal && (h[e->tmp] & rel_mask) == h[e->tmp]) || (e->flag > cmp_notequal && rel_rebind_exp(v->sql, top, e)))) || + (!direct && rel_rebind_exp(v->sql, top, e))) { + rel_join_add_exp(v->sql->sa, top, e); list_remove_data(sdje, NULL, en->data); + } en = next; } fnd = 1; @@ -2202,21 +2277,17 @@ order_joins(visitor *v, list *rels, list top = nr; } } + if (list_length(sdje)) { + if (list_empty(exps)) + exps = sdje; + else + exps = list_merge(exps, sdje, (fdup)NULL); + } if (list_length(exps)) { /* more expressions (add selects) */ top = rel_select(v->sql->sa, top, NULL); for(node *n=exps->h; n; n = n->next) { sql_exp *e = n->data; - /* find the involved relations */ - - /* complex expressions may touch multiple base tables - * Should be push up to extra selection. */ - /* - l = find_one_rel(rels, e->l); - r = find_one_rel(rels, e->r); - - if (l && r) - */ if (exp_is_join_exp(e) == 0) { sql_rel *nr = NULL; if (is_theta_exp(e->flag)) { _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org