Changeset: 4b473057dd0c for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4b473057dd0c Modified Files: sql/server/rel_optimizer.c sql/server/rel_select.c Branch: Jun2020 Log Message:
Performance fix for tpch queries 20 and 21. During the first run of SQL optimizers mark changes rel_push_semijoin_down_or_up so the plan can still be optimized further. Also did small cleanup for primary key checking, ie test for existance of a primary key at both sides of a equality comparison. Removed now unecessery empty selection on user provided cross-products. diffs (246 lines): diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -2510,25 +2510,29 @@ exps_unique(mvc *sql, sql_rel *rel, list return 0; } +static sql_column * +exp_is_pkey(sql_rel *rel, sql_exp *e) +{ + if (find_prop(e->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */ + fcmp cmp = (fcmp)&kc_column_cmp; + sql_column *c = exp_find_column(rel, e, -2); + + if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL) + return c; + } + return NULL; +} + static int -rel_is_join_on_pkey( sql_rel *rel ) -{ - node *n; - +rel_is_join_on_pkey(sql_rel *rel) +{ if (!rel || !rel->exps) return 0; - for (n = rel->exps->h; n; n = n->next){ + for (node *n = rel->exps->h; n; n = n->next) { sql_exp *je = n->data; - if (je->type == e_cmp && je->flag == cmp_equal && - find_prop(((sql_exp*)je->l)->p, PROP_HASHCOL)) { /* aligned PKEY JOIN */ - fcmp cmp = (fcmp)&kc_column_cmp; - sql_exp *e = je->l; - sql_column *c = exp_find_column(rel, e, -2); - - if (c && c->t->pkey && list_find(c->t->pkey->k.columns, c, cmp) != NULL) - return 1; - } + if (je->type == e_cmp && je->flag == cmp_equal && (exp_is_pkey(rel, je->l) || exp_is_pkey(rel, je->r))) + return 1; } return 0; } @@ -2577,6 +2581,75 @@ rel_distinct_aggregate_on_unique_values( return rel; } +static bool +has_no_selectivity(mvc *sql, sql_rel *rel) +{ + if (!rel) + return true; + + switch(rel->op){ + case op_basetable: + case op_truncate: + case op_table: + return true; + case op_topn: + case op_sample: + case op_project: + case op_groupby: + return has_no_selectivity(sql, rel->l); + case op_ddl: + case op_insert: + case op_update: + case op_delete: + case op_join: + case op_left: + case op_right: + case op_full: + case op_semi: + case op_anti: + case op_union: + case op_inter: + case op_except: + case op_select: + return false; + } + return rel; +} + +static sql_column * +is_fk_column_of_pk(sql_rel *rel, sql_column *pkc, sql_exp *e) /* test if e is a foreing key column for the pk on pkc */ +{ + sql_column *c = exp_find_column(rel, e, -2); + + if (c) { + sql_table *t = c->t; + + for (node *n = t->idxs.set->h; n; n = n->next) { + sql_idx *li = n->data; + + if (li->type == join_idx) { + for (node *m = li->columns->h ; m ; m = m->next) { + sql_kc *fkc = m->data; + + if (strcmp(fkc->c->base.name, c->base.name) == 0) { /* same fkey column */ + sql_key *fkey = &((sql_fkey*)li->key)->rkey->k; + + if (strcmp(fkey->t->base.name, pkc->t->base.name) == 0) { /* to same pk table */ + for (node *o = fkey->columns->h ; o ; o = n->next) { + sql_kc *kc = m->data; + + if (strcmp(kc->c->base.name, pkc->base.name) == 0) /* to same pk table column */ + return c; + } + } + } + } + } + } + } + return NULL; +} + static sql_rel * rel_distinct_project2groupby(mvc *sql, sql_rel *rel, int *changes) { @@ -2599,14 +2672,15 @@ rel_distinct_project2groupby(mvc *sql, s * into project( (semi)join(p,f) [ p.pk = f.fk ] ) [ p.pk ] */ if (rel->op == op_project && rel->l && !rel->r /* no order by */ && need_distinct(rel) && l && (is_select(l->op) || l->op == op_join) && rel_is_join_on_pkey(l) /* [ pk == fk ] */) { - sql_exp *found = NULL, *pk = NULL; + sql_exp *found = NULL, *pk = NULL, *fk = NULL; bool all_exps_atoms = true; + sql_column *pkc = NULL; for (node *m = l->exps->h ; m ; m = m->next) { /* find a primary key join */ sql_exp *je = (sql_exp *) m->data; sql_exp *le = je->l, *re = je->r; - if (find_prop(le->p, PROP_HASHCOL)) { /* le is the primary key */ + if ((pkc = exp_is_pkey(l, le))) { /* le is the primary key */ all_exps_atoms = true; for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) { @@ -2618,8 +2692,9 @@ rel_distinct_project2groupby(mvc *sql, s all_exps_atoms = false; } pk = le; - } - if (!found && find_prop(re->p, PROP_HASHCOL)) { /* re is the primary key */ + fk = re; + } + if (!found && (pkc = exp_is_pkey(l, re))) { /* re is the primary key */ all_exps_atoms = true; for (node *n = rel->exps->h; n && all_exps_atoms; n = n->next) { @@ -2631,13 +2706,25 @@ rel_distinct_project2groupby(mvc *sql, s all_exps_atoms = false; } pk = re; + fk = le; } } if (all_exps_atoms && found) { /* rel must have the same primary key on the projection list */ + /* if the foreign key has no selectivity, the join can be removed */ + if (!(rel_is_ref(l)) && ((rel_find_exp(l->l, fk) && is_fk_column_of_pk(l->l, pkc, fk) && has_no_selectivity(sql, l->l)) || + (l->r && rel_find_exp(l->r, fk) && is_fk_column_of_pk(l->r, pkc, fk) && has_no_selectivity(sql, l->r)))) { + sql_rel *side = (rel_find_exp(l->l, pk) != NULL)?l->l:l->r; + + rel->l = rel_dup(side); + rel_destroy(l); + *changes = 1; + set_nodistinct(rel); + return rel; + } /* if the join has no multiple references it can be re-written into a semijoin */ if (l->op == op_join && !(rel_is_ref(l)) && list_length(rel->exps) == 1) { /* other expressions may come from the other side */ - if (rel_find_exp(l->r, pk)) { + if (l->r && rel_find_exp(l->r, pk)) { sql_rel *temp = l->l; l->l = l->r; l->r = temp; @@ -4729,14 +4816,13 @@ rel_push_join_down(mvc *sql, sql_rel *re static sql_rel * rel_push_semijoin_down_or_up(mvc *sql, sql_rel *rel, int *changes) { - (void)*changes; - if (rel->op == op_join && rel->exps && rel->l) { sql_rel *l = rel->l, *r = rel->r; if (is_semi(l->op) && !rel_is_ref(l) && is_select(r->op) && !rel_is_ref(r)) { rel->l = l->l; l->l = rel; + (*changes)++; return l; } } @@ -4751,6 +4837,7 @@ rel_push_semijoin_down_or_up(mvc *sql, s if (is_semi(ll->op) && !rel_is_ref(ll)) { l->l = ll->l; ll->l = rel; + (*changes)++; return ll; } } @@ -4771,6 +4858,7 @@ rel_push_semijoin_down_or_up(mvc *sql, s rel_has_exp(rel->l, sje->r) >= 0) { rel->l = rel_select(sql->sa, rel->l, NULL); rel_select_add_exp(sql->sa, rel->l, sje); + (*changes)++; } else { append(nexps, sje); } @@ -4833,6 +4921,7 @@ rel_push_semijoin_down_or_up(mvc *sql, s l->exps = njexps; rel_destroy(rel); rel = l; + (*changes)++; } return rel; } @@ -9125,10 +9214,13 @@ optimize_rel(mvc *sql, sql_rel *rel, int } if (gp.cnt[op_anti] || gp.cnt[op_semi]) { + int semijoin_changes = 0; /* rewrite semijoin (A, join(A,B)) into semijoin (A,B) */ rel = rel_visitor_bottomup(sql, rel, &rel_rewrite_semijoin, &changes); /* push semijoin through join */ - rel = rel_visitor_bottomup(sql, rel, &rel_push_semijoin_down_or_up, &changes); + rel = rel_visitor_bottomup(sql, rel, &rel_push_semijoin_down_or_up, &semijoin_changes); + if (level == 0) /* for now count optimizer changes just for first iteration */ + changes += semijoin_changes; /* antijoin(a, union(b,c)) -> antijoin(antijoin(a,b), c) */ rel = rel_visitor_bottomup(sql, rel, &rel_rewrite_antijoin, &changes); if (level <= 0) diff --git a/sql/server/rel_select.c b/sql/server/rel_select.c --- a/sql/server/rel_select.c +++ b/sql/server/rel_select.c @@ -5842,7 +5842,6 @@ rel_query(sql_query *query, sql_rel *rel res = rel_crossproduct(sql->sa, res, fnd, op_join); if (lateral) set_dependent(res); - res = rel_select(sql->sa, res, NULL); } else { res = fnd; } _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list