Changeset: 0c5e51eea91d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0c5e51eea91d Modified Files: sql/server/rel_exp.c sql/server/rel_exp.h sql/server/rel_graph.c sql/server/rel_optimizer.c Branch: graph0 Log Message:
QRW: fix few bugs with the new join reordering algorithm for graphs diffs (269 lines): diff --git a/sql/server/rel_exp.c b/sql/server/rel_exp.c --- a/sql/server/rel_exp.c +++ b/sql/server/rel_exp.c @@ -1129,6 +1129,40 @@ rel_has_exps(sql_rel *rel, list *exps) return -1; } + +static bool +rel_has_exps2(sql_rel *rel, list* l){ + bool result = true; + + if(list_length(l) == 0) + return false; + + for(node* n = l->h; n && result; n = n->next){ + result &= rel_has_exp2(rel, n->data); + } + + return result; +} + +bool +rel_has_exp2(sql_rel *rel, sql_exp *e){ + if(!rel) return false; + if(!e) return true; + + switch(e->type){ + case e_cmp: + if (get_cmp(e) == cmp_filter) { + return rel_has_exps2(rel, e->l) && rel_has_exps2(rel, e->r); + } else { + return rel_has_exp2(rel, e->l) && rel_has_exp2(rel, e->r); + } + case e_graph: + return rel_has_exps2(rel, e->l) && rel_has_exps2(rel, e->r); + default: + return rel_find_exp(rel, e) != NULL; + } +} + sql_rel * find_rel(list *rels, sql_exp *e) { @@ -1211,8 +1245,9 @@ rel_find_exp_( sql_rel *rel, sql_exp *e) { sql_exp *ne = NULL; - if (!rel) + if (!rel) // stop the recursion return NULL; + switch(e->type) { case e_column: if (rel->exps && (is_project(rel->op) || is_base(rel->op))) { @@ -1222,6 +1257,11 @@ rel_find_exp_( sql_rel *rel, sql_exp *e) ne = exps_bind_column(rel->exps, e->r, NULL); } } + // check whether this is some kind of shortest path + else if (is_graph(rel->op) && e->l){ + sql_graph* graph_ptr = (sql_graph*) rel; + ne = exps_bind_column2(graph_ptr->spfw, e->l, e->r); + } return ne; case e_convert: return rel_find_exp_(rel, e->l); @@ -1288,13 +1328,6 @@ rel_find_exp( sql_rel *rel, sql_exp *e) break; case op_graph_join: case op_graph_select: { - assert(ne == NULL && "Expression already assigned?"); - // check whether this is some kind of shortest path - if(e->type == e_column){ - sql_graph* graph_ptr = (sql_graph*) rel; - ne = exps_bind_column2(graph_ptr->spfw, e->l, e->r); - } - // try with the lhs if(!ne) { ne = rel_find_exp(rel->l, e); } // and then with the rhs (ok for selects as well) diff --git a/sql/server/rel_exp.h b/sql/server/rel_exp.h --- a/sql/server/rel_exp.h +++ b/sql/server/rel_exp.h @@ -132,6 +132,9 @@ extern int rel_has_exp(sql_rel *rel, sql /* return 0 when the relation contain atleast one of the passed expressions else < 0 */ extern int rel_has_exps(sql_rel *rel, list *e); +// true if the dependency is satisfied, false otherwise +extern bool rel_has_exp2(sql_rel *rel, sql_exp *e); + extern sql_rel *find_rel(list *rels, sql_exp *e); extern sql_rel *find_one_rel(list *rels, sql_exp *e); diff --git a/sql/server/rel_graph.c b/sql/server/rel_graph.c --- a/sql/server/rel_graph.c +++ b/sql/server/rel_graph.c @@ -41,7 +41,7 @@ sql_graph* rel_graph_move(mvc* sql, sql_ graph_old = (sql_graph*) rel; graph_new = rel_graph_create(sql->sa); if(!graph_new) return NULL; - memcpy(graph_new + sizeof(sql_ref), graph_old + sizeof(sql_ref), sizeof(sql_graph) - sizeof(sql_ref)); + memcpy((char*) graph_new + sizeof(sql_ref), (char*) graph_old + sizeof(sql_ref), sizeof(sql_graph) - sizeof(sql_ref)); graph_rel = &(graph_new->relation); graph_rel->l = l; graph_rel->r = r; diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -425,21 +425,29 @@ exp_count(int *cnt, sql_exp *e) return 0; *cnt -= 5*list_length(e->l); return 5*list_length(e->l); - case e_graph: - if(exp_card(e->l) == CARD_ATOM && exp_card(e->r) == CARD_ATOM){ + case e_graph: { + // I do still wonder, why the heck I am using lists for the lhs and rhs of the e_graph expression + sql_exp* el = ((list*) (e->l))->h->data; + sql_exp* er = ((list*) (e->r))->h->data; + + if(exp_card(el) == CARD_ATOM && exp_card(er) == CARD_ATOM){ *cnt += 1000; // this is going to result to a constant predicate TRUE or FALSE } else { - exp_count(cnt, e->l); - exp_count(cnt, e->r); - if(exp_card(e->l) == CARD_ATOM || exp_card(e->r) == CARD_ATOM){ + exp_count(cnt, el); + exp_count(cnt, er); + if(exp_card(el) == CARD_ATOM || exp_card(er) == CARD_ATOM){ // single shortest path *cnt += 500; } else { // this is going to be expensive, many-to-many shortest paths // do not change cnt + + // TODO: TEMPORARY ONLY + *cnt += 500; } } return 0; // the return value is ignored anyway + } break; // silent the warning case e_convert: /* functions are more expensive, depending on the number of columns involved. */ if (e->card == CARD_ATOM) @@ -1089,6 +1097,7 @@ order_joins_with_graphs(mvc *sql, list * sql_exp *e, *le, *re; bool swapped = false; node* m = NULL; + sql_rel* candidate = NULL; e = n->data; @@ -1106,27 +1115,31 @@ order_joins_with_graphs(mvc *sql, list * // can we join with one of the rhs? m = list_find(rels, re, (fcmp)&rel_has_exp); if(!m) continue; // next exp + candidate = m->data; } else if (rel_has_exp(top, re) == 0 /* 0 = true */){ swapped = true; m = list_find(rels, le, (fcmp)&rel_has_exp); if(!m) continue; // next exp + candidate = m->data; } else { // we cannot still produce a join with this exp~ continue; } fnd = true; // dependency satisfied - list_remove_data(rels, m->data); + list_remove_node(rels, m); list_remove_data(exps, e); + assert(candidate != NULL); + // create the join if(e->type != e_graph){ - top = rel_crossproduct(sql->sa, top, m->data, op_join); + top = rel_crossproduct(sql->sa, top, candidate, op_join); rel_join_add_exp(sql->sa, top, e); } else { // e_graph if(!swapped){ - top = reset_graph_join(sql, graphs, top, m->data, e); + top = reset_graph_join(sql, graphs, top, candidate, e); } else { - top = reset_graph_join(sql, graphs, m->data, top, e); + top = reset_graph_join(sql, graphs, candidate, top, e); } } } @@ -1143,15 +1156,16 @@ order_joins_with_graphs(mvc *sql, list * } // add the remaining filters - while (list_length(exps) > 1) { /* more expressions (add selects) */ + while (list_length(exps) > 0) { /* more expressions (add selects) */ node *n = NULL; sql_exp *e = NULL; for(n = exps->h; n; n = n->next){ e = n->data; - if(rel_has_exp(top, e) == 0 /* 0 == true */) break; + if(rel_has_exp2(top, e)) break; } assert(n != NULL && e != NULL); + list_remove_node(exps, n); if(e->type != e_graph) top = rel_select(sql->sa, top, e); @@ -1248,19 +1262,24 @@ push_up_join_exps(mvc *sql, sql_rel *rel switch(rel->op) { case op_graph_select: - record_graph(sql, rel, graphs); - /* fall through */ case op_select: { list *lst = NULL; - assert(rel->exps != NULL); - - if( rel_is_ref(rel->l) ){ - lst = rel->exps; - } else { + if (!rel_is_ref(rel->l)) { + + // consider this selection only as valid iff its parent is: + // a) a join + // b) another valid select operator lst = push_up_join_exps(sql, rel->l, graphs); - lst = list_merge(rel->exps, lst, NULL); - } - rel->exps = NULL; + + if(lst != NULL) { + if(rel->op == op_graph_select) + record_graph(sql, rel, graphs); + + lst = list_merge(rel->exps, lst, NULL); + rel->exps = NULL; + } + } + return lst; } break; case op_graph_join: @@ -1325,7 +1344,7 @@ reorder_join(mvc *sql, sql_rel *rel) // pda rel = rewrite_topdown(sql, rel, &rel_push_select_down, &e_changes); rel = rewrite_topdown(sql, rel, rel_graph_pda, &e_changes); - rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes); +// rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes); } else { // legacy logic get_relations(sql, rel, rels); if (list_length(rels) > 1) { @@ -9335,8 +9354,7 @@ static sql_rel * rel = rewrite(sql, rel, &rel_groupby_distinct, &changes); } - printf("[Optimizer] rel_join_order before: %s\n", dump_rel(sql, rel)); - if (gp.cnt[op_join] || gp.cnt[op_left] || gp.cnt[op_semi] || gp.cnt[op_anti]) { + if (gp.cnt[op_join] || gp.cnt[op_left] || gp.cnt[op_semi] || gp.cnt[op_anti] || gp.cnt[op_graph_join]) { rel = rel_remove_empty_join(sql, rel, &changes); if (!gp.cnt[op_update]) rel = rel_join_order(sql, rel); @@ -9344,7 +9362,6 @@ static sql_rel * /* rel_join_order may introduce empty selects */ rel = rewrite(sql, rel, &rel_remove_empty_select, &e_changes); } - printf("[Optimizer] rel_join_order after: %s\n", dump_rel(sql, rel)); if (gp.cnt[op_select] && sql->emode != m_prepare) rel = rewrite(sql, rel, &rel_simplify_like_select, &changes); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list