Changeset: a6d9d12a58e6 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a6d9d12a58e6 Modified Files: sql/backends/monet5/sql.c sql/backends/monet5/sql_rdf.h sql/backends/monet5/sql_rdf_jgraph.c sql/backends/monet5/sql_rdf_rel.c sql/backends/monet5/sql_rdf_rel.h Branch: rdf Log Message:
Optimize for the case with Optional keyword. Adding ifthenelse projection instead of using left outer join diffs (truncated from 647 to 300 lines): diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c --- a/sql/backends/monet5/sql.c +++ b/sql/backends/monet5/sql.c @@ -133,6 +133,10 @@ sql_symbol2relation(mvc *c, symbol *sym) if (1) c->emod = mod_explain; } } + else { + + _rel_print(c,r); + } if (1){ r = rel_optimizer(c, r); diff --git a/sql/backends/monet5/sql_rdf.h b/sql/backends/monet5/sql_rdf.h --- a/sql/backends/monet5/sql_rdf.h +++ b/sql/backends/monet5/sql_rdf.h @@ -51,6 +51,9 @@ extern PropStat *global_c_propstat; extern BAT *global_mbat; extern BATiter global_mapi; -#define USINGRDFSCAN 1 +#define USINGRDFSCAN 0 + +#define APPLY_OPTIMIZATION_FOR_OPTIONAL 1 /* Instead of using left join, we use a project with ifthenelse */ + /* on the set of optional columns */ #endif /*_SQL_RDF_H */ diff --git a/sql/backends/monet5/sql_rdf_jgraph.c b/sql/backends/monet5/sql_rdf_jgraph.c --- a/sql/backends/monet5/sql_rdf_jgraph.c +++ b/sql/backends/monet5/sql_rdf_jgraph.c @@ -1197,7 +1197,7 @@ void extract_prop_and_subj_from_exps(mvc static -void tranforms_exps(mvc *c, sql_rel *r, list *trans_select_exps, list *trans_tbl_exps, str tblname, int colIdx, oid tmpPropId, str *atblname, str *asubjcolname, list *sp_prj_exps){ +void tranforms_exps(mvc *c, sql_rel *r, list *trans_select_exps, list *trans_tbl_exps, str tblname, int colIdx, oid tmpPropId, str *atblname, str *asubjcolname, list *sp_prj_exps, list *base_column_exps){ list *tmpexps = NULL; list *tmp_tbl_exps = NULL; @@ -1279,10 +1279,12 @@ void tranforms_exps(mvc *c, sql_rel *r, sql_column *tmpcol = get_rdf_column(c, tblname, origcolname); sql_exp *e = exp_alias(sa, tmpexp->rname, tmpexp->name, origtblname, origcolname, &tmpcol->type, CARD_MULTI, tmpcol->null, 0); sql_exp *proj_e = exp_alias(sa, tmpexp->rname, tmpexp->name, tmpexp->rname, tmpexp->name, &tmpcol->type, CARD_MULTI, tmpcol->null, 0); + sql_exp *base_col_e = exp_copy(sa, proj_e); printf("tmpcolname in rdf basetable is %s\n", tmpcolname); append(trans_tbl_exps, e); if (sp_prj_exps) append(sp_prj_exps, proj_e); + if (base_column_exps) append(base_column_exps, base_col_e); } if (strcmp(tmpexp->name, "s") == 0){ @@ -1293,7 +1295,7 @@ void tranforms_exps(mvc *c, sql_rel *r, sql_column *tmpcol = get_rdf_column(c, origtblname, origcolname); sql_exp *e = exp_alias(sa, tmpexp->rname, tmpexp->name, origtblname, origcolname, &tmpcol->type, CARD_MULTI, tmpcol->null, 0); sql_exp *proj_e = exp_alias(sa, tmpexp->rname, tmpexp->name, tmpexp->rname, tmpexp->name, &tmpcol->type, CARD_MULTI, tmpcol->null, 0); - + sql_exp *base_col_e = exp_copy(sa, proj_e); if (*atblname == NULL){ *atblname = GDKstrdup(tmpexp->rname); @@ -1301,6 +1303,7 @@ void tranforms_exps(mvc *c, sql_rel *r, } append(trans_tbl_exps, e); if (sp_prj_exps) append(sp_prj_exps, proj_e); + if (base_column_exps) append(base_column_exps, base_col_e); } } @@ -1309,7 +1312,7 @@ void tranforms_exps(mvc *c, sql_rel *r, static -void tranforms_mvprop_exps(mvc *c, sql_rel *r, mvPropRel *mvproprel, int tblId, oid tblnameoid, int colIdx, oid tmpPropId, int isMVcol, list *sp_prj_exps){ +void tranforms_mvprop_exps(mvc *c, sql_rel *r, mvPropRel *mvproprel, int tblId, oid tblnameoid, int colIdx, oid tmpPropId, int isMVcol, list *sp_prj_exps, list *base_column_exps){ list *tmpexps = NULL; list *trans_select_exps = NULL; @@ -1403,10 +1406,12 @@ void tranforms_mvprop_exps(mvc *c, sql_r sql_column *tmpcol = get_rdf_column(c, mvtblname, origcolname); sql_exp *e = exp_alias(sa, tmpexp->rname, tmpexp->name, origtblname, origcolname, &tmpcol->type, CARD_MULTI, tmpcol->null, 0); sql_exp *proj_e = exp_alias(sa, tmpexp->rname, tmpexp->name, tmpexp->rname, tmpexp->name, &tmpcol->type, CARD_MULTI, tmpcol->null, 0); + sql_exp *base_col_e = exp_copy(sa, proj_e); printf("tmpmvcolname in rdf basetable is %s\n", tmpmvcolname); append(trans_tbl_exps, e); append(sp_prj_exps, proj_e); + if (base_column_exps) append(base_column_exps, base_col_e); } if (strcmp(tmpexp->name, "s") == 0){ @@ -1417,9 +1422,11 @@ void tranforms_mvprop_exps(mvc *c, sql_r sql_column *tmpcol = get_rdf_column(c, origtblname, origcolname); sql_exp *e = exp_alias(sa, tmpexp->rname, tmpexp->name, origtblname, origcolname, &tmpcol->type, CARD_MULTI, tmpcol->null, 0); sql_exp *proj_e = exp_alias(sa, tmpexp->rname, tmpexp->name, tmpexp->rname, tmpexp->name, &tmpcol->type, CARD_MULTI, tmpcol->null, 0); + sql_exp *base_col_e = exp_copy(sa, proj_e); append(trans_tbl_exps, e); append(sp_prj_exps, proj_e); + if (base_column_exps) append(base_column_exps, base_col_e); mvproprel->atblname = GDKstrdup(tmpexp->rname); mvproprel->asubjcolname = GDKstrdup(tmpexp->name); @@ -1643,6 +1650,191 @@ sql_rel *connect_sp_select_and_mv_prop(m } +static +list *single_exp_list(sql_allocator *sa, sql_exp *e){ + list *lst = NULL; + lst = new_exp_list(sa); + + append(lst, e); + + return lst; +} + +/* + * Return sys.isnull(e) + * Right now, it is more like e = null + * */ +static +sql_exp* exp_isnull(sql_allocator *sa, sql_exp *e){ + sql_exp *l = NULL; + sql_exp *r = NULL; + sql_exp *isnull_exp = NULL; + + l = e; + r = exp_atom(sa, atom_general(sa, exp_subtype(l), NULL)); + + isnull_exp = exp_compare(sa, l, r, cmp_equal); + + return isnull_exp; +} + +static +sql_exp* exp_isnotnull(sql_allocator *sa, sql_exp *e){ + sql_exp *l = NULL; + sql_exp *r = NULL; + sql_exp *isnotnull_exp = NULL; + + l = e; + r = exp_atom(sa, atom_general(sa, exp_subtype(l), NULL)); + + isnotnull_exp = exp_compare(sa, l, r, cmp_notequal); + + return isnotnull_exp; +} + +/* + * Create exps for optional set of columns + * e.g., base_column_exps [s1.p1, s1.p2, s1.p3] + * Then, the opt_exps will be + * sys.ifthenelse( + * sys.ifthenelse( + * sys.isnull( + * sys.or(sys.isnull(s1.p1), + * sys.or(sys.isnull(s1.p2), sys.isnull(s1.p3)) + * )), + * boolean "false", + * sys.or(sys.isnull(s1.p1), + * sys.or(sys.isnull(s1.p2), sys.isnull(s1.p3)) + * ) + * ), + * NULL, + * s1.p1 + * ) + * + * Look complicated :) + * + * If it is a set of required columns, then put sys.isnotnull() + * + * NOTE THAT as set of base columns can look like s1.s, s1.o, s2.s, s2.o,... + * we only put the condition on o, while keeping s as original + * + * */ + +static +list *create_optional_exps(sql_allocator *sa, list *base_column_exps, int isOptionalGroup, int contain_mv_col){ + list *opt_exps = NULL ; + list *req_exps = NULL; + sql_exp *or_exp = NULL; + node *en = NULL; + + list *only_o_exps = NULL; //keeping only o + only_o_exps = new_exp_list(sa); + + for (en = base_column_exps->h; en; en = en->next){ + sql_exp *o_exp = (sql_exp *) en->data; + assert(o_exp->type == e_column); + if (strcmp(o_exp->name, "o") == 0){ + sql_exp *tmp_o_exp = exp_copy(sa, o_exp); + append(only_o_exps, tmp_o_exp); + } + } + + if (isOptionalGroup){ + opt_exps = new_exp_list(sa); + if (contain_mv_col){ + printf("Do nothing for group of optional prop containing mv col \n"); + return base_column_exps; + } else { + node *first_node = only_o_exps->h; + sql_exp *first_exp = (sql_exp *) first_node->data; + sql_exp *first_isnull_exp = exp_isnull(sa, first_exp); + + if (first_node->next){ + for (en = first_node->next; en; en = en->next){ + sql_exp *tmpexp = (sql_exp *) en->data; + sql_exp *tmp_isnull_exp = NULL; + assert(tmpexp->type == e_column); + tmp_isnull_exp = exp_isnull(sa, tmpexp); + + if (or_exp == NULL){ + list *lst1 = single_exp_list(sa, first_isnull_exp); + list *lst2 = single_exp_list(sa, tmp_isnull_exp); + or_exp = exp_or(sa, lst1, lst2); + } else { + list *lst1 = single_exp_list(sa, or_exp); + list *lst2 = single_exp_list(sa, tmp_isnull_exp); + + or_exp = exp_or(sa, lst1, lst2); + } + } + } + + if (or_exp == NULL){ + or_exp = exp_copy(sa, first_isnull_exp); + } + + //Replace each column by ifthenelse + //e.g.: col1 = ifthenelse ( or (col1 == null, col2 == null, col3 ==null), null, col1) + for (en =base_column_exps->h; en; en = en->next){ + sql_exp *tmpexp = (sql_exp *) en->data; + sql_exp *if_exp = NULL; + sql_exp *exp_null = NULL; + list *lst_ifthen; + list *lst_else; + + assert(tmpexp->type == e_column); + assert(or_exp != NULL); + + if (strcmp(tmpexp->name, "o") == 0){ + + exp_null = exp_atom(sa, atom_general(sa, exp_subtype(tmpexp), NULL)); + + lst_ifthen = single_exp_list(sa, exp_null); + lst_else = single_exp_list(sa, tmpexp); + + if_exp = exp_if(sa, or_exp, lst_ifthen, lst_else); + + append(opt_exps, if_exp); + } else { + sql_exp *s_exp = NULL; + assert (strcmp(tmpexp->name, "s") == 0); + + s_exp = exp_copy(sa, tmpexp); + + append(opt_exps, s_exp); + } + + } + + } + + return opt_exps; + } else { + req_exps = new_exp_list(sa); + for (en = base_column_exps->h; en; en = en->next){ + sql_exp *tmpexp = (sql_exp *) en->data; + sql_exp *exp_notnull = NULL; + assert(tmpexp->type == e_column); + + exp_notnull = exp_isnotnull(sa, tmpexp); + + append(req_exps, exp_notnull); + } + + return req_exps; + } + +} + +static +void append_sp_rdfscan_proj_exps(list *opt_col_exps, list *sp_rdfscan_proj_exps){ + node *en; + for (en = opt_col_exps->h; en; en = en->next){ + sql_exp *tmpexp = (sql_exp *) en->data; + append(sp_rdfscan_proj_exps, tmpexp); + } +} + /* * Input: * - A sub-join graph (jsg) that all nodes are connected by using inner join @@ -1655,7 +1847,7 @@ sql_rel *connect_sp_select_and_mv_prop(m * - sp_prj_exps stores all the columns should be selected in the "original order" * */ static _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list