Changeset: 775fcc0fb37d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=775fcc0fb37d Modified Files: sql/server/rel_optimizer.c Branch: DVframework Log Message:
on the fly metadata derivation into partially materialized views (pmv): required predicates collected and enumerated. diffs (truncated from 888 to 300 lines): diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -30,9 +30,28 @@ #include "rel_planner.h" #include "sql_env.h" +#include "mtime.h" +#include "mal_client.h" + #define new_func_list(sa) sa_list(sa) #define new_col_list(sa) sa_list(sa) + +typedef struct table_pkeys +{ + str table_name; + struct list *pkey_column_names; +} table_pkeys; + +typedef struct sel_predicate +{ + sht cmp_type; // cmp_gt = 0, cmp_gte = 1, cmp_lte = 2, cmp_lt = 3, cmp_equal = 4, cmp_notequal = 5, cmp_filter = 6, cmp_or = 7, cmp_in = 8, cmp_notin = 9, cmp_all = 10, cmp_project = 11 (up to here taken from sql/rel.txt), cmp_range = 12, 13, 14, 15 (both not equal, first not equal second equal, first equal second not equal, both equal, respectively). + sql_column* column; // column of the selection predicate + ValRecord** values; // array of values (valrecord pointers) compared. Could be of different cardinality. E.g. 1 if cmp_equal, 2 if cmp_range, anything if cmp_in, etc. + int num_values; // length of the values array. +} sel_predicate; + + typedef struct global_props { int cnt[MAXOPS]; } global_props; @@ -41,6 +60,20 @@ typedef sql_rel *(*rewrite_fptr)(int *ch typedef int (*find_prop_fptr)(mvc *sql, sql_rel *rel); static sql_subfunc *find_func( mvc *sql, char *name, list *exps ); +int is_table_in_list_table_pkeys(list *l, sql_table* st); +int is_column_in_list_columns(list *l, sql_column* c); +int is_column_of_table_in_list_table_pkeys(list *l, sql_table* st, sql_column* c); +list* extract_column_names_from_list_of_columns(list* list_of_columns); +list* collect_PERPAD(mvc *sql, sql_rel *rel); +lng get_enum_step_length(sql_column* c); +sel_predicate** convert_all_into_in_clause_except_cmp_equal(list *list_of_PERPAD); +int enumerate_pkey_space(str** ret, sel_predicate** sps, int sps_enum_start, int num_PERPAD, int* is_pkey_to_be_enumerated); +int* enumerate_and_insert_into_temp_table(mvc *sql, sel_predicate** sps, int num_PERPAD); +str SQLstatementIntern(Client c, str *expr, str nme, int execute, bit output); +str VAL2str(ValRecord* valp); +void check_if_required_derived_metadata_is_already_available(list* list_of_PERPAD, int* is_pkey_to_be_enumerated, int num_pkeys_to_be_enumerated); + +list *discovered_table_pkeys; /* The important task of the relational optimizer is to optimize the join order. @@ -578,6 +611,792 @@ find_basetable( sql_rel *r) } } +int is_table_in_list_table_pkeys(list *l, sql_table* st) +{ + node *n = NULL; + + if (l && st) { + for (n = l->h; n; n = n->next) + { + table_pkeys *tp = n->data; + if (strcmp(tp->table_name, st->base.name) == 0) + return TRUE; + } + } + return FALSE; +} + +int is_column_in_list_columns(list *l, sql_column* c) +{ + node *n = NULL; + + if (l && c) { + for (n = l->h; n; n = n->next) + { + str s = n->data; + if (strcmp(s, c->base.name) == 0) + return TRUE; + } + } + return FALSE; +} + +int is_column_of_table_in_list_table_pkeys(list *l, sql_table* st, sql_column* c) +{ + node *n = NULL; + + if (l && st && c) { + for (n = l->h; n; n = n->next) + { + table_pkeys *tp = n->data; + if (strcmp(tp->table_name, st->base.name) == 0) + { + return is_column_in_list_columns(tp->pkey_column_names, c); + } + } + } + return FALSE; +} + + +list* extract_column_names_from_list_of_columns(list* list_of_sql_kc) +{ + node *n = NULL; + list* list_of_str = list_create(NULL); + + if (list_of_sql_kc) { + for (n = list_of_sql_kc->h; n; n = n->next) + { + sql_kc* skc = n->data; + list_append(list_of_str, GDKstrdup(skc->c->base.name)); + } + } + return list_of_str; +} + + +list* collect_PERPAD(mvc *sql, sql_rel *rel) +{ + node *n = NULL; + int i; + list *res = NULL; + list *first = NULL, *second = NULL; + + if(rel == NULL) + return NULL; + + printf("=====enter: collect_PERPAD\n"); + + switch (rel->op) + { + case op_basetable: + case op_table: + return NULL; + case op_join: + case op_left: + case op_right: + case op_full: + + case op_semi: + case op_anti: + + case op_union: + case op_inter: + case op_except: + first = collect_PERPAD(sql, rel->l); + second = collect_PERPAD(sql, rel->r); + if(first == NULL && second == NULL) + return NULL; + else if(first == NULL) + return list_merge(second, first, (fdup)NULL); + else + return list_merge(first, second, (fdup)NULL); + case op_project: + return collect_PERPAD(sql, rel->l); + case op_select: + printf("op_select!\n"); + if(rel->exps == NULL) + break; + + if(rel->exps->h == NULL) + break; + + + res = list_create(NULL); + + for (n = rel->exps->h, i = 0; n; n = n->next, i++) + { + sql_exp *e = n->data; + + printf("i: %d\n", i); + if(e == NULL) + continue; + + if(e->type == e_cmp) + { + sql_exp *el = (sql_exp*) e->l; + sql_exp *er = (sql_exp*) e->r; + sql_exp *ef = NULL; + sql_exp *pivot = NULL; + + if(e->f) + { + ef = (sql_exp*) e->f; + } + + if(el->type == e_column || er->type == e_column) + { + sql_column *c; + atom *a; + + if(el->type != e_column) + { + pivot = el; + el = er; + er = pivot; + } + + c = exp_find_column(rel, el, -1); + if(c == NULL) + continue; + printf("column: %s & table: %s & er_type: %d & atom: %s & atom_r: %s\n", c->base.name, c->t->base.name, er->type, er->name, er->rname); + + if(find_prop(el->p, PROP_HASHCOL)) + { + sql_ukey *su = (sql_ukey*) ((prop*)el->p)->value; + if(!is_table_in_list_table_pkeys(discovered_table_pkeys, c->t)) + { + // TODO: add the table only if it is a partially materialized view + table_pkeys *tp = (table_pkeys*) GDKmalloc(sizeof(table_pkeys)); + tp->table_name = GDKstrdup(c->t->base.name); + tp->pkey_column_names = extract_column_names_from_list_of_columns(su->k.columns); + list_append(discovered_table_pkeys, tp); + } + } + + if(is_column_of_table_in_list_table_pkeys(discovered_table_pkeys, c->t, c)) + { + + sel_predicate* sp = (sel_predicate*) GDKmalloc(sizeof(sel_predicate)); + + sp->cmp_type = e->flag; + sp->column = c; + sp->values = (ValRecord**) GDKmalloc(sizeof(ValRecord*)); + + printf("pkey!\n"); + + /* if it is a range */ + if(ef) + { + if(ef->type == e_convert) + ef = ef->l; + if (is_atom(ef->type) && (a = exp_value(ef, sql->args, sql->argc)) != NULL) + { + ValRecord** vr = NULL; + if(a->isnull) + printf("ERROR: range boundary is given as NULL\n"); + + /* TODO: how to understand which type of range it is */ + sp->cmp_type = 15; + sp->num_values = 2; + vr = (ValRecord**) GDKrealloc(sp->values, 2*sizeof(ValRecord*)); + if(vr == NULL) + { + printf("ERROR: can not allocate memory\n"); + } + sp->values = vr; + /* TODO: check if ef is always the upper boundary of the range */ + sp->values[1] = &(a->data); + printf("atom2sql: %s\n", atom2sql(a)); + } + else + { + printf("ERROR: NOT atom or NO value in atom!\n"); + } + } + + if(er->type == e_convert) + er = er->l; + + + + if (is_atom(er->type) && (a = exp_value(er, sql->args, sql->argc)) != NULL) + { + + if(a->isnull) + printf("ERROR: primary key should not be NULL\n"); + + + switch(sp->cmp_type) + { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + sp->num_values = 1; + sp->values[0] = &(a->data); + break; + case 6: + case 7: + /* not handled (yet) */ + printf("ERROR: case not handled yet!\n"); + break; + case 8: + case 9: + /* TODO: handle IN and NOT IN */ + printf("ERROR: case not handled yet!\n"); + break; + case 10: + case 11: + /* not handled (yet) */ + printf("ERROR: case not handled yet!\n"); + break; + case 12: _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list