Changeset: a13730e52ea3 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a13730e52ea3 Modified Files: sql/backends/monet5/sql_statistics.c sql/server/rel_optimizer.c sql/server/rel_optimizer.h sql/server/rel_planner.c sql/server/rel_planner.h sql/storage/store.c Branch: default Log Message:
improved cost based plan selection (to test use sql_debug=256) diffs (truncated from 921 to 300 lines): diff --git a/sql/backends/monet5/sql_statistics.c b/sql/backends/monet5/sql_statistics.c --- a/sql/backends/monet5/sql_statistics.c +++ b/sql/backends/monet5/sql_statistics.c @@ -40,7 +40,7 @@ sql_analyze(Client cntxt, MalBlkPtr mb, sql_trans *tr = m->session->tr; node *nsch, *ntab, *ncol; char *query, *dquery; - char *maxval, *minval; + char *maxval = NULL, *minval = NULL; str sch = 0, tbl = 0, col = 0; int sorted; lng nils = 0; @@ -58,13 +58,9 @@ sql_analyze(Client cntxt, MalBlkPtr mb, } dquery = (char *) GDKzalloc(8192); query = (char *) GDKzalloc(8192); - maxval = (char *) GDKzalloc(8192); - minval = (char *) GDKzalloc(8192); - if (!(dquery && query && maxval && minval)) { + if (!(dquery && query)) { GDKfree(dquery); GDKfree(query); - GDKfree(maxval); - GDKfree(minval); throw(SQL, "analyze", MAL_MALLOC_FAIL); } @@ -101,6 +97,9 @@ sql_analyze(Client cntxt, MalBlkPtr mb, BAT *bn = store_funcs.bind_col(tr, c, RDONLY), *br; BAT *bsample; lng sz = BATcount(bn); + int (*tostr)(str*,int*,const void*) = BATatoms[bn->ttype].atomToStr; \ + int len = 0; + void *val=0; if (col && strcmp(bc->name, col)) continue; @@ -133,55 +132,20 @@ sql_analyze(Client cntxt, MalBlkPtr mb, sorted = BATtordered(bn); // Gather the min/max value for builtin types -#define minmax(TYPE,FMT,CAST) \ -{\ - TYPE *val=0;\ - val= BATmax(bn,0);\ - if ( ATOMcmp(bn->ttype,val, ATOMnil(bn->ttype))== 0)\ - snprintf(maxval,8192,"nil");\ - else snprintf(maxval,8192,FMT,CAST *val);\ - GDKfree(val);\ - val= BATmin(bn,0);\ - if ( ATOMcmp(bn->ttype,val, ATOMnil(bn->ttype))== 0)\ - snprintf(minval,8192,"nil");\ - else snprintf(minval,8192,FMT,CAST *val);\ - GDKfree(val);\ - break;\ -} width = bn->T->width; - switch (bn->ttype) { - case TYPE_sht: - minmax(sht, "%d",); - case TYPE_int: - minmax(int, "%d",); - case TYPE_lng: - minmax(lng, LLFMT,); -#ifdef HAVE_HGE - case TYPE_hge: - minmax(hge, "%.40g", (dbl)); -#endif - case TYPE_flt: - minmax(flt, "%f",); - case TYPE_dbl: - minmax(dbl, "%f",); - case TYPE_str: - { - BUN p, q; - double sum = 0; - BATiter bi = bat_iterator(bn); - BATloop(bn, p, q) { - str s = BUNtail(bi, p); - if (s != NULL && strcmp(s, str_nil)) - sum += (int) strlen(s); - } - if (sz) - width = (int) (sum / sz); - } - /* fall through */ - default: - snprintf(maxval, 8192, "nil"); - snprintf(minval, 8192, "nil"); + if (tostr) { + val = BATmax(bn,0); len = 0; + tostr(&maxval, &len,val); + GDKfree(val); + val = BATmin(bn,0); len = 0; + tostr(&minval, &len,val); + GDKfree(val); + } else { + maxval = (char *) GDKzalloc(4); + minval = (char *) GDKzalloc(4); + snprintf(maxval, 4, "nil"); + snprintf(minval, 4, "nil"); } snprintf(query, 8192, "insert into sys.statistics values('%s','%s','%s','%s',%d,now()," LLFMT "," LLFMT "," LLFMT "," LLFMT ",'%s','%s',%s);", b->name, bt->name, bc->name, c->type.type->sqlname, width, (samplesize ? samplesize : sz), sz, uniq, nils, minval, maxval, sorted ? "true" : "false"); diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -51,7 +51,7 @@ static sql_subfunc *find_func( mvc *sql, */ /* currently we only find simple column expressions */ -static void * +void * name_find_column( sql_rel *rel, char *rname, char *name, int pnr, sql_rel **bt ) { sql_exp *alias = NULL; @@ -772,8 +772,14 @@ order_joins(mvc *sql, list *rels, list * /* find foreign keys and reorder the expressions on reducing quality */ sdje = find_fk(sql, rels, exps); - if (list_length(rels) > 2 && mvc_debug_on(sql, 256)) - return rel_planner(sql, rels, sdje); + if (list_length(rels) > 2 && mvc_debug_on(sql, 256)) { + for(djn = sdje->h; djn; djn = djn->next ) { + sql_exp *e = djn->data; + list_remove_data(exps, e); + } + top = rel_planner(sql, rels, sdje, exps); + return top; + } /* open problem, some expressions use more than 2 relations */ /* For example a.x = b.y * c.z; */ diff --git a/sql/server/rel_optimizer.h b/sql/server/rel_optimizer.h --- a/sql/server/rel_optimizer.h +++ b/sql/server/rel_optimizer.h @@ -26,5 +26,6 @@ extern sql_rel * rel_optimizer(mvc *sql, extern int exp_joins_rels(sql_exp *e, list *rels); +extern void *name_find_column( sql_rel *rel, char *rname, char *name, int pnr, sql_rel **bt ); #endif /*_REL_OPTIMIZER_H_*/ diff --git a/sql/server/rel_planner.c b/sql/server/rel_planner.c --- a/sql/server/rel_planner.c +++ b/sql/server/rel_planner.c @@ -32,8 +32,8 @@ typedef struct memoitem { int done; int level; lng count; - dbl sel; - lng cost; + lng width; + dbl cost; void *data; } memoitem; @@ -45,7 +45,8 @@ typedef struct memojoin { memoitem *l, *r; int rules; /* handled rules */ int prop; /* pkey, fkey, ukey */ - lng cost; + dbl cost; + dbl sel; sql_exp *e; } memojoin; @@ -120,10 +121,11 @@ memoitem_create( list *memo, sql_allocat mi = SA_NEW(sa, memoitem); mi->name = sa_strdup(sa, name); mi->joins = (rname)?sa_list(sa):NULL; - mi->done = (rname == NULL); + mi->done = (rname)?0:1; mi->level = level; - mi->count = 0; + mi->count = 1; mi->cost = 0; + mi->width = 8; mi->data = NULL; mi->rels = sa_list(sa); mi->exps = sa_list(sa); @@ -134,19 +136,18 @@ memoitem_create( list *memo, sql_allocat static lng rel_getcount(mvc *sql, sql_rel *rel) { - lng cnt = 1; - if (!sql->session->tr) - return 1; + return 0; switch(rel->op) { case op_basetable: { sql_table *t = rel->l; - if (isTable(t)) - cnt = store_funcs.count_col(sql->session->tr, t->columns.set->h->data, 1); - if (cnt) - return cnt; + if (t && isTable(t)) + return store_funcs.count_col(sql->session->tr, t->columns.set->h->data, 1); + if (!t && rel->r) /* dict */ + return sql_trans_dist_count(sql->session->tr, rel->r); + return 0; } break; case op_select: case op_project: @@ -154,44 +155,233 @@ rel_getcount(mvc *sql, sql_rel *rel) return rel_getcount(sql, rel->l); return 1; default: + return 0; + } + return 0; +} + +static lng +rel_getwidth(mvc *sql, sql_rel *rel) +{ + if (!sql->session->tr) + return 0; + + switch(rel->op) { + case op_basetable: { + sql_table *t = rel->l; + + if (t && isTable(t)) + return 4*list_length(rel->exps); + return 0; + } + case op_select: + if (rel->l) + return rel_getwidth(sql, rel->l); return 1; + case op_project: + if (rel->l) + return 4*list_length(rel->exps); + return 1; + default: + return 0; } - return 1; + return 0; +} + +static lng +exp_getdcount( mvc *sql, sql_rel *r , sql_exp *e, size_t count) +{ + switch(e->type) { + case e_column: { + /* find col */ + sql_rel *bt = NULL; + sql_column *c = name_find_column(r, e->l, e->r, -1, &bt); + if (c) { + size_t dcount = sql_trans_dist_count(sql->session->tr, c); + if (dcount != 0 && dcount < count) + return dcount; + } + return count; + } + case e_cmp: + assert(0); + + + case e_convert: + if (e->l) + return exp_getdcount(sql, r, e->l, count); + case e_func: + case e_aggr: + case e_atom: + case e_psm: + return count; + } + return count; +} + +static int +exp_getranges( mvc *sql, sql_rel *r , sql_exp *e, void **min, void **max) +{ + switch(e->type) { + case e_column: { + /* find col */ + sql_rel *bt = NULL; + sql_column *c = name_find_column(r, e->l, e->r, -1, &bt); + if (c) + return sql_trans_ranges(sql->session->tr, c, min, max); + return 0; + } + case e_cmp: + assert(0); + + case e_convert: + if (e->l) + return exp_getranges(sql, r, e->l, min, max); + case e_func: + case e_aggr: + case e_atom: + case e_psm: + return 0; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list