Changeset: a13730e52ea3 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a13730e52ea3
Modified Files:
        sql/backends/monet5/sql_statistics.c
        sql/server/rel_optimizer.c
        sql/server/rel_optimizer.h
        sql/server/rel_planner.c
        sql/server/rel_planner.h
        sql/storage/store.c
Branch: default
Log Message:

improved cost based plan selection
(to test use sql_debug=256)


diffs (truncated from 921 to 300 lines):

diff --git a/sql/backends/monet5/sql_statistics.c 
b/sql/backends/monet5/sql_statistics.c
--- a/sql/backends/monet5/sql_statistics.c
+++ b/sql/backends/monet5/sql_statistics.c
@@ -40,7 +40,7 @@ sql_analyze(Client cntxt, MalBlkPtr mb, 
        sql_trans *tr = m->session->tr;
        node *nsch, *ntab, *ncol;
        char *query, *dquery;
-       char *maxval, *minval;
+       char *maxval = NULL, *minval = NULL;
        str sch = 0, tbl = 0, col = 0;
        int sorted;
        lng nils = 0;
@@ -58,13 +58,9 @@ sql_analyze(Client cntxt, MalBlkPtr mb, 
        }
        dquery = (char *) GDKzalloc(8192);
        query = (char *) GDKzalloc(8192);
-       maxval = (char *) GDKzalloc(8192);
-       minval = (char *) GDKzalloc(8192);
-       if (!(dquery && query && maxval && minval)) {
+       if (!(dquery && query)) {
                GDKfree(dquery);
                GDKfree(query);
-               GDKfree(maxval);
-               GDKfree(minval);
                throw(SQL, "analyze", MAL_MALLOC_FAIL);
        }
 
@@ -101,6 +97,9 @@ sql_analyze(Client cntxt, MalBlkPtr mb, 
                                                BAT *bn = 
store_funcs.bind_col(tr, c, RDONLY), *br;
                                                BAT *bsample;
                                                lng sz = BATcount(bn);
+                                               int (*tostr)(str*,int*,const 
void*) = BATatoms[bn->ttype].atomToStr; \
+                                               int len = 0;
+                                               void *val=0;
 
                                                if (col && strcmp(bc->name, 
col))
                                                        continue;
@@ -133,55 +132,20 @@ sql_analyze(Client cntxt, MalBlkPtr mb, 
                                                sorted = BATtordered(bn);
 
                                                // Gather the min/max value for 
builtin types
-#define minmax(TYPE,FMT,CAST) \
-{\
-       TYPE *val=0;\
-       val= BATmax(bn,0);\
-       if ( ATOMcmp(bn->ttype,val, ATOMnil(bn->ttype))== 0)\
-               snprintf(maxval,8192,"nil");\
-       else snprintf(maxval,8192,FMT,CAST *val);\
-       GDKfree(val);\
-       val= BATmin(bn,0);\
-       if ( ATOMcmp(bn->ttype,val, ATOMnil(bn->ttype))== 0)\
-               snprintf(minval,8192,"nil");\
-       else snprintf(minval,8192,FMT,CAST *val);\
-       GDKfree(val);\
-       break;\
-}
                                                width = bn->T->width;
-                                               switch (bn->ttype) {
-                                               case TYPE_sht:
-                                                       minmax(sht, "%d",);
-                                               case TYPE_int:
-                                                       minmax(int, "%d",);
-                                               case TYPE_lng:
-                                                       minmax(lng, LLFMT,);
-#ifdef HAVE_HGE
-                                               case TYPE_hge:
-                                                       minmax(hge, "%.40g", 
(dbl));
-#endif
-                                               case TYPE_flt:
-                                                       minmax(flt, "%f",);
-                                               case TYPE_dbl:
-                                                       minmax(dbl, "%f",);
-                                               case TYPE_str:
-                                               {
-                                                       BUN p, q;
-                                                       double sum = 0;
-                                                       BATiter bi = 
bat_iterator(bn);
-                                                       BATloop(bn, p, q) {
-                                                               str s = 
BUNtail(bi, p);
-                                                               if (s != NULL 
&& strcmp(s, str_nil))
-                                                                       sum += 
(int) strlen(s);
-                                                       }
-                                                       if (sz)
-                                                               width = (int) 
(sum / sz);
-                                               }
-                                                       /* fall through */
 
-                                               default:
-                                                       snprintf(maxval, 8192, 
"nil");
-                                                       snprintf(minval, 8192, 
"nil");
+                                               if (tostr) { 
+                                                       val = BATmax(bn,0); len 
= 0;
+                                                       tostr(&maxval, 
&len,val); 
+                                                       GDKfree(val);
+                                                       val = BATmin(bn,0); len 
= 0;
+                                                       tostr(&minval, 
&len,val); 
+                                                       GDKfree(val);
+                                               } else {
+                                                       maxval = (char *) 
GDKzalloc(4);
+                                                       minval = (char *) 
GDKzalloc(4);
+                                                       snprintf(maxval, 4, 
"nil");
+                                                       snprintf(minval, 4, 
"nil");
                                                }
                                                snprintf(query, 8192, "insert 
into sys.statistics values('%s','%s','%s','%s',%d,now()," LLFMT "," LLFMT "," 
LLFMT "," LLFMT ",'%s','%s',%s);", b->name, bt->name, bc->name, 
c->type.type->sqlname, width,
                                                         (samplesize ? 
samplesize : sz), sz, uniq, nils, minval, maxval, sorted ? "true" : "false");
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -51,7 +51,7 @@ static sql_subfunc *find_func( mvc *sql,
  */
 
 /* currently we only find simple column expressions */
-static void *
+void *
 name_find_column( sql_rel *rel, char *rname, char *name, int pnr, sql_rel **bt 
) 
 {
        sql_exp *alias = NULL;
@@ -772,8 +772,14 @@ order_joins(mvc *sql, list *rels, list *
        /* find foreign keys and reorder the expressions on reducing quality */
        sdje = find_fk(sql, rels, exps);
 
-       if (list_length(rels) > 2 && mvc_debug_on(sql, 256))
-               return rel_planner(sql, rels, sdje);
+       if (list_length(rels) > 2 && mvc_debug_on(sql, 256)) {
+               for(djn = sdje->h; djn; djn = djn->next ) {
+                       sql_exp *e = djn->data;
+                       list_remove_data(exps, e);
+               }
+               top =  rel_planner(sql, rels, sdje, exps);
+               return top;
+       }
 
        /* open problem, some expressions use more than 2 relations */
        /* For example a.x = b.y * c.z; */
diff --git a/sql/server/rel_optimizer.h b/sql/server/rel_optimizer.h
--- a/sql/server/rel_optimizer.h
+++ b/sql/server/rel_optimizer.h
@@ -26,5 +26,6 @@ extern sql_rel * rel_optimizer(mvc *sql,
 
 extern int exp_joins_rels(sql_exp *e, list *rels);
 
+extern void *name_find_column( sql_rel *rel, char *rname, char *name, int pnr, 
sql_rel **bt );
 
 #endif /*_REL_OPTIMIZER_H_*/
diff --git a/sql/server/rel_planner.c b/sql/server/rel_planner.c
--- a/sql/server/rel_planner.c
+++ b/sql/server/rel_planner.c
@@ -32,8 +32,8 @@ typedef struct memoitem {
        int done;
        int level;
        lng count;
-       dbl sel;
-       lng cost;
+       lng width;
+       dbl cost;
        void *data;
 } memoitem;
 
@@ -45,7 +45,8 @@ typedef struct memojoin {
        memoitem *l, *r;
        int rules;      /* handled rules */
        int prop;       /* pkey, fkey, ukey */
-       lng cost;
+       dbl cost;
+       dbl sel;
        sql_exp *e;
 } memojoin;
 
@@ -120,10 +121,11 @@ memoitem_create( list *memo, sql_allocat
                mi = SA_NEW(sa, memoitem); 
        mi->name = sa_strdup(sa, name);
        mi->joins = (rname)?sa_list(sa):NULL;
-       mi->done = (rname == NULL);
+       mi->done = (rname)?0:1;
        mi->level = level;
-       mi->count = 0;
+       mi->count = 1;
        mi->cost = 0;
+       mi->width = 8;
        mi->data = NULL;
        mi->rels = sa_list(sa);
        mi->exps = sa_list(sa);
@@ -134,19 +136,18 @@ memoitem_create( list *memo, sql_allocat
 static lng
 rel_getcount(mvc *sql, sql_rel *rel)
 {
-       lng cnt = 1;
-
        if (!sql->session->tr)
-               return 1;
+               return 0;
 
        switch(rel->op) {
        case op_basetable: {
                sql_table *t = rel->l;
 
-               if (isTable(t)) 
-                       cnt = store_funcs.count_col(sql->session->tr, 
t->columns.set->h->data, 1);
-               if (cnt)
-                       return cnt;
+               if (t && isTable(t))
+                       return store_funcs.count_col(sql->session->tr, 
t->columns.set->h->data, 1);
+               if (!t && rel->r) /* dict */
+                       return sql_trans_dist_count(sql->session->tr, rel->r);
+               return 0;
        }       break;
        case op_select:
        case op_project:
@@ -154,44 +155,233 @@ rel_getcount(mvc *sql, sql_rel *rel)
                        return rel_getcount(sql, rel->l);
                return 1;
        default:
+               return 0;
+       }
+       return 0;
+}
+
+static lng
+rel_getwidth(mvc *sql, sql_rel *rel)
+{
+       if (!sql->session->tr)
+               return 0;
+
+       switch(rel->op) {
+       case op_basetable: {
+               sql_table *t = rel->l;
+
+               if (t && isTable(t))
+                       return 4*list_length(rel->exps);
+               return 0;
+       }       
+       case op_select:
+               if (rel->l)
+                       return rel_getwidth(sql, rel->l);
                return 1;
+       case op_project:
+               if (rel->l)
+                       return 4*list_length(rel->exps);
+               return 1;
+       default:
+               return 0;
        }
-       return 1;
+       return 0;
+}
+
+static lng
+exp_getdcount( mvc *sql, sql_rel *r , sql_exp *e, size_t count)
+{
+       switch(e->type) {
+       case e_column: {
+               /* find col */
+               sql_rel *bt = NULL;
+               sql_column *c = name_find_column(r, e->l, e->r, -1, &bt);
+               if (c) {
+                       size_t dcount = sql_trans_dist_count(sql->session->tr, 
c);
+                       if (dcount != 0 && dcount < count)
+                               return dcount;
+               }
+               return count;
+       } 
+       case e_cmp:
+               assert(0);
+       
+
+       case e_convert:
+               if (e->l)
+                       return exp_getdcount(sql, r, e->l, count);
+       case e_func:
+       case e_aggr:
+       case e_atom:
+       case e_psm:
+               return count;
+       }
+       return count;
+}
+
+static int
+exp_getranges( mvc *sql, sql_rel *r , sql_exp *e, void **min, void **max)
+{
+       switch(e->type) {
+       case e_column: {
+               /* find col */
+               sql_rel *bt = NULL;
+               sql_column *c = name_find_column(r, e->l, e->r, -1, &bt);
+               if (c) 
+                       return sql_trans_ranges(sql->session->tr, c, min, max);
+               return 0;
+       } 
+       case e_cmp:
+               assert(0);
+
+       case e_convert:
+               if (e->l)
+                       return exp_getranges(sql, r, e->l, min, max);
+       case e_func:
+       case e_aggr:
+       case e_atom:
+       case e_psm:
+               return 0;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to