Changeset: 1f9ff2c0aae3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1f9ff2c0aae3
Modified Files:
        monetdb5/modules/mal/calc.c
        sql/backends/monet5/rel_bin.c
        sql/common/sql_types.c
        sql/scripts/39_analytics.sql
        sql/server/rel_optimize_proj.c
Branch: new-avg
Log Message:

new (old) avg implementation


diffs (truncated from 333 to 300 lines):

diff --git a/monetdb5/modules/mal/calc.c b/monetdb5/modules/mal/calc.c
--- a/monetdb5/modules/mal/calc.c
+++ b/monetdb5/modules/mal/calc.c
@@ -127,6 +127,29 @@ CMDvarDIV(Client cntxt, MalBlkPtr mb, Ma
        return MAL_SUCCEED;
 }
 
+static str
+CMDvarDIV2(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void) cntxt;
+       (void) mb;
+       ValRecord v1, v2;
+
+       v1.vtype = TYPE_lng;
+       v1.val.lval = 1;
+       v2.vtype = TYPE_lng;
+       v2.val.lval = 2;
+
+       if (VARcalcsub(&v1, &stk->stk[getArg(pci, 2)], &v1) != GDK_SUCCEED)
+               return mythrow(MAL, "calc./", OPERATION_FAILED);
+       if (VARcalcdiv(&v2, &v1, &v2) != GDK_SUCCEED)
+               return mythrow(MAL, "calc./", OPERATION_FAILED);
+       if (VARcalcadd(&stk->stk[getArg(pci, 0)], &stk->stk[getArg(pci, 1)], 
&v2) != GDK_SUCCEED)
+               return mythrow(MAL, "calc./", OPERATION_FAILED);
+       if (VARcalcdiv(&stk->stk[getArg(pci, 0)], &stk->stk[getArg(pci, 0)], 
&stk->stk[getArg(pci, 2)]) != GDK_SUCCEED)
+               return mythrow(MAL, "calc./", OPERATION_FAILED);
+       return MAL_SUCCEED;
+}
+
 
 static str
 CMDvarMOD(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
@@ -1057,6 +1080,11 @@ mel_func calc_init_funcs[] = {
  pattern("calc", "*", CMDvarMUL, false, "Return V1 * V2, signal error on 
overflow", args(1,3, arg("",hge),arg("v1",dbl),arg("v2",int))),
  pattern("calc", "*", CMDvarMUL, false, "Return V1 * V2, signal error on 
overflow", args(1,3, arg("",hge),arg("v1",dbl),arg("v2",lng))),
  pattern("calc", "*", CMDvarMUL, false, "Return V1 * V2, signal error on 
overflow", args(1,3, arg("",hge),arg("v1",dbl),arg("v2",hge))),
+ pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil 
on divide by zero", args(1,3, arg("",hge),arg("v1",hge),arg("v2",lng))),
+ pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil 
on divide by zero", args(1,3, arg("",lng),arg("v1",lng),arg("v2",lng))),
+ pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil 
on divide by zero", args(1,3, arg("",int),arg("v1",int),arg("v2",lng))),
+ pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil 
on divide by zero", args(1,3, arg("",sht),arg("v1",sht),arg("v2",lng))),
+ pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil 
on divide by zero", args(1,3, arg("",bte),arg("v1",bte),arg("v2",lng))),
  pattern("calc", "/", CMDvarDIV, false, "Return V1 / V2, signal error on 
divide by zero", args(1,3, arg("",hge),arg("v1",bte),arg("v2",bte))),
  pattern("calc", "/", CMDvarDIV, false, "Return V1 / V2, signal error on 
divide by zero", args(1,3, arg("",hge),arg("v1",bte),arg("v2",sht))),
  pattern("calc", "/", CMDvarDIV, false, "Return V1 / V2, signal error on 
divide by zero", args(1,3, arg("",hge),arg("v1",bte),arg("v2",int))),
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -4380,7 +4380,7 @@ sql_insert_check_null(backend *be, sql_t
 {
        mvc *sql = be->mvc;
        node *m, *n;
-       sql_subfunc *cnt = sql_bind_func(sql, "sys", "count", 
sql_bind_localtype("void"), NULL, F_AGGR, true);
+       sql_subfunc *cnt = NULL;
 
        for (n = ol_first_node(t->columns), m = inserts->h; n && m;
                n = n->next, m = m->next) {
@@ -4393,6 +4393,8 @@ sql_insert_check_null(backend *be, sql_t
 
                        if (!(s->key && s->nrcols == 0)) {
                                s = stmt_selectnil(be, column(be, i));
+                               if (!cnt)
+                                       cnt = sql_bind_func(sql, "sys", 
"count", sql_bind_localtype("void"), NULL, F_AGGR, true);
                                s = stmt_aggr(be, s, NULL, NULL, cnt, 1, 0, 1);
                        } else {
                                sql_subfunc *isnil = sql_bind_func(sql, "sys", 
"isnull", &c->type, NULL, F_FUNC, true);
diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c
--- a/sql/common/sql_types.c
+++ b/sql/common/sql_types.c
@@ -1209,6 +1209,7 @@ sqltypeinit( sql_allocator *sa)
                                sql_create_func(sa, "sql_div", "calc", "/", 
FALSE, FALSE, SCALE_DIV, 0, *t, 2, *t, *u);
                        }
                }
+               sql_create_func(sa, "num_div", "calc", "num_div", FALSE, FALSE, 
SCALE_FIX, 0, *t, 2, *t, LNG);
        }
 
        /* all numericals */
diff --git a/sql/scripts/39_analytics.sql b/sql/scripts/39_analytics.sql
--- a/sql/scripts/39_analytics.sql
+++ b/sql/scripts/39_analytics.sql
@@ -336,9 +336,11 @@ GRANT EXECUTE ON AGGREGATE median_avg(IN
 create aggregate median_avg(val BIGINT) returns DOUBLE
        external name "aggr"."median_avg";
 GRANT EXECUTE ON AGGREGATE median_avg(BIGINT) TO PUBLIC;
+/* like avg we need a proper decimal returning version
 create aggregate median_avg(val DECIMAL) returns DOUBLE
        external name "aggr"."median_avg";
 GRANT EXECUTE ON AGGREGATE median_avg(DECIMAL) TO PUBLIC;
+*/
 create aggregate median_avg(val REAL) returns DOUBLE
        external name "aggr"."median_avg";
 GRANT EXECUTE ON AGGREGATE median_avg(REAL) TO PUBLIC;
diff --git a/sql/server/rel_optimize_proj.c b/sql/server/rel_optimize_proj.c
--- a/sql/server/rel_optimize_proj.c
+++ b/sql/server/rel_optimize_proj.c
@@ -2878,6 +2878,220 @@ rel_simplify_count(visitor *v, sql_rel *
        return rel;
 }
 
+static sql_subfunc *
+find_func( mvc *sql, char *name, list *exps )
+{
+       list * l = sa_list(sql->sa);
+       node *n;
+
+       for(n = exps->h; n; n = n->next)
+               append(l, exp_subtype(n->data));
+       return sql_bind_func_(sql, "sys", name, l, F_FUNC, false);
+}
+
+static sql_exp *
+rel_find_aggr_exp(mvc *sql, sql_rel *rel, list *exps, sql_exp *e, char *name)
+{
+       list *ea = e->l;
+       sql_exp *a = NULL, *eae;
+       node *n;
+
+       (void)rel;
+       if (list_length(ea) != 1)
+               return NULL;
+       eae = ea->h->data;
+       if (eae->type != e_column)
+               return NULL;
+       for( n = exps->h; n; n = n->next) {
+               a = n->data;
+
+               if (a->type == e_aggr) {
+                       sql_subfunc *af = a->f;
+                       list *aa = a->l;
+
+                       /* TODO handle distinct and no-nil etc ! */
+                       if (strcmp(af->func->base.name, name) == 0 &&
+                               /* TODO handle count (has no args!!) */
+                           aa && list_length(aa) == 1) {
+                               sql_exp *aae = aa->h->data;
+
+                               if (eae->type == e_column &&
+                                   ((!aae->l && !eae->l) ||
+                                   (aae->l && eae->l &&
+                                   strcmp(aae->l, eae->l) == 0)) &&
+                                   (aae->r && eae->r &&
+                                   strcmp(aae->r, eae->r) == 0))
+                                       return exp_ref(sql, a);
+                       }
+               }
+       }
+       return NULL;
+}
+
+/* rewrite avg into sum/count
+ *     float/double use explicit kahansum ie output (sum+rest)/count
+ *     int (return double). ((long) double)sum)/((long) double)count).
+ *     decimal (return decimal). (sum+(count/2))/count.
+ *     */
+static sql_rel *
+rel_avg_rewrite(visitor *v, sql_rel *rel)
+{
+       mvc *sql = v->sql;
+       if (is_groupby(rel->op)) {
+               list *pexps, *nexps = new_exp_list(sql->sa), *avgs = 
new_exp_list(sql->sa);
+               list *aexps = new_exp_list(sql->sa); /* alias list */
+               node *m, *n;
+
+               if (list_empty(rel->r) || mvc_debug_on(sql, 64))
+                       return rel;
+
+               /* Find all avg's */
+               for (m = rel->exps->h; m; m = m->next) {
+                       sql_exp *e = m->data;
+
+                       if (e->type == e_aggr) {
+                               sql_subfunc *a = e->f;
+
+                               if (strcmp(a->func->base.name, "avg") == 0) {
+                                       append(avgs, e);
+                                       continue;
+                               }
+                       }
+                       /* alias for local aggr exp */
+                       if (e->type == e_column &&
+                          (!list_find_exp(rel->r, e) &&
+                           !rel_find_exp(rel->l, e)))
+                               append(aexps, e);
+                       else
+                               append(nexps, e);
+               }
+               if (!list_length(avgs))
+                       return rel;
+
+               /* For each avg, find count and sum */
+               for (m = avgs->h; m; m = m->next) {
+                       list *args;
+                       sql_exp *avg = m->data, *navg, *cond, *cnt_d;
+                       sql_exp *cnt = rel_find_aggr_exp(sql, rel, nexps, avg, 
"count");
+                       sql_exp *sum = rel_find_aggr_exp(sql, rel, nexps, avg, 
"sum");
+                       sql_subfunc *div, *ifthen, *cmp;
+                       const char *rname = NULL, *name = NULL;
+                       list *l = avg->l;
+                       sql_subtype *avg_input_t = exp_subtype(l->h->data);
+
+                       rname = exp_relname(avg);
+                       name = exp_name(avg);
+
+                       /* create nsum/cnt exp */
+                       if (!cnt) {
+                               sql_subfunc *cf = sql_bind_func_(sql, "sys", 
"count", append(sa_list(sql->sa), avg_input_t), F_AGGR, false);
+                               sql_exp *e = exp_aggr(sql->sa, list_dup(avg->l, 
(fdup)NULL), cf, need_distinct(avg), need_no_nil(avg), avg->card, has_nil(avg));
+
+                               append(nexps, e);
+                               cnt = exp_ref(sql, e);
+                       }
+                       if (!sum) {
+                               sql_subfunc *sf = sql_bind_func_(sql, "sys", 
"sum", append(sa_list(sql->sa), avg_input_t), F_AGGR, false);
+                               sql_exp *e = exp_aggr(sql->sa, list_dup(avg->l, 
(fdup)NULL), sf, need_distinct(avg), need_no_nil(avg), avg->card, has_nil(avg));
+
+                               append(nexps, e);
+                               sum = exp_ref(sql, e);
+                       }
+                       cnt_d = cnt;
+
+                       sql_subtype *avg_t = exp_subtype(avg);
+                       sql_subtype *dbl_t = sql_bind_localtype("dbl");
+                       if (subtype_cmp(avg_t, dbl_t) == 0 || 
EC_INTERVAL(avg_t->type->eclass)) {
+                               /* check for count = 0 (or move into funcs) */
+                               args = new_exp_list(sql->sa);
+                               append(args, cnt);
+                               append(args, exp_atom_lng(sql->sa, 0));
+                               cmp = find_func(sql, "=", args);
+                               assert(cmp);
+                               cond = exp_op(sql->sa, args, cmp);
+
+                               args = new_exp_list(sql->sa);
+                               append(args, cond);
+                               append(args, exp_atom(sql->sa, 
atom_general(sql->sa, exp_subtype(cnt_d), NULL)));
+                               /* TODO only ifthenelse if value column may 
have nil's*/
+                               append(args, cnt_d);
+                               ifthen = find_func(sql, "ifthenelse", args);
+                               assert(ifthen);
+                               cnt_d = exp_op(sql->sa, args, ifthen);
+
+                               if (subtype_cmp(avg_t, dbl_t) == 0) {
+                                       cnt_d = exp_convert(sql->sa, cnt, 
exp_subtype(cnt), dbl_t);
+                                       sum = exp_convert(sql->sa, sum, 
exp_subtype(sum), dbl_t);
+                               }
+
+                               args = new_exp_list(sql->sa);
+
+                               sql_subtype *st = exp_subtype(sum);
+                               sql_subtype *ct = exp_subtype(cnt_d);
+                               /* convert sum flt -> dbl */
+                               if (st->type->eclass == EC_FLT && 
ct->type->eclass == EC_FLT && st->type->localtype < ct->type->localtype) {
+                                       sum = exp_convert(sql->sa, sum, st, ct);
+                               } else if (st->type->eclass == EC_FLT) {
+                                       if (ct->type != st->type) {
+                                               sql_subtype *dbl_t = 
sql_bind_localtype("dbl");
+                                               if (ct->type->eclass != EC_FLT 
|| st->type == dbl_t->type)
+                                                       cnt_d = 
exp_convert(sql->sa, cnt_d, exp_subtype(cnt_d), st);
+                                       }
+                               }
+                               append(args, sum);
+                               append(args, cnt_d);
+                               div = find_func(sql, "sql_div", args);
+                               assert(div);
+                               navg = exp_op(sql->sa, args, div);
+                       } else {
+                               args = sa_list(sql->sa);
+                               append(args, sum);
+                               append(args, cnt_d);
+                               div = find_func(sql, "num_div", args);
+                               assert(div);
+                               navg = exp_op(sql->sa, args, div);
+                       }
+
+                       if (subtype_cmp(exp_subtype(avg), exp_subtype(navg)) != 
0)
+                               navg = exp_convert(sql->sa, navg, 
exp_subtype(navg), exp_subtype(avg));
+
+                       exp_setname(sql->sa, navg, rname, name );
+                       m->data = navg;
+               }
+               pexps = new_exp_list(sql->sa);
+               for (m = rel->exps->h, n = avgs->h; m; m = m->next) {
+                       sql_exp *e = m->data;
+
+                       if (e->type == e_aggr) {
+                               sql_subfunc *a = e->f;
+
+                               if (strcmp(a->func->base.name, "avg") == 0) {
+                                       sql_exp *avg = n->data;
+
+                                       append(pexps, avg);
+                                       n = n->next;
+                                       continue;
+                               }
+                       }
+                       /* alias for local aggr exp */
+                       if (e->type == e_column && !rel_find_exp(rel->l, e))
+                               append(pexps, e);
+                       else
+                               append(pexps, exp_column(sql->sa, 
exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), 
is_unique(e), is_intern(e)));
+               }
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to