Changeset: 1f9ff2c0aae3 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/1f9ff2c0aae3 Modified Files: monetdb5/modules/mal/calc.c sql/backends/monet5/rel_bin.c sql/common/sql_types.c sql/scripts/39_analytics.sql sql/server/rel_optimize_proj.c Branch: new-avg Log Message:
new (old) avg implementation diffs (truncated from 333 to 300 lines): diff --git a/monetdb5/modules/mal/calc.c b/monetdb5/modules/mal/calc.c --- a/monetdb5/modules/mal/calc.c +++ b/monetdb5/modules/mal/calc.c @@ -127,6 +127,29 @@ CMDvarDIV(Client cntxt, MalBlkPtr mb, Ma return MAL_SUCCEED; } +static str +CMDvarDIV2(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + (void) cntxt; + (void) mb; + ValRecord v1, v2; + + v1.vtype = TYPE_lng; + v1.val.lval = 1; + v2.vtype = TYPE_lng; + v2.val.lval = 2; + + if (VARcalcsub(&v1, &stk->stk[getArg(pci, 2)], &v1) != GDK_SUCCEED) + return mythrow(MAL, "calc./", OPERATION_FAILED); + if (VARcalcdiv(&v2, &v1, &v2) != GDK_SUCCEED) + return mythrow(MAL, "calc./", OPERATION_FAILED); + if (VARcalcadd(&stk->stk[getArg(pci, 0)], &stk->stk[getArg(pci, 1)], &v2) != GDK_SUCCEED) + return mythrow(MAL, "calc./", OPERATION_FAILED); + if (VARcalcdiv(&stk->stk[getArg(pci, 0)], &stk->stk[getArg(pci, 0)], &stk->stk[getArg(pci, 2)]) != GDK_SUCCEED) + return mythrow(MAL, "calc./", OPERATION_FAILED); + return MAL_SUCCEED; +} + static str CMDvarMOD(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) @@ -1057,6 +1080,11 @@ mel_func calc_init_funcs[] = { pattern("calc", "*", CMDvarMUL, false, "Return V1 * V2, signal error on overflow", args(1,3, arg("",hge),arg("v1",dbl),arg("v2",int))), pattern("calc", "*", CMDvarMUL, false, "Return V1 * V2, signal error on overflow", args(1,3, arg("",hge),arg("v1",dbl),arg("v2",lng))), pattern("calc", "*", CMDvarMUL, false, "Return V1 * V2, signal error on overflow", args(1,3, arg("",hge),arg("v1",dbl),arg("v2",hge))), + pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil on divide by zero", args(1,3, arg("",hge),arg("v1",hge),arg("v2",lng))), + pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil on divide by zero", args(1,3, arg("",lng),arg("v1",lng),arg("v2",lng))), + pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil on divide by zero", args(1,3, arg("",int),arg("v1",int),arg("v2",lng))), + pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil on divide by zero", args(1,3, arg("",sht),arg("v1",sht),arg("v2",lng))), + pattern("calc", "num_div", CMDvarDIV2, false, "Return (V1+(V2-1)/2) / V2, nil on divide by zero", args(1,3, arg("",bte),arg("v1",bte),arg("v2",lng))), pattern("calc", "/", CMDvarDIV, false, "Return V1 / V2, signal error on divide by zero", args(1,3, arg("",hge),arg("v1",bte),arg("v2",bte))), pattern("calc", "/", CMDvarDIV, false, "Return V1 / V2, signal error on divide by zero", args(1,3, arg("",hge),arg("v1",bte),arg("v2",sht))), pattern("calc", "/", CMDvarDIV, false, "Return V1 / V2, signal error on divide by zero", args(1,3, arg("",hge),arg("v1",bte),arg("v2",int))), diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c --- a/sql/backends/monet5/rel_bin.c +++ b/sql/backends/monet5/rel_bin.c @@ -4380,7 +4380,7 @@ sql_insert_check_null(backend *be, sql_t { mvc *sql = be->mvc; node *m, *n; - sql_subfunc *cnt = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true); + sql_subfunc *cnt = NULL; for (n = ol_first_node(t->columns), m = inserts->h; n && m; n = n->next, m = m->next) { @@ -4393,6 +4393,8 @@ sql_insert_check_null(backend *be, sql_t if (!(s->key && s->nrcols == 0)) { s = stmt_selectnil(be, column(be, i)); + if (!cnt) + cnt = sql_bind_func(sql, "sys", "count", sql_bind_localtype("void"), NULL, F_AGGR, true); s = stmt_aggr(be, s, NULL, NULL, cnt, 1, 0, 1); } else { sql_subfunc *isnil = sql_bind_func(sql, "sys", "isnull", &c->type, NULL, F_FUNC, true); diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c --- a/sql/common/sql_types.c +++ b/sql/common/sql_types.c @@ -1209,6 +1209,7 @@ sqltypeinit( sql_allocator *sa) sql_create_func(sa, "sql_div", "calc", "/", FALSE, FALSE, SCALE_DIV, 0, *t, 2, *t, *u); } } + sql_create_func(sa, "num_div", "calc", "num_div", FALSE, FALSE, SCALE_FIX, 0, *t, 2, *t, LNG); } /* all numericals */ diff --git a/sql/scripts/39_analytics.sql b/sql/scripts/39_analytics.sql --- a/sql/scripts/39_analytics.sql +++ b/sql/scripts/39_analytics.sql @@ -336,9 +336,11 @@ GRANT EXECUTE ON AGGREGATE median_avg(IN create aggregate median_avg(val BIGINT) returns DOUBLE external name "aggr"."median_avg"; GRANT EXECUTE ON AGGREGATE median_avg(BIGINT) TO PUBLIC; +/* like avg we need a proper decimal returning version create aggregate median_avg(val DECIMAL) returns DOUBLE external name "aggr"."median_avg"; GRANT EXECUTE ON AGGREGATE median_avg(DECIMAL) TO PUBLIC; +*/ create aggregate median_avg(val REAL) returns DOUBLE external name "aggr"."median_avg"; GRANT EXECUTE ON AGGREGATE median_avg(REAL) TO PUBLIC; diff --git a/sql/server/rel_optimize_proj.c b/sql/server/rel_optimize_proj.c --- a/sql/server/rel_optimize_proj.c +++ b/sql/server/rel_optimize_proj.c @@ -2878,6 +2878,220 @@ rel_simplify_count(visitor *v, sql_rel * return rel; } +static sql_subfunc * +find_func( mvc *sql, char *name, list *exps ) +{ + list * l = sa_list(sql->sa); + node *n; + + for(n = exps->h; n; n = n->next) + append(l, exp_subtype(n->data)); + return sql_bind_func_(sql, "sys", name, l, F_FUNC, false); +} + +static sql_exp * +rel_find_aggr_exp(mvc *sql, sql_rel *rel, list *exps, sql_exp *e, char *name) +{ + list *ea = e->l; + sql_exp *a = NULL, *eae; + node *n; + + (void)rel; + if (list_length(ea) != 1) + return NULL; + eae = ea->h->data; + if (eae->type != e_column) + return NULL; + for( n = exps->h; n; n = n->next) { + a = n->data; + + if (a->type == e_aggr) { + sql_subfunc *af = a->f; + list *aa = a->l; + + /* TODO handle distinct and no-nil etc ! */ + if (strcmp(af->func->base.name, name) == 0 && + /* TODO handle count (has no args!!) */ + aa && list_length(aa) == 1) { + sql_exp *aae = aa->h->data; + + if (eae->type == e_column && + ((!aae->l && !eae->l) || + (aae->l && eae->l && + strcmp(aae->l, eae->l) == 0)) && + (aae->r && eae->r && + strcmp(aae->r, eae->r) == 0)) + return exp_ref(sql, a); + } + } + } + return NULL; +} + +/* rewrite avg into sum/count + * float/double use explicit kahansum ie output (sum+rest)/count + * int (return double). ((long) double)sum)/((long) double)count). + * decimal (return decimal). (sum+(count/2))/count. + * */ +static sql_rel * +rel_avg_rewrite(visitor *v, sql_rel *rel) +{ + mvc *sql = v->sql; + if (is_groupby(rel->op)) { + list *pexps, *nexps = new_exp_list(sql->sa), *avgs = new_exp_list(sql->sa); + list *aexps = new_exp_list(sql->sa); /* alias list */ + node *m, *n; + + if (list_empty(rel->r) || mvc_debug_on(sql, 64)) + return rel; + + /* Find all avg's */ + for (m = rel->exps->h; m; m = m->next) { + sql_exp *e = m->data; + + if (e->type == e_aggr) { + sql_subfunc *a = e->f; + + if (strcmp(a->func->base.name, "avg") == 0) { + append(avgs, e); + continue; + } + } + /* alias for local aggr exp */ + if (e->type == e_column && + (!list_find_exp(rel->r, e) && + !rel_find_exp(rel->l, e))) + append(aexps, e); + else + append(nexps, e); + } + if (!list_length(avgs)) + return rel; + + /* For each avg, find count and sum */ + for (m = avgs->h; m; m = m->next) { + list *args; + sql_exp *avg = m->data, *navg, *cond, *cnt_d; + sql_exp *cnt = rel_find_aggr_exp(sql, rel, nexps, avg, "count"); + sql_exp *sum = rel_find_aggr_exp(sql, rel, nexps, avg, "sum"); + sql_subfunc *div, *ifthen, *cmp; + const char *rname = NULL, *name = NULL; + list *l = avg->l; + sql_subtype *avg_input_t = exp_subtype(l->h->data); + + rname = exp_relname(avg); + name = exp_name(avg); + + /* create nsum/cnt exp */ + if (!cnt) { + sql_subfunc *cf = sql_bind_func_(sql, "sys", "count", append(sa_list(sql->sa), avg_input_t), F_AGGR, false); + sql_exp *e = exp_aggr(sql->sa, list_dup(avg->l, (fdup)NULL), cf, need_distinct(avg), need_no_nil(avg), avg->card, has_nil(avg)); + + append(nexps, e); + cnt = exp_ref(sql, e); + } + if (!sum) { + sql_subfunc *sf = sql_bind_func_(sql, "sys", "sum", append(sa_list(sql->sa), avg_input_t), F_AGGR, false); + sql_exp *e = exp_aggr(sql->sa, list_dup(avg->l, (fdup)NULL), sf, need_distinct(avg), need_no_nil(avg), avg->card, has_nil(avg)); + + append(nexps, e); + sum = exp_ref(sql, e); + } + cnt_d = cnt; + + sql_subtype *avg_t = exp_subtype(avg); + sql_subtype *dbl_t = sql_bind_localtype("dbl"); + if (subtype_cmp(avg_t, dbl_t) == 0 || EC_INTERVAL(avg_t->type->eclass)) { + /* check for count = 0 (or move into funcs) */ + args = new_exp_list(sql->sa); + append(args, cnt); + append(args, exp_atom_lng(sql->sa, 0)); + cmp = find_func(sql, "=", args); + assert(cmp); + cond = exp_op(sql->sa, args, cmp); + + args = new_exp_list(sql->sa); + append(args, cond); + append(args, exp_atom(sql->sa, atom_general(sql->sa, exp_subtype(cnt_d), NULL))); + /* TODO only ifthenelse if value column may have nil's*/ + append(args, cnt_d); + ifthen = find_func(sql, "ifthenelse", args); + assert(ifthen); + cnt_d = exp_op(sql->sa, args, ifthen); + + if (subtype_cmp(avg_t, dbl_t) == 0) { + cnt_d = exp_convert(sql->sa, cnt, exp_subtype(cnt), dbl_t); + sum = exp_convert(sql->sa, sum, exp_subtype(sum), dbl_t); + } + + args = new_exp_list(sql->sa); + + sql_subtype *st = exp_subtype(sum); + sql_subtype *ct = exp_subtype(cnt_d); + /* convert sum flt -> dbl */ + if (st->type->eclass == EC_FLT && ct->type->eclass == EC_FLT && st->type->localtype < ct->type->localtype) { + sum = exp_convert(sql->sa, sum, st, ct); + } else if (st->type->eclass == EC_FLT) { + if (ct->type != st->type) { + sql_subtype *dbl_t = sql_bind_localtype("dbl"); + if (ct->type->eclass != EC_FLT || st->type == dbl_t->type) + cnt_d = exp_convert(sql->sa, cnt_d, exp_subtype(cnt_d), st); + } + } + append(args, sum); + append(args, cnt_d); + div = find_func(sql, "sql_div", args); + assert(div); + navg = exp_op(sql->sa, args, div); + } else { + args = sa_list(sql->sa); + append(args, sum); + append(args, cnt_d); + div = find_func(sql, "num_div", args); + assert(div); + navg = exp_op(sql->sa, args, div); + } + + if (subtype_cmp(exp_subtype(avg), exp_subtype(navg)) != 0) + navg = exp_convert(sql->sa, navg, exp_subtype(navg), exp_subtype(avg)); + + exp_setname(sql->sa, navg, rname, name ); + m->data = navg; + } + pexps = new_exp_list(sql->sa); + for (m = rel->exps->h, n = avgs->h; m; m = m->next) { + sql_exp *e = m->data; + + if (e->type == e_aggr) { + sql_subfunc *a = e->f; + + if (strcmp(a->func->base.name, "avg") == 0) { + sql_exp *avg = n->data; + + append(pexps, avg); + n = n->next; + continue; + } + } + /* alias for local aggr exp */ + if (e->type == e_column && !rel_find_exp(rel->l, e)) + append(pexps, e); + else + append(pexps, exp_column(sql->sa, exp_find_rel_name(e), exp_name(e), exp_subtype(e), e->card, has_nil(e), is_unique(e), is_intern(e))); + } _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org