Changeset: 8e4fa53e2f76 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/8e4fa53e2f76 Branch: default Log Message:
Merge ordered-set-aggregates into default. diffs (truncated from 9159 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -1035,6 +1035,7 @@ const char grant_rolesRef[]; const char groupRef[]; const char groupbyRef[]; const char groupdoneRef[]; +const char groupedfirstnRef[]; const char growRef[]; int hasSideEffects(MalBlkPtr mb, InstrPtr p, int strict); const char hgeRef[]; diff --git a/monetdb5/mal/mal_namespace.c b/monetdb5/mal/mal_namespace.c --- a/monetdb5/mal/mal_namespace.c +++ b/monetdb5/mal/mal_namespace.c @@ -197,6 +197,7 @@ const char grantRef[] = "grant"; const char grant_rolesRef[] = "grant_roles"; const char groupbyRef[] = "groupby"; const char groupdoneRef[] = "groupdone"; +const char groupedfirstnRef[] = "groupedfirstn"; const char groupRef[] = "group"; const char growRef[] = "grow"; const char hgeRef[] = "hge"; @@ -462,6 +463,7 @@ initNamespace(void) fixName(grant_rolesRef); fixName(groupbyRef); fixName(groupdoneRef); + fixName(groupedfirstnRef); fixName(groupRef); fixName(growRef); fixName(hgeRef); diff --git a/monetdb5/mal/mal_namespace.h b/monetdb5/mal/mal_namespace.h --- a/monetdb5/mal/mal_namespace.h +++ b/monetdb5/mal/mal_namespace.h @@ -123,6 +123,7 @@ mal_export const char grantRef[]; mal_export const char grant_rolesRef[]; mal_export const char groupbyRef[]; mal_export const char groupdoneRef[]; +mal_export const char groupedfirstnRef[]; mal_export const char groupRef[]; mal_export const char growRef[]; mal_export const char hgeRef[]; diff --git a/monetdb5/optimizer/opt_mergetable.c b/monetdb5/optimizer/opt_mergetable.c --- a/monetdb5/optimizer/opt_mergetable.c +++ b/monetdb5/optimizer/opt_mergetable.c @@ -2317,6 +2317,8 @@ OPTmergetableImplementation(Client cntxt } /* pack if there is a group statement following a groupdone (ie aggr(distinct)) */ + if (getModuleId(p) == algebraRef && getFunctionId(p) == groupedfirstnRef) + groupdone = 1; if (getModuleId(p) == groupRef && p->argc == 5 && (getFunctionId(p) == subgroupRef || getFunctionId(p) == subgroupdoneRef diff --git a/monetdb5/optimizer/opt_mitosis.c b/monetdb5/optimizer/opt_mitosis.c --- a/monetdb5/optimizer/opt_mitosis.c +++ b/monetdb5/optimizer/opt_mitosis.c @@ -67,13 +67,15 @@ OPTmitosisImplementation(Client cntxt, M nr_aggrs += (p->argc > 2 && getModuleId(p) == aggrRef); nr_maps += (isMapOp(p)); - if (p->argc > 2 && getModuleId(p) == aggrRef + if ((getModuleId(p) == algebraRef && + getFunctionId(p) == groupedfirstnRef) || + (p->argc > 2 && getModuleId(p) == aggrRef && getFunctionId(p) != subcountRef && getFunctionId(p) != subminRef && getFunctionId(p) != submaxRef && getFunctionId(p) != subavgRef && getFunctionId(p) != subsumRef && getFunctionId(p) != subprodRef && getFunctionId(p) != countRef && getFunctionId(p) != minRef && getFunctionId(p) != maxRef && getFunctionId(p) != avgRef - && getFunctionId(p) != sumRef && getFunctionId(p) != prodRef) { + && getFunctionId(p) != sumRef && getFunctionId(p) != prodRef)) { pieces = 0; goto bailout; } diff --git a/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test b/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test --- a/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test +++ b/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test @@ -86,7 +86,7 @@ DROP LOADER myfunc2 statement ok DROP LOADER myfunc3 -query ITTTIIIIIIII rowsort +query ITTTIIIIIIIII rowsort SELECT * FROM functions WHERE name='myfunc' ---- diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c --- a/sql/backends/monet5/rel_bin.c +++ b/sql/backends/monet5/rel_bin.c @@ -1686,10 +1686,17 @@ exp_bin(backend *be, sql_exp *e, stmt *l } break; case e_aggr: { list *attr = e->l; + list *r = e->r; stmt *as = NULL; sql_subfunc *a = e->f; assert(sel == NULL); + /* cases + * 0) count(*) + * 1) general aggregation + * 2) aggregation with required order (quantile etc) + * 3) aggregation with optional order by, group_concat, xml_agg + * */ if (attr && attr->h) { node *en; list *l = sa_list(sql->sa); @@ -1742,6 +1749,37 @@ exp_bin(backend *be, sql_exp *e, stmt *l return NULL; append(l, stmt_project(be, u, a)); } + if (r) { + list *obe = r->h->data; + if (obe && obe->h) { + stmt *orderby = NULL, *orderby_vals, *orderby_ids, *orderby_grp; + /* order by */ + if (grp) { + orderby = stmt_order(be, grp, true, true); + + orderby_vals = stmt_result(be, orderby, 0); + orderby_ids = stmt_result(be, orderby, 1); + orderby_grp = stmt_result(be, orderby, 2); + } + for (node *n = obe->h; n; n = n->next) { + sql_exp *oe = n->data; + stmt *os = exp_bin(be, oe, left, right, NULL, NULL, NULL, sel, depth+1, 0, push); + if (orderby) + orderby = stmt_reorder(be, os, is_ascending(oe), nulls_last(oe), orderby_ids, orderby_grp); + else + orderby = stmt_order(be, os, is_ascending(oe), nulls_last(oe)); + orderby_vals = stmt_result(be, orderby, 0); + orderby_ids = stmt_result(be, orderby, 1); + orderby_grp = stmt_result(be, orderby, 2); + } + /* depending on type of aggr project input or ordered column */ + stmt *h = l->h->data; + l->h->data = h = stmt_project(be, orderby_ids, h); + if (grp) + grp = stmt_project(be, orderby_ids, grp); + (void)orderby_vals; + } + } as = stmt_list(be, l); } else { /* count(*) may need the default group (relation) and @@ -4424,10 +4462,36 @@ rel2bin_project(backend *be, sql_rel *re /* distinct, topn returns at least N (unique groups) */ int distinct = need_distinct(rel); stmt *limit = NULL, *lpiv = NULL, *lgid = NULL; - - for (n=oexps->h; n; n = n->next) { + int nr_obe = list_length(oexps); + + /* check for partition columns */ + stmt *grp = NULL, *ext = NULL, *cnt = NULL; + for (n=oexps->h; n; n = n->next, nr_obe--) { + sql_exp *gbe = n->data; + bool last = (!n->next || !is_partitioning((sql_exp*)n->next->data)); + + if (!topn->grouped || !is_partitioning(gbe)) + break; + /* create group by */ + stmt *gbcol = exp_bin(be, gbe, sub, NULL, NULL, NULL, NULL, NULL, 0, 0, 0); + + if (!gbcol) { + assert(sql->session->status == -10); /* Stack overflow errors shouldn't terminate the server */ + return NULL; + } + if (!gbcol->nrcols) + gbcol = stmt_const(be, bin_find_smallest_column(be, sub), gbcol); + stmt *groupby = stmt_group(be, gbcol, grp, ext, cnt, last); + grp = stmt_result(be, groupby, 0); + ext = stmt_result(be, groupby, 1); + cnt = stmt_result(be, groupby, 2); + gbcol = stmt_alias(be, gbcol, gbe->alias.label, exp_find_rel_name(gbe), exp_name(gbe)); + } + + if (grp) + lgid = grp; + for (; n; n = n->next, nr_obe--) { sql_exp *orderbycole = n->data; - int last = (n->next == NULL); stmt *orderbycolstmt = exp_bin(be, orderbycole, sub, psub, NULL, NULL, NULL, NULL, 0, 0, 0); @@ -4435,18 +4499,18 @@ rel2bin_project(backend *be, sql_rel *re return NULL; /* handle constants */ - if (orderbycolstmt->nrcols == 0 && !last) /* no need to sort on constant */ + if (orderbycolstmt->nrcols == 0 && n->next) /* no need to sort on constant */ continue; orderbycolstmt = column(be, orderbycolstmt); if (!limit) { /* topn based on a single column */ - limit = stmt_limit(be, orderbycolstmt, NULL, NULL, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole), nulls_last(orderbycole), last, 1); + limit = stmt_limit(be, orderbycolstmt, NULL, grp, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole), nulls_last(orderbycole), nr_obe, 1); } else { /* topn based on 2 columns */ - limit = stmt_limit(be, orderbycolstmt, lpiv, lgid, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole), nulls_last(orderbycole), last, 1); + limit = stmt_limit(be, orderbycolstmt, lpiv, lgid, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole), nulls_last(orderbycole), nr_obe, 1); } if (!limit) return NULL; lpiv = limit; - if (!last) { + if (!grp && nr_obe > 1) { lpiv = stmt_result(be, limit, 0); lgid = stmt_result(be, limit, 1); if (lpiv == NULL || lgid == NULL) @@ -4455,6 +4519,8 @@ rel2bin_project(backend *be, sql_rel *re } limit = lpiv; + if (limit && grp) + limit = stmt_project(be, stmt_selectnonil(be, limit, NULL), limit); stmt *s; for (n=pl->h ; n; n = n->next) { stmt *os = n->data; @@ -4719,6 +4785,17 @@ rel2bin_groupby(backend *be, sql_rel *re return cursub; } +static bool +has_partitioning( list *exps ) +{ + for(node *n = exps->h; n; n = n->next){ + sql_exp *gbe = n->data; + if (is_partitioning(gbe)) + return true; + } + return false; +} + static stmt * rel2bin_topn(backend *be, sql_rel *rel, list *refs) { @@ -4737,6 +4814,8 @@ rel2bin_topn(backend *be, sql_rel *rel, sub = rel2bin_project(be, rl, refs, rel); } else sub = rel2bin_project(be, rl, refs, rel); + if (rel->grouped && rl->r && has_partitioning(rl->r)) + return sub; } else { sub = subrel_bin(be, rl, refs); } @@ -4774,8 +4853,9 @@ rel2bin_topn(backend *be, sql_rel *rel, if (!l || !o) return NULL; + sc = column(be, sc); - limit = stmt_limit(be, sc /*stmt_alias(be, sc, 0, tname, cname)*/, NULL, NULL, o, l, 0,0,0,0,0); + limit = stmt_limit(be, sc, NULL, NULL, o, l, 0,0,0,0,0); for ( ; n; n = n->next) { stmt *sc = n->data; diff --git a/sql/backends/monet5/rel_physical.c b/sql/backends/monet5/rel_physical.c --- a/sql/backends/monet5/rel_physical.c +++ b/sql/backends/monet5/rel_physical.c @@ -17,8 +17,9 @@ #include "rel_exp.h" #include "rel_rel.h" -#define IS_ORDER_BASED_AGGR(name) (strcmp((name), "quantile") == 0 || strcmp((name), "quantile_avg") == 0 || \ - strcmp((name), "median") == 0 || strcmp((name), "median_avg") == 0) +#define IS_ORDER_BASED_AGGR(fname, argc) (\ + (argc == 2 && (strcmp((fname), "quantile") == 0 || strcmp((fname), "quantile_avg") == 0)) || \ + (argc == 1 && (strcmp((fname), "median") == 0 || strcmp((fname), "median_avg") == 0))) static sql_rel * rel_add_orderby(visitor *v, sql_rel *rel) @@ -31,10 +32,10 @@ rel_add_orderby(visitor *v, sql_rel *rel if (is_aggr(e->type)) { sql_subfunc *af = e->f; - list *aa = e->l; + list *aa = e->l; /* for now we only handle one sort order */ - if (IS_ORDER_BASED_AGGR(af->func->base.name) && aa && list_length(aa) == 2) { + if (aa && IS_ORDER_BASED_AGGR(af->func->base.name, list_length(aa))) { sql_exp *nobe = aa->h->data; if (nobe && !obe) { sql_rel *l = rel->l = rel_project(v->sql->sa, rel->l, rel_projections(v->sql, rel->l, NULL, 1, 1)); diff --git a/sql/backends/monet5/sql_cat.c b/sql/backends/monet5/sql_cat.c --- a/sql/backends/monet5/sql_cat.c +++ b/sql/backends/monet5/sql_cat.c @@ -1070,7 +1070,7 @@ create_func(mvc *sql, char *sname, char sql->errstr[0] = '\0'; } } - switch (mvc_create_func(&nf, sql, NULL, s, f->base.name, f->ops, f->res, f->type, f->lang, f->mod, f->imp, f->query, f->varres, f->vararg, f->system, f->side_effect)) { + switch (mvc_create_func(&nf, sql, NULL, s, f->base.name, f->ops, f->res, f->type, f->lang, f->mod, f->imp, f->query, f->varres, f->vararg, f->system, f->side_effect, f->order_required, f->opt_order)) { case -1: throw(SQL,"sql.create_func", SQLSTATE(HY013) MAL_MALLOC_FAIL); case -2: diff --git a/sql/backends/monet5/sql_statement.c b/sql/backends/monet5/sql_statement.c --- a/sql/backends/monet5/sql_statement.c +++ b/sql/backends/monet5/sql_statement.c @@ -1204,19 +1204,18 @@ stmt_result(backend *be, stmt *s, int nr _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org