Changeset: 8e4fa53e2f76 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/8e4fa53e2f76
Branch: default
Log Message:

Merge ordered-set-aggregates into default.


diffs (truncated from 9159 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1035,6 +1035,7 @@ const char grant_rolesRef[];
 const char groupRef[];
 const char groupbyRef[];
 const char groupdoneRef[];
+const char groupedfirstnRef[];
 const char growRef[];
 int hasSideEffects(MalBlkPtr mb, InstrPtr p, int strict);
 const char hgeRef[];
diff --git a/monetdb5/mal/mal_namespace.c b/monetdb5/mal/mal_namespace.c
--- a/monetdb5/mal/mal_namespace.c
+++ b/monetdb5/mal/mal_namespace.c
@@ -197,6 +197,7 @@ const char grantRef[] = "grant";
 const char grant_rolesRef[] = "grant_roles";
 const char groupbyRef[] = "groupby";
 const char groupdoneRef[] = "groupdone";
+const char groupedfirstnRef[] = "groupedfirstn";
 const char groupRef[] = "group";
 const char growRef[] = "grow";
 const char hgeRef[] = "hge";
@@ -462,6 +463,7 @@ initNamespace(void)
        fixName(grant_rolesRef);
        fixName(groupbyRef);
        fixName(groupdoneRef);
+       fixName(groupedfirstnRef);
        fixName(groupRef);
        fixName(growRef);
        fixName(hgeRef);
diff --git a/monetdb5/mal/mal_namespace.h b/monetdb5/mal/mal_namespace.h
--- a/monetdb5/mal/mal_namespace.h
+++ b/monetdb5/mal/mal_namespace.h
@@ -123,6 +123,7 @@ mal_export const char grantRef[];
 mal_export const char grant_rolesRef[];
 mal_export const char groupbyRef[];
 mal_export const char groupdoneRef[];
+mal_export const char groupedfirstnRef[];
 mal_export const char groupRef[];
 mal_export const char growRef[];
 mal_export const char hgeRef[];
diff --git a/monetdb5/optimizer/opt_mergetable.c 
b/monetdb5/optimizer/opt_mergetable.c
--- a/monetdb5/optimizer/opt_mergetable.c
+++ b/monetdb5/optimizer/opt_mergetable.c
@@ -2317,6 +2317,8 @@ OPTmergetableImplementation(Client cntxt
                }
 
                /* pack if there is a group statement following a groupdone (ie 
aggr(distinct)) */
+               if (getModuleId(p) == algebraRef && getFunctionId(p) == 
groupedfirstnRef)
+                               groupdone = 1;
                if (getModuleId(p) == groupRef && p->argc == 5
                        && (getFunctionId(p) == subgroupRef
                                || getFunctionId(p) == subgroupdoneRef
diff --git a/monetdb5/optimizer/opt_mitosis.c b/monetdb5/optimizer/opt_mitosis.c
--- a/monetdb5/optimizer/opt_mitosis.c
+++ b/monetdb5/optimizer/opt_mitosis.c
@@ -67,13 +67,15 @@ OPTmitosisImplementation(Client cntxt, M
                nr_aggrs += (p->argc > 2 && getModuleId(p) == aggrRef);
                nr_maps += (isMapOp(p));
 
-               if (p->argc > 2 && getModuleId(p) == aggrRef
+               if ((getModuleId(p) == algebraRef &&
+                   getFunctionId(p) == groupedfirstnRef) ||
+                   (p->argc > 2 && getModuleId(p) == aggrRef
                        && getFunctionId(p) != subcountRef && getFunctionId(p) 
!= subminRef
                        && getFunctionId(p) != submaxRef && getFunctionId(p) != 
subavgRef
                        && getFunctionId(p) != subsumRef && getFunctionId(p) != 
subprodRef
                        && getFunctionId(p) != countRef && getFunctionId(p) != 
minRef
                        && getFunctionId(p) != maxRef && getFunctionId(p) != 
avgRef
-                       && getFunctionId(p) != sumRef && getFunctionId(p) != 
prodRef) {
+                       && getFunctionId(p) != sumRef && getFunctionId(p) != 
prodRef)) {
                        pieces = 0;
                        goto bailout;
                }
diff --git a/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test 
b/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test
--- a/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test
+++ b/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test
@@ -86,7 +86,7 @@ DROP LOADER myfunc2
 statement ok
 DROP LOADER myfunc3
 
-query ITTTIIIIIIII rowsort
+query ITTTIIIIIIIII rowsort
 SELECT * FROM functions WHERE name='myfunc'
 ----
 
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -1686,10 +1686,17 @@ exp_bin(backend *be, sql_exp *e, stmt *l
        }       break;
        case e_aggr: {
                list *attr = e->l;
+               list *r = e->r;
                stmt *as = NULL;
                sql_subfunc *a = e->f;
 
                assert(sel == NULL);
+                       /* cases
+                        * 0) count(*)
+                        * 1) general aggregation
+                        * 2) aggregation with required order (quantile etc)
+                        * 3) aggregation with optional order by, group_concat, 
xml_agg
+                        * */
                if (attr && attr->h) {
                        node *en;
                        list *l = sa_list(sql->sa);
@@ -1742,6 +1749,37 @@ exp_bin(backend *be, sql_exp *e, stmt *l
                                        return NULL;
                                append(l, stmt_project(be, u, a));
                        }
+                       if (r) {
+                               list *obe = r->h->data;
+                               if (obe && obe->h) {
+                                       stmt *orderby = NULL, *orderby_vals, 
*orderby_ids, *orderby_grp;
+                                       /* order by */
+                                       if (grp) {
+                                               orderby = stmt_order(be, grp, 
true, true);
+
+                                               orderby_vals = stmt_result(be, 
orderby, 0);
+                                               orderby_ids = stmt_result(be, 
orderby, 1);
+                                               orderby_grp = stmt_result(be, 
orderby, 2);
+                                       }
+                                       for (node *n = obe->h; n; n = n->next) {
+                                               sql_exp *oe = n->data;
+                                               stmt *os = exp_bin(be, oe, 
left, right, NULL, NULL, NULL, sel, depth+1, 0, push);
+                                               if (orderby)
+                                                       orderby = 
stmt_reorder(be, os, is_ascending(oe), nulls_last(oe), orderby_ids, 
orderby_grp);
+                                               else
+                                                       orderby = 
stmt_order(be, os, is_ascending(oe), nulls_last(oe));
+                                               orderby_vals = stmt_result(be, 
orderby, 0);
+                                               orderby_ids = stmt_result(be, 
orderby, 1);
+                                               orderby_grp = stmt_result(be, 
orderby, 2);
+                                       }
+                                       /* depending on type of aggr project 
input or ordered column */
+                                       stmt *h = l->h->data;
+                                       l->h->data = h = stmt_project(be, 
orderby_ids, h);
+                                       if (grp)
+                                               grp = stmt_project(be, 
orderby_ids, grp);
+                                       (void)orderby_vals;
+                               }
+                       }
                        as = stmt_list(be, l);
                } else {
                        /* count(*) may need the default group (relation) and
@@ -4424,10 +4462,36 @@ rel2bin_project(backend *be, sql_rel *re
                /* distinct, topn returns at least N (unique groups) */
                int distinct = need_distinct(rel);
                stmt *limit = NULL, *lpiv = NULL, *lgid = NULL;
-
-               for (n=oexps->h; n; n = n->next) {
+               int nr_obe = list_length(oexps);
+
+               /* check for partition columns */
+               stmt *grp = NULL, *ext = NULL, *cnt = NULL;
+               for (n=oexps->h; n; n = n->next, nr_obe--) {
+                       sql_exp *gbe = n->data;
+                       bool last = (!n->next || 
!is_partitioning((sql_exp*)n->next->data));
+
+                       if (!topn->grouped || !is_partitioning(gbe))
+                               break;
+                       /* create group by */
+                       stmt *gbcol = exp_bin(be, gbe, sub, NULL, NULL, NULL, 
NULL, NULL, 0, 0, 0);
+
+                       if (!gbcol) {
+                               assert(sql->session->status == -10); /* Stack 
overflow errors shouldn't terminate the server */
+                               return NULL;
+                       }
+                       if (!gbcol->nrcols)
+                               gbcol = stmt_const(be, 
bin_find_smallest_column(be, sub), gbcol);
+                       stmt *groupby = stmt_group(be, gbcol, grp, ext, cnt, 
last);
+                       grp = stmt_result(be, groupby, 0);
+                       ext = stmt_result(be, groupby, 1);
+                       cnt = stmt_result(be, groupby, 2);
+                       gbcol = stmt_alias(be, gbcol, gbe->alias.label, 
exp_find_rel_name(gbe), exp_name(gbe));
+               }
+
+               if (grp)
+                       lgid = grp;
+               for (; n; n = n->next, nr_obe--) {
                        sql_exp *orderbycole = n->data;
-                       int last = (n->next == NULL);
 
                        stmt *orderbycolstmt = exp_bin(be, orderbycole, sub, 
psub, NULL, NULL, NULL, NULL, 0, 0, 0);
 
@@ -4435,18 +4499,18 @@ rel2bin_project(backend *be, sql_rel *re
                                return NULL;
 
                        /* handle constants */
-                       if (orderbycolstmt->nrcols == 0 && !last) /* no need to 
sort on constant */
+                       if (orderbycolstmt->nrcols == 0 && n->next) /* no need 
to sort on constant */
                                continue;
                        orderbycolstmt = column(be, orderbycolstmt);
                        if (!limit) {   /* topn based on a single column */
-                               limit = stmt_limit(be, orderbycolstmt, NULL, 
NULL, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole), 
nulls_last(orderbycole), last, 1);
+                               limit = stmt_limit(be, orderbycolstmt, NULL, 
grp, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole), 
nulls_last(orderbycole), nr_obe, 1);
                        } else {        /* topn based on 2 columns */
-                               limit = stmt_limit(be, orderbycolstmt, lpiv, 
lgid, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole), 
nulls_last(orderbycole), last, 1);
+                               limit = stmt_limit(be, orderbycolstmt, lpiv, 
lgid, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole), 
nulls_last(orderbycole), nr_obe, 1);
                        }
                        if (!limit)
                                return NULL;
                        lpiv = limit;
-                       if (!last) {
+                       if (!grp && nr_obe > 1) {
                                lpiv = stmt_result(be, limit, 0);
                                lgid = stmt_result(be, limit, 1);
                                if (lpiv == NULL || lgid == NULL)
@@ -4455,6 +4519,8 @@ rel2bin_project(backend *be, sql_rel *re
                }
 
                limit = lpiv;
+               if (limit && grp)
+                       limit = stmt_project(be, stmt_selectnonil(be, limit, 
NULL), limit);
                stmt *s;
                for (n=pl->h ; n; n = n->next) {
                        stmt *os = n->data;
@@ -4719,6 +4785,17 @@ rel2bin_groupby(backend *be, sql_rel *re
        return cursub;
 }
 
+static bool
+has_partitioning( list *exps )
+{
+       for(node *n = exps->h; n; n = n->next){
+               sql_exp *gbe = n->data;
+               if (is_partitioning(gbe))
+                       return true;
+       }
+       return false;
+}
+
 static stmt *
 rel2bin_topn(backend *be, sql_rel *rel, list *refs)
 {
@@ -4737,6 +4814,8 @@ rel2bin_topn(backend *be, sql_rel *rel, 
                                        sub = rel2bin_project(be, rl, refs, 
rel);
                        } else
                                sub = rel2bin_project(be, rl, refs, rel);
+                       if (rel->grouped && rl->r && has_partitioning(rl->r))
+                               return sub;
                } else {
                        sub = subrel_bin(be, rl, refs);
                }
@@ -4774,8 +4853,9 @@ rel2bin_topn(backend *be, sql_rel *rel, 
                if (!l || !o)
                        return NULL;
 
+
                sc = column(be, sc);
-               limit = stmt_limit(be, sc /*stmt_alias(be, sc, 0, tname, 
cname)*/, NULL, NULL, o, l, 0,0,0,0,0);
+               limit = stmt_limit(be, sc, NULL, NULL, o, l, 0,0,0,0,0);
 
                for ( ; n; n = n->next) {
                        stmt *sc = n->data;
diff --git a/sql/backends/monet5/rel_physical.c 
b/sql/backends/monet5/rel_physical.c
--- a/sql/backends/monet5/rel_physical.c
+++ b/sql/backends/monet5/rel_physical.c
@@ -17,8 +17,9 @@
 #include "rel_exp.h"
 #include "rel_rel.h"
 
-#define IS_ORDER_BASED_AGGR(name) (strcmp((name), "quantile") == 0 || 
strcmp((name), "quantile_avg") == 0 || \
-                                  strcmp((name), "median") == 0 || 
strcmp((name), "median_avg") == 0)
+#define IS_ORDER_BASED_AGGR(fname, argc) (\
+                               (argc == 2 && (strcmp((fname), "quantile") == 0 
|| strcmp((fname), "quantile_avg") == 0)) || \
+                               (argc == 1 && (strcmp((fname), "median") == 0 
|| strcmp((fname), "median_avg") == 0)))
 
 static sql_rel *
 rel_add_orderby(visitor *v, sql_rel *rel)
@@ -31,10 +32,10 @@ rel_add_orderby(visitor *v, sql_rel *rel
 
                                if (is_aggr(e->type)) {
                                        sql_subfunc *af = e->f;
-                                       list *aa = e->l;
+                                       list *aa = e->l;
 
                                        /* for now we only handle one sort 
order */
-                                       if 
(IS_ORDER_BASED_AGGR(af->func->base.name) && aa && list_length(aa) == 2) {
+                                       if (aa && 
IS_ORDER_BASED_AGGR(af->func->base.name, list_length(aa))) {
                                                sql_exp *nobe = aa->h->data;
                                                if (nobe && !obe) {
                                                        sql_rel *l = rel->l = 
rel_project(v->sql->sa, rel->l, rel_projections(v->sql, rel->l, NULL, 1, 1));
diff --git a/sql/backends/monet5/sql_cat.c b/sql/backends/monet5/sql_cat.c
--- a/sql/backends/monet5/sql_cat.c
+++ b/sql/backends/monet5/sql_cat.c
@@ -1070,7 +1070,7 @@ create_func(mvc *sql, char *sname, char 
                        sql->errstr[0] = '\0';
                }
        }
-       switch (mvc_create_func(&nf, sql, NULL, s, f->base.name, f->ops, 
f->res, f->type, f->lang, f->mod, f->imp, f->query, f->varres, f->vararg, 
f->system, f->side_effect)) {
+       switch (mvc_create_func(&nf, sql, NULL, s, f->base.name, f->ops, 
f->res, f->type, f->lang, f->mod, f->imp, f->query, f->varres, f->vararg, 
f->system, f->side_effect, f->order_required, f->opt_order)) {
                case -1:
                        throw(SQL,"sql.create_func", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
                case -2:
diff --git a/sql/backends/monet5/sql_statement.c 
b/sql/backends/monet5/sql_statement.c
--- a/sql/backends/monet5/sql_statement.c
+++ b/sql/backends/monet5/sql_statement.c
@@ -1204,19 +1204,18 @@ stmt_result(backend *be, stmt *s, int nr
 
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to