Changeset: c577d5c7a0c9 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/c577d5c7a0c9
Modified Files:
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/sql_cat.c
        sql/common/sql_types.c
        sql/include/sql_catalog.h
        sql/rel.txt
        sql/scripts/10_sys_schema_extension.sql
        sql/server/rel_dump.c
        sql/server/rel_optimize_others.c
        sql/server/rel_psm.c
        sql/server/rel_select.c
        sql/server/sql_env.c
        sql/server/sql_mvc.c
        sql/server/sql_mvc.h
        sql/server/sql_parser.y
        sql/server/sql_scan.c
        sql/storage/sql_storage.h
        sql/storage/store.c
Branch: ordered-set-aggregates
Log Message:

initial work on ordered-set-aggregates


diffs (truncated from 681 to 300 lines):

diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -1687,10 +1687,17 @@ exp_bin(backend *be, sql_exp *e, stmt *l
        }       break;
        case e_aggr: {
                list *attr = e->l;
+               list *r = e->r;
                stmt *as = NULL;
                sql_subfunc *a = e->f;
 
                assert(sel == NULL);
+                       /* cases
+                        * 0) count(*)
+                        * 1) general aggregation
+                        * 2) aggregation with required order (quantile etc)
+                        * 3) aggregation with optional order by, group_concat, 
xml_agg
+                        * */
                if (attr && attr->h) {
                        node *en;
                        list *l = sa_list(sql->sa);
@@ -1743,6 +1750,37 @@ exp_bin(backend *be, sql_exp *e, stmt *l
                                        return NULL;
                                append(l, stmt_project(be, u, a));
                        }
+                       if (r) {
+                               list *obe = r->h->data;
+                               if (obe) {
+                                       stmt *orderby = NULL, *orderby_vals, 
*orderby_ids, *orderby_grp;
+                                       /* order by */
+                                       if (grp) {
+                                               orderby = stmt_order(be, grp, 
true, true);
+
+                                               orderby_vals = stmt_result(be, 
orderby, 0);
+                                               orderby_ids = stmt_result(be, 
orderby, 1);
+                                               orderby_grp = stmt_result(be, 
orderby, 2);
+                                       }
+                                       for (node *n = obe->h; n; n = n->next) {
+                                               sql_exp *oe = n->data;
+                                               stmt *os = exp_bin(be, oe, 
left, right, NULL, NULL, NULL, sel, depth+1, 0, push);
+                                               if (orderby)
+                                                       orderby = 
stmt_reorder(be, os, is_ascending(oe), nulls_last(oe), orderby_ids, 
orderby_grp);
+                                               else
+                                                       orderby = 
stmt_order(be, os, is_ascending(oe), nulls_last(oe));
+                                               orderby_vals = stmt_result(be, 
orderby, 0);
+                                               orderby_ids = stmt_result(be, 
orderby, 1);
+                                               orderby_grp = stmt_result(be, 
orderby, 2);
+                                       }
+                                       /* depending on type of aggr project 
input or ordered column */
+                                       stmt *h = l->h->data;
+                                       l->h->data = h = stmt_project(be, 
orderby_ids, h);
+                                       if (grp)
+                                               grp = stmt_project(be, 
orderby_ids, grp);
+                                       (void)orderby_vals;
+                               }
+                       }
                        as = stmt_list(be, l);
                } else {
                        /* count(*) may need the default group (relation) and
diff --git a/sql/backends/monet5/sql_cat.c b/sql/backends/monet5/sql_cat.c
--- a/sql/backends/monet5/sql_cat.c
+++ b/sql/backends/monet5/sql_cat.c
@@ -1065,7 +1065,7 @@ create_func(mvc *sql, char *sname, char 
                        sql->errstr[0] = '\0';
                }
        }
-       switch (mvc_create_func(&nf, sql, NULL, s, f->base.name, f->ops, 
f->res, f->type, f->lang, f->mod, f->imp, f->query, f->varres, f->vararg, 
f->system, f->side_effect)) {
+       switch (mvc_create_func(&nf, sql, NULL, s, f->base.name, f->ops, 
f->res, f->type, f->lang, f->mod, f->imp, f->query, f->varres, f->vararg, 
f->system, f->side_effect, f->order_required, f->opt_order)) {
                case -1:
                        throw(SQL,"sql.create_func", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
                case -2:
diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c
--- a/sql/common/sql_types.c
+++ b/sql/common/sql_types.c
@@ -873,6 +873,19 @@ sql_create_aggr(allocator *sa, const cha
 }
 
 static sql_func *
+sql_create_aggr_optorder(allocator *sa, const char *name, const char *mod, 
const char *imp, bit semantics, bit private, sql_type *fres, int nargs, ...)
+{
+       sql_func *res;
+       va_list valist;
+
+       va_start(valist, nargs);
+       res = sql_create_func_(sa, name, mod, imp, F_AGGR, semantics, private, 
SCALE_NONE, 0, fres, nargs, valist);
+       va_end(valist);
+       res->opt_order = true;
+       return res;
+}
+
+static sql_func *
 sql_create_filter(allocator *sa, const char *name, const char *mod, const char 
*imp, bit semantics, bit private, int fix_scale,
                                unsigned int res_scale, int nargs, ...)
 {
@@ -1189,8 +1202,8 @@ sqltypeinit( allocator *sa)
        sql_create_aggr(sa, "count", "aggr", "count", TRUE, FALSE, LNG, 1, ANY);
        sql_create_func(sa, "cnt", "sql", "count", TRUE, TRUE, SCALE_FIX, 0, 
LNG, 2, STR, STR);
 
-       sql_create_aggr(sa, "listagg", "aggr", "str_group_concat", TRUE, FALSE, 
STR, 1, STR);
-       sql_create_aggr(sa, "listagg", "aggr", "str_group_concat", TRUE, FALSE, 
STR, 2, STR, STR);
+       sql_create_aggr_optorder(sa, "listagg", "aggr", "str_group_concat", 
TRUE, FALSE, STR, 1, STR);
+       sql_create_aggr_optorder(sa, "listagg", "aggr", "str_group_concat", 
TRUE, FALSE, STR, 2, STR, STR);
 
        /* order based operators */
        sql_create_analytic(sa, "diff", "sql", "diff", TRUE, BIT, 1, ANY);
diff --git a/sql/include/sql_catalog.h b/sql/include/sql_catalog.h
--- a/sql/include/sql_catalog.h
+++ b/sql/include/sql_catalog.h
@@ -507,8 +507,11 @@ typedef struct sql_func {
        vararg:1,       /* variable input arguments */
        system:1,       /* system function */
        instantiated:1, /* if the function is instantiated */
-       private:1;      /* certain functions cannot be bound from user queries 
*/
-       int fix_scale;
+       private:1,      /* certain functions cannot be bound from user queries 
*/
+       order_required:1,       /* some aggregate functions require an order */
+       opt_order:1;    /* some aggregate functions could have the inputs 
sorted */
+
+       short fix_scale;
                        /*
                           SCALE_NONE => nothing
                           SCALE_FIX => input scale fixing,
diff --git a/sql/rel.txt b/sql/rel.txt
--- a/sql/rel.txt
+++ b/sql/rel.txt
@@ -163,6 +163,8 @@ for aggr
        -> flag DISTINCT and NO_NIL could be set
 for window functions
        -> r contains a list with two values: the first is a list with the 
partition by expressions, and the second a list with order by expressions
+for aggregate functions
+       -> r contains a list with two values: the first is a list with the 
order by expressions, and the second is a list with filter expressions
 
 e_column
        -> rname alias for the relation (i.e., alias of ->l, used by higher 
expressions)
diff --git a/sql/scripts/10_sys_schema_extension.sql 
b/sql/scripts/10_sys_schema_extension.sql
--- a/sql/scripts/10_sys_schema_extension.sql
+++ b/sql/scripts/10_sys_schema_extension.sql
@@ -527,9 +527,9 @@ SELECT 'pi', pi() UNION ALL
 SELECT 'rowcnt', rowcnt;
 GRANT SELECT ON sys.var_values TO PUBLIC;
 
-CREATE AGGREGATE sys.group_concat(str string) RETURNS string EXTERNAL NAME 
"aggr"."str_group_concat";
+CREATE AGGREGATE sys.group_concat(str string) RETURNS string WITH ORDER 
EXTERNAL NAME "aggr"."str_group_concat";
 GRANT EXECUTE ON AGGREGATE sys.group_concat(string) TO PUBLIC;
-CREATE AGGREGATE sys.group_concat(str string, sep string) RETURNS string 
EXTERNAL NAME "aggr"."str_group_concat";
+CREATE AGGREGATE sys.group_concat(str string, sep string) RETURNS string WITH 
ORDER EXTERNAL NAME "aggr"."str_group_concat";
 GRANT EXECUTE ON AGGREGATE sys.group_concat(string, string) TO PUBLIC;
 
 CREATE WINDOW sys.group_concat(str string) RETURNS string EXTERNAL NAME 
"sql"."str_group_concat";
diff --git a/sql/server/rel_dump.c b/sql/server/rel_dump.c
--- a/sql/server/rel_dump.c
+++ b/sql/server/rel_dump.c
@@ -244,6 +244,15 @@ exp_print(mvc *sql, stream *fout, sql_ex
                        exps_print(sql, fout, e->l, depth, refs, 0, 1, 
decorate, 0);
                else
                        mnstr_printf(fout, "()");
+               if (e->r) { /* order by exps */
+                       list *r = e->r;
+                       list *obes = r->h->data;
+                       exps_print(sql, fout, obes, depth, refs, 0, 1, 
decorate, 0);
+                       if (r->h->next) {
+                               list *exps = r->h->next->data;
+                               exps_print(sql, fout, exps, depth, refs, 0, 1, 
decorate, 0);
+                       }
+               }
        } break;
        case e_column: {
                if (is_freevar(e))
diff --git a/sql/server/rel_optimize_others.c b/sql/server/rel_optimize_others.c
--- a/sql/server/rel_optimize_others.c
+++ b/sql/server/rel_optimize_others.c
@@ -277,7 +277,15 @@ exp_mark_used(sql_rel *subrel, sql_exp *
        case e_func: {
                if (e->l)
                        nr += exps_mark_used(subrel, e->l, local_proj);
-               assert(!e->r);
+               if (e->r) {
+                       list *r = e->r;
+                       list *obes = r->h->data;
+                       nr += exps_mark_used(subrel, obes, local_proj);
+                       if (r->h->next) {
+                               list *exps = r->h->next->data;
+                               nr += exps_mark_used(subrel, exps, local_proj);
+                       }
+               }
                break;
        }
        case e_cmp:
diff --git a/sql/server/rel_psm.c b/sql/server/rel_psm.c
--- a/sql/server/rel_psm.c
+++ b/sql/server/rel_psm.c
@@ -854,7 +854,7 @@ rel_create_function(allocator *sa, const
 }
 
 static sql_rel *
-rel_create_func(sql_query *query, dlist *qname, dlist *params, symbol *res, 
dlist *ext_name, dlist *body, sql_ftype type, sql_flang lang, int replace)
+rel_create_func(sql_query *query, dlist *qname, dlist *params, symbol *res, 
dlist *ext_name, dlist *body, sql_ftype type, sql_flang lang, int replace, int 
order_spec)
 {
        mvc *sql = query->sql;
        const char *fname = qname_schema_object(qname);
@@ -869,6 +869,8 @@ rel_create_func(sql_query *query, dlist 
        int create = (!instantiate && !deps);
        bit vararg = FALSE, union_err = 0;
        char *F = NULL, *fn = NULL, is_func, *q = QUERY(sql->scanner);
+       bit order_required = (order_spec == 2);
+       bit opt_order = (order_spec == 1);
 
        if (res && res->token == SQL_TABLE) {
                if (type == F_FUNC)
@@ -1020,7 +1022,7 @@ rel_create_func(sql_query *query, dlist 
                sql->params = NULL;
                if (create) {
                        bit side_effect = (list_empty(restype) || (!vararg && 
list_empty(l))); /* TODO make this more precise? */
-                       switch (mvc_create_func(&f, sql, sql->sa, s, fname, l, 
restype, type, lang, mod, imp, lang_body, (type == F_LOADER)?TRUE:FALSE, 
vararg, FALSE, side_effect)) {
+                       switch (mvc_create_func(&f, sql, sql->sa, s, fname, l, 
restype, type, lang, mod, imp, lang_body, (type == F_LOADER)?TRUE:FALSE, 
vararg, FALSE, side_effect, order_required, opt_order)) {
                                case -1:
                                        return sql_error(sql, 01, 
SQLSTATE(HY013) MAL_MALLOC_FAIL);
                                case -2:
@@ -1040,7 +1042,7 @@ rel_create_func(sql_query *query, dlist 
                if (create) { /* needed for recursive functions */
                        bit side_effect = list_empty(restype) == 1; /* TODO 
make this more precise? */
                        q = query_cleaned(sql->ta, q);
-                       switch (mvc_create_func(&f, sql, sql->sa, s, fname, l, 
restype, type, lang, sql_shared_module_name, NULL, q, FALSE, vararg, FALSE, 
side_effect)) {
+                       switch (mvc_create_func(&f, sql, sql->sa, s, fname, l, 
restype, type, lang, sql_shared_module_name, NULL, q, FALSE, vararg, FALSE, 
side_effect, order_required, opt_order)) {
                                case -1:
                                        return sql_error(sql, 01, 
SQLSTATE(HY013) MAL_MALLOC_FAIL);
                                case -2:
@@ -1083,7 +1085,7 @@ rel_create_func(sql_query *query, dlist 
                sql->params = NULL;
                if (create) {
                        q = query_cleaned(sql->ta, q);
-                       switch (mvc_create_func(&f, sql, sql->sa, s, fname, l, 
restype, type, lang, fmod, fnme, q, FALSE, vararg, FALSE, FALSE)) {
+                       switch (mvc_create_func(&f, sql, sql->sa, s, fname, l, 
restype, type, lang, fmod, fnme, q, FALSE, vararg, FALSE, FALSE, 
order_required, opt_order)) {
                                case -1:
                                        return sql_error(sql, 01, 
SQLSTATE(HY013) MAL_MALLOC_FAIL);
                                case -2:
@@ -1092,6 +1094,7 @@ rel_create_func(sql_query *query, dlist 
                                default:
                                        break;
                        }
+
                        /* instantiate MAL functions while being created. This 
also sets the side-effects flag */
                        bool se = f->side_effect;
                        if (!backend_resolve_function(&clientid, f, fnme, &se))
@@ -1557,8 +1560,9 @@ rel_psm(sql_query *query, symbol *s)
                sql_ftype type = (sql_ftype) 
l->h->next->next->next->next->next->data.i_val;
                sql_flang lang = (sql_flang) 
l->h->next->next->next->next->next->next->data.i_val;
                int repl = 
l->h->next->next->next->next->next->next->next->data.i_val;
+               int order_spec = 
l->h->next->next->next->next->next->next->next->next->data.i_val;
 
-               ret = rel_create_func(query, l->h->data.lval, 
l->h->next->data.lval, l->h->next->next->data.sym, 
l->h->next->next->next->data.lval, l->h->next->next->next->next->data.lval, 
type, lang, repl);
+               ret = rel_create_func(query, l->h->data.lval, 
l->h->next->data.lval, l->h->next->next->data.sym, 
l->h->next->next->next->data.lval, l->h->next->next->next->next->data.lval, 
type, lang, repl, order_spec);
                sql->type = Q_SCHEMA;
        }       break;
        case SQL_DROP_FUNC:
diff --git a/sql/server/rel_select.c b/sql/server/rel_select.c
--- a/sql/server/rel_select.c
+++ b/sql/server/rel_select.c
@@ -3442,6 +3442,8 @@ exps_valid(sql_query *query, list *exps,
        return NULL;
 }
 
+static list * rel_order_by(sql_query *query, sql_rel **R, symbol *orderby, int 
needs_distinct, int f);
+
 static sql_exp *
 _rel_aggr(sql_query *query, sql_rel **rel, int distinct, char *sname, char 
*aname, dnode *args, int f)
 {
@@ -3491,7 +3493,7 @@ static sql_exp *
                bool arguments_correlated = true, all_const = true;
 
                all_freevar = all_aggr?1:0;
-               for (i = 0; args && args->data.sym; args = args->next, i++) {
+               for (i = 0; args && args->data.sym && args->data.sym->token != 
SQL_ORDERBY; args = args->next, i++) {
                        int base = (!groupby || !is_project(groupby->op) || 
is_base(groupby->op) || is_processed(groupby));
                        sql_rel *gl = base?groupby:groupby->l, *ogl = gl; /* 
handle case of subqueries without correlation */
                        sql_exp *e = rel_value_exp(query, &gl, args->data.sym, 
(f | sql_aggr)& ~sql_farg, ek);
@@ -3714,6 +3716,17 @@ static sql_exp *
        if ((!exps || exps_card(exps) > CARD_ATOM) && (!res || !groupby))
                return NULL;
 
+       list *obe = NULL;
+       if (args && args->data.sym && args->data.sym->token != SQL_ORDERBY)
+                       return NULL;
+       if (args && args->data.sym) { /* handle order by */
+               int base = (!groupby || !is_project(groupby->op) || 
is_base(groupby->op) || is_processed(groupby));
+               sql_rel *gl = base?groupby:groupby->l;//, *ogl = gl; /* handle 
case of subqueries without correlation */
+               obe = rel_order_by(query, &gl, args->data.sym, 0, f);
+               if (!obe)
+                       return NULL;
+       }
+
        if (all_freevar) {
                query_update_outer(query, res, all_freevar-1);
        } else if (rel) {
@@ -3725,6 +3738,7 @@ static sql_exp *
        }
 
        if (!has_args) {        /* count(*) case */
+               obe = NULL; /* no errors, although the order by is useless */
                sql_exp *e;
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to