Changeset: a7d5d1ff828f for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a7d5d1ff828f
Added Files:
        sql/test/json/Tests/aggregate00.sql
        sql/test/json/Tests/aggregate00.stable.err
        sql/test/json/Tests/aggregate00.stable.out
Modified Files:
        monetdb5/modules/atoms/json.c
        monetdb5/modules/atoms/json.h
        monetdb5/modules/atoms/json.mal
        sql/scripts/40_json.sql
        sql/test/json/Tests/All
Branch: default
Log Message:

functions for aggregating column as json array


diffs (truncated from 552 to 300 lines):

diff --git a/monetdb5/modules/atoms/json.c b/monetdb5/modules/atoms/json.c
--- a/monetdb5/modules/atoms/json.c
+++ b/monetdb5/modules/atoms/json.c
@@ -1598,3 +1598,329 @@ JSONtextGrouped(int *ret, int *bid, int 
        throw(MAL,"json.text","tobeimplemented");
 }
 
+str
+JSONgroupStr(str *ret, const bat *bid)
+{
+       BAT *b;
+       BUN p, q;
+       const char *t;
+       size_t len, size = BUFSIZ, offset;
+       str buf = GDKmalloc(size);
+       BATiter bi;
+       const char *err = NULL;
+
+       if (buf == NULL)
+               throw(MAL, "json.group",MAL_MALLOC_FAIL);
+       if ((b = BATdescriptor(*bid)) == NULL) {
+               GDKfree(buf);
+               throw(MAL, "json.agg", RUNTIME_OBJECT_MISSING);
+       }
+
+       strcpy(buf, str_nil);
+       offset = 0;
+       bi = bat_iterator(b);
+       BATloop(b, p, q) {
+               int n;
+
+               t = (const char *) BUNtail(bi, p);
+
+               if (strNil(t))
+                       continue;
+               len = strlen(t) + 1;
+               if (len >= size - offset) {
+                       size += len + 128;
+                       buf = GDKrealloc(buf, size);
+                       if (buf == NULL) {
+                               err= MAL_MALLOC_FAIL;
+                               goto failed;
+                       }
+               }
+               if (offset == 0) {
+                       if (BATcount(b) == 1) {
+                               n = snprintf(buf, size, "[ \"%s\" ]", t);
+                       } else {
+                               n = snprintf(buf, size, "[ \"%s\"", t);
+                       }
+               } else {
+                       if (p == BUNlast(b) - 1) {
+                               n = snprintf(buf + offset, size - offset, ", 
\"%s\" ]", t);
+                       } else {
+                               n = snprintf(buf + offset, size - offset, ", 
\"%s\"", t);
+                       }
+               }
+               offset += n;
+       }
+       BBPreleaseref(b->batCacheid);
+       *ret = buf;
+       return MAL_SUCCEED;
+  failed:
+       BBPreleaseref(b->batCacheid);
+       if (buf != NULL)
+               GDKfree(buf);
+       throw(MAL, "json.agg", "%s", err);
+}
+
+static const char *
+JSONjsonaggr(BAT **bnp, BAT *b, BAT *g, BAT *e, BAT *s, int skip_nils)
+{
+       BAT *bn = NULL, *t1, *t2 = NULL;
+       BATiter bi;
+       oid min, max;
+       BUN ngrp, start, end, cnt;
+       BUN nils = 0;
+       int isnil;
+       const oid *cand = NULL, *candend = NULL;
+       const char *v;
+       const oid *grps, *map;
+       oid mapoff = 0;
+       oid prev;
+       BUN p, q;
+       int freeb = 0, freeg = 0;
+       char *buf = NULL;
+       size_t buflen, maxlen, len;
+       const char *err;
+
+       if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &start, &end,
+                                   &cnt, &cand, &candend)) != NULL) {
+               return err;
+       }
+       assert(b->ttype == TYPE_str);
+       if (BATcount(b) == 0 || ngrp == 0) {
+               bn = BATconstant(TYPE_str, ATOMnilptr(TYPE_str), ngrp);
+               if (bn == NULL)
+                       return MAL_MALLOC_FAIL;
+               BATseqbase(bn, ngrp == 0 ? 0 : min);
+               *bnp = bn;
+               return NULL;
+       }
+       if (s) {
+               b = BATleftjoin(s, b, BATcount(s));
+               if (b == NULL) {
+                       err = "internal leftjoin failed";
+                       goto out;
+               }
+               freeb = 1;
+               if (b->htype != TYPE_void) {
+                       t1 = BATmirror(BATmark(BATmirror(b), 0));
+                       if (t1 == NULL) {
+                               err = "internal mark failed";
+                               goto out;
+                       }
+                       BBPunfix(b->batCacheid);
+                       b = t1;
+               }
+               if (g) {
+                       g = BATleftjoin(s, g, BATcount(s));
+                       if (g == NULL) {
+                               err = "internal leftjoin failed";
+                               goto out;
+                       }
+                       freeg = 1;
+                       if (g->htype != TYPE_void) {
+                               t1 = BATmirror(BATmark(BATmirror(g), 0));
+                               if (t1 == NULL) {
+                                       err = "internal mark failed";
+                                       goto out;
+                               }
+                               BBPunfix(g->batCacheid);
+                               g = t1;
+                       }
+               }
+       }
+       if (g && BATtdense(g)) {
+               /* singleton groups: return group ID's (g's tail) and original
+                * values from b */
+               bn = VIEWcreate(BATmirror(g), b);
+               goto out;
+       }
+
+       maxlen = BUFSIZ;
+       if ((buf = GDKmalloc(maxlen)) == NULL) {
+               err = MAL_MALLOC_FAIL;
+               goto out;
+       }
+       buflen = 0;
+       bn = BATnew(TYPE_void, TYPE_str, ngrp);
+       if (bn == NULL) {
+               err = MAL_MALLOC_FAIL;
+               goto out;
+       }
+       bi = bat_iterator(b);
+       if (g) {
+               /* stable sort g */
+               if (BATsubsort(&t1, &t2, NULL, g, NULL, NULL, 0, 1) == 
GDK_FAIL){
+                       BBPreclaim(bn);
+                       bn = NULL;
+                       err = "internal sort failed";
+                       goto out;
+               }
+               if (freeg)
+                       BBPunfix(g->batCacheid);
+               g = t1;
+               freeg = 1;
+               if (t2->ttype == TYPE_void) {
+                       map = NULL;
+                       mapoff = b->tseqbase;
+               } else {
+                       map = (const oid *) Tloc(t2, BUNfirst(t2));
+               }
+               grps = (const oid *) Tloc(g, BUNfirst(g));
+               prev = grps[0];
+               isnil = 0;
+               for (p = 0, q = BATcount(g); p <= q; p++) {
+                       if (p == q || grps[p] != prev) {
+                               strncpy(buf + buflen, " ]", buflen);
+                               buflen += 2;
+                               while (BATcount(bn) < prev - min) {
+                                       bunfastapp_nocheck(bn, BUNlast(bn), 
str_nil, Tsize(bn));
+                                       nils++;
+                               }
+                               bunfastapp_nocheck(bn, BUNlast(bn), buf, 
Tsize(b));
+                               nils += strNil(buf);
+                               strncpy(buf, str_nil, maxlen);
+                               buflen = 0;
+                               if (p == q)
+                                       break;
+                               prev = grps[p];
+                               isnil = 0;
+                       }
+                       if (isnil)
+                               continue;
+                       v = (const char *) BUNtail(bi, BUNfirst(b) + (map ? 
(BUN) map[p] : p + mapoff));
+
+                       if (strNil(v)) {
+                               if (skip_nils)
+                                       continue;
+                               strncpy(buf, str_nil, buflen);
+                               isnil = 1;
+                       } else {
+                               len = strlen(v);
+                               if (len >= maxlen - buflen) {
+                                       maxlen += len + BUFSIZ;
+                                       buf = GDKrealloc(buf, maxlen);
+                                       if (buf == NULL) {
+                                               err = MAL_MALLOC_FAIL;
+                                               goto bunins_failed;
+                                       }
+                               }
+                               if (buflen == 0) {
+                                       len = snprintf(buf + buflen, maxlen - 
buflen, "[ \"%s\"", v);
+                                       buflen += len;
+                               } else {
+                                       len = snprintf(buf + buflen, maxlen - 
buflen, ", \"%s\"", v);
+                                       buflen += len;
+                               }
+                       }
+               }
+               BBPunfix(t2->batCacheid);
+               t2 = NULL;
+       } else {
+               for (p = BUNfirst(b), q = p + BATcount(b); p < q; p++) {
+                       v = (const char *) BUNtail(bi, p);
+                       if (strNil(v)) {
+                               if (skip_nils)
+                                       continue;
+                               strncpy(buf, str_nil, buflen);
+                               nils++;
+                               break;
+                       }
+                       len = strlen(v);
+                       if (len >= maxlen - buflen) {
+                               maxlen += len + BUFSIZ;
+                               buf = GDKrealloc(buf, maxlen);
+                               if (buf == NULL) {
+                                       err = MAL_MALLOC_FAIL;
+                                       goto bunins_failed;
+                               }
+                       }
+                       if (buflen == 0) {
+                               len = snprintf(buf + buflen, maxlen - buflen, 
"[ \"%s\"", v);
+                               buflen += len;
+                       } else {
+                               len = snprintf(buf + buflen, maxlen - buflen, 
", \"%s\"", v);
+                               buflen += len;
+                       }
+               }
+               bunfastapp_nocheck(bn, BUNlast(bn), buf, Tsize(bn));
+       }
+       BATseqbase(bn, min);
+       bn->T->nil = nils != 0;
+       bn->T->nonil = nils == 0;
+       bn->T->sorted = BATcount(bn) <= 1;
+       bn->T->revsorted = BATcount(bn) <= 1;
+       bn->T->key = BATcount(bn) <= 1;
+
+  out:
+       if (t2)
+               BBPunfix(t2->batCacheid);
+       if (freeb && b)
+               BBPunfix(b->batCacheid);
+       if (freeg && g)
+               BBPunfix(g->batCacheid);
+       if (buf)
+               GDKfree(buf);
+       *bnp = bn;
+       return err;
+
+  bunins_failed:
+       if (bn)
+               BBPreclaim(bn);
+       bn = NULL;
+       if (err == NULL)
+               err = MAL_MALLOC_FAIL;  /* insertion into result BAT failed */
+       goto out;
+}
+
+str
+JSONsubjsoncand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, bit 
*skip_nils)
+{
+       BAT *b, *g, *e, *s, *bn = NULL;
+       const char *err;
+
+       b = BATdescriptor(*bid);
+       g = gid ? BATdescriptor(*gid) : NULL;
+       e = eid ? BATdescriptor(*eid) : NULL;
+       if (b == NULL || (gid != NULL && g == NULL) || (eid != NULL && e == 
NULL)) {
+
+               if (b)
+                       BBPreleaseref(b->batCacheid);
+               if (g)
+                       BBPreleaseref(g->batCacheid);
+               if (e)
+                       BBPreleaseref(e->batCacheid);
+               throw(MAL, "aggr.subjson", RUNTIME_OBJECT_MISSING);
+       }
+       if (sid) {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to