Changeset: a7d5d1ff828f for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a7d5d1ff828f Added Files: sql/test/json/Tests/aggregate00.sql sql/test/json/Tests/aggregate00.stable.err sql/test/json/Tests/aggregate00.stable.out Modified Files: monetdb5/modules/atoms/json.c monetdb5/modules/atoms/json.h monetdb5/modules/atoms/json.mal sql/scripts/40_json.sql sql/test/json/Tests/All Branch: default Log Message:
functions for aggregating column as json array diffs (truncated from 552 to 300 lines): diff --git a/monetdb5/modules/atoms/json.c b/monetdb5/modules/atoms/json.c --- a/monetdb5/modules/atoms/json.c +++ b/monetdb5/modules/atoms/json.c @@ -1598,3 +1598,329 @@ JSONtextGrouped(int *ret, int *bid, int throw(MAL,"json.text","tobeimplemented"); } +str +JSONgroupStr(str *ret, const bat *bid) +{ + BAT *b; + BUN p, q; + const char *t; + size_t len, size = BUFSIZ, offset; + str buf = GDKmalloc(size); + BATiter bi; + const char *err = NULL; + + if (buf == NULL) + throw(MAL, "json.group",MAL_MALLOC_FAIL); + if ((b = BATdescriptor(*bid)) == NULL) { + GDKfree(buf); + throw(MAL, "json.agg", RUNTIME_OBJECT_MISSING); + } + + strcpy(buf, str_nil); + offset = 0; + bi = bat_iterator(b); + BATloop(b, p, q) { + int n; + + t = (const char *) BUNtail(bi, p); + + if (strNil(t)) + continue; + len = strlen(t) + 1; + if (len >= size - offset) { + size += len + 128; + buf = GDKrealloc(buf, size); + if (buf == NULL) { + err= MAL_MALLOC_FAIL; + goto failed; + } + } + if (offset == 0) { + if (BATcount(b) == 1) { + n = snprintf(buf, size, "[ \"%s\" ]", t); + } else { + n = snprintf(buf, size, "[ \"%s\"", t); + } + } else { + if (p == BUNlast(b) - 1) { + n = snprintf(buf + offset, size - offset, ", \"%s\" ]", t); + } else { + n = snprintf(buf + offset, size - offset, ", \"%s\"", t); + } + } + offset += n; + } + BBPreleaseref(b->batCacheid); + *ret = buf; + return MAL_SUCCEED; + failed: + BBPreleaseref(b->batCacheid); + if (buf != NULL) + GDKfree(buf); + throw(MAL, "json.agg", "%s", err); +} + +static const char * +JSONjsonaggr(BAT **bnp, BAT *b, BAT *g, BAT *e, BAT *s, int skip_nils) +{ + BAT *bn = NULL, *t1, *t2 = NULL; + BATiter bi; + oid min, max; + BUN ngrp, start, end, cnt; + BUN nils = 0; + int isnil; + const oid *cand = NULL, *candend = NULL; + const char *v; + const oid *grps, *map; + oid mapoff = 0; + oid prev; + BUN p, q; + int freeb = 0, freeg = 0; + char *buf = NULL; + size_t buflen, maxlen, len; + const char *err; + + if ((err = BATgroupaggrinit(b, g, e, s, &min, &max, &ngrp, &start, &end, + &cnt, &cand, &candend)) != NULL) { + return err; + } + assert(b->ttype == TYPE_str); + if (BATcount(b) == 0 || ngrp == 0) { + bn = BATconstant(TYPE_str, ATOMnilptr(TYPE_str), ngrp); + if (bn == NULL) + return MAL_MALLOC_FAIL; + BATseqbase(bn, ngrp == 0 ? 0 : min); + *bnp = bn; + return NULL; + } + if (s) { + b = BATleftjoin(s, b, BATcount(s)); + if (b == NULL) { + err = "internal leftjoin failed"; + goto out; + } + freeb = 1; + if (b->htype != TYPE_void) { + t1 = BATmirror(BATmark(BATmirror(b), 0)); + if (t1 == NULL) { + err = "internal mark failed"; + goto out; + } + BBPunfix(b->batCacheid); + b = t1; + } + if (g) { + g = BATleftjoin(s, g, BATcount(s)); + if (g == NULL) { + err = "internal leftjoin failed"; + goto out; + } + freeg = 1; + if (g->htype != TYPE_void) { + t1 = BATmirror(BATmark(BATmirror(g), 0)); + if (t1 == NULL) { + err = "internal mark failed"; + goto out; + } + BBPunfix(g->batCacheid); + g = t1; + } + } + } + if (g && BATtdense(g)) { + /* singleton groups: return group ID's (g's tail) and original + * values from b */ + bn = VIEWcreate(BATmirror(g), b); + goto out; + } + + maxlen = BUFSIZ; + if ((buf = GDKmalloc(maxlen)) == NULL) { + err = MAL_MALLOC_FAIL; + goto out; + } + buflen = 0; + bn = BATnew(TYPE_void, TYPE_str, ngrp); + if (bn == NULL) { + err = MAL_MALLOC_FAIL; + goto out; + } + bi = bat_iterator(b); + if (g) { + /* stable sort g */ + if (BATsubsort(&t1, &t2, NULL, g, NULL, NULL, 0, 1) == GDK_FAIL){ + BBPreclaim(bn); + bn = NULL; + err = "internal sort failed"; + goto out; + } + if (freeg) + BBPunfix(g->batCacheid); + g = t1; + freeg = 1; + if (t2->ttype == TYPE_void) { + map = NULL; + mapoff = b->tseqbase; + } else { + map = (const oid *) Tloc(t2, BUNfirst(t2)); + } + grps = (const oid *) Tloc(g, BUNfirst(g)); + prev = grps[0]; + isnil = 0; + for (p = 0, q = BATcount(g); p <= q; p++) { + if (p == q || grps[p] != prev) { + strncpy(buf + buflen, " ]", buflen); + buflen += 2; + while (BATcount(bn) < prev - min) { + bunfastapp_nocheck(bn, BUNlast(bn), str_nil, Tsize(bn)); + nils++; + } + bunfastapp_nocheck(bn, BUNlast(bn), buf, Tsize(b)); + nils += strNil(buf); + strncpy(buf, str_nil, maxlen); + buflen = 0; + if (p == q) + break; + prev = grps[p]; + isnil = 0; + } + if (isnil) + continue; + v = (const char *) BUNtail(bi, BUNfirst(b) + (map ? (BUN) map[p] : p + mapoff)); + + if (strNil(v)) { + if (skip_nils) + continue; + strncpy(buf, str_nil, buflen); + isnil = 1; + } else { + len = strlen(v); + if (len >= maxlen - buflen) { + maxlen += len + BUFSIZ; + buf = GDKrealloc(buf, maxlen); + if (buf == NULL) { + err = MAL_MALLOC_FAIL; + goto bunins_failed; + } + } + if (buflen == 0) { + len = snprintf(buf + buflen, maxlen - buflen, "[ \"%s\"", v); + buflen += len; + } else { + len = snprintf(buf + buflen, maxlen - buflen, ", \"%s\"", v); + buflen += len; + } + } + } + BBPunfix(t2->batCacheid); + t2 = NULL; + } else { + for (p = BUNfirst(b), q = p + BATcount(b); p < q; p++) { + v = (const char *) BUNtail(bi, p); + if (strNil(v)) { + if (skip_nils) + continue; + strncpy(buf, str_nil, buflen); + nils++; + break; + } + len = strlen(v); + if (len >= maxlen - buflen) { + maxlen += len + BUFSIZ; + buf = GDKrealloc(buf, maxlen); + if (buf == NULL) { + err = MAL_MALLOC_FAIL; + goto bunins_failed; + } + } + if (buflen == 0) { + len = snprintf(buf + buflen, maxlen - buflen, "[ \"%s\"", v); + buflen += len; + } else { + len = snprintf(buf + buflen, maxlen - buflen, ", \"%s\"", v); + buflen += len; + } + } + bunfastapp_nocheck(bn, BUNlast(bn), buf, Tsize(bn)); + } + BATseqbase(bn, min); + bn->T->nil = nils != 0; + bn->T->nonil = nils == 0; + bn->T->sorted = BATcount(bn) <= 1; + bn->T->revsorted = BATcount(bn) <= 1; + bn->T->key = BATcount(bn) <= 1; + + out: + if (t2) + BBPunfix(t2->batCacheid); + if (freeb && b) + BBPunfix(b->batCacheid); + if (freeg && g) + BBPunfix(g->batCacheid); + if (buf) + GDKfree(buf); + *bnp = bn; + return err; + + bunins_failed: + if (bn) + BBPreclaim(bn); + bn = NULL; + if (err == NULL) + err = MAL_MALLOC_FAIL; /* insertion into result BAT failed */ + goto out; +} + +str +JSONsubjsoncand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, bit *skip_nils) +{ + BAT *b, *g, *e, *s, *bn = NULL; + const char *err; + + b = BATdescriptor(*bid); + g = gid ? BATdescriptor(*gid) : NULL; + e = eid ? BATdescriptor(*eid) : NULL; + if (b == NULL || (gid != NULL && g == NULL) || (eid != NULL && e == NULL)) { + + if (b) + BBPreleaseref(b->batCacheid); + if (g) + BBPreleaseref(g->batCacheid); + if (e) + BBPreleaseref(e->batCacheid); + throw(MAL, "aggr.subjson", RUNTIME_OBJECT_MISSING); + } + if (sid) { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list