Changeset: f5e4fef2ba79 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/f5e4fef2ba79 Branch: tvtree Log Message:
merged with nested diffs (272 lines): diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c --- a/sql/backends/monet5/sql_result.c +++ b/sql/backends/monet5/sql_result.c @@ -2194,14 +2194,23 @@ end: } #define skipspace(s) while(*s && isspace(*s)) s++; +static char * +FINDsep(char *s, char tsep, char rsep) +{ + for (; *s; s++) { + if (s[0] == tsep || s[0] == rsep) { + break; + } + } + return s; +} static str VALUEparser(char **S, Column *cols, int elm, sql_subtype *t, char tsep, char rsep) { /* handle literals */ char *s = *S; - int skip = 0; - char *ns = NULL; + char *ns = s; if (t->type->localtype == TYPE_str) { /* todo improve properly skip "" strings. */ if (*s != '"') @@ -2212,13 +2221,10 @@ VALUEparser(char **S, Column *cols, int ns++; if (*ns != '"') throw(SQL, "SQLfrom_varchar", SQLSTATE(42000) "missing \" at end of string value"); - skip++; - } else if (tsep) { - ns = strchr(s, tsep); + *ns = 0; + ns++; } - if (!ns && rsep) { - ns = strchr(s, rsep); - } + ns = FINDsep(ns, tsep, rsep); char sep = 0; if (!ns) throw(SQL, "SQLfrom_varchar", SQLSTATE(42000) "missing '%c' at end of value", rsep?rsep:tsep); @@ -2230,10 +2236,7 @@ VALUEparser(char **S, Column *cols, int if (elm >= 0 && d && BUNappend(cols[elm].c, d, false) != GDK_SUCCEED) throw(SQL, "SQLfrom_varchar", SQLSTATE(42000) "append failed"); *ns = sep; - s = ns; - if (skip) - s++; - *S = s; + *S = ns; return NULL; } diff --git a/sql/backends/monet5/vaults/json/json.c b/sql/backends/monet5/vaults/json/json.c --- a/sql/backends/monet5/vaults/json/json.c +++ b/sql/backends/monet5/vaults/json/json.c @@ -68,6 +68,7 @@ json_open(const char *fname, allocator * return res; } + static void json_close(JSONFileHandle *jfh) { @@ -93,6 +94,7 @@ read_json_file(JSONFileHandle *jfh) return content; } + static str append_terms(allocator *sa, JSON *jt, BAT *b) { @@ -191,8 +193,10 @@ json_load(void *BE, sql_subfunc *f, char return s; } + int TYPE_json; + static str JSONprelude(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { @@ -203,6 +207,7 @@ JSONprelude(Client cntxt, MalBlkPtr mb, return MAL_SUCCEED; } + static str JSONepilogue(void *ret) { @@ -252,12 +257,79 @@ JSONread_json(Client cntxt, MalBlkPtr mb return msg; } + +static str +JSONread_nd_json(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + (void) cntxt; (void) mb; + char *msg = MAL_SUCCEED; + char *fname = *(str*)getArgReference(stk, pci, pci->retc); + allocator *sa = sa_create(NULL); + JSONFileHandle *jfh = json_open(fname, sa); + if (!jfh) { + sa_destroy(sa); + msg = createException(SQL, "json.read_nd_json", "Failed to open file %s", fname); + return msg; + } + char *content = read_json_file(jfh); + json_close(jfh); + BAT *b = COLnew(0, TYPE_json, 0, TRANSIENT); + if (content) { + if (b) { + size_t cnt = 0; + char *head = content; + char *tail = content; + while (cnt < (jfh->size + 1)) { + if (head[0] == '\n' || (head[0] == '\r' && head[1] == '\n')) { + int skip = 1; + if (head[0] == '\r' && head[1] == '\n') + skip = 2; + head[0] = '\0'; + JSON *jt = JSONparse(tail); + if (jt) { + // must be valid json obj str + if (BUNappend(b, tail, false) != GDK_SUCCEED) { + msg = createException(SQL, "json.read_nd_json", "BUNappend failed!"); + break; + } + } else { + msg = createException(SQL, "json.read_nd_json", "Invalid json object, JSONparse failed!"); + break; + } + tail = head + skip; + while (tail[0] == '\n') // multiple newlines e.g. \n\n + tail ++; + head = tail; + } + head ++; + cnt ++; + } + } else { + msg = createException(SQL, "json.read_nd_json", "Failed to allocate bat"); + } + } else { + msg = createException(SQL, "json.read_nd_json", "Failed to read file %s", fname); + } + if (msg == MAL_SUCCEED) { + bat *res = getArgReference_bat(stk, pci, 0); + *res = b->batCacheid; + BBPkeepref(b); + } else { + BBPreclaim(b); + } + + sa_destroy(sa); + return msg; +} + + #include "mel.h" static mel_func json_init_funcs[] = { pattern("json", "prelude", JSONprelude, false, "", noargs), command("json", "epilogue", JSONepilogue, false, "", noargs), - pattern("json", "read_json", JSONread_json, false, "Reads json file into a table", args(1,2, batarg("", json), arg("filename", str))), + pattern("json", "read_json", JSONread_json, false, "Reads json file", args(1,2, batarg("", json), arg("filename", str))), + pattern("json", "read_nd_json", JSONread_nd_json, false, "Reads new line delimited json objects", args(1,2, batarg("", json), arg("filename", str))), { .imp=NULL } }; diff --git a/sql/scripts/40_json.sql b/sql/scripts/40_json.sql --- a/sql/scripts/40_json.sql +++ b/sql/scripts/40_json.sql @@ -104,3 +104,9 @@ create aggregate json.tojsonarray( x str GRANT EXECUTE ON AGGREGATE json.tojsonarray( string ) TO PUBLIC; create aggregate json.tojsonarray( x double ) returns string external name aggr.jsonaggr; GRANT EXECUTE ON AGGREGATE json.tojsonarray( double ) TO PUBLIC; + + +create function sys.read_nd_json(fname string) +returns table(json JSON) +external name json.read_nd_json; +-- GRANT EXECUTE ON FUNCTION json.read_nd_json(string) TO PUBLIC; diff --git a/sql/test/nested/Tests/bluesky.json b/sql/test/nested/Tests/bluesky.json new file mode 100644 --- /dev/null +++ b/sql/test/nested/Tests/bluesky.json @@ -0,0 +1,5 @@ +{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}} +{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}} +{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}} +{"did":"did:plc:cdsd346mwow7aj3tgfkwsct3","time_us":1732206349003461,"kind":"commit","commit":{"rev":"3lbhus5vior2t","operation":"create","collection":"app.bsky.feed.repost","rkey":"3lbhus5vbtz2t","record":{"$type":"app.bsky.feed.repost","createdAt":"2024-11-21T16:23:36.714Z","subject":{"cid":"bafyreieaacfiobnuqvjhhsndyi5s3fd6krbzdduxsyrzfv43kczpcmkl6y","uri":"at://did:plc:o5q6dynpme4ndolc3heztasm/app.bsky.feed.post/3lbfli3qsoc2o"}},"cid":"bafyreid5ycocp5zq2g7fcx2xxzxrbafuh7b5qhtwuwiomzo6vqila2cbpu"}} +{"did":"did:plc:hbc74dlsxhq53kp5oxges6d7","time_us":1732206349004769,"kind":"commit","commit":{"rev":"3lbhuvzedg52j","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdyof2j","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.167Z","subject":{"cid":"bafyreiaumopip75nzx2xjbugtwemdppsyx54bd2odf6q45f3o7xkocgari","uri":"at://did:plc:ig2jv6gqup4t7gdq2pmanknw/app.bsky.feed.post/3lbhuvtlaec2c"}},"cid":"bafyreidjk2svg2fdjiiwohmfmvp3hdxhpb33ycnixzbkyib5m6cocindxq"}} diff --git a/sql/test/nested/Tests/bluesky.test.in b/sql/test/nested/Tests/bluesky.test.in new file mode 100644 --- /dev/null +++ b/sql/test/nested/Tests/bluesky.test.in @@ -0,0 +1,34 @@ +statement ok +create type "subject" as ("cid" varchar, "uri" varchar) + +statement ok +create type "record" as ("$type" varchar, "createdAt" timestamp, "subject" subject) + +statement ok +create type "commit" as ("rev" varchar, "operation" varchar, "collection" varchar, "rkey" varchar, "record" "record", "cid" varchar) + +statement ok +create type event as ("did" varchar, "time_us" int, "kind" varchar, "commit" "commit") + +statement ok +create table bluesky ("event" event) + +query I +select count(*) from read_nd_json(r'$TSTSRCDIR/bluesky.json') +---- +5 + +statement ok +select cast(t.json as event) as e from (select json from read_nd_json(r'$TSTSRCDIR/bluesky.json')) t + +statement ok +drop table bluesky + +statement ok +drop type event + +statement ok +drop type "commit" + +statement ok +drop type "record" diff --git a/sql/test/nested/Tests/simple.test b/sql/test/nested/Tests/simple.test --- a/sql/test/nested/Tests/simple.test +++ b/sql/test/nested/Tests/simple.test @@ -199,3 +199,17 @@ left 1 l1 2 + +statement ok +create table tpi( p pair[2], i integer) + +statement ok +insert into tpi values (array [ ('left','right'), ('l2','r2') ], 1), (array [ ('left','right'), ('l2','r2') ], 2), (array [ ('l3', 'r3') ], 3) + +#statement ok +#insert into tpi values ('{("l4", "r4"), ("l42", "r42")}', 4), ('{("l5", "r5")}', 5); + +query I +select count(*) from tpi +---- +3 diff --git a/sql/test/nested/Tests/webclicks.test.in b/sql/test/nested/Tests/webclicks.test.in --- a/sql/test/nested/Tests/webclicks.test.in +++ b/sql/test/nested/Tests/webclicks.test.in @@ -46,7 +46,7 @@ select count(*) from r'$TSTSRCDIR/webcli 2 query TTTTTT -select e.eventid, e.sessionid, e.userid, e.event, e.timestamp, e.user_agent from ( select cast(t.json as event) as e from (select json from r'$TSTSRCDIR/webclicks.json') t); +select e.eventid, e.sessionid, e.userid, e.event, e.timestamp, e.user_agent from ( select cast(t.json as event) as e from (select json from r'$TSTSRCDIR/webclicks.json') t) ---- 996257967-103007874 47f07c1399c9c6bd1012861f9c5c958e042732e259b909e077f7e8967b650c75 _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org