Changeset: f5e4fef2ba79 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/f5e4fef2ba79
Branch: tvtree
Log Message:

merged with nested


diffs (272 lines):

diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c
--- a/sql/backends/monet5/sql_result.c
+++ b/sql/backends/monet5/sql_result.c
@@ -2194,14 +2194,23 @@ end:
 }
 
 #define skipspace(s) while(*s && isspace(*s)) s++;
+static char *
+FINDsep(char *s, char tsep, char rsep)
+{
+       for (; *s; s++) {
+               if (s[0] == tsep || s[0] == rsep) {
+                       break;
+               }
+       }
+       return s;
+}
 
 static str
 VALUEparser(char **S, Column *cols, int elm, sql_subtype *t, char tsep, char 
rsep)
 {
        /* handle literals */
        char *s = *S;
-       int skip = 0;
-       char *ns = NULL;
+       char *ns = s;
        if (t->type->localtype == TYPE_str) {
                /* todo improve properly skip "" strings. */
                if (*s != '"')
@@ -2212,13 +2221,10 @@ VALUEparser(char **S, Column *cols, int 
                        ns++;
                if (*ns != '"')
                        throw(SQL, "SQLfrom_varchar", SQLSTATE(42000) "missing 
\" at end of string value");
-               skip++;
-       } else if (tsep) {
-               ns = strchr(s, tsep);
+               *ns = 0;
+               ns++;
        }
-       if (!ns && rsep) {
-               ns = strchr(s, rsep);
-       }
+       ns = FINDsep(ns, tsep, rsep);
        char sep = 0;
        if (!ns)
                throw(SQL, "SQLfrom_varchar", SQLSTATE(42000) "missing '%c' at 
end of value", rsep?rsep:tsep);
@@ -2230,10 +2236,7 @@ VALUEparser(char **S, Column *cols, int 
        if (elm >= 0 && d && BUNappend(cols[elm].c, d, false) != GDK_SUCCEED)
                throw(SQL, "SQLfrom_varchar", SQLSTATE(42000) "append failed");
        *ns = sep;
-       s = ns;
-       if (skip)
-               s++;
-       *S = s;
+       *S = ns;
        return NULL;
 }
 
diff --git a/sql/backends/monet5/vaults/json/json.c 
b/sql/backends/monet5/vaults/json/json.c
--- a/sql/backends/monet5/vaults/json/json.c
+++ b/sql/backends/monet5/vaults/json/json.c
@@ -68,6 +68,7 @@ json_open(const char *fname, allocator *
        return res;
 }
 
+
 static void
 json_close(JSONFileHandle *jfh)
 {
@@ -93,6 +94,7 @@ read_json_file(JSONFileHandle *jfh)
        return content;
 }
 
+
 static str
 append_terms(allocator *sa, JSON *jt, BAT *b)
 {
@@ -191,8 +193,10 @@ json_load(void *BE, sql_subfunc *f, char
        return s;
 }
 
+
 int TYPE_json;
 
+
 static str
 JSONprelude(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
@@ -203,6 +207,7 @@ JSONprelude(Client cntxt, MalBlkPtr mb, 
        return MAL_SUCCEED;
 }
 
+
 static str
 JSONepilogue(void *ret)
 {
@@ -252,12 +257,79 @@ JSONread_json(Client cntxt, MalBlkPtr mb
        return msg;
 }
 
+
+static str
+JSONread_nd_json(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void) cntxt; (void) mb;
+       char *msg = MAL_SUCCEED;
+       char *fname = *(str*)getArgReference(stk, pci, pci->retc);
+       allocator *sa = sa_create(NULL);
+       JSONFileHandle *jfh = json_open(fname, sa);
+       if (!jfh) {
+               sa_destroy(sa);
+               msg = createException(SQL, "json.read_nd_json", "Failed to open 
file %s", fname);
+               return msg;
+       }
+       char *content = read_json_file(jfh);
+       json_close(jfh);
+       BAT *b = COLnew(0, TYPE_json, 0, TRANSIENT);
+       if (content) {
+               if (b) {
+                       size_t cnt = 0;
+                       char *head = content;
+                       char *tail = content;
+                       while (cnt < (jfh->size + 1)) {
+                               if (head[0] == '\n' || (head[0] == '\r' && 
head[1] == '\n')) {
+                                       int skip = 1;
+                                       if (head[0] == '\r' && head[1] == '\n')
+                                               skip = 2;
+                                       head[0] = '\0';
+                                       JSON *jt = JSONparse(tail);
+                                       if (jt) {
+                                               // must be valid json obj str
+                                               if (BUNappend(b, tail, false) 
!= GDK_SUCCEED) {
+                                                       msg = 
createException(SQL, "json.read_nd_json", "BUNappend failed!");
+                                                       break;
+                                               }
+                                       } else {
+                                                       msg = 
createException(SQL, "json.read_nd_json", "Invalid json object, JSONparse 
failed!");
+                                                       break;
+                                       }
+                                       tail = head + skip;
+                                       while (tail[0] == '\n') // multiple 
newlines e.g. \n\n
+                                               tail ++;
+                                       head = tail;
+                               }
+                               head ++;
+                               cnt ++;
+                       }
+               } else {
+                       msg = createException(SQL, "json.read_nd_json", "Failed 
to allocate bat");
+               }
+       } else {
+               msg = createException(SQL, "json.read_nd_json", "Failed to read 
file %s", fname);
+       }
+       if (msg == MAL_SUCCEED) {
+               bat *res = getArgReference_bat(stk, pci, 0);
+               *res = b->batCacheid;
+               BBPkeepref(b);
+       } else {
+               BBPreclaim(b);
+       }
+
+       sa_destroy(sa);
+       return msg;
+}
+
+
 #include "mel.h"
 
 static mel_func json_init_funcs[] = {
        pattern("json", "prelude", JSONprelude, false, "", noargs),
        command("json", "epilogue", JSONepilogue, false, "", noargs),
-       pattern("json", "read_json", JSONread_json, false, "Reads json file 
into a table", args(1,2, batarg("", json), arg("filename", str))),
+       pattern("json", "read_json", JSONread_json, false, "Reads json file", 
args(1,2, batarg("", json), arg("filename", str))),
+       pattern("json", "read_nd_json", JSONread_nd_json, false, "Reads new 
line delimited json objects", args(1,2, batarg("", json), arg("filename", 
str))),
 { .imp=NULL }
 };
 
diff --git a/sql/scripts/40_json.sql b/sql/scripts/40_json.sql
--- a/sql/scripts/40_json.sql
+++ b/sql/scripts/40_json.sql
@@ -104,3 +104,9 @@ create aggregate json.tojsonarray( x str
 GRANT EXECUTE ON AGGREGATE json.tojsonarray( string ) TO PUBLIC;
 create aggregate json.tojsonarray( x double ) returns string external name 
aggr.jsonaggr;
 GRANT EXECUTE ON AGGREGATE json.tojsonarray( double ) TO PUBLIC;
+
+
+create function sys.read_nd_json(fname string)
+returns table(json JSON)
+external name json.read_nd_json;
+-- GRANT EXECUTE ON FUNCTION json.read_nd_json(string) TO PUBLIC;
diff --git a/sql/test/nested/Tests/bluesky.json 
b/sql/test/nested/Tests/bluesky.json
new file mode 100644
--- /dev/null
+++ b/sql/test/nested/Tests/bluesky.json
@@ -0,0 +1,5 @@
+{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}}
+{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}}
+{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}}
+{"did":"did:plc:cdsd346mwow7aj3tgfkwsct3","time_us":1732206349003461,"kind":"commit","commit":{"rev":"3lbhus5vior2t","operation":"create","collection":"app.bsky.feed.repost","rkey":"3lbhus5vbtz2t","record":{"$type":"app.bsky.feed.repost","createdAt":"2024-11-21T16:23:36.714Z","subject":{"cid":"bafyreieaacfiobnuqvjhhsndyi5s3fd6krbzdduxsyrzfv43kczpcmkl6y","uri":"at://did:plc:o5q6dynpme4ndolc3heztasm/app.bsky.feed.post/3lbfli3qsoc2o"}},"cid":"bafyreid5ycocp5zq2g7fcx2xxzxrbafuh7b5qhtwuwiomzo6vqila2cbpu"}}
+{"did":"did:plc:hbc74dlsxhq53kp5oxges6d7","time_us":1732206349004769,"kind":"commit","commit":{"rev":"3lbhuvzedg52j","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdyof2j","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.167Z","subject":{"cid":"bafyreiaumopip75nzx2xjbugtwemdppsyx54bd2odf6q45f3o7xkocgari","uri":"at://did:plc:ig2jv6gqup4t7gdq2pmanknw/app.bsky.feed.post/3lbhuvtlaec2c"}},"cid":"bafyreidjk2svg2fdjiiwohmfmvp3hdxhpb33ycnixzbkyib5m6cocindxq"}}
diff --git a/sql/test/nested/Tests/bluesky.test.in 
b/sql/test/nested/Tests/bluesky.test.in
new file mode 100644
--- /dev/null
+++ b/sql/test/nested/Tests/bluesky.test.in
@@ -0,0 +1,34 @@
+statement ok
+create type "subject" as ("cid" varchar, "uri" varchar)
+
+statement ok
+create type "record" as ("$type" varchar, "createdAt" timestamp, "subject" 
subject)
+
+statement ok
+create type "commit" as ("rev" varchar, "operation" varchar, "collection" 
varchar, "rkey" varchar, "record" "record", "cid" varchar)
+
+statement ok
+create type event as ("did" varchar, "time_us" int, "kind" varchar, "commit" 
"commit")
+
+statement ok
+create table bluesky ("event" event)
+
+query I
+select count(*) from read_nd_json(r'$TSTSRCDIR/bluesky.json')
+----
+5
+
+statement ok
+select cast(t.json as event) as e from (select json from 
read_nd_json(r'$TSTSRCDIR/bluesky.json')) t
+
+statement ok
+drop table bluesky
+
+statement ok
+drop type event
+
+statement ok
+drop type "commit"
+
+statement ok
+drop type "record"
diff --git a/sql/test/nested/Tests/simple.test 
b/sql/test/nested/Tests/simple.test
--- a/sql/test/nested/Tests/simple.test
+++ b/sql/test/nested/Tests/simple.test
@@ -199,3 +199,17 @@ left
 1
 l1
 2
+
+statement ok
+create table tpi( p pair[2], i integer)
+
+statement ok
+insert into tpi values (array [ ('left','right'), ('l2','r2') ], 1), (array [ 
('left','right'), ('l2','r2') ], 2), (array [ ('l3', 'r3') ], 3)
+
+#statement ok
+#insert into tpi values ('{("l4", "r4"), ("l42", "r42")}', 4), ('{("l5", 
"r5")}', 5);
+
+query I
+select count(*) from tpi
+----
+3
diff --git a/sql/test/nested/Tests/webclicks.test.in 
b/sql/test/nested/Tests/webclicks.test.in
--- a/sql/test/nested/Tests/webclicks.test.in
+++ b/sql/test/nested/Tests/webclicks.test.in
@@ -46,7 +46,7 @@ select count(*) from r'$TSTSRCDIR/webcli
 2
 
 query TTTTTT
-select e.eventid, e.sessionid, e.userid, e.event, e.timestamp, e.user_agent 
from ( select cast(t.json as event) as e from (select json from 
r'$TSTSRCDIR/webclicks.json') t);
+select e.eventid, e.sessionid, e.userid, e.event, e.timestamp, e.user_agent 
from ( select cast(t.json as event) as e from (select json from 
r'$TSTSRCDIR/webclicks.json') t)
 ----
 996257967-103007874
 47f07c1399c9c6bd1012861f9c5c958e042732e259b909e077f7e8967b650c75
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to