Changeset: d9d16949e459 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d9d16949e459
Added Files:
        sql/test/bincopy/Tests/bincopy_invalid_json.SQL.py
Modified Files:
        sql/backends/monet5/sql_bincopyfrom.c
        sql/test/bincopy/Tests/All
Branch: Jan2022
Log Message:

Do not allow invalid json in COPY BINARY INTO


diffs (149 lines):

diff --git a/sql/backends/monet5/sql_bincopyfrom.c 
b/sql/backends/monet5/sql_bincopyfrom.c
--- a/sql/backends/monet5/sql_bincopyfrom.c
+++ b/sql/backends/monet5/sql_bincopyfrom.c
@@ -327,7 +327,7 @@ convert_timestamp(void *dst_start, void 
 
 
 static str
-convert_and_validate(char *text)
+convert_and_validate_utf8(char *text)
 {
        unsigned char *r = (unsigned char*)text;
        unsigned char *w = r;
@@ -375,29 +375,18 @@ bad_utf8:
        return createException(SQL, "BATattach_stream", SQLSTATE(42000) 
"malformed utf-8 byte sequence");
 }
 
-static str
-append_text(BAT *bat, char *start)
-{
-       str msg = convert_and_validate(start);
-       if (msg != MAL_SUCCEED)
-               return msg;
-
-       if (BUNappend(bat, start, false) != GDK_SUCCEED)
-               return createException(SQL, "sql.importColumn", GDK_EXCEPTION);
-
-       return MAL_SUCCEED;
-}
-
 // Load items from the stream and put them in the BAT.
 // Because it's text read from a binary stream, we replace \r\n with \n.
-// We don't have to validate the utf-8 structure because BUNappend does that 
for us.
 static str
 load_zero_terminated_text(BAT *bat, stream *s, int *eof_reached)
 {
        str msg = MAL_SUCCEED;
        bstream *bs = NULL;
+       int tpe = BATttype(bat);
+       void *buffer = NULL;
+       size_t buffer_len = 0;
 
-       // convert_and_validate() above counts on the following property to 
hold:
+       // convert_and_validate_utf8() above counts on the following property 
to hold:
        assert(strNil((const char[2]){ 0x80, 0 }));
 
        bs = bstream_create(s, 1 << 20);
@@ -419,9 +408,24 @@ load_zero_terminated_text(BAT *bat, stre
                char *buf_end = &bs->buf[bs->len];
                char *start, *end;
                for (start = buf_start; (end = memchr(start, '\0', buf_end - 
start)) != NULL; start = end + 1) {
-                       msg = append_text(bat, start);
+                       char *value;
+                       msg = convert_and_validate_utf8(start);
                        if (msg != NULL)
                                goto end;
+                       if (tpe == TYPE_str) {
+                               value = start;
+                       } else {
+                               ssize_t n = ATOMfromstr(tpe, &buffer, 
&buffer_len, start, false);
+                               if (n <= 0) {
+                                       msg = createException(SQL, 
"sql.importColumn", GDK_EXCEPTION);
+                                       goto end;
+                               }
+                               value = buffer;
+                       }
+                       if (BUNappend(bat, value, false) != GDK_SUCCEED) {
+                               msg = createException(SQL, "sql.importColumn", 
GDK_EXCEPTION);
+                               goto end;
+                       }
                }
                bs->pos = start - buf_start;
        }
@@ -432,6 +436,7 @@ load_zero_terminated_text(BAT *bat, stre
 
 end:
        *eof_reached = 0;
+       GDKfree(buffer);
        if (bs != NULL) {
                *eof_reached = (int)bs->eof;
                bs->s = NULL;
diff --git a/sql/test/bincopy/Tests/All b/sql/test/bincopy/Tests/All
--- a/sql/test/bincopy/Tests/All
+++ b/sql/test/bincopy/Tests/All
@@ -41,6 +41,8 @@ bincopy_json_objects_on_server
 bincopy_uuids_on_client
 bincopy_uuids_on_server
 
+bincopy_invalid_json
+
 bincopy_little_endians_on_client
 bincopy_little_endians_on_server
 bincopy_big_endians_on_client
diff --git a/sql/test/bincopy/Tests/bincopy_invalid_json.SQL.py 
b/sql/test/bincopy/Tests/bincopy_invalid_json.SQL.py
new file mode 100644
--- /dev/null
+++ b/sql/test/bincopy/Tests/bincopy_invalid_json.SQL.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+
+from cmath import exp
+import os
+import pymonetdb
+
+conn = pymonetdb.connect(
+    database=os.getenv("TSTDB"),
+    port=int(os.getenv("MAPIPORT")))
+
+conn.set_autocommit(False)
+
+CONTENT = dict(
+    invalid_utf8=b'"invali\x80\x80"',
+    unterminated_object=b'{"foo": "bar',
+    valid_json=b'{"foo":42}'
+)
+
+
+class MyUploader(pymonetdb.Uploader):
+    def handle_upload(self, upload: pymonetdb.Upload, filename: str, 
text_mode: bool, skip_amount: int):
+        assert text_mode == False
+        assert skip_amount == 0
+        json = CONTENT[filename]
+        bw = upload.binary_writer()
+        bw.write(json + b'\x00')
+
+
+conn.set_uploader(MyUploader())
+
+def run_test(content_name, expected_exception):
+    c = conn.cursor()
+    try:
+        c.execute("DROP TABLE IF EXISTS foo")
+        c.execute("CREATE TABLE foo(j JSON)")
+        try:
+            c.execute("COPY BINARY INTO foo FROM %s ON CLIENT", [content_name])
+            if expected_exception:
+                content = CONTENT[content_name]
+                msg = f"Expected error involving '{expected_exception}' when 
loading {content!r}"
+                raise Exception(msg)
+        except pymonetdb.OperationalError as e:
+            if expected_exception in str(e):
+                conn.rollback()
+                return
+            raise e
+    finally:
+        c.close()
+
+run_test('valid_json', None)
+run_test('invalid_utf8', 'malformed utf')
+run_test('unterminated_object', 'JSONfromString')
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to