Changeset: 99bbc3b67902 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=99bbc3b67902
Added Files:
        sql/test/bincopy/Tests/bincopy_null_strings_on_client.SQL.py
        sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.err
        sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.out
        sql/test/bincopy/Tests/bincopy_null_strings_on_server.SQL.py
        sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.err
        sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.out
Modified Files:
        sql/backends/monet5/sql.c
        sql/test/bincopy/Tests/All
        sql/test/bincopy/Tests/bincopy_support.py
Branch: copybinary
Log Message:

Add support for NULL strings

Denoted by the utf-8'ly incorrect byte sequence '\x80\0'


diffs (truncated from 315 to 300 lines):

diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -3172,36 +3172,44 @@ BATattach_str(bstream *in, BAT *bn)
                // the middle loop looks for complete strings
                char *end;
                while ((end = memchr(&in->buf[in->pos], '\0', in->len - 
in->pos)) != NULL) {
-
-                       // the inner loop validates them and converts line 
endings.
-                       unsigned int u = 0; // utf-8 state
-                       char *r = &in->buf[in->pos];
-                       char *w = &in->buf[in->pos];
-                       while (1) {
-                               if (u > 0) {
-                                       // must be an utf-8 continuation byte.
-                                       if ((*r & 0xC0) == 0x80)    // 10xx xxxx
-                                               u--;
-                                       else
+                       unsigned char *r = (unsigned char*) &in->buf[in->pos];
+                       unsigned char *w = (unsigned char*) &in->buf[in->pos];
+                       const char *s;
+
+                       if (*r == 0x80 && *(r+1) == 0) {
+                               // technically a utf-8 violation but we treat 
it as the NULL marker
+                               s = str_nil;
+                       } else {
+                               s = &in->buf[in->pos]; // to be validated first
+
+                               // the inner loop validates them and converts 
line endings.
+                               unsigned int u = 0; // utf-8 state
+                               while (1) {
+                                       if (u > 0) {
+                                               // must be an utf-8 
continuation byte.
+                                               if ((*r & 0xC0) == 0x80)    // 
10xx xxxx
+                                                       u--;
+                                               else
+                                                       goto bad_utf8;
+                                       } else if ((*r & 0xF8) == 0xF0) // 
1111_0xxx
+                                               u = 3;
+                                       else if ((*r & 0xF0) == 0xE0)   // 
1110_xxxx
+                                               u = 2;
+                                       else if ((*r & 0xE0) == 0xC0)   // 110x 
xxxx
+                                               u = 1;
+                                       else if ((*r & 0xC0) == 0x80)   // 10xx 
xxxx
                                                goto bad_utf8;
-                               } else if ((*r & 0xF8) == 0xF0) // 1111_0xxx
-                                       u = 3;
-                               else if ((*r & 0xF0) == 0xE0)   // 1110_xxxx
-                                       u = 2;
-                               else if ((*r & 0xE0) == 0xC0)   // 110x xxxx
-                                       u = 1;
-                               else if ((*r & 0xC0) == 0x80)   // 10xx xxxx
-                                       goto bad_utf8;
-                               else if (*r == '\r' && *(r+1) == '\n') // 
convert!
-                                       r++;
-                               else if (*r == '\0') { // guaranteed to happen.
-                                       *w = '\0';
-                                       break;
+                                       else if (*r == '\r' && *(r+1) == '\n') 
// convert!
+                                               r++;
+                                       else if (*r == '\0') { // guaranteed to 
happen.
+                                               *w = '\0';
+                                               break;
+                                       }
+                                       *w++ = *r++;
                                }
-                               *w++ = *r++;
                        }
 
-                       if (BUNappend(bn, &in->buf[in->pos], false) != 
GDK_SUCCEED)
+                       if (BUNappend(bn, s, false) != GDK_SUCCEED)
                                return createException(SQL, "BATattach_stream", 
GDK_EXCEPTION);
 
                        in->pos = end - in->buf + 1;
diff --git a/sql/test/bincopy/Tests/All b/sql/test/bincopy/Tests/All
--- a/sql/test/bincopy/Tests/All
+++ b/sql/test/bincopy/Tests/All
@@ -13,3 +13,5 @@ bincopy_broken_strings_on_client
 bincopy_broken_strings_on_server
 bincopy_newlines_on_client
 bincopy_newlines_on_server
+bincopy_null_strings_on_client
+bincopy_null_strings_on_server
diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_client.SQL.py 
b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.SQL.py
new file mode 100644
--- /dev/null
+++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.SQL.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from bincopy_support import run_test
+from bincopy_support import NULL_STRINGS as testcode
+
+run_test('client', testcode)
diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.err 
b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.err
new file mode 100644
--- /dev/null
+++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.err
@@ -0,0 +1,33 @@
+stderr of test 'bincopy_null_strings_on_client` in directory 
'sql/test/bincopy` itself:
+
+
+# 17:07:36 >  
+# 17:07:36 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_listenaddr=all" "--set" "mapi_port=36279" "--set" 
"mapi_usock=/var/tmp/mtest-7176/.s.monetdb.36279" "--forcemito" 
"--dbpath=/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy" 
"--set" "embedded_c=true"
+# 17:07:36 >  
+
+# builtin opt  gdk_dbpath = 
/home/jvr/monets/copybinary/inst/var/monetdb5/dbfarm/demo
+# builtin opt  mapi_port = 50000
+# builtin opt  sql_optimizer = default_pipe
+# builtin opt  sql_debug = 0
+# builtin opt  raw_strings = false
+# cmdline opt  gdk_nr_threads = 0
+# cmdline opt  mapi_listenaddr = all
+# cmdline opt  mapi_port = 36279
+# cmdline opt  mapi_usock = /var/tmp/mtest-7176/.s.monetdb.36279
+# cmdline opt  gdk_dbpath = 
/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy
+# cmdline opt  embedded_c = true
+
+# 17:07:37 >  
+# 17:07:37 >  "/home/jvr/lib/pyenv/versions/3.8.0/bin/python3.8" 
"bincopy_null_strings_on_client.SQL.py" "bincopy_null_strings_on_client"
+# 17:07:37 >  
+
+
+# 17:07:37 >  
+# 17:07:37 >  mclient -lsql -ftest -tnone -Eutf-8 -i -e 
--host=/var/tmp/mtest-7176 --port=36279 --database=mTests_sql_test_bincopy
+# 17:07:37 >  
+
+
+# 17:07:37 >  
+# 17:07:37 >  "Done."
+# 17:07:37 >  
+
diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.out 
b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.out
new file mode 100644
--- /dev/null
+++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.out
@@ -0,0 +1,47 @@
+stdout of test 'bincopy_null_strings_on_client` in directory 
'sql/test/bincopy` itself:
+
+
+# 17:07:36 >  
+# 17:07:36 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_listenaddr=all" "--set" "mapi_port=36279" "--set" 
"mapi_usock=/var/tmp/mtest-7176/.s.monetdb.36279" "--forcemito" 
"--dbpath=/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy" 
"--set" "embedded_c=true"
+# 17:07:36 >  
+
+# MonetDB 5 server v11.40.0 (hg id: e2f8b3cfc8)
+# This is an unreleased version
+# Serving database 'mTests_sql_test_bincopy', using 8 threads
+# Compiled for x86_64-pc-linux-gnu/64bit with 128bit integers
+# Found 31.139 GiB available main-memory of which we use 25.378 GiB
+# Copyright (c) 1993 - July 2008 CWI.
+# Copyright (c) August 2008 - 2020 MonetDB B.V., all rights reserved
+# Visit https://www.monetdb.org/ for further information
+# Listening for connection requests on mapi:monetdb://hank:36279/
+# Listening for UNIX domain connection requests on 
mapi:monetdb:///var/tmp/mtest-7176/.s.monetdb.36279
+# MonetDB/GIS module loaded
+# MonetDB/SQL module loaded
+
+# 17:07:37 >  
+# 17:07:37 >  "/home/jvr/lib/pyenv/versions/3.8.0/bin/python3.8" 
"bincopy_null_strings_on_client.SQL.py" "bincopy_null_strings_on_client"
+# 17:07:37 >  
+
+
+# 17:07:37 >  
+# 17:07:37 >  mclient -lsql -ftest -tnone -Eutf-8 -i -e 
--host=/var/tmp/mtest-7176 --port=36279 --database=mTests_sql_test_bincopy
+# 17:07:37 >  
+
+#START TRANSACTION;
+#CREATE TABLE foo(id INT NOT NULL, s TEXT);
+#COPY BINARY INTO foo(id, s) FROM 
R'${TSTTRGBASE}/mTests/sql/test/bincopy/bincopy_ints.bin', 
R'${TSTTRGBASE}/mTests/sql/test/bincopy/bincopy_null_strings.bin' ON CLIENT;
+[ 1000000      ]
+#SELECT COUNT(id) FROM foo
+#WHERE (id % 2 = 0 AND s IS NULL)
+#OR    (id % 2 = 1 AND s = 'banana');
+% sys.%1 # table_name
+% %1 # name
+% bigint # type
+% 7 # length
+[ 1000000      ]
+#ROLLBACK;
+
+# 17:07:37 >  
+# 17:07:37 >  "Done."
+# 17:07:37 >  
+
diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_server.SQL.py 
b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.SQL.py
new file mode 100644
--- /dev/null
+++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.SQL.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+from bincopy_support import run_test
+from bincopy_support import NULL_STRINGS as testcode
+
+run_test('server', testcode)
diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.err 
b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.err
new file mode 100644
--- /dev/null
+++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.err
@@ -0,0 +1,33 @@
+stderr of test 'bincopy_null_strings_on_server` in directory 
'sql/test/bincopy` itself:
+
+
+# 17:07:37 >  
+# 17:07:37 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_listenaddr=all" "--set" "mapi_port=36279" "--set" 
"mapi_usock=/var/tmp/mtest-7176/.s.monetdb.36279" "--forcemito" 
"--dbpath=/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy" 
"--set" "embedded_c=true"
+# 17:07:37 >  
+
+# builtin opt  gdk_dbpath = 
/home/jvr/monets/copybinary/inst/var/monetdb5/dbfarm/demo
+# builtin opt  mapi_port = 50000
+# builtin opt  sql_optimizer = default_pipe
+# builtin opt  sql_debug = 0
+# builtin opt  raw_strings = false
+# cmdline opt  gdk_nr_threads = 0
+# cmdline opt  mapi_listenaddr = all
+# cmdline opt  mapi_port = 36279
+# cmdline opt  mapi_usock = /var/tmp/mtest-7176/.s.monetdb.36279
+# cmdline opt  gdk_dbpath = 
/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy
+# cmdline opt  embedded_c = true
+
+# 17:07:37 >  
+# 17:07:37 >  "/home/jvr/lib/pyenv/versions/3.8.0/bin/python3.8" 
"bincopy_null_strings_on_server.SQL.py" "bincopy_null_strings_on_server"
+# 17:07:37 >  
+
+
+# 17:07:38 >  
+# 17:07:38 >  mclient -lsql -ftest -tnone -Eutf-8 -i -e 
--host=/var/tmp/mtest-7176 --port=36279 --database=mTests_sql_test_bincopy
+# 17:07:38 >  
+
+
+# 17:07:38 >  
+# 17:07:38 >  "Done."
+# 17:07:38 >  
+
diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.out 
b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.out
new file mode 100644
--- /dev/null
+++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.out
@@ -0,0 +1,47 @@
+stdout of test 'bincopy_null_strings_on_server` in directory 
'sql/test/bincopy` itself:
+
+
+# 17:13:13 >  
+# 17:13:13 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_listenaddr=all" "--set" "mapi_port=34308" "--set" 
"mapi_usock=/var/tmp/mtest-10649/.s.monetdb.34308" "--forcemito" 
"--dbpath=/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy" 
"--set" "embedded_c=true"
+# 17:13:13 >  
+
+# MonetDB 5 server v11.40.0 (hg id: e2f8b3cfc8)
+# This is an unreleased version
+# Serving database 'mTests_sql_test_bincopy', using 8 threads
+# Compiled for x86_64-pc-linux-gnu/64bit with 128bit integers
+# Found 31.139 GiB available main-memory of which we use 25.378 GiB
+# Copyright (c) 1993 - July 2008 CWI.
+# Copyright (c) August 2008 - 2020 MonetDB B.V., all rights reserved
+# Visit https://www.monetdb.org/ for further information
+# Listening for connection requests on mapi:monetdb://hank:34308/
+# Listening for UNIX domain connection requests on 
mapi:monetdb:///var/tmp/mtest-10649/.s.monetdb.34308
+# MonetDB/GIS module loaded
+# MonetDB/SQL module loaded
+
+# 17:13:13 >  
+# 17:13:13 >  "/home/jvr/lib/pyenv/versions/3.8.0/bin/python3.8" 
"bincopy_null_strings_on_server.SQL.py" "bincopy_null_strings_on_server"
+# 17:13:13 >  
+
+
+# 17:13:13 >  
+# 17:13:13 >  mclient -lsql -ftest -tnone -Eutf-8 -i -e 
--host=/var/tmp/mtest-10649 --port=34308 --database=mTests_sql_test_bincopy
+# 17:13:13 >  
+
+#START TRANSACTION;
+#CREATE TABLE foo(id INT NOT NULL, s TEXT);
+#COPY BINARY INTO foo(id, s) FROM 
R'${TSTTRGBASE}/mTests/sql/test/bincopy/bincopy_ints.bin', 
R'${TSTTRGBASE}/mTests/sql/test/bincopy/bincopy_null_strings.bin' ON SERVER;
+[ 1000000      ]
+#SELECT COUNT(id) FROM foo
+#WHERE (id % 2 = 0 AND s IS NULL)
+#OR    (id % 2 = 1 AND s = 'banana');
+% sys.%1 # table_name
+% %1 # name
+% bigint # type
+% 7 # length
+[ 1000000      ]
+#ROLLBACK;
+
+# 17:13:14 >  
+# 17:13:14 >  "Done."
+# 17:13:14 >  
+
diff --git a/sql/test/bincopy/Tests/bincopy_support.py 
b/sql/test/bincopy/Tests/bincopy_support.py
--- a/sql/test/bincopy/Tests/bincopy_support.py
+++ b/sql/test/bincopy/Tests/bincopy_support.py
@@ -98,6 +98,14 @@ def gen_newline_strings(outfile):
     for i in range(1_000_000):
         f.write(f"rn\r\nr\r{i}\0")
 
+def gen_null_strings(outfile):
+    for i in range(1_000_000):
+        if i % 2 == 0:
+            outfile.write(b"\x80\x00")
+        else:
+            outfile.write(b"banana\0")
+
+
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to