Changeset: 99bbc3b67902 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=99bbc3b67902 Added Files: sql/test/bincopy/Tests/bincopy_null_strings_on_client.SQL.py sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.err sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.out sql/test/bincopy/Tests/bincopy_null_strings_on_server.SQL.py sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.err sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.out Modified Files: sql/backends/monet5/sql.c sql/test/bincopy/Tests/All sql/test/bincopy/Tests/bincopy_support.py Branch: copybinary Log Message:
Add support for NULL strings Denoted by the utf-8'ly incorrect byte sequence '\x80\0' diffs (truncated from 315 to 300 lines): diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c --- a/sql/backends/monet5/sql.c +++ b/sql/backends/monet5/sql.c @@ -3172,36 +3172,44 @@ BATattach_str(bstream *in, BAT *bn) // the middle loop looks for complete strings char *end; while ((end = memchr(&in->buf[in->pos], '\0', in->len - in->pos)) != NULL) { - - // the inner loop validates them and converts line endings. - unsigned int u = 0; // utf-8 state - char *r = &in->buf[in->pos]; - char *w = &in->buf[in->pos]; - while (1) { - if (u > 0) { - // must be an utf-8 continuation byte. - if ((*r & 0xC0) == 0x80) // 10xx xxxx - u--; - else + unsigned char *r = (unsigned char*) &in->buf[in->pos]; + unsigned char *w = (unsigned char*) &in->buf[in->pos]; + const char *s; + + if (*r == 0x80 && *(r+1) == 0) { + // technically a utf-8 violation but we treat it as the NULL marker + s = str_nil; + } else { + s = &in->buf[in->pos]; // to be validated first + + // the inner loop validates them and converts line endings. + unsigned int u = 0; // utf-8 state + while (1) { + if (u > 0) { + // must be an utf-8 continuation byte. + if ((*r & 0xC0) == 0x80) // 10xx xxxx + u--; + else + goto bad_utf8; + } else if ((*r & 0xF8) == 0xF0) // 1111_0xxx + u = 3; + else if ((*r & 0xF0) == 0xE0) // 1110_xxxx + u = 2; + else if ((*r & 0xE0) == 0xC0) // 110x xxxx + u = 1; + else if ((*r & 0xC0) == 0x80) // 10xx xxxx goto bad_utf8; - } else if ((*r & 0xF8) == 0xF0) // 1111_0xxx - u = 3; - else if ((*r & 0xF0) == 0xE0) // 1110_xxxx - u = 2; - else if ((*r & 0xE0) == 0xC0) // 110x xxxx - u = 1; - else if ((*r & 0xC0) == 0x80) // 10xx xxxx - goto bad_utf8; - else if (*r == '\r' && *(r+1) == '\n') // convert! - r++; - else if (*r == '\0') { // guaranteed to happen. - *w = '\0'; - break; + else if (*r == '\r' && *(r+1) == '\n') // convert! + r++; + else if (*r == '\0') { // guaranteed to happen. + *w = '\0'; + break; + } + *w++ = *r++; } - *w++ = *r++; } - if (BUNappend(bn, &in->buf[in->pos], false) != GDK_SUCCEED) + if (BUNappend(bn, s, false) != GDK_SUCCEED) return createException(SQL, "BATattach_stream", GDK_EXCEPTION); in->pos = end - in->buf + 1; diff --git a/sql/test/bincopy/Tests/All b/sql/test/bincopy/Tests/All --- a/sql/test/bincopy/Tests/All +++ b/sql/test/bincopy/Tests/All @@ -13,3 +13,5 @@ bincopy_broken_strings_on_client bincopy_broken_strings_on_server bincopy_newlines_on_client bincopy_newlines_on_server +bincopy_null_strings_on_client +bincopy_null_strings_on_server diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_client.SQL.py b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.SQL.py new file mode 100644 --- /dev/null +++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.SQL.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from bincopy_support import run_test +from bincopy_support import NULL_STRINGS as testcode + +run_test('client', testcode) diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.err b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.err new file mode 100644 --- /dev/null +++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.err @@ -0,0 +1,33 @@ +stderr of test 'bincopy_null_strings_on_client` in directory 'sql/test/bincopy` itself: + + +# 17:07:36 > +# 17:07:36 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_listenaddr=all" "--set" "mapi_port=36279" "--set" "mapi_usock=/var/tmp/mtest-7176/.s.monetdb.36279" "--forcemito" "--dbpath=/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy" "--set" "embedded_c=true" +# 17:07:36 > + +# builtin opt gdk_dbpath = /home/jvr/monets/copybinary/inst/var/monetdb5/dbfarm/demo +# builtin opt mapi_port = 50000 +# builtin opt sql_optimizer = default_pipe +# builtin opt sql_debug = 0 +# builtin opt raw_strings = false +# cmdline opt gdk_nr_threads = 0 +# cmdline opt mapi_listenaddr = all +# cmdline opt mapi_port = 36279 +# cmdline opt mapi_usock = /var/tmp/mtest-7176/.s.monetdb.36279 +# cmdline opt gdk_dbpath = /home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy +# cmdline opt embedded_c = true + +# 17:07:37 > +# 17:07:37 > "/home/jvr/lib/pyenv/versions/3.8.0/bin/python3.8" "bincopy_null_strings_on_client.SQL.py" "bincopy_null_strings_on_client" +# 17:07:37 > + + +# 17:07:37 > +# 17:07:37 > mclient -lsql -ftest -tnone -Eutf-8 -i -e --host=/var/tmp/mtest-7176 --port=36279 --database=mTests_sql_test_bincopy +# 17:07:37 > + + +# 17:07:37 > +# 17:07:37 > "Done." +# 17:07:37 > + diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.out b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.out new file mode 100644 --- /dev/null +++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_client.stable.out @@ -0,0 +1,47 @@ +stdout of test 'bincopy_null_strings_on_client` in directory 'sql/test/bincopy` itself: + + +# 17:07:36 > +# 17:07:36 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_listenaddr=all" "--set" "mapi_port=36279" "--set" "mapi_usock=/var/tmp/mtest-7176/.s.monetdb.36279" "--forcemito" "--dbpath=/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy" "--set" "embedded_c=true" +# 17:07:36 > + +# MonetDB 5 server v11.40.0 (hg id: e2f8b3cfc8) +# This is an unreleased version +# Serving database 'mTests_sql_test_bincopy', using 8 threads +# Compiled for x86_64-pc-linux-gnu/64bit with 128bit integers +# Found 31.139 GiB available main-memory of which we use 25.378 GiB +# Copyright (c) 1993 - July 2008 CWI. +# Copyright (c) August 2008 - 2020 MonetDB B.V., all rights reserved +# Visit https://www.monetdb.org/ for further information +# Listening for connection requests on mapi:monetdb://hank:36279/ +# Listening for UNIX domain connection requests on mapi:monetdb:///var/tmp/mtest-7176/.s.monetdb.36279 +# MonetDB/GIS module loaded +# MonetDB/SQL module loaded + +# 17:07:37 > +# 17:07:37 > "/home/jvr/lib/pyenv/versions/3.8.0/bin/python3.8" "bincopy_null_strings_on_client.SQL.py" "bincopy_null_strings_on_client" +# 17:07:37 > + + +# 17:07:37 > +# 17:07:37 > mclient -lsql -ftest -tnone -Eutf-8 -i -e --host=/var/tmp/mtest-7176 --port=36279 --database=mTests_sql_test_bincopy +# 17:07:37 > + +#START TRANSACTION; +#CREATE TABLE foo(id INT NOT NULL, s TEXT); +#COPY BINARY INTO foo(id, s) FROM R'${TSTTRGBASE}/mTests/sql/test/bincopy/bincopy_ints.bin', R'${TSTTRGBASE}/mTests/sql/test/bincopy/bincopy_null_strings.bin' ON CLIENT; +[ 1000000 ] +#SELECT COUNT(id) FROM foo +#WHERE (id % 2 = 0 AND s IS NULL) +#OR (id % 2 = 1 AND s = 'banana'); +% sys.%1 # table_name +% %1 # name +% bigint # type +% 7 # length +[ 1000000 ] +#ROLLBACK; + +# 17:07:37 > +# 17:07:37 > "Done." +# 17:07:37 > + diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_server.SQL.py b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.SQL.py new file mode 100644 --- /dev/null +++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.SQL.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from bincopy_support import run_test +from bincopy_support import NULL_STRINGS as testcode + +run_test('server', testcode) diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.err b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.err new file mode 100644 --- /dev/null +++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.err @@ -0,0 +1,33 @@ +stderr of test 'bincopy_null_strings_on_server` in directory 'sql/test/bincopy` itself: + + +# 17:07:37 > +# 17:07:37 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_listenaddr=all" "--set" "mapi_port=36279" "--set" "mapi_usock=/var/tmp/mtest-7176/.s.monetdb.36279" "--forcemito" "--dbpath=/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy" "--set" "embedded_c=true" +# 17:07:37 > + +# builtin opt gdk_dbpath = /home/jvr/monets/copybinary/inst/var/monetdb5/dbfarm/demo +# builtin opt mapi_port = 50000 +# builtin opt sql_optimizer = default_pipe +# builtin opt sql_debug = 0 +# builtin opt raw_strings = false +# cmdline opt gdk_nr_threads = 0 +# cmdline opt mapi_listenaddr = all +# cmdline opt mapi_port = 36279 +# cmdline opt mapi_usock = /var/tmp/mtest-7176/.s.monetdb.36279 +# cmdline opt gdk_dbpath = /home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy +# cmdline opt embedded_c = true + +# 17:07:37 > +# 17:07:37 > "/home/jvr/lib/pyenv/versions/3.8.0/bin/python3.8" "bincopy_null_strings_on_server.SQL.py" "bincopy_null_strings_on_server" +# 17:07:37 > + + +# 17:07:38 > +# 17:07:38 > mclient -lsql -ftest -tnone -Eutf-8 -i -e --host=/var/tmp/mtest-7176 --port=36279 --database=mTests_sql_test_bincopy +# 17:07:38 > + + +# 17:07:38 > +# 17:07:38 > "Done." +# 17:07:38 > + diff --git a/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.out b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.out new file mode 100644 --- /dev/null +++ b/sql/test/bincopy/Tests/bincopy_null_strings_on_server.stable.out @@ -0,0 +1,47 @@ +stdout of test 'bincopy_null_strings_on_server` in directory 'sql/test/bincopy` itself: + + +# 17:13:13 > +# 17:13:13 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_listenaddr=all" "--set" "mapi_port=34308" "--set" "mapi_usock=/var/tmp/mtest-10649/.s.monetdb.34308" "--forcemito" "--dbpath=/home/jvr/monets/copybinary/inst/var/MonetDB/mTests_sql_test_bincopy" "--set" "embedded_c=true" +# 17:13:13 > + +# MonetDB 5 server v11.40.0 (hg id: e2f8b3cfc8) +# This is an unreleased version +# Serving database 'mTests_sql_test_bincopy', using 8 threads +# Compiled for x86_64-pc-linux-gnu/64bit with 128bit integers +# Found 31.139 GiB available main-memory of which we use 25.378 GiB +# Copyright (c) 1993 - July 2008 CWI. +# Copyright (c) August 2008 - 2020 MonetDB B.V., all rights reserved +# Visit https://www.monetdb.org/ for further information +# Listening for connection requests on mapi:monetdb://hank:34308/ +# Listening for UNIX domain connection requests on mapi:monetdb:///var/tmp/mtest-10649/.s.monetdb.34308 +# MonetDB/GIS module loaded +# MonetDB/SQL module loaded + +# 17:13:13 > +# 17:13:13 > "/home/jvr/lib/pyenv/versions/3.8.0/bin/python3.8" "bincopy_null_strings_on_server.SQL.py" "bincopy_null_strings_on_server" +# 17:13:13 > + + +# 17:13:13 > +# 17:13:13 > mclient -lsql -ftest -tnone -Eutf-8 -i -e --host=/var/tmp/mtest-10649 --port=34308 --database=mTests_sql_test_bincopy +# 17:13:13 > + +#START TRANSACTION; +#CREATE TABLE foo(id INT NOT NULL, s TEXT); +#COPY BINARY INTO foo(id, s) FROM R'${TSTTRGBASE}/mTests/sql/test/bincopy/bincopy_ints.bin', R'${TSTTRGBASE}/mTests/sql/test/bincopy/bincopy_null_strings.bin' ON SERVER; +[ 1000000 ] +#SELECT COUNT(id) FROM foo +#WHERE (id % 2 = 0 AND s IS NULL) +#OR (id % 2 = 1 AND s = 'banana'); +% sys.%1 # table_name +% %1 # name +% bigint # type +% 7 # length +[ 1000000 ] +#ROLLBACK; + +# 17:13:14 > +# 17:13:14 > "Done." +# 17:13:14 > + diff --git a/sql/test/bincopy/Tests/bincopy_support.py b/sql/test/bincopy/Tests/bincopy_support.py --- a/sql/test/bincopy/Tests/bincopy_support.py +++ b/sql/test/bincopy/Tests/bincopy_support.py @@ -98,6 +98,14 @@ def gen_newline_strings(outfile): for i in range(1_000_000): f.write(f"rn\r\nr\r{i}\0") +def gen_null_strings(outfile): + for i in range(1_000_000): + if i % 2 == 0: + outfile.write(b"\x80\x00") + else: + outfile.write(b"banana\0") + + _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list