Changeset: 35ae9857827b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/35ae9857827b
Branch: iso
Log Message:

Merged with Jul2021


diffs (261 lines):

diff --git a/clients/examples/C/bincopydata.c b/clients/examples/C/bincopydata.c
--- a/clients/examples/C/bincopydata.c
+++ b/clients/examples/C/bincopydata.c
@@ -183,7 +183,7 @@ gen_newline_strings(FILE *f, bool bytesw
 {
        (void)byteswap;
        for (long i = 0; i < nrecs; i++) {
-               fprintf(f, "rn\r\nr\r%ld", i);
+               fprintf(f, "RN\r\nR\r%ld", i);
                fputc(0, f);
        }
 }
diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c
--- a/gdk/gdk_bbp.c
+++ b/gdk/gdk_bbp.c
@@ -129,24 +129,11 @@ static bool havehge = false;
 
 #define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0)
 
-#ifndef NDEBUG
-static inline bool
-islocked(MT_Lock *l)
-{
-       if (MT_lock_try(l)) {
-               MT_lock_unset(l);
-               return false;
-       }
-       return true;
-}
-#endif
-
 static void
 BBP_insert(bat i)
 {
        bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask);
 
-       assert(islocked(&BBPnameLock));
        BBP_next(i) = BBP_hash[idx];
        BBP_hash[idx] = i;
 }
@@ -158,7 +145,6 @@ BBP_delete(bat i)
        const char *s = BBP_logical(i);
        bat idx = (bat) (strHash(s) & BBP_mask);
 
-       assert(islocked(&BBPnameLock));
        for (h += idx; (i = *h) != 0; h = &BBP_next(i)) {
                if (strcmp(BBP_logical(i), s) == 0) {
                        *h = BBP_next(i);
@@ -400,7 +386,6 @@ BBPextend(int idx, bool buildhash)
 static gdk_return
 recover_dir(int farmid, bool direxists)
 {
-       assert(islocked(&GDKtmLock));
        if (direxists) {
                /* just try; don't care about these non-vital files */
                if (GDKunlink(farmid, BATDIR, "BBP", "bak") != GDK_SUCCEED)
@@ -1410,10 +1395,6 @@ BBPdir_first(bool subcommit, lng logno, 
        int n = 0;
        lng ologno, otransid;
 
-#ifndef NDEBUG
-       assert(islocked(&GDKtmLock));
-#endif
-
        if (obbpfp)
                *obbpfp = NULL;
        *nbbpfp = NULL;
@@ -2874,7 +2855,6 @@ BBPprepare(bool subcommit)
        str bakdirpath, subdirpath;
        gdk_return ret = GDK_SUCCEED;
 
-       assert(islocked(&GDKtmLock));
        if(!(bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL)))
                return GDK_FAIL;
        if(!(subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL))) {
diff --git a/monetdb5/modules/atoms/json.c b/monetdb5/modules/atoms/json.c
--- a/monetdb5/modules/atoms/json.c
+++ b/monetdb5/modules/atoms/json.c
@@ -57,7 +57,6 @@ typedef str json;
                        if (*(J) != ' ' &&                                      
\
                                *(J) != '\n' &&                                 
\
                                *(J) != '\t' &&                                 
\
-                               *(J) != '\f' &&                                 
\
                                *(J) != '\r')                                   
\
                                break;                                          
        \
        } while (0)
@@ -871,6 +870,8 @@ JSONstringParser(const char *j, const ch
                        *next = j;
                        return MAL_SUCCEED;
                default:
+                       if ((unsigned char)*j < ' ')
+                               throw(MAL, "json.parser", "illegal control 
char");
                        if (seensurrogate)
                                throw(MAL, "json.parser", "illegal escape 
char");
                        break;
@@ -911,6 +912,9 @@ JSONfractionParser(const char *j, const 
 
        // skip the period character
        j++;
+       // must be followed by more digits
+       if (!isdigit((unsigned char)*j))
+               return false;
        for (; *j; j++)
                if (!isdigit((unsigned char)*j))
                        break;
@@ -1022,11 +1026,16 @@ JSONtoken(JSON *jt, const char *j, const
                        skipblancs(j);
                        if (*j == '}')
                                break;
-                       if (*j != '}' && *j != ',') {
+                       if (*j != ',') {
                                jt->error = createException(MAL, "json.parser", 
"JSON syntax error: ',' or '}' expected at offset %td", j - string_start);
                                return idx;
                        }
                        j++;
+                       skipblancs(j);
+                       if (*j == '}') {
+                               jt->error = createException(MAL, "json.parser", 
"JSON syntax error: '}' not expected at offset %td", j - string_start);
+                               return idx;
+                       }
                }
                if (*j != '}') {
                        jt->error = createException(MAL, "json.parser", "JSON 
syntax error: '}' expected at offset %td", j - string_start);
@@ -1083,12 +1092,16 @@ JSONtoken(JSON *jt, const char *j, const
                                jt->error = createException(MAL, "json.parser", 
"JSON syntax error: Array value expected at offset %td", j - string_start);
                                return idx;
                        }
-                       if (*j != ']' && *j != ',') {
+                       if (*j != ',') {
                                jt->error = createException(MAL, "json.parser", 
"JSON syntax error: ',' or ']' expected at offset %td (context: %c%c%c)", j - 
string_start, *(j - 1), *j, *(j + 1));
                                return idx;
                        }
                        j++;
                        skipblancs(j);
+                       if (*j == ']') {
+                               jt->error = createException(MAL, "json.parser", 
"JSON syntax error: '}' not expected at offset %td", j - string_start);
+                               return idx;
+                       }
                }
                if (*j != ']') {
                        jt->error = createException(MAL, "json.parser", "JSON 
syntax error: ']' expected at offset %td", j - string_start);
diff --git a/sql/backends/monet5/sql_bincopyfrom.c 
b/sql/backends/monet5/sql_bincopyfrom.c
--- a/sql/backends/monet5/sql_bincopyfrom.c
+++ b/sql/backends/monet5/sql_bincopyfrom.c
@@ -326,29 +326,61 @@ convert_timestamp(void *dst_start, void 
 }
 
 
-static void
-convert_line_endings(char *text)
+static str
+convert_and_validate(char *text)
 {
-       // Read- and write positions.
-       // We always have w <= r, or it wouldn't be safe.
-       const char *r = text;
-       char *w = text;
-       while (*r) {
-               if (r[0] == '\r' && r[1] == '\n')
-                       r++;
-               *w++ = *r++;
+       unsigned char *r = (unsigned char*)text;
+       unsigned char *w = r;
+
+       if (*r == 0x80 && *(r+1) == 0) {
+               // Technically a utf-8 violation, but we treat it as the NULL 
marker
+               // GDK does so as well so we can just pass it on.
+               // load_zero_terminated_text() below contains an assert to 
ensure
+               // this remains the case.
+               return MAL_SUCCEED;
+       }
+
+       while (*r != 0) {
+               unsigned char c = *w++ = *r++;
+
+               if (c == '\r' && *r == '\n') {
+                       w--;
+                       continue;
+               }
+               if ((c & 0x80) == 0x00) // 1xxx_xxxx: standalone byte
+                       continue;
+               if ((c & 0xF8) == 0xF0) // 1111_0xxx
+                       goto expect3;
+               if ((c & 0xF0) == 0xE0) // 1110_xxxx
+                       goto expect2;
+               if ((c & 0xE0) == 0xC0) // 110x_xxxx
+                       goto expect1;
+               goto bad_utf8;
+
+expect3:
+               if (((*w++ = *r++) & 0x80) != 0x80)
+                       goto bad_utf8;
+expect2:
+               if (((*w++ = *r++) & 0x80) != 0x80)
+                       goto bad_utf8;
+expect1:
+               if (((*w++ = *r++) & 0x80) != 0x80)
+                       goto bad_utf8;
+
        }
        *w = '\0';
+       return MAL_SUCCEED;
+
+bad_utf8:
+       return createException(SQL, "BATattach_stream", SQLSTATE(42000) 
"malformed utf-8 byte sequence");
 }
 
 static str
-append_text(BAT *bat, char *start, char *end)
+append_text(BAT *bat, char *start)
 {
-       (void)bat;
-
-       char *cr = memchr(start, '\r', end - start);
-       if (cr)
-               convert_line_endings(cr);
+       str msg = convert_and_validate(start);
+       if (msg != MAL_SUCCEED)
+               return msg;
 
        if (BUNappend(bat, start, false) != GDK_SUCCEED)
                return createException(SQL, "sql.importColumn", GDK_EXCEPTION);
@@ -365,6 +397,9 @@ load_zero_terminated_text(BAT *bat, stre
        str msg = MAL_SUCCEED;
        bstream *bs = NULL;
 
+       // convert_and_validate() above counts on the following property to 
hold:
+       assert(strNil((const char[2]){ 0x80, 0 }));
+
        bs = bstream_create(s, 1 << 20);
        if (bs == NULL) {
                msg = createException(SQL, "sql", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
@@ -384,7 +419,7 @@ load_zero_terminated_text(BAT *bat, stre
                char *buf_end = &bs->buf[bs->len];
                char *start, *end;
                for (start = buf_start; (end = memchr(start, '\0', buf_end - 
start)) != NULL; start = end + 1) {
-                       msg = append_text(bat, start, end);
+                       msg = append_text(bat, start);
                        if (msg != NULL)
                                goto end;
                }
diff --git a/sql/test/bincopy/Tests/bincopy_support.py 
b/sql/test/bincopy/Tests/bincopy_support.py
--- a/sql/test/bincopy/Tests/bincopy_support.py
+++ b/sql/test/bincopy/Tests/bincopy_support.py
@@ -127,14 +127,14 @@ OR    (id % 10000 = 0 AND LENGTH(s) = 28
 BROKEN_STRINGS = ("""
 CREATE TABLE foo(id INT NOT NULL, s TEXT);
 COPY BINARY INTO foo(id, s) FROM @ints@, @broken_strings@ @ON@;
-""", (None, "!GDK reported error: strPut: incorrectly encoded UTF-8"))
+""", (None, "!malformed utf-8 byte sequence"))
 
 # note that the \r\n has been normalized to \n but the lone \r has been
 # left alone.
 NEWLINE_STRINGS = (r"""
 CREATE TABLE foo(id INT NOT NULL, s TEXT);
 COPY BINARY INTO foo(id, s) FROM @ints@, @newline_strings@ @ON@;
-SELECT COUNT(id) FROM foo WHERE s = (E'rn\nr\r' || id);
+SELECT COUNT(id) FROM foo WHERE s = (E'RN\nR\r' || id);
 """, [f"{NRECS} affected rows", f"{NRECS}"])
 
 NULL_STRINGS = ("""
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to