Changeset: 35ae9857827b for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/35ae9857827b Branch: iso Log Message:
Merged with Jul2021 diffs (261 lines): diff --git a/clients/examples/C/bincopydata.c b/clients/examples/C/bincopydata.c --- a/clients/examples/C/bincopydata.c +++ b/clients/examples/C/bincopydata.c @@ -183,7 +183,7 @@ gen_newline_strings(FILE *f, bool bytesw { (void)byteswap; for (long i = 0; i < nrecs; i++) { - fprintf(f, "rn\r\nr\r%ld", i); + fprintf(f, "RN\r\nR\r%ld", i); fputc(0, f); } } diff --git a/gdk/gdk_bbp.c b/gdk/gdk_bbp.c --- a/gdk/gdk_bbp.c +++ b/gdk/gdk_bbp.c @@ -129,24 +129,11 @@ static bool havehge = false; #define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0) -#ifndef NDEBUG -static inline bool -islocked(MT_Lock *l) -{ - if (MT_lock_try(l)) { - MT_lock_unset(l); - return false; - } - return true; -} -#endif - static void BBP_insert(bat i) { bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask); - assert(islocked(&BBPnameLock)); BBP_next(i) = BBP_hash[idx]; BBP_hash[idx] = i; } @@ -158,7 +145,6 @@ BBP_delete(bat i) const char *s = BBP_logical(i); bat idx = (bat) (strHash(s) & BBP_mask); - assert(islocked(&BBPnameLock)); for (h += idx; (i = *h) != 0; h = &BBP_next(i)) { if (strcmp(BBP_logical(i), s) == 0) { *h = BBP_next(i); @@ -400,7 +386,6 @@ BBPextend(int idx, bool buildhash) static gdk_return recover_dir(int farmid, bool direxists) { - assert(islocked(&GDKtmLock)); if (direxists) { /* just try; don't care about these non-vital files */ if (GDKunlink(farmid, BATDIR, "BBP", "bak") != GDK_SUCCEED) @@ -1410,10 +1395,6 @@ BBPdir_first(bool subcommit, lng logno, int n = 0; lng ologno, otransid; -#ifndef NDEBUG - assert(islocked(&GDKtmLock)); -#endif - if (obbpfp) *obbpfp = NULL; *nbbpfp = NULL; @@ -2874,7 +2855,6 @@ BBPprepare(bool subcommit) str bakdirpath, subdirpath; gdk_return ret = GDK_SUCCEED; - assert(islocked(&GDKtmLock)); if(!(bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL))) return GDK_FAIL; if(!(subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL))) { diff --git a/monetdb5/modules/atoms/json.c b/monetdb5/modules/atoms/json.c --- a/monetdb5/modules/atoms/json.c +++ b/monetdb5/modules/atoms/json.c @@ -57,7 +57,6 @@ typedef str json; if (*(J) != ' ' && \ *(J) != '\n' && \ *(J) != '\t' && \ - *(J) != '\f' && \ *(J) != '\r') \ break; \ } while (0) @@ -871,6 +870,8 @@ JSONstringParser(const char *j, const ch *next = j; return MAL_SUCCEED; default: + if ((unsigned char)*j < ' ') + throw(MAL, "json.parser", "illegal control char"); if (seensurrogate) throw(MAL, "json.parser", "illegal escape char"); break; @@ -911,6 +912,9 @@ JSONfractionParser(const char *j, const // skip the period character j++; + // must be followed by more digits + if (!isdigit((unsigned char)*j)) + return false; for (; *j; j++) if (!isdigit((unsigned char)*j)) break; @@ -1022,11 +1026,16 @@ JSONtoken(JSON *jt, const char *j, const skipblancs(j); if (*j == '}') break; - if (*j != '}' && *j != ',') { + if (*j != ',') { jt->error = createException(MAL, "json.parser", "JSON syntax error: ',' or '}' expected at offset %td", j - string_start); return idx; } j++; + skipblancs(j); + if (*j == '}') { + jt->error = createException(MAL, "json.parser", "JSON syntax error: '}' not expected at offset %td", j - string_start); + return idx; + } } if (*j != '}') { jt->error = createException(MAL, "json.parser", "JSON syntax error: '}' expected at offset %td", j - string_start); @@ -1083,12 +1092,16 @@ JSONtoken(JSON *jt, const char *j, const jt->error = createException(MAL, "json.parser", "JSON syntax error: Array value expected at offset %td", j - string_start); return idx; } - if (*j != ']' && *j != ',') { + if (*j != ',') { jt->error = createException(MAL, "json.parser", "JSON syntax error: ',' or ']' expected at offset %td (context: %c%c%c)", j - string_start, *(j - 1), *j, *(j + 1)); return idx; } j++; skipblancs(j); + if (*j == ']') { + jt->error = createException(MAL, "json.parser", "JSON syntax error: '}' not expected at offset %td", j - string_start); + return idx; + } } if (*j != ']') { jt->error = createException(MAL, "json.parser", "JSON syntax error: ']' expected at offset %td", j - string_start); diff --git a/sql/backends/monet5/sql_bincopyfrom.c b/sql/backends/monet5/sql_bincopyfrom.c --- a/sql/backends/monet5/sql_bincopyfrom.c +++ b/sql/backends/monet5/sql_bincopyfrom.c @@ -326,29 +326,61 @@ convert_timestamp(void *dst_start, void } -static void -convert_line_endings(char *text) +static str +convert_and_validate(char *text) { - // Read- and write positions. - // We always have w <= r, or it wouldn't be safe. - const char *r = text; - char *w = text; - while (*r) { - if (r[0] == '\r' && r[1] == '\n') - r++; - *w++ = *r++; + unsigned char *r = (unsigned char*)text; + unsigned char *w = r; + + if (*r == 0x80 && *(r+1) == 0) { + // Technically a utf-8 violation, but we treat it as the NULL marker + // GDK does so as well so we can just pass it on. + // load_zero_terminated_text() below contains an assert to ensure + // this remains the case. + return MAL_SUCCEED; + } + + while (*r != 0) { + unsigned char c = *w++ = *r++; + + if (c == '\r' && *r == '\n') { + w--; + continue; + } + if ((c & 0x80) == 0x00) // 1xxx_xxxx: standalone byte + continue; + if ((c & 0xF8) == 0xF0) // 1111_0xxx + goto expect3; + if ((c & 0xF0) == 0xE0) // 1110_xxxx + goto expect2; + if ((c & 0xE0) == 0xC0) // 110x_xxxx + goto expect1; + goto bad_utf8; + +expect3: + if (((*w++ = *r++) & 0x80) != 0x80) + goto bad_utf8; +expect2: + if (((*w++ = *r++) & 0x80) != 0x80) + goto bad_utf8; +expect1: + if (((*w++ = *r++) & 0x80) != 0x80) + goto bad_utf8; + } *w = '\0'; + return MAL_SUCCEED; + +bad_utf8: + return createException(SQL, "BATattach_stream", SQLSTATE(42000) "malformed utf-8 byte sequence"); } static str -append_text(BAT *bat, char *start, char *end) +append_text(BAT *bat, char *start) { - (void)bat; - - char *cr = memchr(start, '\r', end - start); - if (cr) - convert_line_endings(cr); + str msg = convert_and_validate(start); + if (msg != MAL_SUCCEED) + return msg; if (BUNappend(bat, start, false) != GDK_SUCCEED) return createException(SQL, "sql.importColumn", GDK_EXCEPTION); @@ -365,6 +397,9 @@ load_zero_terminated_text(BAT *bat, stre str msg = MAL_SUCCEED; bstream *bs = NULL; + // convert_and_validate() above counts on the following property to hold: + assert(strNil((const char[2]){ 0x80, 0 })); + bs = bstream_create(s, 1 << 20); if (bs == NULL) { msg = createException(SQL, "sql", SQLSTATE(HY013) MAL_MALLOC_FAIL); @@ -384,7 +419,7 @@ load_zero_terminated_text(BAT *bat, stre char *buf_end = &bs->buf[bs->len]; char *start, *end; for (start = buf_start; (end = memchr(start, '\0', buf_end - start)) != NULL; start = end + 1) { - msg = append_text(bat, start, end); + msg = append_text(bat, start); if (msg != NULL) goto end; } diff --git a/sql/test/bincopy/Tests/bincopy_support.py b/sql/test/bincopy/Tests/bincopy_support.py --- a/sql/test/bincopy/Tests/bincopy_support.py +++ b/sql/test/bincopy/Tests/bincopy_support.py @@ -127,14 +127,14 @@ OR (id % 10000 = 0 AND LENGTH(s) = 28 BROKEN_STRINGS = (""" CREATE TABLE foo(id INT NOT NULL, s TEXT); COPY BINARY INTO foo(id, s) FROM @ints@, @broken_strings@ @ON@; -""", (None, "!GDK reported error: strPut: incorrectly encoded UTF-8")) +""", (None, "!malformed utf-8 byte sequence")) # note that the \r\n has been normalized to \n but the lone \r has been # left alone. NEWLINE_STRINGS = (r""" CREATE TABLE foo(id INT NOT NULL, s TEXT); COPY BINARY INTO foo(id, s) FROM @ints@, @newline_strings@ @ON@; -SELECT COUNT(id) FROM foo WHERE s = (E'rn\nr\r' || id); +SELECT COUNT(id) FROM foo WHERE s = (E'RN\nR\r' || id); """, [f"{NRECS} affected rows", f"{NRECS}"]) NULL_STRINGS = (""" _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list