Changeset: 2112b593422d for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/2112b593422d Modified Files: sql/server/sql_scan.c Branch: Jan2022 Log Message:
Add more checks for end of file (EOF) in scanner/lexer implementation. diffs (225 lines): diff --git a/sql/server/sql_scan.c b/sql/server/sql_scan.c --- a/sql/server/sql_scan.c +++ b/sql/server/sql_scan.c @@ -563,7 +563,7 @@ scanner_error(mvc *lc, int cur) switch (cur) { case EOF: (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input"); - return -1; /* EOF needs -1 result */ + return EOF; default: /* on Windows at least, iswcntrl returns TRUE for * U+FEFF, but we just want consistent error @@ -762,8 +762,8 @@ scanner_string(mvc *c, int quote, bool e cur = scanner_getc(lc); } } - (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "unexpected end of input"); - return LEX_ERROR; + (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input"); + return EOF; } /* scan a structure {blah} into a string. We only count the matching {} @@ -810,7 +810,7 @@ scanner_body(mvc *c) } } (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input"); - return LEX_ERROR; + return EOF; } static int @@ -833,6 +833,8 @@ keyword_or_ident(mvc * c, int cur) return lc->yyval; } } + if (cur < 0) + return cur; (void)scanner_token(lc, IDENT); if ((k = find_keyword_bs(lc,s))) lc->yyval = k->token; @@ -912,11 +914,17 @@ number(mvc * c, int cur) * parsed number (which may be the first causing it not to be a number); * it token == 0 after this block, a parse error was detected */ if (cur == '0' && (cur = scanner_getc(lc)) == 'x') { + if (cur == EOF) + return cur; cur = scanner_getc(lc); while (cur != EOF && iswxdigit(cur)) { token = HEXADECIMAL; cur = scanner_getc(lc); } + + if (cur == EOF) + return cur; + if (token != HEXADECIMAL) { /* 0x not followed by a hex digit: show 'x' as erroneous */ utf8_putchar(lc, cur); @@ -927,12 +935,18 @@ number(mvc * c, int cur) while (cur != EOF && iswdigit(cur)) { token = sqlINT; cur = scanner_getc(lc); + if (cur == EOF) + return cur; } if (cur == '@') { if (token == sqlINT) { cur = scanner_getc(lc); + if (cur == EOF) + return cur; if (cur == '0') { cur = scanner_getc(lc); + if (cur == EOF) + return cur; token = OIDNUM; } else { /* number + '@' not followed by 0: show '@' as erroneous */ @@ -944,10 +958,14 @@ number(mvc * c, int cur) } else { if (cur == '.') { cur = scanner_getc(lc); + if (cur == EOF) + return cur; if (token == sqlINT || iswdigit(cur)) { token = INTNUM; - while (iswdigit(cur)) + while (cur != EOF && iswdigit(cur)) cur = scanner_getc(lc); + if (cur == EOF) + return cur; } else { token = 0; } @@ -955,12 +973,16 @@ number(mvc * c, int cur) if (cur == 'e' || cur == 'E') { if (token != 0) { cur = scanner_getc(lc); + if (cur == EOF) + return cur; if (cur == '+' || cur == '-') cur = scanner_getc(lc); while (cur != EOF && iswdigit(cur)) { token = APPROXNUM; cur = scanner_getc(lc); } + if (cur == EOF) + return cur; if (token != APPROXNUM) token = 0; } @@ -996,6 +1018,8 @@ int scanner_symbol(mvc * c, int cur) case '/': lc->started = 1; next = scanner_getc(lc); + if (next < 0) + return EOF; if (next == '*') { lc->started = started; cur = skip_c_comment(lc); @@ -1032,6 +1056,8 @@ int scanner_symbol(mvc * c, int cur) case '-': lc->started = 1; next = scanner_getc(lc); + if (next < 0) + return EOF; if (next == '-') { lc->started = started; if ((cur = skip_sql_comment(lc)) == EOF) @@ -1044,6 +1070,8 @@ int scanner_symbol(mvc * c, int cur) case '~': /* binary not */ lc->started = 1; next = scanner_getc(lc); + if (next < 0) + return EOF; if (next == '=') return scanner_token(lc, GEOM_MBR_EQUAL); utf8_putchar(lc, next); @@ -1064,8 +1092,14 @@ int scanner_symbol(mvc * c, int cur) case '&': lc->started = 1; cur = scanner_getc(lc); + if (cur < 0) + return EOF; + if (cur < 0) + return EOF; if(cur == '<') { next = scanner_getc(lc); + if (next < 0) + return EOF; if(next == '|') { return scanner_token(lc, GEOM_OVERLAP_OR_BELOW); } else { @@ -1089,12 +1123,16 @@ int scanner_symbol(mvc * c, int cur) case '<': lc->started = 1; cur = scanner_getc(lc); + if (cur < 0) + return EOF; if (cur == '=') { return scanner_token( lc, COMPARISON); } else if (cur == '>') { return scanner_token( lc, COMPARISON); } else if (cur == '<') { next = scanner_getc(lc); + if (next < 0) + return EOF; if (next == '=') { return scanner_token( lc, LEFT_SHIFT_ASSIGN); } else if (next == '|') { @@ -1105,6 +1143,8 @@ int scanner_symbol(mvc * c, int cur) } } else if(cur == '-') { next = scanner_getc(lc); + if (next < 0) + return EOF; if(next == '>') { return scanner_token(lc, GEOM_DIST); } else { @@ -1120,8 +1160,12 @@ int scanner_symbol(mvc * c, int cur) case '>': lc->started = 1; cur = scanner_getc(lc); + if (cur < 0) + return EOF; if (cur == '>') { cur = scanner_getc(lc); + if (cur < 0) + return EOF; if (cur == '=') return scanner_token( lc, RIGHT_SHIFT_ASSIGN); utf8_putchar(lc, cur); @@ -1135,6 +1179,8 @@ int scanner_symbol(mvc * c, int cur) case '.': lc->started = 1; cur = scanner_getc(lc); + if (cur < 0) + return EOF; if (!iswdigit(cur)) { utf8_putchar(lc, cur); return scanner_token( lc, '.'); @@ -1146,10 +1192,14 @@ int scanner_symbol(mvc * c, int cur) case '|': /* binary or or string concat */ lc->started = 1; cur = scanner_getc(lc); + if (cur < 0) + return EOF; if (cur == '|') { return scanner_token(lc, CONCATSTRING); } else if (cur == '&') { next = scanner_getc(lc); + if (next < 0) + return EOF; if(next == '>') { return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE); } else { @@ -1159,6 +1209,8 @@ int scanner_symbol(mvc * c, int cur) } } else if (cur == '>') { next = scanner_getc(lc); + if (next < 0) + return EOF; if(next == '>') { return scanner_token(lc, GEOM_ABOVE); } else { _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org