Changeset: 2112b593422d for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/2112b593422d
Modified Files:
        sql/server/sql_scan.c
Branch: Jan2022
Log Message:

Add more checks for end of file (EOF) in scanner/lexer implementation.


diffs (225 lines):

diff --git a/sql/server/sql_scan.c b/sql/server/sql_scan.c
--- a/sql/server/sql_scan.c
+++ b/sql/server/sql_scan.c
@@ -563,7 +563,7 @@ scanner_error(mvc *lc, int cur)
        switch (cur) {
        case EOF:
                (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of 
input");
-               return -1;      /* EOF needs -1 result */
+               return EOF;
        default:
                /* on Windows at least, iswcntrl returns TRUE for
                 * U+FEFF, but we just want consistent error
@@ -762,8 +762,8 @@ scanner_string(mvc *c, int quote, bool e
                        cur = scanner_getc(lc);
                }
        }
-       (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) 
"unexpected end of input");
-       return LEX_ERROR;
+       (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) 
"Unexpected end of input");
+       return EOF;
 }
 
 /* scan a structure {blah} into a string. We only count the matching {}
@@ -810,7 +810,7 @@ scanner_body(mvc *c)
                }
        }
        (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
-       return LEX_ERROR;
+       return EOF;
 }
 
 static int
@@ -833,6 +833,8 @@ keyword_or_ident(mvc * c, int cur)
                        return lc->yyval;
                }
        }
+       if (cur < 0)
+               return cur;
        (void)scanner_token(lc, IDENT);
        if ((k = find_keyword_bs(lc,s)))
                lc->yyval = k->token;
@@ -912,11 +914,17 @@ number(mvc * c, int cur)
         * parsed number (which may be the first causing it not to be a number);
         * it token == 0 after this block, a parse error was detected */
        if (cur == '0' && (cur = scanner_getc(lc)) == 'x') {
+               if (cur == EOF)
+                       return cur;
                cur = scanner_getc(lc);
                while (cur != EOF && iswxdigit(cur)) {
                        token = HEXADECIMAL;
                        cur = scanner_getc(lc);
                }
+
+               if (cur == EOF)
+                       return cur;
+
                if (token != HEXADECIMAL) {
                        /* 0x not followed by a hex digit: show 'x' as 
erroneous */
                        utf8_putchar(lc, cur);
@@ -927,12 +935,18 @@ number(mvc * c, int cur)
                while (cur != EOF && iswdigit(cur)) {
                        token = sqlINT;
                        cur = scanner_getc(lc);
+                       if (cur == EOF)
+                               return cur;
                }
                if (cur == '@') {
                        if (token == sqlINT) {
                                cur = scanner_getc(lc);
+                               if (cur == EOF)
+                                       return cur;
                                if (cur == '0') {
                                        cur = scanner_getc(lc);
+                                       if (cur == EOF)
+                                               return cur;
                                        token = OIDNUM;
                                } else {
                                        /* number + '@' not followed by 0: show 
'@' as erroneous */
@@ -944,10 +958,14 @@ number(mvc * c, int cur)
                } else {
                        if (cur == '.') {
                                cur = scanner_getc(lc);
+                                       if (cur == EOF)
+                                               return cur;
                                if (token == sqlINT || iswdigit(cur)) {
                                        token = INTNUM;
-                                       while (iswdigit(cur))
+                                       while (cur != EOF && iswdigit(cur))
                                                cur = scanner_getc(lc);
+                                       if (cur == EOF)
+                                               return cur;
                                } else {
                                        token = 0;
                                }
@@ -955,12 +973,16 @@ number(mvc * c, int cur)
                        if (cur == 'e' || cur == 'E') {
                                if (token != 0) {
                                        cur = scanner_getc(lc);
+                                       if (cur == EOF)
+                                               return cur;
                                        if (cur == '+' || cur == '-')
                                                cur = scanner_getc(lc);
                                        while (cur != EOF && iswdigit(cur)) {
                                                token = APPROXNUM;
                                                cur = scanner_getc(lc);
                                        }
+                                       if (cur == EOF)
+                                               return cur;
                                        if (token != APPROXNUM)
                                                token = 0;
                                }
@@ -996,6 +1018,8 @@ int scanner_symbol(mvc * c, int cur)
        case '/':
                lc->started = 1;
                next = scanner_getc(lc);
+               if (next < 0)
+                       return EOF;
                if (next == '*') {
                        lc->started = started;
                        cur = skip_c_comment(lc);
@@ -1032,6 +1056,8 @@ int scanner_symbol(mvc * c, int cur)
        case '-':
                lc->started = 1;
                next = scanner_getc(lc);
+               if (next < 0)
+                       return EOF;
                if (next == '-') {
                        lc->started = started;
                        if ((cur = skip_sql_comment(lc)) == EOF)
@@ -1044,6 +1070,8 @@ int scanner_symbol(mvc * c, int cur)
        case '~': /* binary not */
                lc->started = 1;
                next = scanner_getc(lc);
+               if (next < 0)
+                       return EOF;
                if (next == '=')
                        return scanner_token(lc, GEOM_MBR_EQUAL);
                utf8_putchar(lc, next);
@@ -1064,8 +1092,14 @@ int scanner_symbol(mvc * c, int cur)
        case '&':
                lc->started = 1;
                cur = scanner_getc(lc);
+               if (cur < 0)
+                       return EOF;
+               if (cur < 0)
+                       return EOF;
                if(cur == '<') {
                        next = scanner_getc(lc);
+                       if (next < 0)
+                               return EOF;
                        if(next == '|') {
                                return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
                        } else {
@@ -1089,12 +1123,16 @@ int scanner_symbol(mvc * c, int cur)
        case '<':
                lc->started = 1;
                cur = scanner_getc(lc);
+               if (cur < 0)
+                       return EOF;
                if (cur == '=') {
                        return scanner_token( lc, COMPARISON);
                } else if (cur == '>') {
                        return scanner_token( lc, COMPARISON);
                } else if (cur == '<') {
                        next = scanner_getc(lc);
+                       if (next < 0)
+                               return EOF;
                        if (next == '=') {
                                return scanner_token( lc, LEFT_SHIFT_ASSIGN);
                        } else if (next == '|') {
@@ -1105,6 +1143,8 @@ int scanner_symbol(mvc * c, int cur)
                        }
                } else if(cur == '-') {
                        next = scanner_getc(lc);
+                       if (next < 0)
+                               return EOF;
                        if(next == '>') {
                                return scanner_token(lc, GEOM_DIST);
                        } else {
@@ -1120,8 +1160,12 @@ int scanner_symbol(mvc * c, int cur)
        case '>':
                lc->started = 1;
                cur = scanner_getc(lc);
+               if (cur < 0)
+                       return EOF;
                if (cur == '>') {
                        cur = scanner_getc(lc);
+                       if (cur < 0)
+                               return EOF;
                        if (cur == '=')
                                return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
                        utf8_putchar(lc, cur);
@@ -1135,6 +1179,8 @@ int scanner_symbol(mvc * c, int cur)
        case '.':
                lc->started = 1;
                cur = scanner_getc(lc);
+               if (cur < 0)
+                       return EOF;
                if (!iswdigit(cur)) {
                        utf8_putchar(lc, cur);
                        return scanner_token( lc, '.');
@@ -1146,10 +1192,14 @@ int scanner_symbol(mvc * c, int cur)
        case '|': /* binary or or string concat */
                lc->started = 1;
                cur = scanner_getc(lc);
+               if (cur < 0)
+                       return EOF;
                if (cur == '|') {
                        return scanner_token(lc, CONCATSTRING);
                } else if (cur == '&') {
                        next = scanner_getc(lc);
+                       if (next < 0)
+                               return EOF;
                        if(next == '>') {
                                return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
                        } else {
@@ -1159,6 +1209,8 @@ int scanner_symbol(mvc * c, int cur)
                        }
                } else if (cur == '>') {
                        next = scanner_getc(lc);
+                       if (next < 0)
+                               return EOF;
                        if(next == '>') {
                                return scanner_token(lc, GEOM_ABOVE);
                        } else {
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to