Module Name: src Committed By: christos Date: Thu Aug 1 06:22:52 UTC 2019
Modified Files: src/external/historical/nawk/dist: b.c Log Message: PR/54424: Martijn Dekker: awk: broken character classes in UTF-8 locale: only the first matches Pick up some of the fixes from upstream: - posix paren matching - print \v \a - some more fatal handling - init all the character range. To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 src/external/historical/nawk/dist/b.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/external/historical/nawk/dist/b.c diff -u src/external/historical/nawk/dist/b.c:1.7 src/external/historical/nawk/dist/b.c:1.8 --- src/external/historical/nawk/dist/b.c:1.7 Thu Aug 1 02:14:45 2019 +++ src/external/historical/nawk/dist/b.c Thu Aug 1 02:22:52 2019 @@ -31,6 +31,7 @@ THIS SOFTWARE. #define DEBUG #include <ctype.h> +#include <limits.h> #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -333,6 +334,10 @@ int quoted(const uschar **pp) /* pick up c = '\r'; else if (c == 'b') c = '\b'; + else if (c == 'v') + c = '\v'; + else if (c == 'a') + c = '\a'; else if (c == '\\') c = '\\'; else if (c == 'x') { /* hexadecimal goo follows */ @@ -978,6 +983,7 @@ static int repeat(const uschar *reptok, if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */ if (firstnum < 2) { /* 0 or 1: should be handled before you get here */ + FATAL("internal error"); } else { return replace_repeat(reptok, reptoklen, atom, atomlen, firstnum, secondnum, REPEAT_PLUS_APPENDED); @@ -998,6 +1004,7 @@ static int repeat(const uschar *reptok, return replace_repeat(reptok, reptoklen, atom, atomlen, firstnum, secondnum, REPEAT_WITH_Q); } else { /* Error - shouldn't be here (n>m) */ + FATAL("internal error"); } return 0; } @@ -1013,6 +1020,7 @@ int relex(void) /* lexical analyzer for int i; int num, m, commafound, digitfound; const uschar *startreptok; + static int parens = 0; rescan: starttok = prestr; @@ -1026,9 +1034,18 @@ rescan: case '\0': prestr--; return '\0'; case '^': case '$': + return c; case '(': + parens++; + return c; case ')': - return c; + if (parens) { + parens--; + return c; + } + /* unmatched close parenthesis; per POSIX, treat as literal */ + rlxval = c; + return CHAR; case '\\': rlxval = quoted(&prestr); return CHAR; @@ -1064,7 +1081,15 @@ rescan: if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && prestr[2 + cc->cc_namelen] == ']') { prestr += cc->cc_namelen + 3; - for (i = 1; i < NCHARS; i++) { + /* + * BUG: We begin at 1, instead of 0, since we + * would otherwise prematurely terminate the + * string for classes like [[:cntrl:]]. This + * means that we can't match the NUL character, + * not without first adapting the entire + * program to track each string's length. + */ + for (i = 1; i <= UCHAR_MAX; i++) { if (!adjbuf(&buf, &bufsz, bp-buf+1, 100, &bp, "relex2")) FATAL("out of space for reg expr %.10s...", lastre); if (cc->cc_func(i)) {