Module Name: src Committed By: kre Date: Mon Oct 21 15:57:45 UTC 2024
Modified Files: src/bin/sh: expand.c parser.c parser.h Log Message: Fix processing of unknown variable expansion types. Our shell is (was) one of the last not to do this correctly. Expansions are supposed to happen only when the command in which they occur is being executed, not while it is being parsed. If the expansion only happens them, errors should only be detected then. Make it work like that (I saw after I fixed this that FreeBSD had done it, long ago, almost the same way - it is kind of an obvious thing to do). This will allow code like if test it is shell X then commands using shell X specific expansion ops else if it is shell Y then commands using shell Y specific expansion ops else ... fi Previously expansion errors were detected while parsing, so if we're not shell X, and don't implement something that it does (some extension to the standard) that would have generated a parser syntax error, and the script could not be executed (despite the line with the error never being executed). Note that this change does not handle all such possible extensions, just this one. Others are much harder. One side effect of this change is that sh will now continue reading a variable expansion until it locates the terminating '}' (in ${var} forms) regardless of how broken it obviously is (to our shell) whereas previously it would have bailed out as soon as an oddity was spotted. To generate a diff of this commit: cvs rdiff -u -r1.145 -r1.146 src/bin/sh/expand.c cvs rdiff -u -r1.183 -r1.184 src/bin/sh/parser.c cvs rdiff -u -r1.29 -r1.30 src/bin/sh/parser.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/bin/sh/expand.c diff -u src/bin/sh/expand.c:1.145 src/bin/sh/expand.c:1.146 --- src/bin/sh/expand.c:1.145 Thu Oct 3 20:14:01 2024 +++ src/bin/sh/expand.c Mon Oct 21 15:57:45 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: expand.c,v 1.145 2024/10/03 20:14:01 rillig Exp $ */ +/* $NetBSD: expand.c,v 1.146 2024/10/21 15:57:45 kre Exp $ */ /*- * Copyright (c) 1991, 1993 @@ -37,7 +37,7 @@ #if 0 static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95"; #else -__RCSID("$NetBSD: expand.c,v 1.145 2024/10/03 20:14:01 rillig Exp $"); +__RCSID("$NetBSD: expand.c,v 1.146 2024/10/21 15:57:45 kre Exp $"); #endif #endif /* not lint */ @@ -913,12 +913,12 @@ evalvar(const char *p, int flag) varflags = (unsigned char)*p++; subtype = varflags & VSTYPE; var = p; - special = !is_name(*p); + special = subtype != VSUNKNOWN && !is_name(*p); p = strchr(p, '=') + 1; CTRACE(DBG_EXPAND, ("evalvar \"%.*s\", flag=%#X quotes=%#X vf=%#X subtype=%X\n", - p - var - 1, var, flag, quotes, varflags, subtype)); + (int)(p - var - 1), var, flag, quotes, varflags, subtype)); again: /* jump here after setting a variable with ${var=text} */ if (varflags & VSLINENO) { @@ -1106,6 +1106,26 @@ evalvar(const char *p, int flag) apply_ifs = 0; /* never executed */ break; + case VSUNKNOWN: + VTRACE(DBG_EXPAND, + ("evalvar \"%.*s\", unknown [%p %p] \"%.3s\" (%#2x %#2x)\n", + (int)(p - var - 1), var, var, p, p, p[0] & 0xFF, p[1] & 0xFF)); + + if ((p - var) <= 1) + error("%d: unknown expansion type", line_number); + else { + if (*p == '#') /* only VSUNKNOWN as a ${#var:...} */ + error("%d: ${#%.*s%c..}: unknown modifier", + line_number, (int)(p - var - 1), + var, p[1]&0xFF); + + if (*p == CTLESC) + p++; + error("%d: ${%.*s%c..}: unknown modifier", + line_number, (int)(p - var - 1), var, (*p & 0xFF)); + } + /* NOTREACHED */ + default: abort(); } Index: src/bin/sh/parser.c diff -u src/bin/sh/parser.c:1.183 src/bin/sh/parser.c:1.184 --- src/bin/sh/parser.c:1.183 Thu Oct 3 20:14:01 2024 +++ src/bin/sh/parser.c Mon Oct 21 15:57:45 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: parser.c,v 1.183 2024/10/03 20:14:01 rillig Exp $ */ +/* $NetBSD: parser.c,v 1.184 2024/10/21 15:57:45 kre Exp $ */ /*- * Copyright (c) 1991, 1993 @@ -37,7 +37,7 @@ #if 0 static char sccsid[] = "@(#)parser.c 8.7 (Berkeley) 5/16/95"; #else -__RCSID("$NetBSD: parser.c,v 1.183 2024/10/03 20:14:01 rillig Exp $"); +__RCSID("$NetBSD: parser.c,v 1.184 2024/10/21 15:57:45 kre Exp $"); #endif #endif /* not lint */ @@ -2216,7 +2216,7 @@ parsesub: { int typeloc; int flags; const char *p; - static const char types[] = "}-+?="; + static const char types[] = "}-+?="; /* see parser.h VSXYZ defs */ c = pgetc_linecont(); VTRACE(DBG_LEXER, ("\"$%c\"(%#.2x)", c&0xFF, c&0x1FF)); @@ -2323,10 +2323,8 @@ parsesub: { c = pgetc_linecont(); } else { - VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??\n", c&0xFF,c&0x1FF)); - badsub:; - cleanup_state_stack(stack); - synerror("Bad substitution"); + VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??", c&0xFF, c&0xFF)); + subtype = VSUNKNOWN; } STPUTC('=', out); @@ -2338,9 +2336,29 @@ parsesub: { /*FALLTHROUGH*/ default: p = strchr(types, c); - if (p == NULL) - goto badsub; - subtype = p - types + VSNORMAL; + if (__predict_false(p == NULL)) { + subtype = VSUNKNOWN; + /* + * keep the unknown modifier + * for the error message. + * + * Note that if we came from + * the case ':' above, that + * is the unknown modifier, + * not the following character + * + * It is not important that + * we keep the remaining word + * intact, it will never be + * used. + */ + if (flags & VSNUL) + /* (ie: lose c) */ + STPUTC(':', out); + else + STPUTC(c, out); + } else + subtype = p - types + VSNORMAL; break; case '%': case '#': @@ -2357,8 +2375,10 @@ parsesub: { } } } else { - if (subtype == VSLENGTH && c != /*{*/ '}') - synerror("no modifiers allowed with ${#var}"); + if (subtype == VSLENGTH && c != /*{*/ '}') { + STPUTC('#', out); + subtype = VSUNKNOWN; + } pungetc(); } if (quoted || arinest) Index: src/bin/sh/parser.h diff -u src/bin/sh/parser.h:1.29 src/bin/sh/parser.h:1.30 --- src/bin/sh/parser.h:1.29 Fri Jul 12 08:35:47 2024 +++ src/bin/sh/parser.h Mon Oct 21 15:57:45 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: parser.h,v 1.29 2024/07/12 08:35:47 kre Exp $ */ +/* $NetBSD: parser.h,v 1.30 2024/10/21 15:57:45 kre Exp $ */ /*- * Copyright (c) 1991, 1993 @@ -49,7 +49,8 @@ #define CTLNONL '\212' /* The \n in a deleted \ \n sequence */ /* pure concidence that (CTLNONL & 0x7f) == '\n' */ #define CTLCNL '\213' /* A $'\n' - newline not counted */ -#define CTL_LAST '\213' /* last 'special' character */ +#define CTLVARMOD '\214' /* a modifier in a variable expansion */ +#define CTL_LAST '\214' /* last 'special' character */ /* variable substitution byte (follows CTLVAR) */ #define VSTYPE 0x0f /* type of variable substitution */ @@ -59,7 +60,7 @@ #define VSPATQ 0x40 /* ensure correct pattern quoting in ${x#pat} */ #define VSQUOTE 0x80 /* inside double quotes--suppress splitting */ -/* values of VSTYPE field */ +/* values of VSTYPE field (nb: 0 reserved for "not determined yet") */ #define VSNORMAL 0x1 /* normal variable: $var or ${var} */ #define VSMINUS 0x2 /* ${var-text} */ #define VSPLUS 0x3 /* ${var+text} */ @@ -70,6 +71,7 @@ #define VSTRIMRIGHT 0x8 /* ${var%pattern} */ #define VSTRIMRIGHTMAX 0x9 /* ${var%%pattern} */ #define VSLENGTH 0xa /* ${#var} */ +#define VSUNKNOWN 0xf /* unknown modifier */ union node *parsecmd(int); void fixredir(union node *, const char *, int);