Module Name:    src
Committed By:   kre
Date:           Mon Oct 21 15:57:45 UTC 2024

Modified Files:
        src/bin/sh: expand.c parser.c parser.h

Log Message:
Fix processing of unknown variable expansion types.

Our shell is (was) one of the last not to do this correctly.

Expansions are supposed to happen only when the command in which
they occur is being executed, not while it is being parsed.
If the expansion only happens them, errors should only be
detected then.

Make it work like that (I saw after I fixed this that FreeBSD
had done it, long ago, almost the same way - it is kind of an
obvious thing to do).

This will allow code like

        if test it is shell X
        then
                commands using shell X specific expansion ops
        else if it is shell Y
        then
                commands using shell Y specific expansion ops
        else ...
        fi

Previously expansion errors were detected while parsing, so
if we're not shell X, and don't implement something that it
does (some extension to the standard) that would have generated
a parser syntax error, and the script could not be executed
(despite the line with the error never being executed).

Note that this change does not handle all such possible
extensions, just this one.   Others are much harder.

One side effect of this change is that sh will now continue
reading a variable expansion until it locates the terminating
'}' (in ${var} forms) regardless of how broken it obviously
is (to our shell) whereas previously it would have bailed out
as soon as an oddity was spotted.


To generate a diff of this commit:
cvs rdiff -u -r1.145 -r1.146 src/bin/sh/expand.c
cvs rdiff -u -r1.183 -r1.184 src/bin/sh/parser.c
cvs rdiff -u -r1.29 -r1.30 src/bin/sh/parser.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/bin/sh/expand.c
diff -u src/bin/sh/expand.c:1.145 src/bin/sh/expand.c:1.146
--- src/bin/sh/expand.c:1.145	Thu Oct  3 20:14:01 2024
+++ src/bin/sh/expand.c	Mon Oct 21 15:57:45 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: expand.c,v 1.145 2024/10/03 20:14:01 rillig Exp $	*/
+/*	$NetBSD: expand.c,v 1.146 2024/10/21 15:57:45 kre Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)expand.c	8.5 (Berkeley) 5/15/95";
 #else
-__RCSID("$NetBSD: expand.c,v 1.145 2024/10/03 20:14:01 rillig Exp $");
+__RCSID("$NetBSD: expand.c,v 1.146 2024/10/21 15:57:45 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -913,12 +913,12 @@ evalvar(const char *p, int flag)
 	varflags = (unsigned char)*p++;
 	subtype = varflags & VSTYPE;
 	var = p;
-	special = !is_name(*p);
+	special = subtype != VSUNKNOWN && !is_name(*p);
 	p = strchr(p, '=') + 1;
 
 	CTRACE(DBG_EXPAND,
 	    ("evalvar \"%.*s\", flag=%#X quotes=%#X vf=%#X subtype=%X\n",
-	    p - var - 1, var, flag, quotes, varflags, subtype));
+	    (int)(p - var - 1), var, flag, quotes, varflags, subtype));
 
  again: /* jump here after setting a variable with ${var=text} */
 	if (varflags & VSLINENO) {
@@ -1106,6 +1106,26 @@ evalvar(const char *p, int flag)
 		apply_ifs = 0;		/* never executed */
 		break;
 
+	case VSUNKNOWN:
+		VTRACE(DBG_EXPAND,
+	    	   ("evalvar \"%.*s\", unknown [%p %p] \"%.3s\" (%#2x %#2x)\n",
+		    (int)(p - var - 1), var, var, p, p, p[0] & 0xFF, p[1] & 0xFF));
+
+		if ((p - var) <= 1)
+			error("%d: unknown expansion type", line_number);
+		else {
+			if (*p == '#')	/* only VSUNKNOWN as a ${#var:...} */
+				error("%d: ${#%.*s%c..}: unknown modifier",
+				     line_number, (int)(p - var - 1),
+				     var, p[1]&0xFF);
+
+			if (*p == CTLESC)
+				p++;
+			error("%d: ${%.*s%c..}: unknown modifier",
+			    line_number, (int)(p - var - 1), var, (*p & 0xFF));
+		}
+		/* NOTREACHED */
+
 	default:
 		abort();
 	}

Index: src/bin/sh/parser.c
diff -u src/bin/sh/parser.c:1.183 src/bin/sh/parser.c:1.184
--- src/bin/sh/parser.c:1.183	Thu Oct  3 20:14:01 2024
+++ src/bin/sh/parser.c	Mon Oct 21 15:57:45 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: parser.c,v 1.183 2024/10/03 20:14:01 rillig Exp $	*/
+/*	$NetBSD: parser.c,v 1.184 2024/10/21 15:57:45 kre Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c	8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.183 2024/10/03 20:14:01 rillig Exp $");
+__RCSID("$NetBSD: parser.c,v 1.184 2024/10/21 15:57:45 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -2216,7 +2216,7 @@ parsesub: {
 	int typeloc;
 	int flags;
 	const char *p;
-	static const char types[] = "}-+?=";
+	static const char types[] = "}-+?=";	/* see parser.h VSXYZ defs */
 
 	c = pgetc_linecont();
 	VTRACE(DBG_LEXER, ("\"$%c\"(%#.2x)", c&0xFF, c&0x1FF));
@@ -2323,10 +2323,8 @@ parsesub: {
 			c = pgetc_linecont();
 		}
 		else {
-			VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??\n", c&0xFF,c&0x1FF));
- badsub:;
-			cleanup_state_stack(stack);
-			synerror("Bad substitution");
+			VTRACE(DBG_LEXER, ("\"$%c(%#.2x)??", c&0xFF, c&0xFF));
+			subtype = VSUNKNOWN;
 		}
 
 		STPUTC('=', out);
@@ -2338,9 +2336,29 @@ parsesub: {
 				/*FALLTHROUGH*/
 			default:
 				p = strchr(types, c);
-				if (p == NULL)
-					goto badsub;
-				subtype = p - types + VSNORMAL;
+				if (__predict_false(p == NULL)) {
+					subtype = VSUNKNOWN;
+						/*
+						 * keep the unknown modifier
+						 * for the error message.
+						 *
+						 * Note that if we came from
+						 * the case ':' above, that
+						 * is the unknown modifier,
+						 * not the following character
+						 *
+						 * It is not important that
+						 * we keep the remaining word
+						 * intact, it will never be
+						 * used.
+						 */
+					if (flags & VSNUL)
+						/* (ie: lose c) */
+						STPUTC(':', out);
+					else
+						STPUTC(c, out);
+				} else
+					subtype = p - types + VSNORMAL;
 				break;
 			case '%':
 			case '#':
@@ -2357,8 +2375,10 @@ parsesub: {
 				}
 			}
 		} else {
-			if (subtype == VSLENGTH && c != /*{*/ '}')
-				synerror("no modifiers allowed with ${#var}");
+			if (subtype == VSLENGTH && c != /*{*/ '}') {
+				STPUTC('#', out);
+				subtype = VSUNKNOWN;
+			}
 			pungetc();
 		}
 		if (quoted || arinest)

Index: src/bin/sh/parser.h
diff -u src/bin/sh/parser.h:1.29 src/bin/sh/parser.h:1.30
--- src/bin/sh/parser.h:1.29	Fri Jul 12 08:35:47 2024
+++ src/bin/sh/parser.h	Mon Oct 21 15:57:45 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: parser.h,v 1.29 2024/07/12 08:35:47 kre Exp $	*/
+/*	$NetBSD: parser.h,v 1.30 2024/10/21 15:57:45 kre Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -49,7 +49,8 @@
 #define	CTLNONL '\212'		/* The \n in a deleted \ \n sequence */
 			/* pure concidence that (CTLNONL & 0x7f) == '\n' */
 #define	CTLCNL	'\213'		/* A $'\n' - newline not counted */
-#define	CTL_LAST '\213'		/* last 'special' character */
+#define	CTLVARMOD '\214'	/* a modifier in a variable expansion */
+#define	CTL_LAST '\214'		/* last 'special' character */
 
 /* variable substitution byte (follows CTLVAR) */
 #define VSTYPE		0x0f	/* type of variable substitution */
@@ -59,7 +60,7 @@
 #define VSPATQ		0x40	/* ensure correct pattern quoting in ${x#pat} */
 #define VSQUOTE	 	0x80	/* inside double quotes--suppress splitting */
 
-/* values of VSTYPE field */
+/* values of VSTYPE field (nb: 0 reserved for "not determined yet") */
 #define VSNORMAL	0x1		/* normal variable:  $var or ${var} */
 #define VSMINUS		0x2		/* ${var-text} */
 #define VSPLUS		0x3		/* ${var+text} */
@@ -70,6 +71,7 @@
 #define VSTRIMRIGHT	0x8		/* ${var%pattern} */
 #define VSTRIMRIGHTMAX 	0x9		/* ${var%%pattern} */
 #define VSLENGTH	0xa		/* ${#var} */
+#define VSUNKNOWN	0xf		/* unknown modifier */
 
 union node *parsecmd(int);
 void fixredir(union node *, const char *, int);

Reply via email to