commit: 8a607ed23beb4995627f9c4e2c7580469ace56bb
Author: Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Fri Jun 27 09:05:07 2025 +0000
Commit: Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sat Jun 28 23:50:57 2025 +0000
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=8a607ed2
phase-functions.sh: filter bash vars comprehensively in
__filter_readonly_variables()
Presently, the __filter_readonly_variables() function attempts to filter
variables that pertain to bash. That is, those which are either set by
bash, or which are otherwise able to influence the behaviour of bash. Up
until recently, a distinction was made between variables known to be
readonly and those that are not. However, it can be demonstrated that
both of the implied sets have been poorly maintained. Consider the prior
definition of the 'readonly_bash_vars', which was as follows.
BASHOPTS BASHPID DIRSTACK EUID FUNCNAME GROUPS PIPESTATUS PPID SHELLOPTS
UID
Ten distinct names. Yet, a new instance of bash in its capacity as a
non-interactive shell defines only six readonly variables.
$ env -i bash -c 'compgen -A variable |
while read -r; do [[ ${!REPLY <AT> a} == *r* ]] && printf .; done' |
wc -c
6
Ergo, it was incorrect to categorise 'DIRSTACK', 'FUNCNAME', 'GROUPS'
and 'PIPESTATUS' as readonly, whether or not it be sensible to attempt
to assign a value to any of them.
Now consider the prior definition of the 'bash_misc_vars' variable,
which was as follows.
BASH BASH_.* COLUMNS COMP_WORDBREAKS HISTCMD HISTFILE HOSTNAME HOSTTYPE
IFS LINENO MACHTYPE OLDPWD OPTERR OPTIND OSTYPE POSIXLY_CORRECT PS4 PWD
RANDOM SECONDS SHLVL _
Putting aside the "BASH_.*" pattern, there are 21 words, which is rather
short of the mark. By my count there are 105 variables in bash-5.3-rc2
that fall into any of the following three categories.
a) initially set by BASH
b) later set by BASH in response to particular events
c) not set by BASH but able to influence its behaviour if set
In fairness, many of these variables are not expected to have been
exported and/or pertain only to the interactive context, though I would
point out that misguided users sometimes export variables that ought not
to be exported. Regardless, I would aver that portage ought to be
thorough and try its level best never to pollute environment dumps with
the variables in question. There are attendant concerns regarding
hygiene, safety, and even privacy in some cases.
Address this issue in a manner twofold. Firstly, by introducing a
routine whereby bash(1) is executed with an empty environment and asked
to report the names of the variables that it has declared. This, alone,
accounts for at least 44 variables. Secondly, revise the set of
hard-coded bash variable names so as to be comprehensive.
With this change, I was immediately able to observe an improvement in
"environment.bz2" hygiene. In particular, the following variables are
now excluded.
- EPOCHREALTIME
- EPOCHSECONDS
- MAIL
- SHELL
- SRANDOM
See-also: 9c80c1b5ee3bfe832a114f33970f0dbad7c3f713
See-also: 5688c04e90e3afd05063a2a4f62930498d8d4dad
Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>
Signed-off-by: Sam James <sam <AT> gentoo.org>
bin/phase-functions.sh | 36 ++++++++++++++++++++++++------------
1 file changed, 24 insertions(+), 12 deletions(-)
diff --git a/bin/phase-functions.sh b/bin/phase-functions.sh
index dee18d3ceb..536dec387b 100644
--- a/bin/phase-functions.sh
+++ b/bin/phase-functions.sh
@@ -48,7 +48,7 @@ PORTAGE_MUTABLE_FILTERED_VARS="AA HOSTNAME"
# Read an environment from stdin and echo to stdout while filtering variables
# with names that are known to cause interference:
#
-# * some specific variables for which bash does not allow assignment
+# * all variables that can be set by or that may affect bash (except PATH)
# * some specific variables that affect portage or sandbox behavior
# * variable names that begin with a digit or that contain any
# non-alphanumeric characters that are not be supported by bash
@@ -80,21 +80,33 @@ PORTAGE_MUTABLE_FILTERED_VARS="AA HOSTNAME"
# as ${PORTAGE_SAVED_READONLY_VARS} and ${PORTAGE_MUTABLE_FILTERED_VARS}.
# This is enabled automatically if EMERGE_FROM=binary, since it preserves
# variables from when the package was originally built.
-#
-# In bash-3.2_p20+ an attempt to assign BASH_*, FUNCNAME, GROUPS or any
-# readonly variable cause the shell to exit while executing the "source"
-# builtin command. To avoid this problem, this function filters those
-# variables out and discards them. See bug #190128.
__filter_readonly_variables() {
local -a {binpkg_untrusted,filtered_sandbox,misc_garbage,bash}_vars
words
local IFS
- bash_vars=(
- "BASH_.*" BASH BASHOPTS BASHPID COLUMNS COMP_WORDBREAKS
- DIRSTACK EUID FUNCNAME GROUPS HISTCMD HISTFILE HOSTNAME
- HOSTTYPE IFS LINENO MACHTYPE OLDPWD OPTERR OPTIND OSTYPE
- PIPESTATUS POSIXLY_CORRECT PPID PS4 PWD RANDOM SECONDS
- SHELLOPTS SHLVL UID _
+ # Collect an initial list of special bash variables by instructing a
+ # hygienic instance of bash(1) to report them.
+ mapfile -t bash_vars < <(
+ # Like compgen -A variable but doesn't require readline support.
+ env -i -- "${BASH}" -c "printf %s\\\n $(printf '${!%s*} '
{A..Z} {a..z} _)" \
+ | grep -vx PATH
+ )
+ # Incorporate other variables that are known to either be set by or be
+ # able to influence bash. This list was last updated for bash-5.3-rc2.
+ bash_vars+=(
+ BASH_COMPAT BASH_ENV BASH_LOADABLES_PATH BASH_REMATCH
+ BASH_XTRACEFD CDPATH CHILD_MAX COLUMNS COMPREPLY COMP_CWORD
+ COMP_KEY COMP_LINE COMP_POINT COMP_TYPE COMP_WORDS COPROC EMACS
+ ENV EXECIGNORE FCEDIT FIGNORE FUNCNAME FUNCNEST GLOBIGNORE
+ GLOBSORT HISTCONTROL HISTFILE HISTFILESIZE HISTIGNORE HISTSIZE
+ HISTTIMEFORMAT HOME HOSTFILE IGNOREEOF INPUTRC INSIDE_EMACS
+ LINES MAIL MAILCHECK MAILPATH MAPFILE OLDPWD OPTARG PIPESTATUS
+ POSIXLY_CORRECT PROMPT_COMMAND PROMPT_DIRTRIM PS0 PS1 PS2 PS3
+ READLINE_ARGUMENT READLINE_LINE READLINE_MARK READLINE_POINT
+ REPLY TIMEFORMAT TMOUT TMPDIR auto_resume histchars
+
+ # Exported functions bear this prefix.
+ "BASH_FUNC_.*"
)
filtered_sandbox_vars=(
SANDBOX_ACTIVE SANDBOX_BASHRC SANDBOX_DEBUG_LOG