The branch main has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=8a5ceebece0311bc41180b3ca0ce7237def1e253
commit 8a5ceebece0311bc41180b3ca0ce7237def1e253 Author: Kyle Evans <kev...@freebsd.org> AuthorDate: 2025-08-08 04:26:51 +0000 Commit: Kyle Evans <kev...@freebsd.org> CommitDate: 2025-08-08 04:26:51 +0000 kern: disallow user scheduling/debugging/signalling of jailed procs Currently, jails are generally ignored when determining whether the current process/thread can take action upon another, except to determine if the target's jail is somewhere in the source's hierarchy. Notably, uid 1001 in a jail (including prison0) can take action upon a process run by uid 1001 inside of a subordinate jail by default. While this could be considered a feature at times, it is a scenario that really should be deliberately crafted; there is no guarantee that uid 1001 in the parent jail is at all related to uid 1001 in a subordinate. This changes introduces three new privileges that grant a process this kind of insight into other jails: - PRIV_DEBUG_DIFFJAIL - PRIV_SCHED_DIFFJAIl - PRIV_SIGNAL_DIFFJAIL These can be granted independently or in conjunction with the accompanying *_DIFFCRED privileges, i.e.: - PRIV_DEBUG_DIFFCRED alone will let uid 1001 debug uid 1002, but PRIV_DEBUG_DIFFJAIL is additionally needed to let it debug uid 1002 in a jail. - PRIV_DEBUG_DIFFJAIL alone will let uid 1001 debug uid 1001 in a jail, but will not allow it to debug uid 1002 in a jail. Note that security.bsd.see_jail_proc can be used for similar effects, but does not prevent a user from learning the pid of a jailed process with matching creds and signalling it or rescheduling it (e.g., cpuset). Debugging is restricted by visibility in all cases, so that one is less of a concern. This change adds a new jail(8) parameter for the parent to indicate on a per-jail basis if its users are open to being tampered with by the parent's unprivileged users: allow.unprivileged_parent_tampering. This is disabled by default, but may be enabled to bypass the new priv(9) checks in some scenarios where the functionality is useful. For development setups that involve regularly debugging jailed processes from outside the jail, consider adding a default `allow.unprivileged_parent_tampering;` to your /etc/jail.conf. This may get MFC'd in the future with the default flipped to preserve pre-existing behavior but allow opt-in for the new position sooner. Reviewed by: jamie Differential Revision: https://reviews.freebsd.org/D51645 --- sys/kern/kern_jail.c | 10 ++++++++++ sys/kern/kern_prot.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ sys/sys/jail.h | 9 ++++++--- sys/sys/priv.h | 3 +++ usr.sbin/jail/jail.8 | 8 +++++++- 5 files changed, 82 insertions(+), 4 deletions(-) diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 14e6e735f8e7..7c9a15ae18f3 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -235,6 +235,9 @@ static struct bool_flags pr_flag_allow[NBBY * NBPW] = { {"allow.adjtime", "allow.noadjtime", PR_ALLOW_ADJTIME}, {"allow.settime", "allow.nosettime", PR_ALLOW_SETTIME}, {"allow.routing", "allow.norouting", PR_ALLOW_ROUTING}, + {"allow.unprivileged_parent_tampering", + "allow.nounprivileged_parent_tampering", + PR_ALLOW_UNPRIV_PARENT_TAMPER}, }; static unsigned pr_allow_all = PR_ALLOW_ALL_STATIC; const size_t pr_flag_allow_size = sizeof(pr_flag_allow); @@ -4009,6 +4012,7 @@ prison_priv_check(struct ucred *cred, int priv) case PRIV_DEBUG_DIFFCRED: case PRIV_DEBUG_SUGID: case PRIV_DEBUG_UNPRIV: + case PRIV_DEBUG_DIFFJAIL: /* * Allow jail to set various resource limits and login @@ -4046,8 +4050,10 @@ prison_priv_check(struct ucred *cred, int priv) */ case PRIV_SCHED_DIFFCRED: case PRIV_SCHED_CPUSET: + case PRIV_SCHED_DIFFJAIL: case PRIV_SIGNAL_DIFFCRED: case PRIV_SIGNAL_SUGID: + case PRIV_SIGNAL_DIFFJAIL: /* * Allow jailed processes to write to sysctls marked as jail @@ -4691,6 +4697,10 @@ SYSCTL_JAIL_PARAM(_allow, read_msgbuf, CTLTYPE_INT | CTLFLAG_RW, "B", "Jail may read the kernel message buffer"); SYSCTL_JAIL_PARAM(_allow, unprivileged_proc_debug, CTLTYPE_INT | CTLFLAG_RW, "B", "Unprivileged processes may use process debugging facilities"); +SYSCTL_JAIL_PARAM(_allow, unprivileged_parent_tampering, + CTLTYPE_INT | CTLFLAG_RW, "B", + "Unprivileged parent jail processes may tamper with same-uid processes" + " (signal/debug/cpuset)"); SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW, "B", "Processes in jail with uid 0 have privilege"); #ifdef VIMAGE diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index bbb622547598..2cd5b7069023 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -1913,6 +1913,38 @@ cr_canseejailproc(struct ucred *u1, struct ucred *u2) return (ESRCH); } +/* + * Determine if u1 can tamper with the subject specified by u2, if they are in + * different jails and 'unprivileged_parent_tampering' jail policy allows it. + * + * May be called if u1 and u2 are in the same jail, but it is expected that the + * caller has already done a prison_check() prior to calling it. + * + * Returns: 0 for permitted, EPERM otherwise + */ +static int +cr_can_tamper_with_subjail(struct ucred *u1, struct ucred *u2, int priv) +{ + + MPASS(prison_check(u1, u2) == 0); + if (u1->cr_prison == u2->cr_prison) + return (0); + + if (priv_check_cred(u1, priv) == 0) + return (0); + + /* + * Jails do not maintain a distinct UID space, so process visibility is + * all that would control an unprivileged process' ability to tamper + * with a process in a subjail by default if we did not have the + * allow.unprivileged_parent_tampering knob to restrict it by default. + */ + if (prison_allow(u2, PR_ALLOW_UNPRIV_PARENT_TAMPER)) + return (0); + + return (EPERM); +} + /* * Helper for cr_cansee*() functions to abide by system-wide security.bsd.see_* * policies. Determines if u1 "can see" u2 according to these policies. @@ -2062,6 +2094,19 @@ cr_cansignal(struct ucred *cred, struct proc *proc, int signum) return (error); } + /* + * At this point, the target may be in a different jail than the + * subject -- the subject must be in a parent jail to the target, + * whether it is prison0 or a subordinate of prison0 that has + * children. Additional privileges are required to allow this, as + * whether the creds are truly equivalent or not must be determined on + * a case-by-case basis. + */ + error = cr_can_tamper_with_subjail(cred, proc->p_ucred, + PRIV_SIGNAL_DIFFJAIL); + if (error) + return (error); + return (0); } @@ -2138,6 +2183,12 @@ p_cansched(struct thread *td, struct proc *p) if (error) return (error); } + + error = cr_can_tamper_with_subjail(td->td_ucred, p->p_ucred, + PRIV_SCHED_DIFFJAIL); + if (error) + return (error); + return (0); } @@ -2258,6 +2309,11 @@ p_candebug(struct thread *td, struct proc *p) return (error); } + error = cr_can_tamper_with_subjail(td->td_ucred, p->p_ucred, + PRIV_DEBUG_DIFFJAIL); + if (error) + return (error); + /* Can't trace init when securelevel > 0. */ if (p == initproc) { error = securelevel_gt(td->td_ucred, 0); diff --git a/sys/sys/jail.h b/sys/sys/jail.h index efe5e3ee8db7..0ad80ec53157 100644 --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -260,6 +260,7 @@ struct prison_racct { #define PR_ALLOW_ADJTIME 0x00080000 #define PR_ALLOW_SETTIME 0x00100000 #define PR_ALLOW_ROUTING 0x00200000 +#define PR_ALLOW_UNPRIV_PARENT_TAMPER 0x00400000 /* * PR_ALLOW_PRISON0 are the allow flags that we apply by default to prison0, @@ -267,14 +268,16 @@ struct prison_racct { * build time. PR_ALLOW_ALL_STATIC should contain any bit above that we expect * to be used on the system, while PR_ALLOW_PRISON0 will be some subset of that. */ -#define PR_ALLOW_ALL_STATIC 0x003f87ff -#define PR_ALLOW_PRISON0 (PR_ALLOW_ALL_STATIC) +#define PR_ALLOW_ALL_STATIC 0x007f87ff +#define PR_ALLOW_PRISON0 \ + (PR_ALLOW_ALL_STATIC & ~(PR_ALLOW_UNPRIV_PARENT_TAMPERING)) /* * PR_ALLOW_DIFFERENCES determines which flags are able to be * different between the parent and child jail upon creation. */ -#define PR_ALLOW_DIFFERENCES (PR_ALLOW_UNPRIV_DEBUG) +#define PR_ALLOW_DIFFERENCES \ + (PR_ALLOW_UNPRIV_DEBUG | PR_ALLOW_UNPRIV_PARENT_TAMPER) /* * OSD methods diff --git a/sys/sys/priv.h b/sys/sys/priv.h index 1f73877ab450..9c493629f7cf 100644 --- a/sys/sys/priv.h +++ b/sys/sys/priv.h @@ -115,6 +115,7 @@ #define PRIV_DEBUG_SUGID 81 /* Exempt debugging setuid proc. */ #define PRIV_DEBUG_UNPRIV 82 /* Exempt unprivileged debug limit. */ #define PRIV_DEBUG_DENIED 83 /* Exempt P2_NOTRACE. */ +#define PRIV_DEBUG_DIFFJAIL 84 /* Exempt debugging other jails. */ /* * Dtrace privileges. @@ -193,6 +194,7 @@ #define PRIV_SCHED_CPUSET 206 /* Can manipulate cpusets. */ #define PRIV_SCHED_CPUSET_INTR 207 /* Can adjust IRQ to CPU binding. */ #define PRIV_SCHED_IDPRIO 208 /* Can set idle time scheduling. */ +#define PRIV_SCHED_DIFFJAIL 209 /* Exempt scheduling other jails. */ /* * POSIX semaphore privileges. @@ -204,6 +206,7 @@ */ #define PRIV_SIGNAL_DIFFCRED 230 /* Exempt signalling other users. */ #define PRIV_SIGNAL_SUGID 231 /* Non-conserv signal setuid proc. */ +#define PRIV_SIGNAL_DIFFJAIL 232 /* Exempt signalling other jails. */ /* * Sysctl privileges. diff --git a/usr.sbin/jail/jail.8 b/usr.sbin/jail/jail.8 index dd7b91d5cefa..421aa9babb4c 100644 --- a/usr.sbin/jail/jail.8 +++ b/usr.sbin/jail/jail.8 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd May 11, 2025 +.Dd August 7, 2025 .Dt JAIL 8 .Os .Sh NAME @@ -686,6 +686,12 @@ in the file outside of the jails. .It Va allow.reserved_ports The jail root may bind to ports lower than 1024. +.It Va allow.unprivileged_parent_tampering +Unprivileged processes in the jail's parent may tamper with processes of the +same UID in the jail. +This includes the ability to signal, debug, and +.Xr cpuset 1 +processes that belong to the jail. .It Va allow.unprivileged_proc_debug Unprivileged processes in the jail may use debugging facilities. .It Va allow.suser