The branch main has been updated by kevans:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8a5ceebece0311bc41180b3ca0ce7237def1e253

commit 8a5ceebece0311bc41180b3ca0ce7237def1e253
Author:     Kyle Evans <kev...@freebsd.org>
AuthorDate: 2025-08-08 04:26:51 +0000
Commit:     Kyle Evans <kev...@freebsd.org>
CommitDate: 2025-08-08 04:26:51 +0000

    kern: disallow user scheduling/debugging/signalling of jailed procs
    
    Currently, jails are generally ignored when determining whether the
    current process/thread can take action upon another, except to determine
    if the target's jail is somewhere in the source's hierarchy. Notably,
    uid 1001 in a jail (including prison0) can take action upon a process
    run by uid 1001 inside of a subordinate jail by default.
    
    While this could be considered a feature at times, it is a scenario
    that really should be deliberately crafted; there is no guarantee that
    uid 1001 in the parent jail is at all related to uid 1001 in a
    subordinate.
    
    This changes introduces three new privileges that grant a process
    this kind of insight into other jails:
    
    - PRIV_DEBUG_DIFFJAIL
    - PRIV_SCHED_DIFFJAIl
    - PRIV_SIGNAL_DIFFJAIL
    
    These can be granted independently or in conjunction with the
    accompanying *_DIFFCRED privileges, i.e.:
    
    - PRIV_DEBUG_DIFFCRED alone will let uid 1001 debug uid 1002, but
      PRIV_DEBUG_DIFFJAIL is additionally needed to let it debug uid 1002
      in a jail.
    
    - PRIV_DEBUG_DIFFJAIL alone will let uid 1001 debug uid 1001 in a jail,
      but will not allow it to debug uid 1002 in a jail.
    
    Note that security.bsd.see_jail_proc can be used for similar effects,
    but does not prevent a user from learning the pid of a jailed process
    with matching creds and signalling it or rescheduling it (e.g., cpuset).
    Debugging is restricted by visibility in all cases, so that one is less
    of a concern.
    
    This change adds a new jail(8) parameter for the parent to indicate on
    a per-jail basis if its users are open to being tampered with by the
    parent's unprivileged users: allow.unprivileged_parent_tampering.  This
    is disabled by default, but may be enabled to bypass the new priv(9)
    checks in some scenarios where the functionality is useful.  For
    development setups that involve regularly debugging jailed processes
    from outside the jail, consider adding a default
    `allow.unprivileged_parent_tampering;` to your /etc/jail.conf.
    
    This may get MFC'd in the future with the default flipped to preserve
    pre-existing behavior but allow opt-in for the new position sooner.
    
    Reviewed by:    jamie
    Differential Revision:  https://reviews.freebsd.org/D51645
---
 sys/kern/kern_jail.c | 10 ++++++++++
 sys/kern/kern_prot.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 sys/sys/jail.h       |  9 ++++++---
 sys/sys/priv.h       |  3 +++
 usr.sbin/jail/jail.8 |  8 +++++++-
 5 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 14e6e735f8e7..7c9a15ae18f3 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -235,6 +235,9 @@ static struct bool_flags pr_flag_allow[NBBY * NBPW] = {
        {"allow.adjtime", "allow.noadjtime", PR_ALLOW_ADJTIME},
        {"allow.settime", "allow.nosettime", PR_ALLOW_SETTIME},
        {"allow.routing", "allow.norouting", PR_ALLOW_ROUTING},
+       {"allow.unprivileged_parent_tampering",
+           "allow.nounprivileged_parent_tampering",
+           PR_ALLOW_UNPRIV_PARENT_TAMPER},
 };
 static unsigned pr_allow_all = PR_ALLOW_ALL_STATIC;
 const size_t pr_flag_allow_size = sizeof(pr_flag_allow);
@@ -4009,6 +4012,7 @@ prison_priv_check(struct ucred *cred, int priv)
        case PRIV_DEBUG_DIFFCRED:
        case PRIV_DEBUG_SUGID:
        case PRIV_DEBUG_UNPRIV:
+       case PRIV_DEBUG_DIFFJAIL:
 
                /*
                 * Allow jail to set various resource limits and login
@@ -4046,8 +4050,10 @@ prison_priv_check(struct ucred *cred, int priv)
                 */
        case PRIV_SCHED_DIFFCRED:
        case PRIV_SCHED_CPUSET:
+       case PRIV_SCHED_DIFFJAIL:
        case PRIV_SIGNAL_DIFFCRED:
        case PRIV_SIGNAL_SUGID:
+       case PRIV_SIGNAL_DIFFJAIL:
 
                /*
                 * Allow jailed processes to write to sysctls marked as jail
@@ -4691,6 +4697,10 @@ SYSCTL_JAIL_PARAM(_allow, read_msgbuf, CTLTYPE_INT | 
CTLFLAG_RW,
     "B", "Jail may read the kernel message buffer");
 SYSCTL_JAIL_PARAM(_allow, unprivileged_proc_debug, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Unprivileged processes may use process debugging facilities");
+SYSCTL_JAIL_PARAM(_allow, unprivileged_parent_tampering,
+    CTLTYPE_INT | CTLFLAG_RW, "B",
+    "Unprivileged parent jail processes may tamper with same-uid processes"
+    " (signal/debug/cpuset)");
 SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Processes in jail with uid 0 have privilege");
 #ifdef VIMAGE
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
index bbb622547598..2cd5b7069023 100644
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -1913,6 +1913,38 @@ cr_canseejailproc(struct ucred *u1, struct ucred *u2)
        return (ESRCH);
 }
 
+/*
+ * Determine if u1 can tamper with the subject specified by u2, if they are in
+ * different jails and 'unprivileged_parent_tampering' jail policy allows it.
+ *
+ * May be called if u1 and u2 are in the same jail, but it is expected that the
+ * caller has already done a prison_check() prior to calling it.
+ *
+ * Returns: 0 for permitted, EPERM otherwise
+ */
+static int
+cr_can_tamper_with_subjail(struct ucred *u1, struct ucred *u2, int priv)
+{
+
+       MPASS(prison_check(u1, u2) == 0);
+       if (u1->cr_prison == u2->cr_prison)
+               return (0);
+
+       if (priv_check_cred(u1, priv) == 0)
+               return (0);
+
+       /*
+        * Jails do not maintain a distinct UID space, so process visibility is
+        * all that would control an unprivileged process' ability to tamper
+        * with a process in a subjail by default if we did not have the
+        * allow.unprivileged_parent_tampering knob to restrict it by default.
+        */
+       if (prison_allow(u2, PR_ALLOW_UNPRIV_PARENT_TAMPER))
+               return (0);
+
+       return (EPERM);
+}
+
 /*
  * Helper for cr_cansee*() functions to abide by system-wide security.bsd.see_*
  * policies.  Determines if u1 "can see" u2 according to these policies.
@@ -2062,6 +2094,19 @@ cr_cansignal(struct ucred *cred, struct proc *proc, int 
signum)
                        return (error);
        }
 
+       /*
+        * At this point, the target may be in a different jail than the
+        * subject -- the subject must be in a parent jail to the target,
+        * whether it is prison0 or a subordinate of prison0 that has
+        * children.  Additional privileges are required to allow this, as
+        * whether the creds are truly equivalent or not must be determined on
+        * a case-by-case basis.
+        */
+       error = cr_can_tamper_with_subjail(cred, proc->p_ucred,
+           PRIV_SIGNAL_DIFFJAIL);
+       if (error)
+               return (error);
+
        return (0);
 }
 
@@ -2138,6 +2183,12 @@ p_cansched(struct thread *td, struct proc *p)
                if (error)
                        return (error);
        }
+
+       error = cr_can_tamper_with_subjail(td->td_ucred, p->p_ucred,
+           PRIV_SCHED_DIFFJAIL);
+       if (error)
+               return (error);
+
        return (0);
 }
 
@@ -2258,6 +2309,11 @@ p_candebug(struct thread *td, struct proc *p)
                        return (error);
        }
 
+       error = cr_can_tamper_with_subjail(td->td_ucred, p->p_ucred,
+           PRIV_DEBUG_DIFFJAIL);
+       if (error)
+               return (error);
+
        /* Can't trace init when securelevel > 0. */
        if (p == initproc) {
                error = securelevel_gt(td->td_ucred, 0);
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index efe5e3ee8db7..0ad80ec53157 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -260,6 +260,7 @@ struct prison_racct {
 #define        PR_ALLOW_ADJTIME                0x00080000
 #define        PR_ALLOW_SETTIME                0x00100000
 #define        PR_ALLOW_ROUTING                0x00200000
+#define        PR_ALLOW_UNPRIV_PARENT_TAMPER   0x00400000
 
 /*
  * PR_ALLOW_PRISON0 are the allow flags that we apply by default to prison0,
@@ -267,14 +268,16 @@ struct prison_racct {
  * build time.  PR_ALLOW_ALL_STATIC should contain any bit above that we expect
  * to be used on the system, while PR_ALLOW_PRISON0 will be some subset of 
that.
  */
-#define        PR_ALLOW_ALL_STATIC             0x003f87ff
-#define        PR_ALLOW_PRISON0                (PR_ALLOW_ALL_STATIC)
+#define        PR_ALLOW_ALL_STATIC             0x007f87ff
+#define        PR_ALLOW_PRISON0                \
+    (PR_ALLOW_ALL_STATIC & ~(PR_ALLOW_UNPRIV_PARENT_TAMPERING))
 
 /*
  * PR_ALLOW_DIFFERENCES determines which flags are able to be
  * different between the parent and child jail upon creation.
  */
-#define        PR_ALLOW_DIFFERENCES            (PR_ALLOW_UNPRIV_DEBUG)
+#define        PR_ALLOW_DIFFERENCES            \
+    (PR_ALLOW_UNPRIV_DEBUG | PR_ALLOW_UNPRIV_PARENT_TAMPER)
 
 /*
  * OSD methods
diff --git a/sys/sys/priv.h b/sys/sys/priv.h
index 1f73877ab450..9c493629f7cf 100644
--- a/sys/sys/priv.h
+++ b/sys/sys/priv.h
@@ -115,6 +115,7 @@
 #define        PRIV_DEBUG_SUGID        81      /* Exempt debugging setuid 
proc. */
 #define        PRIV_DEBUG_UNPRIV       82      /* Exempt unprivileged debug 
limit. */
 #define        PRIV_DEBUG_DENIED       83      /* Exempt P2_NOTRACE. */
+#define        PRIV_DEBUG_DIFFJAIL     84      /* Exempt debugging other 
jails. */
 
 /*
  * Dtrace privileges.
@@ -193,6 +194,7 @@
 #define        PRIV_SCHED_CPUSET       206     /* Can manipulate cpusets. */
 #define        PRIV_SCHED_CPUSET_INTR  207     /* Can adjust IRQ to CPU 
binding. */
 #define        PRIV_SCHED_IDPRIO       208     /* Can set idle time 
scheduling. */
+#define        PRIV_SCHED_DIFFJAIL     209     /* Exempt scheduling other 
jails. */
 
 /*
  * POSIX semaphore privileges.
@@ -204,6 +206,7 @@
  */
 #define        PRIV_SIGNAL_DIFFCRED    230     /* Exempt signalling other 
users. */
 #define        PRIV_SIGNAL_SUGID       231     /* Non-conserv signal setuid 
proc. */
+#define        PRIV_SIGNAL_DIFFJAIL    232     /* Exempt signalling other 
jails. */
 
 /*
  * Sysctl privileges.
diff --git a/usr.sbin/jail/jail.8 b/usr.sbin/jail/jail.8
index dd7b91d5cefa..421aa9babb4c 100644
--- a/usr.sbin/jail/jail.8
+++ b/usr.sbin/jail/jail.8
@@ -23,7 +23,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd May 11, 2025
+.Dd August 7, 2025
 .Dt JAIL 8
 .Os
 .Sh NAME
@@ -686,6 +686,12 @@ in the
 file outside of the jails.
 .It Va allow.reserved_ports
 The jail root may bind to ports lower than 1024.
+.It Va allow.unprivileged_parent_tampering
+Unprivileged processes in the jail's parent may tamper with processes of the
+same UID in the jail.
+This includes the ability to signal, debug, and
+.Xr cpuset 1
+processes that belong to the jail.
 .It Va allow.unprivileged_proc_debug
 Unprivileged processes in the jail may use debugging facilities.
 .It Va allow.suser

Reply via email to