The branch main has been updated by jamie:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1158508a8086a1a93492c1a2e22b61cd7fee4ec7

commit 1158508a8086a1a93492c1a2e22b61cd7fee4ec7
Author:     Jamie Gritton <ja...@freebsd.org>
AuthorDate: 2021-02-21 21:24:47 +0000
Commit:     Jamie Gritton <ja...@freebsd.org>
CommitDate: 2021-02-21 21:24:47 +0000

    jail: Add pr_state to struct prison
    
    Rather that using references (pr_ref and pr_uref) to deduce the state
    of a prison, keep track of its state explicitly.  A prison is either
    "invalid" (pr_ref == 0), "alive" (pr_uref > 0) or "dying"
    (pr_uref == 0).
    
    State transitions are generally tied to the reference counts, but with
    some flexibility: a new prison is "invalid" even though it now starts
    with a reference, and jail_remove(2) sets the state to "dying" before
    the user reference count drops to zero (which was prviously
    accomplished via the PR_REMOVE flag).
    
    pr_state is protected by both the prison mutex and allprison_lock, so
    it has the same availablity guarantees as the reference counts do.
    
    Differential Revision:  https://reviews.freebsd.org/D27876
---
 sys/kern/kern_jail.c | 102 +++++++++++++++++++++++++++------------------------
 sys/sys/jail.h       |  14 +++++--
 2 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 342af50462f2..1ddfe3c3df5f 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -106,6 +106,7 @@ struct prison prison0 = {
        .pr_path        = "/",
        .pr_securelevel = -1,
        .pr_devfs_rsnum = 0,
+       .pr_state       = PRISON_STATE_ALIVE,
        .pr_childmax    = JAIL_MAX,
        .pr_hostuuid    = DEFAULT_HOSTUUID,
        .pr_children    = LIST_HEAD_INITIALIZER(prison0.pr_children),
@@ -663,7 +664,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                }
                ch_flags |= jsf->new | jsf->disable;
        }
-       if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
+       if ((flags & (JAIL_CREATE | JAIL_ATTACH)) == JAIL_CREATE
            && !(pr_flags & PR_PERSIST)) {
                error = EINVAL;
                vfs_opterror(opts, "new jail must persist or attach");
@@ -1198,6 +1199,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                        /* This brings the parent back to life. */
                        mtx_lock(&ppr->pr_mtx);
                        refcount_acquire(&ppr->pr_uref);
+                       ppr->pr_state = PRISON_STATE_ALIVE;
                        mtx_unlock(&ppr->pr_mtx);
                        error = osd_jail_call(ppr, PR_METHOD_CREATE, opts);
                        if (error) {
@@ -1216,8 +1218,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                }
 
                pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
-               refcount_init(&pr->pr_ref, 0);
+               pr->pr_state = PRISON_STATE_INVALID;
+               refcount_init(&pr->pr_ref, 1);
                refcount_init(&pr->pr_uref, 0);
+               drflags |= PD_DEREF;
                LIST_INIT(&pr->pr_children);
                mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
                TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
@@ -1311,11 +1315,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
 
                mtx_lock(&pr->pr_mtx);
                drflags |= PD_LOCKED;
-               /*
-                * New prisons do not yet have a reference, because we do not
-                * want others to see the incomplete prison once the
-                * allprison_lock is downgraded.
-                */
        } else {
                /*
                 * Grab a reference for existing prisons, to ensure they
@@ -1737,14 +1736,17 @@ kern_jail_set(struct thread *td, struct uio *optuio, 
int flags)
                prison_set_allow_locked(pr, tallow, 0);
        /*
         * Persistent prisons get an extra reference, and prisons losing their
-        * persist flag lose that reference.  Only do this for existing prisons
-        * for now, so new ones will remain unseen until after the module
-        * handlers have completed.
+        * persist flag lose that reference.
         */
        born = !prison_isalive(pr);
-       if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
+       if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) {
                if (pr_flags & PR_PERSIST) {
                        prison_hold(pr);
+                       /*
+                        * This may make a dead prison alive again, but wait
+                        * to label it as such until after OSD calls have had
+                        * a chance to run (and perhaps to fail).
+                        */
                        refcount_acquire(&pr->pr_uref);
                } else {
                        drflags |= PD_DEUREF;
@@ -1752,7 +1754,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                }
        }
        pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
-       pr->pr_flags &= ~PR_REMOVE;
        mtx_unlock(&pr->pr_mtx);
        drflags &= ~PD_LOCKED;
 
@@ -1826,15 +1827,20 @@ kern_jail_set(struct thread *td, struct uio *optuio, 
int flags)
                goto done_deref;
        }
 
+       /*
+        * A new prison is now ready to be seen; either it has gained a user
+        * reference via persistence, or is about to gain one via attachment.
+        */
+       if (born) {
+               drflags = prison_lock_xlock(pr, drflags);
+               pr->pr_state = PRISON_STATE_ALIVE;
+       }
+
        /* Attach this process to the prison if requested. */
        if (flags & JAIL_ATTACH) {
                error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags));
                drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
                if (error) {
-                       if (created) {
-                               /* do_jail_attach has removed the prison. */
-                               pr = NULL;
-                       }
                        vfs_opterror(opts, "attach failed");
                        goto done_deref;
                }
@@ -1852,22 +1858,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
 
        td->td_retval[0] = pr->pr_id;
 
-       if (created) {
-               /*
-                * Add a reference to newly created persistent prisons
-                * (which was not done earlier so that the prison would
-                * not be publicly visible).
-                */
-               if (pr_flags & PR_PERSIST) {
-                       drflags = prison_lock_xlock(pr, drflags);
-                       refcount_acquire(&pr->pr_ref);
-                       refcount_acquire(&pr->pr_uref);
-               } else {
-                       /* Non-persistent jails need no further changes. */
-                       pr = NULL;
-               }
-       }
-
  done_deref:
        /* Release any temporary prison holds and/or locks. */
        if (pr != NULL)
@@ -2332,7 +2322,7 @@ static void
 prison_remove_one(struct prison *pr)
 {
        struct proc *p;
-       int drflags;
+       int was_alive, drflags;
 
        drflags = PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED;
 
@@ -2340,7 +2330,8 @@ prison_remove_one(struct prison *pr)
         * Mark the prison as doomed, so it doesn't accidentally come back
         * to life.  It may still be explicitly brought back by jail_set(2).
         */
-       pr->pr_flags |= PR_REMOVE;
+       was_alive = pr->pr_state == PRISON_STATE_ALIVE;
+       pr->pr_state = PRISON_STATE_DYING;
 
        /* If the prison was persistent, it is not anymore. */
        if (pr->pr_flags & PR_PERSIST) {
@@ -2361,9 +2352,14 @@ prison_remove_one(struct prison *pr)
                return;
        }
 
+       /* Tell modules this prison has died. */
        mtx_unlock(&pr->pr_mtx);
+       drflags &= ~PD_LOCKED;
+       if (was_alive)
+               (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+
        sx_xunlock(&allprison_lock);
-       drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
+       drflags &= ~PD_LIST_XLOCKED;
        /*
         * Kill all processes unfortunate enough to be attached to this prison.
         */
@@ -2429,7 +2425,7 @@ do_jail_attach(struct thread *td, struct prison *pr, int 
drflags)
         * a process root from one prison, but attached to the jail
         * of another.
         */
-       refcount_acquire(&pr->pr_ref);
+       prison_hold(pr);
        refcount_acquire(&pr->pr_uref);
        drflags |= PD_DEREF | PD_DEUREF;
        mtx_unlock(&pr->pr_mtx);
@@ -2721,6 +2717,12 @@ prison_proc_free(struct prison *pr)
                 * prison_free() won't re-submit the task.
                 */
                prison_hold(pr);
+               mtx_lock(&pr->pr_mtx);
+               KASSERT(!(pr->pr_flags & PR_COMPLETE_PROC),
+                   ("Redundant last reference in prison_proc_free (jid=%d)",
+                    pr->pr_id));
+               pr->pr_flags |= PR_COMPLETE_PROC;
+               mtx_unlock(&pr->pr_mtx);
                taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
        }
 }
@@ -2735,14 +2737,14 @@ prison_complete(void *context, int pending)
        int drflags;
 
        /*
-        * This could be called to release the last reference, or the
-        * last user reference; the existence of a user reference implies
-        * the latter.  There will always be a reference to remove, as
-        * prison_proc_free adds one.
+        * This could be called to release the last reference, or the last
+        * user reference (plus the reference held in prison_proc_free).
         */
        drflags = prison_lock_xlock(pr, PD_DEREF);
-       if (refcount_load(&pr->pr_uref) > 0)
+       if (pr->pr_flags & PR_COMPLETE_PROC) {
+               pr->pr_flags &= ~PR_COMPLETE_PROC;
                drflags |= PD_DEUREF;
+       }
        prison_deref(pr, drflags);
 }
 
@@ -2777,7 +2779,8 @@ prison_deref(struct prison *pr, int flags)
                                        flags |= PD_DEREF;
                                }
                                flags = prison_lock_xlock(pr, flags);
-                               if (refcount_release(&pr->pr_uref)) {
+                               if (refcount_release(&pr->pr_uref) &&
+                                   pr->pr_state == PRISON_STATE_ALIVE) {
                                        /*
                                         * When the last user references goes,
                                         * this becomes a dying prison.
@@ -2785,6 +2788,7 @@ prison_deref(struct prison *pr, int flags)
                                        KASSERT(
                                            refcount_load(&prison0.pr_uref) > 0,
                                            ("prison0 pr_uref=0"));
+                                       pr->pr_state = PRISON_STATE_DYING;
                                        mtx_unlock(&pr->pr_mtx);
                                        flags &= ~PD_LOCKED;
                                        (void)osd_jail_call(pr,
@@ -2812,6 +2816,7 @@ prison_deref(struct prison *pr, int flags)
                                        KASSERT(
                                            refcount_load(&prison0.pr_ref) != 0,
                                            ("prison0 pr_ref=0"));
+                                       pr->pr_state = PRISON_STATE_INVALID;
                                        TAILQ_REMOVE(&allprison, pr, pr_list);
                                        LIST_REMOVE(pr, pr_sibling);
                                        TAILQ_INSERT_TAIL(&freeprison, pr,
@@ -3078,9 +3083,7 @@ bool
 prison_isalive(struct prison *pr)
 {
 
-       if (__predict_false(refcount_load(&pr->pr_uref) == 0))
-               return (false);
-       if (__predict_false(pr->pr_flags & PR_REMOVE))
+       if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE))
                return (false);
        return (true);
 }
@@ -3096,6 +3099,8 @@ bool
 prison_isvalid(struct prison *pr)
 {
 
+       if (__predict_false(pr->pr_state == PRISON_STATE_INVALID))
+               return (false);
        if (__predict_false(refcount_load(&pr->pr_ref) == 0))
                return (false);
        return (true);
@@ -3760,8 +3765,7 @@ sysctl_jail_list(SYSCTL_HANDLER_ARGS)
                bzero(xp, sizeof(*xp));
                xp->pr_version = XPRISON_VERSION;
                xp->pr_id = cpr->pr_id;
-               xp->pr_state = prison_isalive(cpr)
-                   ? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
+               xp->pr_state = cpr->pr_state;
                strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
                strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
                strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
@@ -4412,6 +4416,10 @@ db_show_prison(struct prison *pr)
        db_printf(" parent          = %p\n", pr->pr_parent);
        db_printf(" ref             = %d\n", pr->pr_ref);
        db_printf(" uref            = %d\n", pr->pr_uref);
+       db_printf(" state           = %s\n",
+           pr->pr_state == PRISON_STATE_ALIVE ? "alive" :
+           pr->pr_state == PRISON_STATE_DYING ? "dying" :
+           "invalid");
        db_printf(" path            = %s\n", pr->pr_path);
        db_printf(" cpuset          = %d\n", pr->pr_cpuset
            ? pr->pr_cpuset->cs_id : -1);
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index a48e189729dc..723a1fff0b82 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -88,9 +88,11 @@ struct xprison {
 };
 #define        XPRISON_VERSION         3
 
-#define        PRISON_STATE_INVALID    0
-#define        PRISON_STATE_ALIVE      1
-#define        PRISON_STATE_DYING      2
+enum prison_state {
+    PRISON_STATE_INVALID = 0,  /* New prison, not ready to be seen */
+    PRISON_STATE_ALIVE,                /* Current prison, visible to all */
+    PRISON_STATE_DYING         /* Removed but holding resources, */
+};                             /* optionally visible. */
 
 /*
  * Flags for jail_set and jail_get.
@@ -155,6 +157,7 @@ struct prison_racct;
  *   (m) locked by pr_mtx
  *   (p) locked by pr_mtx, and also at least shared allprison_lock required
  *       to update
+ *   (q) locked by both pr_mtx and allprison_lock
  *   (r) atomic via refcount(9), pr_mtx and allprison_lock required to
  *       decrement to zero
  */
@@ -185,7 +188,8 @@ struct prison {
        int              pr_securelevel;                /* (p) securelevel */
        int              pr_enforce_statfs;             /* (p) statfs 
permission */
        int              pr_devfs_rsnum;                /* (p) devfs ruleset */
-       int              pr_spare[3];
+       enum prison_state pr_state;                     /* (q) state in life 
cycle */
+       int              pr_spare[2];
        int              pr_osreldate;                  /* (c) kern.osreldate 
value */
        unsigned long    pr_hostid;                     /* (p) jail hostid */
        char             pr_name[MAXHOSTNAMELEN];       /* (p) admin jail name 
*/
@@ -222,6 +226,8 @@ struct prison_racct {
                                        /* by this jail or an ancestor */
 #define        PR_IP6          0x04000000      /* IPv6 restricted or disabled 
*/
                                        /* by this jail or an ancestor */
+#define PR_COMPLETE_PROC 0x08000000    /* prison_complete called from */
+                                       /* prison_proc_free, releases uref */
 
 /*
  * Flags for pr_allow
_______________________________________________
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"

Reply via email to