The branch main has been updated by jamie:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f7496dcab0360a74bfb00cd6118f66323fffda61

commit f7496dcab0360a74bfb00cd6118f66323fffda61
Author:     Jamie Gritton <ja...@freebsd.org>
AuthorDate: 2021-02-21 18:55:44 +0000
Commit:     Jamie Gritton <ja...@freebsd.org>
CommitDate: 2021-02-21 18:55:44 +0000

    jail: Change the locking around pr_ref and pr_uref
    
    Require both the prison mutex and allprison_lock when pr_ref or
    pr_uref go to/from zero.  Adding a non-first or removing a non-last
    reference remain lock-free.  This means that a shared hold on
    allprison_lock is sufficient for prison_isalive() to be useful, which
    removes a number of cases of lock/check/unlock on the prison mutex.
    
    Expand the locking in kern_jail_set() to keep allprison_lock held
    exclusive until the new prison is valid, thus making invalid prisons
    invisible to any thread holding allprison_lock (except of course the
    one creating or destroying the prison).  This renders prison_isvalid()
    nearly redundant, now used only in asserts.
    
    Differential Revision:  https://reviews.freebsd.org/D28419
    Differential Revision:  https://reviews.freebsd.org/D28458
---
 sys/kern/kern_jail.c   | 423 ++++++++++++++++++++++++-------------------------
 sys/kern/sysv_msg.c    |   2 +-
 sys/kern/sysv_sem.c    |   2 +-
 sys/kern/sysv_shm.c    |   2 +-
 sys/kern/uipc_mqueue.c |  35 ++--
 sys/sys/jail.h         |   3 +-
 6 files changed, 232 insertions(+), 235 deletions(-)

diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 65201eb12951..48c91a95bf1a 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -137,9 +137,11 @@ LIST_HEAD(, prison_racct) allprison_racct;
 int    lastprid = 0;
 
 static int get_next_prid(struct prison **insprp);
-static int do_jail_attach(struct thread *td, struct prison *pr);
+static int do_jail_attach(struct thread *td, struct prison *pr, int drflags);
 static void prison_complete(void *context, int pending);
 static void prison_deref(struct prison *pr, int flags);
+static int prison_lock_xlock(struct prison *pr, int flags);
+static void prison_free_not_last(struct prison *pr);
 static void prison_set_allow_locked(struct prison *pr, unsigned flag,
     int enable);
 static char *prison_path(struct prison *pr1, struct prison *pr2);
@@ -1006,18 +1008,15 @@ kern_jail_set(struct thread *td, struct uio *optuio, 
int flags)
                 * where it can be inserted later.
                 */
                TAILQ_FOREACH(inspr, &allprison, pr_list) {
-                       if (inspr->pr_id == jid) {
-                               mtx_lock(&inspr->pr_mtx);
-                               if (prison_isvalid(inspr)) {
-                                       pr = inspr;
-                                       drflags |= PD_LOCKED;
-                                       inspr = NULL;
-                               } else
-                                       mtx_unlock(&inspr->pr_mtx);
-                               break;
-                       }
+                       if (inspr->pr_id < jid)
+                               continue;
                        if (inspr->pr_id > jid)
                                break;
+                       pr = inspr;
+                       mtx_lock(&pr->pr_mtx);
+                       drflags |= PD_LOCKED;
+                       inspr = NULL;
+                       break;
                }
                if (pr != NULL) {
                        ppr = pr->pr_parent;
@@ -1041,13 +1040,15 @@ kern_jail_set(struct thread *td, struct uio *optuio, 
int flags)
                                error = ENOENT;
                                vfs_opterror(opts, "jail %d not found", jid);
                                goto done_deref;
-                       } else if (!prison_isalive(pr)) {
+                       }
+                       if (!prison_isalive(pr)) {
                                if (!(flags & JAIL_DYING)) {
                                        error = ENOENT;
                                        vfs_opterror(opts, "jail %d is dying",
                                            jid);
                                        goto done_deref;
-                               } else if ((flags & JAIL_ATTACH) ||
+                               }
+                               if ((flags & JAIL_ATTACH) ||
                                    (pr_flags & PR_PERSIST)) {
                                        /*
                                         * A dying jail might be resurrected
@@ -1121,12 +1122,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, 
int flags)
                if (namelc[0] != '\0') {
                        pnamelen =
                            (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
- name_again:
                        deadpr = NULL;
                        FOREACH_PRISON_CHILD(ppr, tpr) {
                                if (tpr != pr &&
                                    !strcmp(tpr->pr_name + pnamelen, namelc)) {
-                                       mtx_lock(&tpr->pr_mtx);
                                        if (prison_isalive(tpr)) {
                                                if (pr == NULL &&
                                                    cuflags != JAIL_CREATE) {
@@ -1135,6 +1134,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                                                         * for updates.
                                                         */
                                                        pr = tpr;
+                                                       mtx_lock(&pr->pr_mtx);
                                                        drflags |= PD_LOCKED;
                                                        break;
                                                }
@@ -1144,28 +1144,22 @@ kern_jail_set(struct thread *td, struct uio *optuio, 
int flags)
                                                 * active sibling jail.
                                                 */
                                                error = EEXIST;
-                                               mtx_unlock(&tpr->pr_mtx);
                                                vfs_opterror(opts,
                                                   "jail \"%s\" already exists",
                                                   name);
                                                goto done_deref;
                                        }
                                        if (pr == NULL &&
-                                           cuflags != JAIL_CREATE &&
-                                           prison_isvalid(tpr))
+                                           cuflags != JAIL_CREATE) {
                                                deadpr = tpr;
-                                       mtx_unlock(&tpr->pr_mtx);
+                                       }
                                }
                        }
                        /* If no active jail is found, use a dying one. */
                        if (deadpr != NULL && pr == NULL) {
                                if (flags & JAIL_DYING) {
-                                       mtx_lock(&deadpr->pr_mtx);
-                                       if (!prison_isvalid(deadpr)) {
-                                               mtx_unlock(&deadpr->pr_mtx);
-                                               goto name_again;
-                                       }
                                        pr = deadpr;
+                                       mtx_lock(&pr->pr_mtx);
                                        drflags |= PD_LOCKED;
                                } else if (cuflags == JAIL_UPDATE) {
                                        error = ENOENT;
@@ -1199,19 +1193,11 @@ kern_jail_set(struct thread *td, struct uio *optuio, 
int flags)
                                vfs_opterror(opts, "prison limit exceeded");
                                goto done_deref;
                        }
-               mtx_lock(&ppr->pr_mtx);
-               if (!prison_isvalid(ppr)) {
-                       mtx_unlock(&ppr->pr_mtx);
-                       error = ENOENT;
-                       vfs_opterror(opts, "jail \"%s\" not found",
-                           prison_name(mypr, ppr));
-                       goto done_deref;
-               }
                prison_hold(ppr);
-               if (refcount_acquire(&ppr->pr_uref))
-                       mtx_unlock(&ppr->pr_mtx);
-               else {
+               if (!refcount_acquire_if_not_zero(&ppr->pr_uref)) {
                        /* This brings the parent back to life. */
+                       mtx_lock(&ppr->pr_mtx);
+                       refcount_acquire(&ppr->pr_uref);
                        mtx_unlock(&ppr->pr_mtx);
                        error = osd_jail_call(ppr, PR_METHOD_CREATE, opts);
                        if (error) {
@@ -1219,7 +1205,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                                drflags |= PD_DEREF | PD_DEUREF;
                                goto done_deref;
                        }
-                }
+               }
 
                if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
                        error = EAGAIN;
@@ -1230,6 +1216,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                }
 
                pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
+               refcount_init(&pr->pr_ref, 0);
+               refcount_init(&pr->pr_uref, 0);
                LIST_INIT(&pr->pr_children);
                mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
                TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
@@ -1452,7 +1440,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
 #ifdef VIMAGE
                            (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 #endif
-                           refcount_load(&tpr->pr_uref) == 0) {
+                           !prison_isalive(tpr)) {
                                descend = 0;
                                continue;
                        }
@@ -1520,7 +1508,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
 #ifdef VIMAGE
                            (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 #endif
-                           refcount_load(&tpr->pr_uref) == 0) {
+                           !prison_isalive(tpr)) {
                                descend = 0;
                                continue;
                        }
@@ -1759,8 +1747,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                        prison_hold(pr);
                        refcount_acquire(&pr->pr_uref);
                } else {
-                       refcount_release(&pr->pr_ref);
                        drflags |= PD_DEUREF;
+                       prison_free_not_last(pr);
                }
        }
        pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
@@ -1824,8 +1812,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
 #endif
 
        /* Let the modules do their work. */
-       sx_downgrade(&allprison_lock);
-       drflags = (drflags & ~PD_LIST_XLOCKED) | PD_LIST_SLOCKED;
        if (born) {
                error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
                if (error) {
@@ -1842,9 +1828,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
 
        /* Attach this process to the prison if requested. */
        if (flags & JAIL_ATTACH) {
-               mtx_lock(&pr->pr_mtx);
-               error = do_jail_attach(td, pr);
-               drflags &= ~PD_LIST_SLOCKED;
+               error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags));
+               drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
                if (error) {
                        if (created) {
                                /* do_jail_attach has removed the prison. */
@@ -1857,9 +1842,9 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
 
 #ifdef RACCT
        if (racct_enable && !created) {
-               if (drflags & PD_LIST_SLOCKED) {
-                       sx_sunlock(&allprison_lock);
-                       drflags &= ~PD_LIST_SLOCKED;
+               if (drflags & PD_LIST_XLOCKED) {
+                       sx_xunlock(&allprison_lock);
+                       drflags &= ~PD_LIST_XLOCKED;
                }
                prison_racct_modify(pr);
        }
@@ -1874,8 +1859,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                 * not be publicly visible).
                 */
                if (pr_flags & PR_PERSIST) {
-                       mtx_lock(&pr->pr_mtx);
-                       drflags |= PD_LOCKED;
+                       drflags = prison_lock_xlock(pr, drflags);
                        refcount_acquire(&pr->pr_ref);
                        refcount_acquire(&pr->pr_uref);
                } else {
@@ -1952,13 +1936,8 @@ get_next_prid(struct prison **insprp)
                        TAILQ_FOREACH(inspr, &allprison, pr_list) {
                                if (inspr->pr_id < jid)
                                        continue;
-                               if (inspr->pr_id > jid ||
-                                   refcount_load(&inspr->pr_ref) == 0) {
-                                       /*
-                                        * Found an opening.  This may be a gap
-                                        * in the list, or a dead jail with the
-                                        * same ID.
-                                        */
+                               if (inspr->pr_id > jid) {
+                                       /* Found an opening. */
                                        maxid = 0;
                                        break;
                                }
@@ -2047,18 +2026,14 @@ kern_jail_get(struct thread *td, struct uio *optuio, 
int flags)
        error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
        if (error == 0) {
                TAILQ_FOREACH(pr, &allprison, pr_list) {
-                       if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
+                       if (pr->pr_id > jid &&
+                           ((flags & JAIL_DYING) || prison_isalive(pr)) &&
+                           prison_ischild(mypr, pr)) {
                                mtx_lock(&pr->pr_mtx);
-                               if ((flags & JAIL_DYING)
-                                   ? prison_isvalid(pr) : prison_isalive(pr))
-                                       break;
-                               mtx_unlock(&pr->pr_mtx);
+                               drflags |= PD_LOCKED;
+                               goto found_prison;
                        }
                }
-               if (pr != NULL) {
-                       drflags |= PD_LOCKED;
-                       goto found_prison;
-               }
                error = ENOENT;
                vfs_opterror(opts, "no jail after %d", jid);
                goto done;
@@ -2314,7 +2289,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int 
flags)
 int
 sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
 {
-       struct prison *pr, *cpr, *lpr, *tpr;
+       struct prison *pr, *cpr, *lpr;
        int descend, error;
 
        error = priv_check(td, PRIV_JAIL_REMOVE);
@@ -2334,21 +2309,13 @@ sys_jail_remove(struct thread *td, struct 
jail_remove_args *uap)
                mtx_unlock(&pr->pr_mtx);
                lpr = NULL;
                FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
-                       mtx_lock(&cpr->pr_mtx);
-                       if (prison_isvalid(cpr)) {
-                               tpr = cpr;
-                               prison_hold(cpr);
-                       } else {
-                               /* Already removed - do not do it again. */
-                               tpr = NULL;
-                       }
-                       mtx_unlock(&cpr->pr_mtx);
+                       prison_hold(cpr);
                        if (lpr != NULL) {
                                mtx_lock(&lpr->pr_mtx);
                                prison_remove_one(lpr);
                                sx_xlock(&allprison_lock);
                        }
-                       lpr = tpr;
+                       lpr = cpr;
                }
                if (lpr != NULL) {
                        mtx_lock(&lpr->pr_mtx);
@@ -2377,8 +2344,8 @@ prison_remove_one(struct prison *pr)
 
        /* If the prison was persistent, it is not anymore. */
        if (pr->pr_flags & PR_PERSIST) {
-               refcount_release(&pr->pr_ref);
                drflags |= PD_DEUREF;
+               prison_free_not_last(pr);
                pr->pr_flags &= ~PR_PERSIST;
        }
 
@@ -2428,14 +2395,7 @@ sys_jail_attach(struct thread *td, struct 
jail_attach_args *uap)
        if (error)
                return (error);
 
-       /*
-        * Start with exclusive hold on allprison_lock to ensure that a possible
-        * PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove.
-        * But then immediately downgrade it since we don't need to stop
-        * readers.
-        */
-       sx_xlock(&allprison_lock);
-       sx_downgrade(&allprison_lock);
+       sx_slock(&allprison_lock);
        pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
        if (pr == NULL) {
                sx_sunlock(&allprison_lock);
@@ -2449,16 +2409,18 @@ sys_jail_attach(struct thread *td, struct 
jail_attach_args *uap)
                return (EINVAL);
        }
 
-       return (do_jail_attach(td, pr));
+       return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
 }
 
 static int
-do_jail_attach(struct thread *td, struct prison *pr)
+do_jail_attach(struct thread *td, struct prison *pr, int drflags)
 {
        struct proc *p;
        struct ucred *newcred, *oldcred;
        int error;
 
+       mtx_assert(&pr->pr_mtx, MA_OWNED);
+       sx_assert(&allprison_lock, SX_LOCKED);
        /*
         * XXX: Note that there is a slight race here if two threads
         * in the same privileged process attempt to attach to two
@@ -2469,15 +2431,18 @@ do_jail_attach(struct thread *td, struct prison *pr)
         */
        refcount_acquire(&pr->pr_ref);
        refcount_acquire(&pr->pr_uref);
+       drflags |= PD_DEREF | PD_DEUREF;
        mtx_unlock(&pr->pr_mtx);
+       drflags &= ~PD_LOCKED;
 
        /* Let modules do whatever they need to prepare for attaching. */
        error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
        if (error) {
-               prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
+               prison_deref(pr, drflags);
                return (error);
        }
-       sx_sunlock(&allprison_lock);
+       sx_unlock(&allprison_lock);
+       drflags &= ~(PD_LIST_SLOCKED | PD_LIST_XLOCKED);
 
        /*
         * Reparent the newly attached process to this jail.
@@ -2513,7 +2478,7 @@ do_jail_attach(struct thread *td, struct prison *pr)
        rctl_proc_ucred_changed(p, newcred);
        crfree(newcred);
 #endif
-       prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
+       prison_deref(oldcred->cr_prison, drflags);
        crfree(oldcred);
 
        /*
@@ -2533,8 +2498,9 @@ do_jail_attach(struct thread *td, struct prison *pr)
  e_revert_osd:
        /* Tell modules this thread is still in its old jail after all. */
        sx_slock(&allprison_lock);
+       drflags |= PD_LIST_SLOCKED;
        (void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
-       prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
+       prison_deref(pr, drflags);
        return (error);
 }
 
@@ -2548,19 +2514,13 @@ prison_find(int prid)
 
        sx_assert(&allprison_lock, SX_LOCKED);
        TAILQ_FOREACH(pr, &allprison, pr_list) {
-               if (pr->pr_id == prid) {
-                       mtx_lock(&pr->pr_mtx);
-                       if (prison_isvalid(pr))
-                               return (pr);
-                       /*
-                        * Any active prison with the same ID would have
-                        * been inserted before a dead one.
-                        */
-                       mtx_unlock(&pr->pr_mtx);
-                       break;
-               }
+               if (pr->pr_id < prid)
+                       continue;
                if (pr->pr_id > prid)
                        break;
+               KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr));
+               mtx_lock(&pr->pr_mtx);
+               return (pr);
        }
        return (NULL);
 }
@@ -2577,10 +2537,10 @@ prison_find_child(struct prison *mypr, int prid)
        sx_assert(&allprison_lock, SX_LOCKED);
        FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
                if (pr->pr_id == prid) {
+                       KASSERT(prison_isvalid(pr),
+                           ("Found invalid prison %p", pr));
                        mtx_lock(&pr->pr_mtx);
-                       if (prison_isvalid(pr))
-                               return (pr);
-                       mtx_unlock(&pr->pr_mtx);
+                       return (pr);
                }
        }
        return (NULL);
@@ -2598,26 +2558,21 @@ prison_find_name(struct prison *mypr, const char *name)
 
        sx_assert(&allprison_lock, SX_LOCKED);
        mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
- again:
        deadpr = NULL;
        FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
                if (!strcmp(pr->pr_name + mylen, name)) {
-                       mtx_lock(&pr->pr_mtx);
-                       if (prison_isalive(pr))
+                       KASSERT(prison_isvalid(pr),
+                           ("Found invalid prison %p", pr));
+                       if (prison_isalive(pr)) {
+                               mtx_lock(&pr->pr_mtx);
                                return (pr);
-                       if (prison_isvalid(pr))
-                               deadpr = pr;
-                       mtx_unlock(&pr->pr_mtx);
+                       }
+                       deadpr = pr;
                }
        }
        /* There was no valid prison - perhaps there was a dying one. */
-       if (deadpr != NULL) {
+       if (deadpr != NULL)
                mtx_lock(&deadpr->pr_mtx);
-               if (!prison_isvalid(deadpr)) {
-                       mtx_unlock(&deadpr->pr_mtx);
-                       goto again;
-               }
-       }
        return (deadpr);
 }
 
@@ -2671,45 +2626,53 @@ prison_hold(struct prison *pr)
 
 /*
  * Remove a prison reference.  If that was the last reference, the
- * prison will be removed (at a later time).  Return with the prison
- * unlocked.
+ * prison will be removed (at a later time).
  */
 void
 prison_free_locked(struct prison *pr)
 {
-       int lastref;
 
        mtx_assert(&pr->pr_mtx, MA_OWNED);
+       /*
+        * Locking is no longer required, but unlock because the caller
+        * expects it.
+        */
+       mtx_unlock(&pr->pr_mtx);
+       prison_free(pr);
+}
+
+void
+prison_free(struct prison *pr)
+{
+
        KASSERT(refcount_load(&pr->pr_ref) > 0,
            ("Trying to free dead prison %p (jid=%d).",
             pr, pr->pr_id));
-       lastref = refcount_release(&pr->pr_ref);
-       mtx_unlock(&pr->pr_mtx);
-       if (lastref) {
+       if (!refcount_release_if_not_last(&pr->pr_ref)) {
                /*
-                * Don't remove the prison itself in this context,
+                * Don't remove the last reference in this context,
                 * in case there are locks held.
                 */
                taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
        }
 }
 
-void
-prison_free(struct prison *pr)
+static void
+prison_free_not_last(struct prison *pr)
 {
+#ifdef INVARIANTS
+       int lastref;
 
-       /*
-        * Locking is only required when releasing the last reference.
-        * This allows assurance that a locked prison will remain valid
-        * until it is unlocked.
-        */
        KASSERT(refcount_load(&pr->pr_ref) > 0,
            ("Trying to free dead prison %p (jid=%d).",
             pr, pr->pr_id));
-       if (refcount_release_if_not_last(&pr->pr_ref))
-               return;
-       mtx_lock(&pr->pr_mtx);
-       prison_free_locked(pr);
+       lastref = refcount_release(&pr->pr_ref);
+       KASSERT(!lastref,
+           ("prison_free_not_last freed last ref on prison %p (jid=%d).",
+            pr, pr->pr_id));
+#else
+       refcount_release(&pr>pr_ref);
+#endif
 }
 
 /*
@@ -2718,7 +2681,8 @@ prison_free(struct prison *pr)
  * user-visible, except through the the jail system calls.  It is also
  * an error to hold an invalid prison.  A prison record will remain
  * alive as long as it has at least one user reference, and will not
- * be set to the dying state was long as the prison mutex is held.
+ * be set to the dying state until the prison mutex and allprison_lock
+ * are both freed.
  */
 void
 prison_proc_hold(struct prison *pr)
@@ -2756,7 +2720,7 @@ prison_proc_free(struct prison *pr)
                 * but also half dead.  Add a reference so any calls to
                 * prison_free() won't re-submit the task.
                 */
-               refcount_acquire(&pr->pr_ref);
+               prison_hold(pr);
                taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
        }
 }
@@ -2768,18 +2732,18 @@ static void
 prison_complete(void *context, int pending)
 {
        struct prison *pr = context;
+       int drflags;
 
-       sx_xlock(&allprison_lock);
-       mtx_lock(&pr->pr_mtx);
        /*
-        * If this is completing a call to prison_proc_free, there will still
-        * be a user reference held; clear that as well as the reference that
-        * was added.  No references are expected if this is completing a call
-        * to prison_free, but prison_deref is still called for the cleanup.
+        * This could be called to release the last reference, or the
+        * last user reference; the existence of a user reference implies
+        * the latter.  There will always be a reference to remove, as
+        * prison_proc_free adds one.
         */
-       prison_deref(pr, refcount_load(&pr->pr_uref) > 0
-           ? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
-           : PD_LOCKED | PD_LIST_XLOCKED);
+       drflags = prison_lock_xlock(pr, PD_DEREF);
+       if (refcount_load(&pr->pr_uref) > 0)
+               drflags |= PD_DEUREF;
+       prison_deref(pr, drflags);
 }
 
 /*
@@ -2794,84 +2758,86 @@ static void
 prison_deref(struct prison *pr, int flags)
 {
        struct prisonlist freeprison;
-       struct prison *rpr, *tpr;
-       int lastref, lasturef;
+       struct prison *rpr, *ppr, *tpr;
 
        TAILQ_INIT(&freeprison);
-       if (!(flags & PD_LOCKED))
-               mtx_lock(&pr->pr_mtx);
        /*
         * Release this prison as requested, which may cause its parent
         * to be released, and then maybe its grandparent, etc.
         */
        for (;;) {
                if (flags & PD_DEUREF) {
+                       /* Drop a user reference. */
                        KASSERT(refcount_load(&pr->pr_uref) > 0,
                            ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
                             pr->pr_id));
-                       lasturef = refcount_release(&pr->pr_uref);
-                       if (lasturef)
-                               refcount_acquire(&pr->pr_ref);
-                       KASSERT(refcount_load(&prison0.pr_uref) > 0,
-                           ("prison0 pr_uref=0"));
-               } else
-                       lasturef = 0;
+                       if (!refcount_release_if_not_last(&pr->pr_uref)) {
+                               if (!(flags & PD_DEREF)) {
+                                       prison_hold(pr);
+                                       flags |= PD_DEREF;
+                               }
+                               flags = prison_lock_xlock(pr, flags);
+                               if (refcount_release(&pr->pr_uref)) {
+                                       /*
+                                        * When the last user references goes,
+                                        * this becomes a dying prison.
+                                        */
+                                       KASSERT(
+                                           refcount_load(&prison0.pr_uref) > 0,
+                                           ("prison0 pr_uref=0"));
+                                       mtx_unlock(&pr->pr_mtx);
+                                       flags &= ~PD_LOCKED;
+                                       (void)osd_jail_call(pr,
+                                           PR_METHOD_REMOVE, NULL);
+                               }
+                       }
+               }
                if (flags & PD_DEREF) {
+                       /* Drop a reference. */
                        KASSERT(refcount_load(&pr->pr_ref) > 0,
                            ("prison_deref PD_DEREF on a dead prison (jid=%d)",
                             pr->pr_id));
-                       lastref = refcount_release(&pr->pr_ref);
-               }
-               else
-                       lastref = refcount_load(&pr->pr_ref) == 0;
-               mtx_unlock(&pr->pr_mtx);
-
-               /*
-                * Tell the modules if the last user reference was removed
-                * (even it sticks around in dying state).
-                */
-               if (lasturef) {
-                       if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
-                               if (atomic_load_acq_int(&pr->pr_ref) > 1) {
-                                       sx_slock(&allprison_lock);
-                                       flags |= PD_LIST_SLOCKED;
-                               } else {
-                                       sx_xlock(&allprison_lock);
-                                       flags |= PD_LIST_XLOCKED;
+                       if (!refcount_release_if_not_last(&pr->pr_ref)) {
+                               flags = prison_lock_xlock(pr, flags);
+                               if (refcount_release(&pr->pr_ref)) {
+                                       /*
+                                        * When the last reference goes,
+                                        * unlink the prison and set it aside.
+                                        */
+                                       KASSERT(
+                                           refcount_load(&pr->pr_uref) == 0,
+                                           ("prison_deref: last ref, "
+                                            "but still has %d urefs (jid=%d)",
+                                            pr->pr_uref, pr->pr_id));
+                                       KASSERT(
+                                           refcount_load(&prison0.pr_ref) != 0,
+                                           ("prison0 pr_ref=0"));
+                                       TAILQ_REMOVE(&allprison, pr, pr_list);
+                                       LIST_REMOVE(pr, pr_sibling);
+                                       TAILQ_INSERT_TAIL(&freeprison, pr,
+                                           pr_list);
+                                       for (ppr = pr->pr_parent;
+                                            ppr != NULL;
+                                            ppr = ppr->pr_parent)
+                                               ppr->pr_childcount--;
+                                       /*
+                                        * Removing a prison frees references
+                                        * from its parent.
+                                        */
+                                       mtx_unlock(&pr->pr_mtx);
+                                       flags &= ~PD_LOCKED;
+                                       pr = pr->pr_parent;
+                                       flags |= PD_DEREF | PD_DEUREF;
+                                       continue;
                                }
                        }
-                       (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
-                       mtx_lock(&pr->pr_mtx);
-                       lastref = refcount_release(&pr->pr_ref);
-                       mtx_unlock(&pr->pr_mtx);
                }
-
-               if (!lastref)
-                       break;
-
-               if (flags & PD_LIST_SLOCKED) {
-                       if (!sx_try_upgrade(&allprison_lock)) {
-                               sx_sunlock(&allprison_lock);
-                               sx_xlock(&allprison_lock);
-                       }
-                       flags &= ~PD_LIST_SLOCKED;
-               } else if (!(flags & PD_LIST_XLOCKED))
-                       sx_xlock(&allprison_lock);
-               flags |= PD_LIST_XLOCKED;
-
-               TAILQ_REMOVE(&allprison, pr, pr_list);
-               LIST_REMOVE(pr, pr_sibling);
-               TAILQ_INSERT_TAIL(&freeprison, pr, pr_list);
-               for (tpr = pr->pr_parent; tpr != NULL; tpr = tpr->pr_parent)
-                       tpr->pr_childcount--;
-
-               /* Removing a prison frees a reference on its parent. */
-               pr = pr->pr_parent;
-               mtx_lock(&pr->pr_mtx);
-               flags |= PD_DEREF | PD_DEUREF;
+               break;
        }
 
        /* Release all the prison locks. */
+       if (flags & PD_LOCKED)
+               mtx_unlock(&pr->pr_mtx);
        if (flags & PD_LIST_SLOCKED)
                sx_sunlock(&allprison_lock);
        else if (flags & PD_LIST_XLOCKED)
@@ -2902,10 +2868,47 @@ prison_deref(struct prison *pr, int flags)
                if (racct_enable)
                        prison_racct_detach(rpr);
 #endif
+               TAILQ_REMOVE(&freeprison, rpr, pr_list);
                free(rpr, M_PRISON);
        }
 }
 
+/*
+ * Given the current locking state in the flags, make sure allprison_lock
+ * is held exclusive, and the prison is locked.  Return flags indicating
+ * the new state.
+ */
+static int
+prison_lock_xlock(struct prison *pr, int flags)
+{
+
+       if (!(flags & PD_LIST_XLOCKED)) {
+               /*
+                * Get allprison_lock, which may be an upgrade,
+                * and may require unlocking the prison.
+                */
+               if (flags & PD_LOCKED) {
+                       mtx_unlock(&pr->pr_mtx);
+                       flags &= ~PD_LOCKED;
+               }
+               if (flags & PD_LIST_SLOCKED) {
+                       if (!sx_try_upgrade(&allprison_lock)) {
+                               sx_sunlock(&allprison_lock);
+                               sx_xlock(&allprison_lock);
+                       }
+                       flags &= ~PD_LIST_SLOCKED;
+               } else
+                       sx_xlock(&allprison_lock);
+               flags |= PD_LIST_XLOCKED;
+       }
+       if (!(flags & PD_LOCKED)) {
+               /* Lock the prison mutex. */
+               mtx_lock(&pr->pr_mtx);
+               flags |= PD_LOCKED;
+       }
+       return flags;
+}
+
 /*
  * Set or clear a permission bit in the pr_allow field, passing restrictions
  * (cleared permission) down to child jails.
@@ -3068,15 +3071,13 @@ prison_ischild(struct prison *pr1, struct prison *pr2)
 }
 
 /*
- * Return true if the prison is currently alive.  A prison is alive if it is
- * valid and holds user references, and it isn't being removed.
+ * Return true if the prison is currently alive.  A prison is alive if it
+ * holds user references and it isn't being removed.
  */
 bool
 prison_isalive(struct prison *pr)
 {
 
-       if (__predict_false(refcount_load(&pr->pr_ref) == 0))
-               return (false);
        if (__predict_false(refcount_load(&pr->pr_uref) == 0))
                return (false);
        if (__predict_false(pr->pr_flags & PR_REMOVE))
@@ -3087,7 +3088,9 @@ prison_isalive(struct prison *pr)
 /*
  * Return true if the prison is currently valid.  A prison is valid if it has
  * been fully created, and is not being destroyed.  Note that dying prisons
- * are still considered valid.
+ * are still considered valid.  Invalid prisons won't be found under normal
+ * circumstances, as they're only put in that state by functions that have
+ * an exclusive hold on allprison_lock.
  */
 bool
 prison_isvalid(struct prison *pr)
@@ -3754,10 +3757,6 @@ sysctl_jail_list(SYSCTL_HANDLER_ARGS)
                            cpr->pr_ip6s * sizeof(struct in6_addr));
                }
 #endif
-               if (!prison_isvalid(cpr)) {
-                       mtx_unlock(&cpr->pr_mtx);
-                       continue;
-               }
                bzero(xp, sizeof(*xp));
                xp->pr_version = XPRISON_VERSION;
                xp->pr_id = cpr->pr_id;
diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c
index f048234f3a33..435235f0384d 100644
--- a/sys/kern/sysv_msg.c
+++ b/sys/kern/sysv_msg.c
@@ -290,7 +290,7 @@ msginit()
                if (rsv == NULL)
                        rsv = osd_reserve(msg_prison_slot);
                prison_lock(pr);
-               if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
+               if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
                        (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
                            &prison0);
                        rsv = NULL;
diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c
index deee60d87a5a..dd8925246d1e 100644
--- a/sys/kern/sysv_sem.c
+++ b/sys/kern/sysv_sem.c
@@ -321,7 +321,7 @@ seminit(void)
                if (rsv == NULL)
                        rsv = osd_reserve(sem_prison_slot);
                prison_lock(pr);
-               if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
+               if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
                        (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
                            &prison0);
                        rsv = NULL;
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index ad5f0030b965..2e7ae927dcc3 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -979,7 +979,7 @@ shminit(void)
                if (rsv == NULL)
                        rsv = osd_reserve(shm_prison_slot);
                prison_lock(pr);
-               if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
+               if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
                        (void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
                            &prison0);
                        rsv = NULL;
diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c
index dc94ce213d08..5c1775a261fc 100644
--- a/sys/kern/uipc_mqueue.c
+++ b/sys/kern/uipc_mqueue.c
@@ -1564,29 +1564,26 @@ mqfs_prison_remove(void *obj, void *data __unused)
        const struct prison *pr = obj;
        struct prison *tpr;
        struct mqfs_node *pn, *tpn;
-       int found;
+       struct vnode *pr_root;
 
-       found = 0;
+       pr_root = pr->pr_root;
+       if (pr->pr_parent->pr_root == pr_root)
+               return (0);
        TAILQ_FOREACH(tpr, &allprison, pr_list) {
-               prison_lock(tpr);
-               if (tpr != pr && prison_isvalid(tpr) &&
-                   tpr->pr_root == pr->pr_root)
-                       found = 1;
-               prison_unlock(tpr);
+               if (tpr != pr && tpr->pr_root == pr_root)
+                       return (0);
        }
-       if (!found) {
-               /*
-                * No jails are rooted in this directory anymore,
-                * so no queues should be either.
-                */
-               sx_xlock(&mqfs_data.mi_lock);
-               LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
-                   mn_sibling, tpn) {
-                       if (pn->mn_pr_root == pr->pr_root)
-                               (void)do_unlink(pn, curthread->td_ucred);
-               }
-               sx_xunlock(&mqfs_data.mi_lock);
+       /*
+        * No jails are rooted in this directory anymore,
+        * so no queues should be either.
+        */
+       sx_xlock(&mqfs_data.mi_lock);
+       LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
+           mn_sibling, tpn) {
+               if (pn->mn_pr_root == pr_root)
+                       (void)do_unlink(pn, curthread->td_ucred);
        }
+       sx_xunlock(&mqfs_data.mi_lock);
        return (0);
 }
 
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index 2ac6aabdbd43..a48e189729dc 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -155,7 +155,8 @@ struct prison_racct;
  *   (m) locked by pr_mtx
  *   (p) locked by pr_mtx, and also at least shared allprison_lock required
  *       to update
- *   (r) atomic via refcount(9), pr_mtx required to decrement to zero
+ *   (r) atomic via refcount(9), pr_mtx and allprison_lock required to
+ *       decrement to zero
  */
 struct prison {
        TAILQ_ENTRY(prison) pr_list;                    /* (a) all prisons */
_______________________________________________
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"

Reply via email to