Author: jamie
Date: Sat Apr 30 03:19:07 2016
New Revision: 298833
URL: https://svnweb.freebsd.org/changeset/base/298833

Log:
  MFC r298565:
  
    Add a new jail OSD method, PR_METHOD_REMOVE.  It's called when a jail is
    removed from the user perspective, i.e. when the last pr_uref goes away,
    even though the jail mail still exist in the dying state.  It will also
    be called if either PR_METHOD_CREATE or PR_METHOD_SET fail.
  
  MFC r298683:
  
    Delay removing the last jail reference in prison_proc_free, and instead
    put it off into the pr_task.  This is similar to prison_free, and in fact
    uses the same task even though they do something slightly different.
  
  MFC r298566:
  
    Pass the current/new jail to PR_METHOD_CHECK, which pushes the call
    until after the jail is found or created.  This requires unlocking the
    jail for the call and re-locking it afterward, but that works because
    nothing in the jail has been changed yet, and other processes won't
    change the important fields as long as allprison_lock remains held.
  
    Keep better track of name vs namelc in kern_jail_set.  Name should
    always be the hierarchical name (relative to the caller), and namelc
    the last component.
  
  MFC r298668:
  
    Use crcopysafe in jail_attach.
  
  PR:           48471

Modified:
  stable/10/sys/kern/kern_jail.c
  stable/10/sys/sys/jail.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/kern/kern_jail.c
==============================================================================
--- stable/10/sys/kern/kern_jail.c      Sat Apr 30 03:05:36 2016        
(r298832)
+++ stable/10/sys/kern/kern_jail.c      Sat Apr 30 03:19:07 2016        
(r298833)
@@ -560,8 +560,9 @@ kern_jail_set(struct thread *td, struct 
        void *op;
 #endif
        unsigned long hid;
-       size_t namelen, onamelen;
-       int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
+       size_t namelen, onamelen, pnamelen;
+       int born, created, cuflags, descend, enforce;
+       int error, errmsg_len, errmsg_pos;
        int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
        int fi, jid, jsys, len, level;
        int childmax, osreldt, rsnum, slevel;
@@ -584,7 +585,7 @@ kern_jail_set(struct thread *td, struct 
                error = priv_check(td, PRIV_JAIL_ATTACH);
        if (error)
                return (error);
-       mypr = ppr = td->td_ucred->cr_prison;
+       mypr = td->td_ucred->cr_prison;
        if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
                return (EPERM);
        if (flags & ~JAIL_SET_MASK)
@@ -611,6 +612,13 @@ kern_jail_set(struct thread *td, struct 
 #endif
        g_path = NULL;
 
+       cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
+       if (!cuflags) {
+               error = EINVAL;
+               vfs_opterror(opts, "no valid operation (create or update)");
+               goto done_errmsg;
+       }
+
        error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
        if (error == ENOENT)
                jid = 0;
@@ -1020,42 +1028,18 @@ kern_jail_set(struct thread *td, struct 
        }
 
        /*
-        * Grab the allprison lock before letting modules check their
-        * parameters.  Once we have it, do not let go so we'll have a
-        * consistent view of the OSD list.
-        */
-       sx_xlock(&allprison_lock);
-       error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
-       if (error)
-               goto done_unlock_list;
-
-       /* By now, all parameters should have been noted. */
-       TAILQ_FOREACH(opt, opts, link) {
-               if (!opt->seen && strcmp(opt->name, "errmsg")) {
-                       error = EINVAL;
-                       vfs_opterror(opts, "unknown parameter: %s", opt->name);
-                       goto done_unlock_list;
-               }
-       }
-
-       /*
-        * See if we are creating a new record or updating an existing one.
+        * Find the specified jail, or at least its parent.
         * This abuses the file error codes ENOENT and EEXIST.
         */
-       cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
-       if (!cuflags) {
-               error = EINVAL;
-               vfs_opterror(opts, "no valid operation (create or update)");
-               goto done_unlock_list;
-       }
        pr = NULL;
-       namelc = NULL;
+       ppr = mypr;
        if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
                namelc = strrchr(name, '.');
                jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
                if (*p != '\0')
                        jid = 0;
        }
+       sx_xlock(&allprison_lock);
        if (jid != 0) {
                /*
                 * See if a requested jid already exists.  There is an
@@ -1121,6 +1105,7 @@ kern_jail_set(struct thread *td, struct 
         * and updates keyed by the name itself (where the name must exist
         * because that is the jail being updated).
         */
+       namelc = NULL;
        if (name != NULL) {
                namelc = strrchr(name, '.');
                if (namelc == NULL)
@@ -1131,7 +1116,6 @@ kern_jail_set(struct thread *td, struct 
                         * parent and child names, and make sure the parent
                         * exists or matches an already found jail.
                         */
-                       *namelc = '\0';
                        if (pr != NULL) {
                                if (strncmp(name, ppr->pr_name, namelc - name)
                                    || ppr->pr_name[namelc - name] != '\0') {
@@ -1142,6 +1126,7 @@ kern_jail_set(struct thread *td, struct 
                                        goto done_unlock_list;
                                }
                        } else {
+                               *namelc = '\0';
                                ppr = prison_find_name(mypr, name);
                                if (ppr == NULL) {
                                        error = ENOENT;
@@ -1150,17 +1135,18 @@ kern_jail_set(struct thread *td, struct 
                                        goto done_unlock_list;
                                }
                                mtx_unlock(&ppr->pr_mtx);
+                               *namelc = '.';
                        }
-                       name = ++namelc;
+                       namelc++;
                }
-               if (name[0] != '\0') {
-                       namelen =
+               if (namelc[0] != '\0') {
+                       pnamelen =
                            (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
  name_again:
                        deadpr = NULL;
                        FOREACH_PRISON_CHILD(ppr, tpr) {
                                if (tpr != pr && tpr->pr_ref > 0 &&
-                                   !strcmp(tpr->pr_name + namelen, name)) {
+                                   !strcmp(tpr->pr_name + pnamelen, namelc)) {
                                        if (pr == NULL &&
                                            cuflags != JAIL_CREATE) {
                                                mtx_lock(&tpr->pr_mtx);
@@ -1237,7 +1223,8 @@ kern_jail_set(struct thread *td, struct 
                if (ppr->pr_ref == 0) {
                        mtx_unlock(&ppr->pr_mtx);
                        error = ENOENT;
-                       vfs_opterror(opts, "parent jail went away!");
+                       vfs_opterror(opts, "jail \"%s\" not found",
+                           prison_name(mypr, ppr));
                        goto done_unlock_list;
                }
                ppr->pr_ref++;
@@ -1291,8 +1278,8 @@ kern_jail_set(struct thread *td, struct 
                pr->pr_id = jid;
 
                /* Set some default values, and inherit some from the parent. */
-               if (name == NULL)
-                       name = "";
+               if (namelc == NULL)
+                       namelc = "";
                if (path == NULL) {
                        path = "/";
                        root = mypr->pr_root;
@@ -1355,6 +1342,7 @@ kern_jail_set(struct thread *td, struct 
 
                LIST_INIT(&pr->pr_children);
                mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
+               TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 
 #ifdef VIMAGE
                /* Allocate a new vnet if specified. */
@@ -1374,7 +1362,7 @@ kern_jail_set(struct thread *td, struct 
                mtx_lock(&pr->pr_mtx);
                /*
                 * New prisons do not yet have a reference, because we do not
-                * want other to see the incomplete prison once the
+                * want others to see the incomplete prison once the
                 * allprison_lock is downgraded.
                 */
        } else {
@@ -1588,13 +1576,13 @@ kern_jail_set(struct thread *td, struct 
        }
 #endif
        onamelen = namelen = 0;
-       if (name != NULL) {
+       if (namelc != NULL) {
                /* Give a default name of the jid.  Also allow the name to be
                 * explicitly the jid - but not any other number, and only in
                 * normal form (no leading zero/etc).
                 */
-               if (name[0] == '\0')
-                       snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
+               if (namelc[0] == '\0')
+                       snprintf(namelc = numbuf, sizeof(numbuf), "%d", jid);
                else if ((strtoul(namelc, &p, 10) != jid ||
                          namelc[0] < '1' || namelc[0] > '9') && *p == '\0') {
                        error = EINVAL;
@@ -1606,9 +1594,10 @@ kern_jail_set(struct thread *td, struct 
                 * Make sure the name isn't too long for the prison or its
                 * children.
                 */
-               onamelen = strlen(pr->pr_name);
-               namelen = strlen(name);
-               if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
+               pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
+               onamelen = strlen(pr->pr_name + pnamelen);
+               namelen = strlen(namelc);
+               if (pnamelen + namelen + 1 > sizeof(pr->pr_name)) {
                        error = ENAMETOOLONG;
                        goto done_deref_locked;
                }
@@ -1625,6 +1614,30 @@ kern_jail_set(struct thread *td, struct 
                goto done_deref_locked;
        }
 
+       /*
+        * Let modules check their parameters.  This requires unlocking and
+        * then re-locking the prison, but this is still a valid state as long
+        * as allprison_lock remains xlocked.
+        */
+       mtx_unlock(&pr->pr_mtx);
+       error = osd_jail_call(pr, PR_METHOD_CHECK, opts);
+       if (error != 0) {
+               prison_deref(pr, created
+                   ? PD_LIST_XLOCKED
+                   : PD_DEREF | PD_LIST_XLOCKED);
+               goto done_releroot;
+       }
+       mtx_lock(&pr->pr_mtx);
+
+       /* At this point, all valid parameters should have been noted. */
+       TAILQ_FOREACH(opt, opts, link) {
+               if (!opt->seen && strcmp(opt->name, "errmsg")) {
+                       error = EINVAL;
+                       vfs_opterror(opts, "unknown parameter: %s", opt->name);
+                       goto done_deref_locked;
+               }
+       }
+
        /* Set the parameters of the prison. */
 #ifdef INET
        redo_ip4 = 0;
@@ -1698,12 +1711,12 @@ kern_jail_set(struct thread *td, struct 
                FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
                        tpr->pr_devfs_rsnum = rsnum;
        }
-       if (name != NULL) {
+       if (namelc != NULL) {
                if (ppr == &prison0)
-                       strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
+                       strlcpy(pr->pr_name, namelc, sizeof(pr->pr_name));
                else
                        snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
-                           ppr->pr_name, name);
+                           ppr->pr_name, namelc);
                /* Change this component of child names. */
                FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
                        bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
@@ -1781,6 +1794,7 @@ kern_jail_set(struct thread *td, struct 
         * for now, so new ones will remain unseen until after the module
         * handlers have completed.
         */
+       born = pr->pr_uref == 0;
        if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
                if (pr_flags & PR_PERSIST) {
                        pr->pr_ref++;
@@ -1850,15 +1864,20 @@ kern_jail_set(struct thread *td, struct 
 
        /* Let the modules do their work. */
        sx_downgrade(&allprison_lock);
-       if (created) {
+       if (born) {
                error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
                if (error) {
-                       prison_deref(pr, PD_LIST_SLOCKED);
+                       (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+                       prison_deref(pr, created
+                           ? PD_LIST_SLOCKED
+                           : PD_DEREF | PD_LIST_SLOCKED);
                        goto done_errmsg;
                }
        }
        error = osd_jail_call(pr, PR_METHOD_SET, opts);
        if (error) {
+               if (born)
+                       (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
                prison_deref(pr, created
                    ? PD_LIST_SLOCKED
                    : PD_DEREF | PD_LIST_SLOCKED);
@@ -1910,7 +1929,7 @@ kern_jail_set(struct thread *td, struct 
                        sx_sunlock(&allprison_lock);
        }
 
-       goto done_errmsg;
+       goto done_free;
 
  done_deref_locked:
        prison_deref(pr, created
@@ -2404,7 +2423,6 @@ sys_jail_attach(struct thread *td, struc
 static int
 do_jail_attach(struct thread *td, struct prison *pr)
 {
-       struct prison *ppr;
        struct proc *p;
        struct ucred *newcred, *oldcred;
        int error;
@@ -2432,7 +2450,6 @@ do_jail_attach(struct thread *td, struct
        /*
         * Reparent the newly attached process to this jail.
         */
-       ppr = td->td_ucred->cr_prison;
        p = td->td_proc;
        error = cpuset_setproc_update_set(p, pr->pr_cpuset);
        if (error)
@@ -2451,23 +2468,23 @@ do_jail_attach(struct thread *td, struct
 
        newcred = crget();
        PROC_LOCK(p);
-       oldcred = p->p_ucred;
-       setsugid(p);
-       crcopy(newcred, oldcred);
+       oldcred = crcopysafe(p, newcred);
        newcred->cr_prison = pr;
        p->p_ucred = newcred;
+       setsugid(p);
        PROC_UNLOCK(p);
 #ifdef RACCT
        racct_proc_ucred_changed(p, oldcred, newcred);
 #endif
+       prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
        crfree(oldcred);
-       prison_deref(ppr, PD_DEREF | PD_DEUREF);
        return (0);
+
  e_unlock:
        VOP_UNLOCK(pr->pr_root, 0);
  e_revert_osd:
        /* Tell modules this thread is still in its old jail after all. */
-       (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
+       (void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
        prison_deref(pr, PD_DEREF | PD_DEUREF);
        return (error);
 }
@@ -2576,16 +2593,13 @@ prison_allow(struct ucred *cred, unsigne
 void
 prison_free_locked(struct prison *pr)
 {
+       int ref;
 
        mtx_assert(&pr->pr_mtx, MA_OWNED);
-       pr->pr_ref--;
-       if (pr->pr_ref == 0) {
-               mtx_unlock(&pr->pr_mtx);
-               TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
-               taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
-               return;
-       }
+       ref = --pr->pr_ref;
        mtx_unlock(&pr->pr_mtx);
+       if (ref == 0)
+               taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 }
 
 void
@@ -2596,11 +2610,17 @@ prison_free(struct prison *pr)
        prison_free_locked(pr);
 }
 
+/*
+ * Complete a call to either prison_free or prison_proc_free.
+ */
 static void
 prison_complete(void *context, int pending)
 {
+       struct prison *pr = context;
 
-       prison_deref((struct prison *)context, 0);
+       mtx_lock(&pr->pr_mtx);
+       prison_deref(pr, pr->pr_uref
+           ? PD_DEREF | PD_DEUREF | PD_LOCKED : PD_LOCKED);
 }
 
 /*
@@ -2613,19 +2633,53 @@ static void
 prison_deref(struct prison *pr, int flags)
 {
        struct prison *ppr, *tpr;
+       int ref, lasturef;
 
        if (!(flags & PD_LOCKED))
                mtx_lock(&pr->pr_mtx);
        for (;;) {
                if (flags & PD_DEUREF) {
+                       KASSERT(pr->pr_uref > 0,
+                           ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
+                            pr->pr_id));
                        pr->pr_uref--;
+                       lasturef = pr->pr_uref == 0;
+                       if (lasturef)
+                               pr->pr_ref++;
                        KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
-               }
-               if (flags & PD_DEREF)
+               } else
+                       lasturef = 0;
+               if (flags & PD_DEREF) {
+                       KASSERT(pr->pr_ref > 0,
+                           ("prison_deref PD_DEREF on a dead prison (jid=%d)",
+                            pr->pr_id));
                        pr->pr_ref--;
-               /* If the prison still has references, nothing else to do. */
-               if (pr->pr_ref > 0) {
+               }
+               ref = pr->pr_ref;
+               mtx_unlock(&pr->pr_mtx);
+
+               /*
+                * Tell the modules if the last user reference was removed
+                * (even it sticks around in dying state).
+                */
+               if (lasturef) {
+                       if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
+                               if (ref > 1) {
+                                       sx_slock(&allprison_lock);
+                                       flags |= PD_LIST_SLOCKED;
+                               } else {
+                                       sx_xlock(&allprison_lock);
+                                       flags |= PD_LIST_XLOCKED;
+                               }
+                       }
+                       (void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
+                       mtx_lock(&pr->pr_mtx);
+                       ref = --pr->pr_ref;
                        mtx_unlock(&pr->pr_mtx);
+               }
+
+               /* If the prison still has references, nothing else to do. */
+               if (ref > 0) {
                        if (flags & PD_LIST_SLOCKED)
                                sx_sunlock(&allprison_lock);
                        else if (flags & PD_LIST_XLOCKED)
@@ -2633,7 +2687,6 @@ prison_deref(struct prison *pr, int flag
                        return;
                }
 
-               mtx_unlock(&pr->pr_mtx);
                if (flags & PD_LIST_SLOCKED) {
                        if (!sx_try_upgrade(&allprison_lock)) {
                                sx_sunlock(&allprison_lock);
@@ -2715,7 +2768,20 @@ prison_proc_free(struct prison *pr)
        mtx_lock(&pr->pr_mtx);
        KASSERT(pr->pr_uref > 0,
            ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
-       prison_deref(pr, PD_DEUREF | PD_LOCKED);
+       if (pr->pr_uref > 1)
+               pr->pr_uref--;
+       else {
+               /*
+                * Don't remove the last user reference in this context, which
+                * is expected to be a process that is not only locked, but
+                * also half dead.
+                */
+               pr->pr_ref++;
+               mtx_unlock(&pr->pr_mtx);
+               taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
+               return;
+       }
+       mtx_unlock(&pr->pr_mtx);
 }
 
 

Modified: stable/10/sys/sys/jail.h
==============================================================================
--- stable/10/sys/sys/jail.h    Sat Apr 30 03:05:36 2016        (r298832)
+++ stable/10/sys/sys/jail.h    Sat Apr 30 03:19:07 2016        (r298833)
@@ -149,7 +149,6 @@ struct prison_racct;
  *   (p) locked by pr_mtx
  *   (c) set only during creation before the structure is shared, no mutex
  *       required to read
- *   (d) set only during destruction of jail, no mutex needed
  */
 struct prison {
        TAILQ_ENTRY(prison) pr_list;                    /* (a) all prisons */
@@ -161,7 +160,7 @@ struct prison {
        LIST_ENTRY(prison) pr_sibling;                  /* (a) next in parent's 
list */
        struct prison   *pr_parent;                     /* (c) containing jail 
*/
        struct mtx       pr_mtx;
-       struct task      pr_task;                       /* (d) destroy task */
+       struct task      pr_task;                       /* (c) destroy task */
        struct osd       pr_osd;                        /* (p) additional data 
*/
        struct cpuset   *pr_cpuset;                     /* (p) cpuset */
        struct vnet     *pr_vnet;                       /* (c) network stack */
@@ -243,7 +242,8 @@ struct prison_racct {
 #define        PR_METHOD_SET           2
 #define        PR_METHOD_CHECK         3
 #define        PR_METHOD_ATTACH        4
-#define        PR_MAXMETHOD            5
+#define        PR_METHOD_REMOVE        5
+#define        PR_MAXMETHOD            6
 
 /*
  * Lock/unlock a prison.
_______________________________________________
svn-src-stable-10@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-stable-10
To unsubscribe, send any mail to "svn-src-stable-10-unsubscr...@freebsd.org"

Reply via email to