Author: jonathan
Date: Sat Aug 13 09:21:16 2011
New Revision: 224810
URL: http://svn.freebsd.org/changeset/base/224810

Log:
  Allow Capsicum capabilities to delegate constrained
  access to file system subtrees to sandboxed processes.
  
  - Use of absolute paths and '..' are limited in capability mode.
  - Use of absolute paths and '..' are limited when looking up relative
    to a capability.
  - When a name lookup is performed, identify what operation is to be
    performed (such as CAP_MKDIR) as well as check for CAP_LOOKUP.
  
  With these constraints, openat() and friends are now safe in capability
  mode, and can then be used by code such as the capability-mode runtime
  linker.
  
  Approved by: re (bz), mentor (rwatson)
  Sponsored by: Google Inc

Modified:
  head/sys/kern/kern_descrip.c
  head/sys/kern/sys_capability.c
  head/sys/kern/vfs_lookup.c
  head/sys/kern/vfs_syscalls.c
  head/sys/sys/capability.h
  head/sys/sys/namei.h

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c        Sat Aug 13 00:56:42 2011        
(r224809)
+++ head/sys/kern/kern_descrip.c        Sat Aug 13 09:21:16 2011        
(r224810)
@@ -2336,6 +2336,16 @@ _fget(struct thread *td, int fd, struct 
 
 #ifdef CAPABILITIES
        /*
+        * If this is a capability, what rights does it have?
+        */
+       if (haverightsp != NULL) {
+               if (fp->f_type == DTYPE_CAPABILITY)
+                       *haverightsp = cap_rights(fp);
+               else
+                       *haverightsp = CAP_MASK_VALID;
+       }
+
+       /*
         * If a capability has been requested, return the capability directly.
         * Otherwise, check capability rights, extract the underlying object,
         * and check its access flags.

Modified: head/sys/kern/sys_capability.c
==============================================================================
--- head/sys/kern/sys_capability.c      Sat Aug 13 00:56:42 2011        
(r224809)
+++ head/sys/kern/sys_capability.c      Sat Aug 13 09:21:16 2011        
(r224810)
@@ -220,7 +220,7 @@ cap_new(struct thread *td, struct cap_ne
 {
        int error, capfd;
        int fd = uap->fd;
-       struct file *fp, *fcapp;
+       struct file *fp;
        cap_rights_t rights = uap->rights;
 
        AUDIT_ARG_FD(fd);
@@ -229,7 +229,7 @@ cap_new(struct thread *td, struct cap_ne
        if (error)
                return (error);
        AUDIT_ARG_FILE(td->td_proc, fp);
-       error = kern_capwrap(td, fp, rights, &fcapp, &capfd);
+       error = kern_capwrap(td, fp, rights, &capfd);
        if (error)
                return (error);
 
@@ -267,10 +267,10 @@ cap_getrights(struct thread *td, struct 
  */
 int
 kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
-    struct file **fcappp, int *capfdp)
+    int *capfdp)
 {
        struct capability *cp, *cp_old;
-       struct file *fp_object;
+       struct file *fp_object, *fcapp;
        int error;
 
        if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID)
@@ -290,7 +290,7 @@ kern_capwrap(struct thread *td, struct f
        /*
         * Allocate a new file descriptor to hang the capability off of.
         */
-       error = falloc(td, fcappp, capfdp, fp->f_flag);
+       error = falloc(td, &fcapp, capfdp, fp->f_flag);
        if (error)
                return (error);
 
@@ -309,18 +309,18 @@ kern_capwrap(struct thread *td, struct f
        cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO);
        cp->cap_rights = rights;
        cp->cap_object = fp_object;
-       cp->cap_file = *fcappp;
+       cp->cap_file = fcapp;
        if (fp->f_flag & DFLAG_PASSABLE)
-               finit(*fcappp, fp->f_flag, DTYPE_CAPABILITY, cp,
+               finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
                    &capability_ops);
        else
-               finit(*fcappp, fp->f_flag, DTYPE_CAPABILITY, cp,
+               finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
                    &capability_ops_unpassable);
 
        /*
         * Release our private reference (the proc filedesc still has one).
         */
-       fdrop(*fcappp, td);
+       fdrop(fcapp, td);
        return (0);
 }
 

Modified: head/sys/kern/vfs_lookup.c
==============================================================================
--- head/sys/kern/vfs_lookup.c  Sat Aug 13 00:56:42 2011        (r224809)
+++ head/sys/kern/vfs_lookup.c  Sat Aug 13 09:21:16 2011        (r224810)
@@ -180,6 +180,18 @@ namei(struct nameidata *ndp)
        if (!error && *cnp->cn_pnbuf == '\0')
                error = ENOENT;
 
+#ifdef CAPABILITY_MODE
+       /*
+        * In capability mode, lookups must be "strictly relative" (i.e.
+        * not an absolute path, and not containing '..' components) to
+        * a real file descriptor, not the pseudo-descriptor AT_FDCWD.
+        */
+       if (IN_CAPABILITY_MODE(td)) {
+               ndp->ni_strictrelative = 1;
+               if (ndp->ni_dirfd == AT_FDCWD)
+                       error = ECAPMODE;
+       }
+#endif
        if (error) {
                uma_zfree(namei_zone, cnp->cn_pnbuf);
 #ifdef DIAGNOSTIC
@@ -214,12 +226,20 @@ namei(struct nameidata *ndp)
                                AUDIT_ARG_ATFD1(ndp->ni_dirfd);
                        if (cnp->cn_flags & AUDITVNODE2)
                                AUDIT_ARG_ATFD2(ndp->ni_dirfd);
-#ifdef CAPABILITY_MODE
-                       KASSERT(!IN_CAPABILITY_MODE(td),
-                           ("%s: reached %s:%d in capability mode",
-                            __func__, __FILE__, __LINE__));
+                       error = fgetvp_rights(td, ndp->ni_dirfd,
+                           ndp->ni_rightsneeded | CAP_LOOKUP,
+                           &(ndp->ni_baserights), &dp);
+#ifdef CAPABILITIES
+                       /*
+                        * Lookups relative to a capability must also be
+                        * strictly relative.
+                        *
+                        * Note that a capability with rights CAP_MASK_VALID
+                        * is treated exactly like a regular file descriptor.
+                        */
+                       if (ndp->ni_baserights != CAP_MASK_VALID)
+                               ndp->ni_strictrelative = 1;
 #endif
-                       error = fgetvp(td, ndp->ni_dirfd, 0, &dp);
                }
                if (error != 0 || dp != NULL) {
                        FILEDESC_SUNLOCK(fdp);
@@ -261,6 +281,8 @@ namei(struct nameidata *ndp)
                if (*(cnp->cn_nameptr) == '/') {
                        vrele(dp);
                        VFS_UNLOCK_GIANT(vfslocked);
+                       if (ndp->ni_strictrelative != 0)
+                               return (ENOTCAPABLE);
                        while (*(cnp->cn_nameptr) == '/') {
                                cnp->cn_nameptr++;
                                ndp->ni_pathlen--;
@@ -604,7 +626,10 @@ dirloop:
        }
 
        /*
-        * Handle "..": four special cases.
+        * Handle "..": five special cases.
+        * 0. If doing a capability lookup, return ENOTCAPABLE (this is a
+        *    fairly conservative design choice, but it's the only one that we
+        *    are satisfied guarantees the property we're looking for).
         * 1. Return an error if this is the last component of
         *    the name and the operation is DELETE or RENAME.
         * 2. If at root directory (e.g. after chroot)
@@ -618,6 +643,10 @@ dirloop:
         *    the jail or chroot, don't let them out.
         */
        if (cnp->cn_flags & ISDOTDOT) {
+               if (ndp->ni_strictrelative != 0) {
+                       error = ENOTCAPABLE;
+                       goto bad;
+               }
                if ((cnp->cn_flags & ISLASTCN) != 0 &&
                    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
                        error = EINVAL;

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c        Sat Aug 13 00:56:42 2011        
(r224809)
+++ head/sys/kern/vfs_syscalls.c        Sat Aug 13 09:21:16 2011        
(r224810)
@@ -993,6 +993,41 @@ change_root(vp, td)
        return (0);
 }
 
+static __inline cap_rights_t
+flags_to_rights(int flags)
+{
+       cap_rights_t rights = 0;
+
+       switch ((flags & O_ACCMODE)) {
+       case O_RDONLY:
+               rights |= CAP_READ;
+               break;
+
+       case O_RDWR:
+               rights |= CAP_READ;
+               /* fall through */
+
+       case O_WRONLY:
+               rights |= CAP_WRITE;
+               break;
+
+       case O_EXEC:
+               rights |= CAP_FEXECVE;
+               break;
+       }
+
+       if (flags & O_CREAT)
+               rights |= CAP_CREATE;
+
+       if (flags & O_TRUNC)
+               rights |= CAP_FTRUNCATE;
+
+       if ((flags & O_EXLOCK) || (flags & O_SHLOCK))
+               rights |= CAP_FLOCK;
+
+       return (rights);
+}
+
 /*
  * Check permissions, allocate an open file structure, and call the device
  * open routine if any.
@@ -1055,10 +1090,12 @@ kern_openat(struct thread *td, int fd, c
        struct flock lf;
        struct nameidata nd;
        int vfslocked;
+       cap_rights_t rights_needed = CAP_LOOKUP;
 
        AUDIT_ARG_FFLAGS(flags);
        AUDIT_ARG_MODE(mode);
        /* XXX: audit dirfd */
+       rights_needed |= flags_to_rights(flags);
        /*
         * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
         * may be specified.
@@ -1082,8 +1119,8 @@ kern_openat(struct thread *td, int fd, c
        /* Set the flags early so the finit in devfs can pick them up. */
        fp->f_flag = flags & FMASK;
        cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
-       NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd,
-           td);
+       NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
+           path, fd, rights_needed, td);
        td->td_dupfd = -1;              /* XXX check for fdopen */
        error = vn_open(&nd, &flags, cmode, fp);
        if (error) {
@@ -1092,18 +1129,20 @@ kern_openat(struct thread *td, int fd, c
                 * wonderous happened deep below and we just pass it up
                 * pretending we know what we do.
                 */
-               if (error == ENXIO && fp->f_ops != &badfileops) {
-                       fdrop(fp, td);
-                       td->td_retval[0] = indx;
-                       return (0);
-               }
+               if (error == ENXIO && fp->f_ops != &badfileops)
+                       goto success;
 
                /*
                 * handle special fdopen() case.  bleh.  dupfdopen() is
                 * responsible for dropping the old contents of ofiles[indx]
                 * if it succeeds.
+                *
+                * Don't do this for relative (capability) lookups; we don't
+                * understand exactly what would happen, and we don't think
+                * that it ever should.
                 */
-               if ((error == ENODEV || error == ENXIO) &&
+               if ((nd.ni_strictrelative == 0) &&
+                   (error == ENODEV || error == ENXIO) &&
                    (td->td_dupfd >= 0)) {
                        /* XXX from fdopen */
                        if ((error = finstall(td, fp, &indx, flags)) != 0)
@@ -1172,9 +1211,22 @@ success:
        /*
         * If we haven't already installed the FD (for dupfdopen), do so now.
         */
-       if (indx == -1)
-               if ((error = finstall(td, fp, &indx, flags)) != 0)
-                       goto bad_unlocked;
+       if (indx == -1) {
+#ifdef CAPABILITIES
+               if (nd.ni_strictrelative == 1) {
+                       /*
+                        * We are doing a strict relative lookup; wrap the
+                        * result in a capability.
+                        */
+                       if ((error = kern_capwrap(td, fp, nd.ni_baserights,
+                           &indx)) != 0)
+                               goto bad_unlocked;
+               } else
+#endif
+                       if ((error = finstall(td, fp, &indx, flags)) != 0)
+                               goto bad_unlocked;
+
+       }
 
        /*
         * Release our private reference, leaving the one associated with
@@ -1301,8 +1353,9 @@ kern_mknodat(struct thread *td, int fd, 
                return (error);
 restart:
        bwillwrite();
-       NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
-           pathseg, path, fd, td);
+       NDINIT_ATRIGHTS(&nd, CREATE,
+           LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd,
+           CAP_MKFIFO, td);
        if ((error = namei(&nd)) != 0)
                return (error);
        vfslocked = NDHASGIANT(&nd);
@@ -2153,8 +2206,8 @@ kern_accessat(struct thread *td, int fd,
        } else
                cred = tmpcred = td->td_ucred;
        AUDIT_ARG_VALUE(mode);
-       NDINIT_AT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
-           AUDITVNODE1, pathseg, path, fd, td);
+       NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
+           AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td);
        if ((error = namei(&nd)) != 0)
                goto out1;
        vfslocked = NDHASGIANT(&nd);
@@ -2363,9 +2416,9 @@ kern_statat_vnhook(struct thread *td, in
        if (flag & ~AT_SYMLINK_NOFOLLOW)
                return (EINVAL);
 
-       NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
+       NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
            FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
-           path, fd, td);
+           path, fd, CAP_FSTAT, td);
 
        if ((error = namei(&nd)) != 0)
                return (error);
@@ -2920,8 +2973,8 @@ kern_fchmodat(struct thread *td, int fd,
 
        AUDIT_ARG_MODE(mode);
        follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
-       NDINIT_AT(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg, path,
-           fd, td);
+       NDINIT_ATRIGHTS(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg,
+           path, fd, CAP_FCHMOD, td);
        if ((error = namei(&nd)) != 0)
                return (error);
        vfslocked = NDHASGIANT(&nd);
@@ -3063,8 +3116,8 @@ kern_fchownat(struct thread *td, int fd,
 
        AUDIT_ARG_OWNER(uid, gid);
        follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
-       NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path,
-           fd, td);
+       NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
+           path, fd, CAP_FCHOWN, td);
 
        if ((error = namei(&nd)) != 0)
                return (error);
@@ -3279,8 +3332,8 @@ kern_utimesat(struct thread *td, int fd,
 
        if ((error = getutimes(tptr, tptrseg, ts)) != 0)
                return (error);
-       NDINIT_AT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path,
-           fd, td);
+       NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg,
+           path, fd, CAP_FUTIMES, td);
 
        if ((error = namei(&nd)) != 0)
                return (error);
@@ -3610,11 +3663,11 @@ kern_renameat(struct thread *td, int old
 
        bwillwrite();
 #ifdef MAC
-       NDINIT_AT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
-           AUDITVNODE1, pathseg, old, oldfd, td);
+       NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
+           MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 #else
-       NDINIT_AT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
-           AUDITVNODE1, pathseg, old, oldfd, td);
+       NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
+           AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 #endif
 
        if ((error = namei(&fromnd)) != 0)
@@ -3637,8 +3690,9 @@ kern_renameat(struct thread *td, int old
                vrele(fvp);
                goto out1;
        }
-       NDINIT_AT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
-           MPSAFE | AUDITVNODE2, pathseg, new, newfd, td);
+       NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
+           SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE,
+           td);
        if (fromnd.ni_vp->v_type == VDIR)
                tond.ni_cnd.cn_flags |= WILLBEDIR;
        if ((error = namei(&tond)) != 0) {
@@ -3764,8 +3818,8 @@ kern_mkdirat(struct thread *td, int fd, 
        AUDIT_ARG_MODE(mode);
 restart:
        bwillwrite();
-       NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
-           segflg, path, fd, td);
+       NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE |
+           AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td);
        nd.ni_cnd.cn_flags |= WILLBEDIR;
        if ((error = namei(&nd)) != 0)
                return (error);
@@ -3853,8 +3907,8 @@ kern_rmdirat(struct thread *td, int fd, 
 
 restart:
        bwillwrite();
-       NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
-           pathseg, path, fd, td);
+       NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE |
+           AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td);
        if ((error = namei(&nd)) != 0)
                return (error);
        vfslocked = NDHASGIANT(&nd);

Modified: head/sys/sys/capability.h
==============================================================================
--- head/sys/sys/capability.h   Sat Aug 13 00:56:42 2011        (r224809)
+++ head/sys/sys/capability.h   Sat Aug 13 09:21:16 2011        (r224810)
@@ -142,7 +142,7 @@
  * Create a capability to wrap a file object.
  */
 int    kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
-           struct file **cap, int *capfd);
+           int *capfd);
 
 /*
  * Unwrap a capability if its rights mask is a superset of 'rights'.

Modified: head/sys/sys/namei.h
==============================================================================
--- head/sys/sys/namei.h        Sat Aug 13 00:56:42 2011        (r224809)
+++ head/sys/sys/namei.h        Sat Aug 13 09:21:16 2011        (r224810)
@@ -63,6 +63,7 @@ struct nameidata {
         */
        const   char *ni_dirp;          /* pathname pointer */
        enum    uio_seg ni_segflg;      /* location of pathname */
+       cap_rights_t ni_rightsneeded;   /* rights required to look up vnode */
        /*
         * Arguments to lookup.
         */
@@ -70,6 +71,11 @@ struct nameidata {
        struct  vnode *ni_rootdir;      /* logical root directory */
        struct  vnode *ni_topdir;       /* logical top directory */
        int     ni_dirfd;               /* starting directory for *at functions 
*/
+       int     ni_strictrelative;      /* relative lookup only; no '..' */
+       /*
+        * Results: returned from namei
+        */
+       cap_rights_t ni_baserights;     /* rights the *at base has (or -1) */
        /*
         * Results: returned from/manipulated by lookup
         */
@@ -151,11 +157,13 @@ struct nameidata {
  * Initialization of a nameidata structure.
  */
 #define        NDINIT(ndp, op, flags, segflg, namep, td)                       
\
-       NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, td)
+       NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, 0, td)
 #define        NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td)             
\
-       NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, td)
+       NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, 0, td)
+#define        NDINIT_ATRIGHTS(ndp, op, flags, segflg, namep, dirfd, rights, 
td) \
+       NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, rights, td)
 #define        NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td)              
\
-       NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, td)
+       NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, 0, td)
 
 static __inline void
 NDINIT_ALL(struct nameidata *ndp,
@@ -164,6 +172,7 @@ NDINIT_ALL(struct nameidata *ndp,
        const char *namep,
        int dirfd,
        struct vnode *startdir,
+       cap_rights_t rights,
        struct thread *td)
 {
        ndp->ni_cnd.cn_nameiop = op;
@@ -172,6 +181,9 @@ NDINIT_ALL(struct nameidata *ndp,
        ndp->ni_dirp = namep;
        ndp->ni_dirfd = dirfd;
        ndp->ni_startdir = startdir;
+       ndp->ni_strictrelative = 0;
+       ndp->ni_rightsneeded = rights;
+       ndp->ni_baserights = 0;
        ndp->ni_cnd.cn_thread = td;
 }
 
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to