The branch stable/13 has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=b0c1b3827e2f88bbde26d461417d8c25f9645d12

commit b0c1b3827e2f88bbde26d461417d8c25f9645d12
Author:     Konstantin Belousov <k...@freebsd.org>
AuthorDate: 2021-03-18 10:41:47 +0000
Commit:     Konstantin Belousov <k...@freebsd.org>
CommitDate: 2021-04-23 11:14:10 +0000

    open(2): Implement O_PATH
    
    (cherry picked from commit 8d9ed174f3afba5f114742447e622fc1173d4774)
---
 lib/libc/sys/open.2     | 41 ++++++++++++++++++++++++++++++++-
 sys/kern/kern_descrip.c | 46 +++++++++++++++++++++++++++++++++----
 sys/kern/vfs_aio.c      |  5 ++++
 sys/kern/vfs_lookup.c   |  6 +++--
 sys/kern/vfs_syscalls.c | 61 ++++++++++++++++++++++++++++++++++++++-----------
 sys/kern/vfs_vnops.c    | 34 ++++++++++++++++-----------
 sys/sys/fcntl.h         |  8 ++++---
 sys/sys/file.h          |  1 +
 sys/sys/filedesc.h      |  2 ++
 9 files changed, 168 insertions(+), 36 deletions(-)

diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2
index e24c823d039a..f9c54bfc7581 100644
--- a/lib/libc/sys/open.2
+++ b/lib/libc/sys/open.2
@@ -28,7 +28,7 @@
 .\"     @(#)open.2     8.2 (Berkeley) 11/16/93
 .\" $FreeBSD$
 .\"
-.Dd February 23, 2021
+.Dd March 18, 2021
 .Dt OPEN 2
 .Os
 .Sh NAME
@@ -168,6 +168,7 @@ O_DIRECTORY error if file is not a directory
 O_CLOEXEC      set FD_CLOEXEC upon open
 O_VERIFY       verify the contents of the file
 O_RESOLVE_BENEATH      path resolution must not cross the fd directory
+O_PATH         record only the target path in the opened descriptor
 .Ed
 .Pp
 Opening a file with
@@ -316,6 +317,44 @@ The primary use for this descriptor will be as the lookup 
descriptor for the
 .Fn *at
 family of functions.
 .Pp
+.Dv O_PATH
+returns a file descriptor that can be used as a directory file descriptor for
+.Xr openat 2
+and other system calls taking a file descriptor argument, like
+.Xr fstatat 2
+and others.
+The other functionality of the returned file descriptor is limited to
+the descriptor-level operations.
+It can be used for
+.Bl -tag -width SCM_RIGHTS -offset indent -compact
+.It Xr fcntl 2
+but advisory locking is not allowed
+.It Xr dup 2
+.It Xr close 2
+.It Xr fstat 2
+.It Xr fexecve 2
+requires that
+.Dv O_EXEC
+was also specified at open time
+.It Dv SCM_RIGHTS
+can be passed over a
+.Xr unix 4
+socket using a
+.Dv SCM_RIGHTS
+message
+.El
+But operations like
+.Xr read 2 ,
+.Xr ftruncate 2 ,
+and any other that operate on file and not on file descriptor (except
+.Xr fstat 2 ),
+are not allowed.
+See also the description of
+.Dv AT_EMPTY_PATH
+flag for
+.Xr fstatat 2
+and related syscalls.
+.Pp
 If successful,
 .Fn open
 returns a non-negative integer, termed a file descriptor.
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 7a43fbb2eb80..81af58fbddd1 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/selinfo.h>
+#include <sys/poll.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
@@ -546,6 +547,11 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t 
arg)
                error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETFL, &fp);
                if (error != 0)
                        break;
+               if (fp->f_ops == &path_fileops) {
+                       fdrop(fp, td);
+                       error = EBADF;
+                       break;
+               }
                do {
                        tmp = flg = fp->f_flag;
                        tmp &= ~FCNTLFLAGS;
@@ -610,7 +616,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
                error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
                if (error != 0)
                        break;
-               if (fp->f_type != DTYPE_VNODE) {
+               if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
                        error = EBADF;
                        fdrop(fp, td);
                        break;
@@ -715,7 +721,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
                error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
                if (error != 0)
                        break;
-               if (fp->f_type != DTYPE_VNODE) {
+               if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
                        error = EBADF;
                        fdrop(fp, td);
                        break;
@@ -771,7 +777,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
                error = fget_unlocked(fdp, fd, &cap_no_rights, &fp);
                if (error != 0)
                        break;
-               if (fp->f_type != DTYPE_VNODE) {
+               if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
                        fdrop(fp, td);
                        error = EBADF;
                        break;
@@ -3544,7 +3550,7 @@ sys_flock(struct thread *td, struct flock_args *uap)
        error = fget(td, uap->fd, &cap_flock_rights, &fp);
        if (error != 0)
                return (error);
-       if (fp->f_type != DTYPE_VNODE) {
+       if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
                fdrop(fp, td);
                return (EOPNOTSUPP);
        }
@@ -4960,6 +4966,38 @@ struct fileops badfileops = {
        .fo_fill_kinfo = badfo_fill_kinfo,
 };
 
+static int
+path_poll(struct file *fp, int events, struct ucred *active_cred,
+    struct thread *td)
+{
+       return (POLLNVAL);
+}
+
+static int
+path_close(struct file *fp, struct thread *td)
+{
+       MPASS(fp->f_type == DTYPE_VNODE);
+       fp->f_ops = &badfileops;
+       vrele(fp->f_vnode);
+       return (0);
+}
+
+struct fileops path_fileops = {
+       .fo_read = badfo_readwrite,
+       .fo_write = badfo_readwrite,
+       .fo_truncate = badfo_truncate,
+       .fo_ioctl = badfo_ioctl,
+       .fo_poll = path_poll,
+       .fo_kqfilter = badfo_kqfilter,
+       .fo_stat = vn_statfile,
+       .fo_close = path_close,
+       .fo_chmod = badfo_chmod,
+       .fo_chown = badfo_chown,
+       .fo_sendfile = badfo_sendfile,
+       .fo_fill_kinfo = vn_fill_kinfo,
+       .fo_flags = DFLAG_PASSABLE,
+};
+
 int
 invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 9b45a06c5f9f..640e82b6f0ff 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -1619,6 +1619,11 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct 
aioliojob *lj,
                goto err3;
        }
 
+       if (fp != NULL && fp->f_ops == &path_fileops) {
+               error = EBADF;
+               goto err3;
+       }
+
        job->fd_file = fp;
 
        mtx_lock(&aio_job_mtx);
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index f4ec3cea9fff..f979676f4c7d 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -360,8 +360,10 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, 
struct pwd **pwdp)
                        if (cnp->cn_flags & AUDITVNODE2)
                                AUDIT_ARG_ATFD2(ndp->ni_dirfd);
                        /*
-                        * Effectively inlined fgetvp_rights, because we need to
-                        * inspect the file as well as grabbing the vnode.
+                        * Effectively inlined fgetvp_rights, because
+                        * we need to inspect the file as well as
+                        * grabbing the vnode.  No check for O_PATH,
+                        * files to implement its semantic.
                         */
                        error = fget_cap(td, ndp->ni_dirfd, &rights,
                            &dfp, &ndp->ni_filecaps);
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 45f155ebff3d..5a1efcdec467 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -375,7 +375,7 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
        int error;
 
        AUDIT_ARG_FD(fd);
-       error = getvnode(td, fd, &cap_fstatfs_rights, &fp);
+       error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
        if (error != 0)
                return (error);
        vp = fp->f_vnode;
@@ -891,7 +891,7 @@ sys_fchdir(struct thread *td, struct fchdir_args *uap)
        int error;
 
        AUDIT_ARG_FD(uap->fd);
-       error = getvnode(td, uap->fd, &cap_fchdir_rights,
+       error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
            &fp);
        if (error != 0)
                return (error);
@@ -1023,9 +1023,10 @@ change_dir(struct vnode *vp, struct thread *td)
 static __inline void
 flags_to_rights(int flags, cap_rights_t *rightsp)
 {
-
        if (flags & O_EXEC) {
                cap_rights_set_one(rightsp, CAP_FEXECVE);
+               if (flags & O_PATH)
+                       return;
        } else {
                switch ((flags & O_ACCMODE)) {
                case O_RDONLY:
@@ -1112,11 +1113,15 @@ kern_openat(struct thread *td, int fd, const char 
*path, enum uio_seg pathseg,
        AUDIT_ARG_MODE(mode);
        cap_rights_init_one(&rights, CAP_LOOKUP);
        flags_to_rights(flags, &rights);
+
        /*
         * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
-        * may be specified.
+        * may be specified.  On the other hand, for O_PATH any mode
+        * except O_EXEC is ignored.
         */
-       if (flags & O_EXEC) {
+       if ((flags & O_PATH) != 0) {
+               flags &= ~(O_CREAT | O_ACCMODE);
+       } else if ((flags & O_EXEC) != 0) {
                if (flags & O_ACCMODE)
                        return (EINVAL);
        } else if ((flags & O_ACCMODE) == O_ACCMODE) {
@@ -1145,8 +1150,10 @@ kern_openat(struct thread *td, int fd, const char *path, 
enum uio_seg pathseg,
                 * wonderous happened deep below and we just pass it up
                 * pretending we know what we do.
                 */
-               if (error == ENXIO && fp->f_ops != &badfileops)
+               if (error == ENXIO && fp->f_ops != &badfileops) {
+                       MPASS((flags & O_PATH) == 0);
                        goto success;
+               }
 
                /*
                 * Handle special fdopen() case. bleh.
@@ -1176,14 +1183,16 @@ kern_openat(struct thread *td, int fd, const char 
*path, enum uio_seg pathseg,
         * files that switched type in the cdevsw fdopen() method.
         */
        fp->f_vnode = vp;
+
        /*
         * If the file wasn't claimed by devfs bind it to the normal
         * vnode operations here.
         */
        if (fp->f_ops == &badfileops) {
-               KASSERT(vp->v_type != VFIFO,
+               KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0,
                    ("Unexpected fifo fp %p vp %p", fp, vp));
-               finit_vnode(fp, flags, NULL, &vnops);
+               finit_vnode(fp, flags, NULL, (flags & O_PATH) != 0 ?
+                   &path_fileops : &vnops);
        }
 
        VOP_UNLOCK(vp);
@@ -1882,7 +1891,7 @@ kern_funlinkat(struct thread *td, int dfd, const char 
*path, int fd,
 
        fp = NULL;
        if (fd != FD_NONE) {
-               error = getvnode(td, fd, &cap_no_rights, &fp);
+               error = getvnode_path(td, fd, &cap_no_rights, &fp);
                if (error != 0)
                        return (error);
        }
@@ -4255,12 +4264,13 @@ out:
 }
 
 /*
- * Convert a user file descriptor to a kernel file entry and check that, if it
- * is a capability, the correct rights are present. A reference on the file
- * entry is held upon returning.
+ * This variant of getvnode() allows O_PATH files.  Caller should
+ * ensure that returned file and vnode are only used for compatible
+ * semantics.
  */
 int
-getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
+getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
+    struct file **fpp)
 {
        struct file *fp;
        int error;
@@ -4285,10 +4295,35 @@ getvnode(struct thread *td, int fd, cap_rights_t 
*rightsp, struct file **fpp)
                fdrop(fp, td);
                return (EINVAL);
        }
+
        *fpp = fp;
        return (0);
 }
 
+/*
+ * Convert a user file descriptor to a kernel file entry and check
+ * that, if it is a capability, the correct rights are present.
+ * A reference on the file entry is held upon returning.
+ */
+int
+getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
+{
+       int error;
+
+       error = getvnode_path(td, fd, rightsp, fpp);
+
+       /*
+        * Filter out O_PATH file descriptors, most getvnode() callers
+        * do not call fo_ methods.
+        */
+       if (error == 0 && (*fpp)->f_ops == &path_fileops) {
+               fdrop(*fpp, td);
+               error = EBADF;
+       }
+
+       return (error);
+}
+
 /*
  * Get an (NFS) file handle.
  */
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 98f37d26ea8c..6339295b0556 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -391,25 +391,30 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred 
*cred,
                return (EOPNOTSUPP);
        if (vp->v_type != VDIR && fmode & O_DIRECTORY)
                return (ENOTDIR);
+
        accmode = 0;
-       if (fmode & (FWRITE | O_TRUNC)) {
-               if (vp->v_type == VDIR)
-                       return (EISDIR);
-               accmode |= VWRITE;
+       if ((fmode & O_PATH) == 0) {
+               if ((fmode & (FWRITE | O_TRUNC)) != 0) {
+                       if (vp->v_type == VDIR)
+                               return (EISDIR);
+                       accmode |= VWRITE;
+               }
+               if ((fmode & FREAD) != 0)
+                       accmode |= VREAD;
+               if ((fmode & O_APPEND) && (fmode & FWRITE))
+                       accmode |= VAPPEND;
+#ifdef MAC
+               if ((fmode & O_CREAT) != 0)
+                       accmode |= VCREAT;
+#endif
        }
-       if (fmode & FREAD)
-               accmode |= VREAD;
-       if (fmode & FEXEC)
+       if ((fmode & FEXEC) != 0)
                accmode |= VEXEC;
-       if ((fmode & O_APPEND) && (fmode & FWRITE))
-               accmode |= VAPPEND;
 #ifdef MAC
-       if (fmode & O_CREAT)
-               accmode |= VCREAT;
-       if (fmode & O_VERIFY)
+       if ((fmode & O_VERIFY) != 0)
                accmode |= VVERIFY;
        error = mac_vnode_check_open(cred, vp, accmode);
-       if (error)
+       if (error != 0)
                return (error);
 
        accmode &= ~(VCREAT | VVERIFY);
@@ -419,6 +424,9 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred 
*cred,
                if (error != 0)
                        return (error);
        }
+       if ((fmode & O_PATH) != 0)
+               return (0);
+
        if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
                vn_lock(vp, LK_UPGRADE | LK_RETRY);
        error = VOP_OPEN(vp, fmode, cred, td, fp);
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index 0fa4e7758c9d..c328abaa02af 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -135,7 +135,7 @@ typedef     __pid_t         pid_t;
 
 #if __BSD_VISIBLE
 #define        O_VERIFY        0x00200000      /* open only after verification 
*/
-/* #define O_UNUSED1   0x00400000   */ /* Was O_BENEATH */
+#define O_PATH         0x00400000      /* fd is only a path */
 #define        O_RESOLVE_BENEATH 0x00800000    /* Do not allow name resolution 
to walk
                                           out of cwd */
 #endif
@@ -156,10 +156,12 @@ typedef   __pid_t         pid_t;
 
 /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
 #define        FFLAGS(oflags)  ((oflags) & O_EXEC ? (oflags) : (oflags) + 1)
-#define        OFLAGS(fflags)  ((fflags) & O_EXEC ? (fflags) : (fflags) - 1)
+#define        OFLAGS(fflags)  \
+    (((fflags) & (O_EXEC | O_PATH)) != 0 ? (fflags) : (fflags) - 1)
 
 /* bits to save after open */
-#define        FMASK   
(FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|O_DIRECT|FEXEC)
+#define        FMASK   (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK| \
+                O_DIRECT|FEXEC|O_PATH)
 /* bits settable by fcntl(F_SETFL, ...) */
 #define        FCNTLFLAGS      
(FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)
 
diff --git a/sys/sys/file.h b/sys/sys/file.h
index c4fc70f517a4..9237ee5ceb9d 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -239,6 +239,7 @@ struct xfile {
 
 extern struct fileops vnops;
 extern struct fileops badfileops;
+extern struct fileops path_fileops;
 extern struct fileops socketops;
 extern int maxfiles;           /* kernel limit on number of open files */
 extern int maxfilesperproc;    /* per process limit on number of open files */
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
index 8c5aa258ed28..7f18d8a2286c 100644
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -265,6 +265,8 @@ struct filedesc_to_leader *
            struct filedesc *fdp, struct proc *leader);
 int    getvnode(struct thread *td, int fd, cap_rights_t *rightsp,
            struct file **fpp);
+int    getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
+           struct file **fpp);
 void   mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
 
 int    fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
_______________________________________________
dev-commits-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "dev-commits-src-all-unsubscr...@freebsd.org"

Reply via email to