The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=f35525ff2053e026a423e852136d73ed93c95803
commit f35525ff2053e026a423e852136d73ed93c95803 Author: Mark Johnston <ma...@freebsd.org> AuthorDate: 2025-06-24 20:17:07 +0000 Commit: Mark Johnston <ma...@freebsd.org> CommitDate: 2025-06-24 21:04:18 +0000 file: Add a fd flag with O_RESOLVE_BENEATH semantics The O_RESOLVE_BENEATH openat(2) flag restricts name lookups such that they remain under the directory referenced by the dirfd. This commit introduces an implicit version of the flag, FD_RESOLVE_BENEATH, stored in the file descriptor entry. When the flag is set, any lookup relative to that fd automatically has O_RESOLVE_BENEATH semantics. Furthermore, the flag is sticky, meaning that it cannot be cleared, and it is copied by dup() and openat(). File descriptors with FD_RESOLVE_BENEATH set may not be passed to fchdir(2) or fchroot(2). Various fd lookup routines are modified to return fd flags to the caller. This flag will be used to address a case where jails with different root directories and the ability to pass SCM_RIGHTS messages across the jail boundary can transfer directory fds in such as way as to allow a filesystem escape. PR: 262180 Reviewed by: kib MFC after: 3 weeks Differential Revision: https://reviews.freebsd.org/D50371 --- lib/libsys/fcntl.2 | 63 +++++++++++++++++++---------- sys/fs/fdescfs/fdesc_vnops.c | 4 +- sys/kern/kern_descrip.c | 95 ++++++++++++++++++++++++++++++++------------ sys/kern/uipc_syscalls.c | 2 +- sys/kern/vfs_acl.c | 4 +- sys/kern/vfs_cache.c | 14 +++++-- sys/kern/vfs_extattr.c | 8 ++-- sys/kern/vfs_syscalls.c | 28 +++++++++---- sys/sys/fcntl.h | 2 + sys/sys/file.h | 2 +- sys/sys/filedesc.h | 8 +++- sys/sys/namei.h | 1 + 12 files changed, 162 insertions(+), 69 deletions(-) diff --git a/lib/libsys/fcntl.2 b/lib/libsys/fcntl.2 index b5d4abe35aeb..604de43e5e8c 100644 --- a/lib/libsys/fcntl.2 +++ b/lib/libsys/fcntl.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd December 7, 2021 +.Dd June 5, 2025 .Dt FCNTL 2 .Os .Sh NAME @@ -80,6 +80,11 @@ associated with the new file descriptor is cleared, so the file descriptor is to remain open across .Xr execve 2 system calls. +.It +The +.Dv FD_RESOLVE_BENEATH +flag, described below, will be set if it was set on the original +descriptor. .El .It Dv F_DUPFD_CLOEXEC Like @@ -113,29 +118,47 @@ Use instead of .Dv F_DUP2FD . .It Dv F_GETFD -Get the close-on-exec flag associated with the file descriptor -.Fa fd -as -.Dv FD_CLOEXEC . -If the returned value ANDed with -.Dv FD_CLOEXEC -is 0, -the file will remain open across -.Fn exec , -otherwise the file will be closed upon execution of +Get the flags associated with the file descriptor +.Fa fd . +The following flags are defined: +.Bl -tag -width FD_RESOLVE_BENEATH +.It Dv FD_CLOEXEC +The file will be closed upon execution of .Fn exec .Fa ( arg is ignored). +Otherwise, the file descriptor will remain open. +.It Dv FD_RESOLVE_BENEATH +All path name lookups relative to that file descriptor +will behave as if the lookup had +.Dv O_RESOLVE_BENEATH +or +.Dv AT_RESOLVE_BENEATH +semantics. +It is not permitted to call +.Xr fchdir 2 +or +.Xr fchroot 2 +on such a file descriptor. +The +.Dv FD_RESOLVE_BENEATH +flag is sticky, meaning that it is preserved by +.Xr dup 2 +and similar operations, and opening a directory with +.Xr openat 2 +where the directory descriptor has the flag set causes the new directory +descriptor to also have the flag set. +.El .It Dv F_SETFD -Set the close-on-exec flag associated with -.Fa fd -to -.Fa arg , -where -.Fa arg -is either 0 or -.Dv FD_CLOEXEC , -as described above. +Set flags associated with +.Fa fd . +The available flags are +.Dv FD_CLOEXEC +and +.Dv FD_RESOLVE_BENEATH . +The +.Dv FD_RESOLVE_BENEATH +flag cannot be cleared once set. .It Dv F_GETFL Get descriptor status flags, as described below .Fa ( arg diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c index 9ec80794e795..676ea5de12b8 100644 --- a/sys/fs/fdescfs/fdesc_vnops.c +++ b/sys/fs/fdescfs/fdesc_vnops.c @@ -502,7 +502,7 @@ fdesc_setattr(struct vop_setattr_args *ap) cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp); } else { error = getvnode_path(td, fd, - cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp); + cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp); } if (error) { /* @@ -639,7 +639,7 @@ fdesc_readlink(struct vop_readlink_args *va) VOP_UNLOCK(vn); td = curthread; - error = fget_cap(td, fd_fd, &cap_no_rights, &fp, NULL); + error = fget_cap(td, fd_fd, &cap_no_rights, NULL, &fp, NULL); if (error != 0) goto out; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index e70f2d248365..bbd6d530f478 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -109,8 +109,8 @@ static void fdgrowtable_exp(struct filedesc *fdp, int nfd); static void fdunused(struct filedesc *fdp, int fd); static void fdused(struct filedesc *fdp, int fd); static int fget_unlocked_seq(struct thread *td, int fd, - const cap_rights_t *needrightsp, struct file **fpp, - seqc_t *seqp); + const cap_rights_t *needrightsp, uint8_t *flagsp, + struct file **fpp, seqc_t *seqp); static int getmaxfd(struct thread *td); static u_long *filecaps_copy_prep(const struct filecaps *src); static void filecaps_copy_finish(const struct filecaps *src, @@ -527,7 +527,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) fde = fdeget_noref(fdp, fd); if (fde != NULL) { td->td_retval[0] = - (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; + ((fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0) | + ((fde->fde_flags & UF_RESOLVE_BENEATH) ? + FD_RESOLVE_BENEATH : 0); error = 0; } FILEDESC_SUNLOCK(fdp); @@ -538,8 +540,13 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) FILEDESC_XLOCK(fdp); fde = fdeget_noref(fdp, fd); if (fde != NULL) { + /* + * UF_RESOLVE_BENEATH is sticky and cannot be cleared. + */ fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | - (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); + ((arg & FD_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | + ((arg & FD_RESOLVE_BENEATH) != 0 ? + UF_RESOLVE_BENEATH : 0); error = 0; } FILEDESC_XUNLOCK(fdp); @@ -2164,7 +2171,8 @@ _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags, seqc_write_begin(&fde->fde_seqc); #endif fde->fde_file = fp; - fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0; + fde->fde_flags = ((flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | + ((flags & O_RESOLVE_BENEATH) != 0 ? UF_RESOLVE_BENEATH : 0); if (fcaps != NULL) filecaps_move(fcaps, &fde->fde_caps); else @@ -2912,7 +2920,7 @@ out: #ifdef CAPABILITIES int fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp, - struct file **fpp, struct filecaps *havecapsp) + uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp) { struct filedesc *fdp = td->td_proc->p_fd; int error; @@ -2921,7 +2929,8 @@ fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp, *fpp = NULL; for (;;) { - error = fget_unlocked_seq(td, fd, needrightsp, &fp, &seq); + error = fget_unlocked_seq(td, fd, needrightsp, flagsp, &fp, + &seq); if (error != 0) return (error); @@ -2952,10 +2961,10 @@ get_locked: #else int fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp, - struct file **fpp, struct filecaps *havecapsp) + uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp) { int error; - error = fget_unlocked(td, fd, needrightsp, fpp); + error = fget_unlocked(td, fd, needrightsp, flagsp, fpp); if (havecapsp != NULL && error == 0) filecaps_fill(havecapsp); @@ -3038,7 +3047,7 @@ out: #ifdef CAPABILITIES int -fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch) +fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp) { const struct filedescent *fde; const struct fdescenttbl *fdt; @@ -3048,7 +3057,7 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch) const cap_rights_t *haverights; cap_rights_t rights; seqc_t seq; - int fd; + int fd, flags; VFS_SMR_ASSERT_ENTERED(); @@ -3068,7 +3077,9 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch) return (EAGAIN); if (__predict_false(cap_check_inline_transient(haverights, &rights))) return (EAGAIN); - *fsearch = ((fp->f_flag & FSEARCH) != 0); + flags = fp->f_flag & FSEARCH; + flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ? + O_RESOLVE_BENEATH : 0; vp = fp->f_vnode; if (__predict_false(vp == NULL)) { return (EAGAIN); @@ -3102,17 +3113,19 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch) #endif } *vpp = vp; + *flagsp = flags; return (0); } #else int -fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch) +fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp) { + const struct filedescent *fde; const struct fdescenttbl *fdt; struct filedesc *fdp; struct file *fp; struct vnode *vp; - int fd; + int fd, flags; VFS_SMR_ASSERT_ENTERED(); @@ -3121,9 +3134,13 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch) fdt = fdp->fd_files; if (__predict_false((u_int)fd >= fdt->fdt_nfiles)) return (EBADF); - fp = fdt->fdt_ofiles[fd].fde_file; + fde = &fdt->fdt_ofiles[fd]; + fp = fde->fde_file; if (__predict_false(fp == NULL)) return (EAGAIN); + flags = fp->f_flag & FSEARCH; + flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ? + O_RESOLVE_BENEATH : 0; *fsearch = ((fp->f_flag & FSEARCH) != 0); vp = fp->f_vnode; if (__predict_false(vp == NULL || vp->v_type != VDIR)) { @@ -3139,6 +3156,7 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch) return (EAGAIN); filecaps_fill(&ndp->ni_filecaps); *vpp = vp; + *flagsp = flags; return (0); } #endif @@ -3152,13 +3170,15 @@ fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp) struct componentname *cnp; cap_rights_t rights; int error; + uint8_t flags; td = curthread; rights = *ndp->ni_rightsneeded; cap_rights_set_one(&rights, CAP_LOOKUP); cnp = &ndp->ni_cnd; - error = fget_cap(td, ndp->ni_dirfd, &rights, &fp, &ndp->ni_filecaps); + error = fget_cap(td, ndp->ni_dirfd, &rights, &flags, &fp, + &ndp->ni_filecaps); if (__predict_false(error != 0)) return (error); if (__predict_false(fp->f_ops == &badfileops)) { @@ -3176,6 +3196,10 @@ fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp) */ if ((fp->f_flag & FSEARCH) != 0) cnp->cn_flags |= NOEXECCHECK; + if ((flags & UF_RESOLVE_BENEATH) != 0) { + cnp->cn_flags |= RBENEATH; + ndp->ni_resflags |= NIRES_BENEATH; + } fdrop(fp, td); #ifdef CAPABILITIES @@ -3223,7 +3247,7 @@ out_free: #ifdef CAPABILITIES static int fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp, - struct file **fpp, seqc_t *seqp) + uint8_t *flagsp, struct file **fpp, seqc_t *seqp) { struct filedesc *fdp; const struct filedescent *fde; @@ -3232,6 +3256,7 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp, seqc_t seq; cap_rights_t haverights; int error; + uint8_t flags; fdp = td->td_proc->p_fd; fdt = fdp->fd_files; @@ -3243,6 +3268,7 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp, fde = &fdt->fdt_ofiles[fd]; haverights = *cap_rights_fde_inline(fde); fp = fde->fde_file; + flags = fde->fde_flags; if (__predict_false(fp == NULL)) { if (seqc_consistent(fd_seqc(fdt, fd), seq)) return (EBADF); @@ -3271,19 +3297,21 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp, fdrop(fp, td); } *fpp = fp; - if (seqp != NULL) { + if (flagsp != NULL) + *flagsp = flags; + if (seqp != NULL) *seqp = seq; - } return (0); } #else static int fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp, - struct file **fpp, seqc_t *seqp __unused) + uint8_t *flagsp, struct file **fpp, seqc_t *seqp __unused) { struct filedesc *fdp; const struct fdescenttbl *fdt; struct file *fp; + uint8_t flags; fdp = td->td_proc->p_fd; fdt = fdp->fd_files; @@ -3292,6 +3320,7 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp, for (;;) { fp = fdt->fdt_ofiles[fd].fde_file; + flags = fdt->fdt_ofiles[fd].fde_flags; if (__predict_false(fp == NULL)) return (EBADF); if (__predict_false(!refcount_acquire_if_not_zero(&fp->f_count))) { @@ -3308,6 +3337,8 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp, break; fdrop(fp, td); } + if (flagsp != NULL) + *flagsp = flags; *fpp = fp; return (0); } @@ -3321,8 +3352,8 @@ fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp, * racing with itself. */ int -fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp, - struct file **fpp) +fget_unlocked_flags(struct thread *td, int fd, const cap_rights_t *needrightsp, + uint8_t *flagsp, struct file **fpp) { struct filedesc *fdp; #ifdef CAPABILITIES @@ -3334,6 +3365,7 @@ fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp, seqc_t seq; const cap_rights_t *haverights; #endif + uint8_t flags; fdp = td->td_proc->p_fd; fdt = fdp->fd_files; @@ -3346,8 +3378,10 @@ fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp, fde = &fdt->fdt_ofiles[fd]; haverights = cap_rights_fde_inline(fde); fp = fde->fde_file; + flags = fde->fde_flags; #else fp = fdt->fdt_ofiles[fd].fde_file; + flags = fdt->fdt_ofiles[fd].fde_flags; #endif if (__predict_false(fp == NULL)) goto out_fallback; @@ -3371,12 +3405,21 @@ fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp, #endif goto out_fdrop; *fpp = fp; + if (flagsp != NULL) + *flagsp = flags; return (0); out_fdrop: fdrop(fp, td); out_fallback: *fpp = NULL; - return (fget_unlocked_seq(td, fd, needrightsp, fpp, NULL)); + return (fget_unlocked_seq(td, fd, needrightsp, flagsp, fpp, NULL)); +} + +int +fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp, + struct file **fpp) +{ + return (fget_unlocked_flags(td, fd, needrightsp, NULL, fpp)); } /* @@ -3528,7 +3571,7 @@ fget_mmap(struct thread *td, int fd, const cap_rights_t *rightsp, fdp = td->td_proc->p_fd; MPASS(cap_rights_is_set(rightsp, CAP_MMAP)); for (;;) { - error = fget_unlocked_seq(td, fd, rightsp, &fp, &seq); + error = fget_unlocked_seq(td, fd, rightsp, NULL, &fp, &seq); if (__predict_false(error != 0)) return (error); if (__predict_false(fp->f_ops == &badfileops)) { @@ -3583,7 +3626,7 @@ fget_fcntl(struct thread *td, int fd, const cap_rights_t *rightsp, *fpp = NULL; MPASS(cap_rights_is_set(rightsp, CAP_FCNTL)); for (;;) { - error = fget_unlocked_seq(td, fd, rightsp, &fp, &seq); + error = fget_unlocked_seq(td, fd, rightsp, NULL, &fp, &seq); if (error != 0) return (error); error = cap_fcntl_check(fdp, fd, needfcntl); @@ -3645,7 +3688,7 @@ fgetvp_rights(struct thread *td, int fd, const cap_rights_t *needrightsp, struct file *fp; int error; - error = fget_cap(td, fd, needrightsp, &fp, &caps); + error = fget_cap(td, fd, needrightsp, NULL, &fp, &caps); if (error != 0) return (error); if (fp->f_ops == &badfileops) { diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 0bbf13936bf9..ad8485028987 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -91,7 +91,7 @@ getsock_cap(struct thread *td, int fd, const cap_rights_t *rightsp, struct file *fp; int error; - error = fget_cap(td, fd, rightsp, &fp, havecapsp); + error = fget_cap(td, fd, rightsp, NULL, &fp, havecapsp); if (__predict_false(error != 0)) return (error); if (__predict_false(fp->f_type != DTYPE_SOCKET)) { diff --git a/sys/kern/vfs_acl.c b/sys/kern/vfs_acl.c index 3106218abce6..6076a5f7bdf8 100644 --- a/sys/kern/vfs_acl.c +++ b/sys/kern/vfs_acl.c @@ -434,7 +434,7 @@ sys___acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap) AUDIT_ARG_FD(uap->filedes); error = getvnode_path(td, uap->filedes, - cap_rights_init_one(&rights, CAP_ACL_GET), &fp); + cap_rights_init_one(&rights, CAP_ACL_GET), NULL, &fp); if (error == 0) { error = vacl_get_acl(td, fp->f_vnode, uap->type, uap->aclp); fdrop(fp, td); @@ -569,7 +569,7 @@ sys___acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap) AUDIT_ARG_FD(uap->filedes); error = getvnode_path(td, uap->filedes, - cap_rights_init_one(&rights, CAP_ACL_CHECK), &fp); + cap_rights_init_one(&rights, CAP_ACL_CHECK), NULL, &fp); if (error == 0) { error = vacl_aclcheck(td, fp->f_vnode, uap->type, uap->aclp); fdrop(fp, td); diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 4ab00698b311..883beaf6d1da 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -4528,17 +4528,23 @@ cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp) { struct nameidata *ndp; struct componentname *cnp; - int error; - bool fsearch; + int error, flags; ndp = fpl->ndp; cnp = fpl->cnp; - error = fgetvp_lookup_smr(ndp, vpp, &fsearch); + error = fgetvp_lookup_smr(ndp, vpp, &flags); if (__predict_false(error != 0)) { return (cache_fpl_aborted(fpl)); } - fpl->fsearch = fsearch; + if (__predict_false((flags & O_RESOLVE_BENEATH) != 0)) { + _Static_assert((CACHE_FPL_SUPPORTED_CN_FLAGS & RBENEATH) == 0, + "RBENEATH supported by fplookup"); + cache_fpl_smr_exit(fpl); + cache_fpl_aborted(fpl); + return (EOPNOTSUPP); + } + fpl->fsearch = (flags & FSEARCH) != 0; if ((*vpp)->v_type != VDIR) { if (!((cnp->cn_flags & EMPTYPATH) != 0 && cnp->cn_pnbuf[0] == '\0')) { cache_fpl_smr_exit(fpl); diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 8debf487cc54..1fe7494f3998 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -253,7 +253,7 @@ kern_extattr_set_fd(struct thread *td, int fd, int attrnamespace, AUDIT_ARG_TEXT(attrname); error = getvnode_path(td, fd, - cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp); + cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp); if (error) return (error); @@ -441,7 +441,7 @@ kern_extattr_get_fd(struct thread *td, int fd, int attrnamespace, AUDIT_ARG_TEXT(attrname); error = getvnode_path(td, fd, - cap_rights_init_one(&rights, CAP_EXTATTR_GET), &fp); + cap_rights_init_one(&rights, CAP_EXTATTR_GET), NULL, &fp); if (error) return (error); @@ -597,7 +597,7 @@ kern_extattr_delete_fd(struct thread *td, int fd, int attrnamespace, AUDIT_ARG_TEXT(attrname); error = getvnode_path(td, fd, - cap_rights_init_one(&rights, CAP_EXTATTR_DELETE), &fp); + cap_rights_init_one(&rights, CAP_EXTATTR_DELETE), NULL, &fp); if (error) return (error); @@ -764,7 +764,7 @@ kern_extattr_list_fd(struct thread *td, int fd, int attrnamespace, AUDIT_ARG_FD(fd); AUDIT_ARG_VALUE(attrnamespace); error = getvnode_path(td, fd, - cap_rights_init_one(&rights, CAP_EXTATTR_LIST), &fp); + cap_rights_init_one(&rights, CAP_EXTATTR_LIST), NULL, &fp); if (error) return (error); diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 0e4847e1c634..c236f241bf20 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -370,7 +370,7 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf) int error; AUDIT_ARG_FD(fd); - error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); + error = getvnode_path(td, fd, &cap_fstatfs_rights, NULL, &fp); if (error != 0) return (error); vp = fp->f_vnode; @@ -893,12 +893,17 @@ sys_fchdir(struct thread *td, struct fchdir_args *uap) struct mount *mp; struct file *fp; int error; + uint8_t fdflags; AUDIT_ARG_FD(uap->fd); - error = getvnode_path(td, uap->fd, &cap_fchdir_rights, + error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fdflags, &fp); if (error != 0) return (error); + if ((fdflags & UF_RESOLVE_BENEATH) != 0) { + fdrop(fp, td); + return (ENOTCAPABLE); + } vp = fp->f_vnode; vrefact(vp); fdrop(fp, td); @@ -1041,10 +1046,15 @@ sys_fchroot(struct thread *td, struct fchroot_args *uap) struct vnode *vp; struct file *fp; int error; + uint8_t fdflags; - error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fp); + error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fdflags, &fp); if (error != 0) return (error); + if ((fdflags & UF_RESOLVE_BENEATH) != 0) { + fdrop(fp, td); + return (ENOTCAPABLE); + } vp = fp->f_vnode; vrefact(vp); fdrop(fp, td); @@ -1309,6 +1319,10 @@ success: else #endif fcaps = NULL; + if ((nd.ni_resflags & NIRES_BENEATH) != 0) + flags |= O_RESOLVE_BENEATH; + else + flags &= ~O_RESOLVE_BENEATH; error = finstall_refed(td, fp, &indx, flags, fcaps); /* On success finstall_refed() consumes fcaps. */ if (error != 0) { @@ -2013,7 +2027,7 @@ kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, fp = NULL; if (fd != FD_NONE) { - error = getvnode_path(td, fd, &cap_no_rights, &fp); + error = getvnode_path(td, fd, &cap_no_rights, NULL, &fp); if (error != 0) return (error); } @@ -4409,12 +4423,12 @@ out: */ int getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp, - struct file **fpp) + uint8_t *flagsp, struct file **fpp) { struct file *fp; int error; - error = fget_unlocked(td, fd, rightsp, &fp); + error = fget_unlocked_flags(td, fd, rightsp, flagsp, &fp); if (error != 0) return (error); @@ -4451,7 +4465,7 @@ getvnode(struct thread *td, int fd, const cap_rights_t *rightsp, { int error; - error = getvnode_path(td, fd, rightsp, fpp); + error = getvnode_path(td, fd, rightsp, NULL, fpp); if (__predict_false(error != 0)) return (error); diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index c23c7eba0544..9329ecc29c47 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -289,6 +289,8 @@ typedef __pid_t pid_t; /* file descriptor flags (F_GETFD, F_SETFD) */ #define FD_CLOEXEC 1 /* close-on-exec flag */ +#define FD_RESOLVE_BENEATH 2 /* all lookups relative to fd have + O_RESOLVE_BENEATH semantics */ /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */ #define F_RDLCK 1 /* shared or read lock */ diff --git a/sys/sys/file.h b/sys/sys/file.h index c79759a3f966..284d523147b6 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -301,7 +301,7 @@ int fgetvp_read(struct thread *td, int fd, const cap_rights_t *rightsp, struct vnode **vpp); int fgetvp_write(struct thread *td, int fd, const cap_rights_t *rightsp, struct vnode **vpp); -int fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch); +int fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp); int fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp); static __inline __result_use_check bool diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 602d236ff853..55969b2ff4b3 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -148,6 +148,7 @@ struct filedesc_to_leader { * Per-process open flags. */ #define UF_EXCLOSE 0x01 /* auto-close on exec */ +#define UF_RESOLVE_BENEATH 0x02 /* lookups must be beneath this dir */ #ifdef _KERNEL @@ -278,17 +279,20 @@ struct filedesc_to_leader * int getvnode(struct thread *td, int fd, const cap_rights_t *rightsp, struct file **fpp); int getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp, - struct file **fpp); + uint8_t *flagsp, struct file **fpp); void mountcheckdirs(struct vnode *olddp, struct vnode *newdp); int fget_cap_noref(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp, struct file **fpp, struct filecaps *havecapsp); int fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp, - struct file **fpp, struct filecaps *havecapsp); + uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp); /* Return a referenced file from an unlocked descriptor. */ int fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp, struct file **fpp); +int fget_unlocked_flags(struct thread *td, int fd, + const cap_rights_t *needrightsp, uint8_t *flagsp, + struct file **fpp); /* Return a file pointer without a ref. FILEDESC_IS_ONLY_USER must be true. */ int fget_only_user(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp, struct file **fpp); diff --git a/sys/sys/namei.h b/sys/sys/namei.h index eda3cc9b6f24..5c245235ace5 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -196,6 +196,7 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, #define NIRES_ABS 0x00000001 /* Path was absolute */ #define NIRES_STRICTREL 0x00000002 /* Restricted lookup result */ #define NIRES_EMPTYPATH 0x00000004 /* EMPTYPATH used */ +#define NIRES_BENEATH 0x00000008 /* O_RESOLVE_BENEATH is to be inherited */ /* * Flags in ni_lcf, valid for the duration of the namei call.