On Fri, Aug 08, 2025 at 12:55:34AM +0000, Rick Macklem wrote: > The branch main has been updated by rmacklem: > > URL: > https://cgit.FreeBSD.org/src/commit/?id=37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7 > > commit 37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7 > Author: Rick Macklem <rmack...@freebsd.org> > AuthorDate: 2025-08-08 00:52:23 +0000 > Commit: Rick Macklem <rmack...@freebsd.org> > CommitDate: 2025-08-08 00:52:23 +0000 > > vfs: Add support for file cloning to VOP_COPY_FILE_RANGE > > NFSv4 has a separate CLONE operation from COPY with > a couple of semantics differences. Unlike COPY, CLONE > must complete the "copy on write" and cannot return > partially copied. It also is required to use offsets (and > the length if not to EOF) that are aligned to a buffer > boundary. > > Since VOP_COPY_FILE_RANGE() can already do "copy on write" > for file systems that support it, such as ZFS with block > cloning enabled, all this patch does is add a flag called > COPY_FILE_RANGE_CLONE so that it will conform to the > rule that it must do a "copy on write" to completion. > > The patch also adds a new pathconf(2) name _PC_CLONE_BLKSIZE, > which acquires the blocksize requirement for cloning and > returns 0 for file systems that do not support the > "copy on write" feature. (This is needed for the NFSv4.2 > clone_blksize attribute.) > > This patch will allow the implementation of CLONE > for NFSv4.2. > > Reviewed by: asomers > Differential Revision: https://reviews.freebsd.org/D51808 > --- > sys/fs/fuse/fuse_vnops.c | 3 +++ > sys/kern/vfs_default.c | 1 + > sys/kern/vfs_syscalls.c | 2 +- > sys/kern/vfs_vnops.c | 5 +++++ > sys/sys/unistd.h | 1 + > sys/sys/vnode.h | 17 +++++++++++++---- > 6 files changed, 24 insertions(+), 5 deletions(-) > > diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c > index b90ce60ec664..b782146b7278 100644 > --- a/sys/fs/fuse/fuse_vnops.c > +++ b/sys/fs/fuse/fuse_vnops.c > @@ -877,6 +877,9 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args > *ap) > pid_t pid; > int err; > > + if ((ap->a_flags & COPY_FILE_RANGE_CLONE) != 0) > + return (EXTERROR(ENOSYS, "Cannot clone")); > + > if (mp == NULL || mp != vnode_mount(outvp)) > return (EXTERROR(ENOSYS, "Mount points do not match")); > > diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c > index fd6202a1424c..85f67731e1cc 100644 > --- a/sys/kern/vfs_default.c > +++ b/sys/kern/vfs_default.c > @@ -457,6 +457,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap) > case _PC_NAMEDATTR_ENABLED: > case _PC_HAS_NAMEDATTR: > case _PC_HAS_HIDDENSYSTEM: > + case _PC_CLONE_BLKSIZE: > *ap->a_retval = 0; > return (0); > default: > diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c > index 9704e9c160a8..c64618036733 100644 > --- a/sys/kern/vfs_syscalls.c > +++ b/sys/kern/vfs_syscalls.c > @@ -5058,7 +5058,7 @@ kern_copy_file_range(struct thread *td, int infd, off_t > *inoffp, int outfd, > error = 0; > retlen = 0; > > - if (flags != 0) { > + if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) { > error = EINVAL; > goto out; > } > diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c > index 6451c9e07a60..93f87ddae4de 100644 > --- a/sys/kern/vfs_vnops.c > +++ b/sys/kern/vfs_vnops.c > @@ -3443,6 +3443,11 @@ vn_generic_copy_file_range(struct vnode *invp, off_t > *inoffp, > interrupted = 0; > dat = NULL; > > + if ((flags & COPY_FILE_RANGE_CLONE) != 0) { > + error = ENOSYS; > + goto out; > + } > + > error = vn_lock(invp, LK_SHARED); > if (error != 0) > goto out; > diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h > index 85ed93fd359d..7ab2f021e408 100644 > --- a/sys/sys/unistd.h > +++ b/sys/sys/unistd.h > @@ -159,6 +159,7 @@ > #define _PC_XATTR_ENABLED _PC_NAMEDATTR_ENABLED /* Solaris > Compatible */ > #define _PC_XATTR_EXISTS _PC_HAS_NAMEDATTR /* Solaris > Compatible */ > #define _PC_HAS_HIDDENSYSTEM 68 > +#define _PC_CLONE_BLKSIZE 69 > #endif > > /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ > diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h > index 074769d55c2d..8080e9edd8c3 100644 > --- a/sys/sys/vnode.h > +++ b/sys/sys/vnode.h > @@ -397,8 +397,21 @@ struct vattr { > */ > #define VLKTIMEOUT (hz / 20 + 1) > > +/* copy_file_range flags */ > +#define COPY_FILE_RANGE_KFLAGS 0xff000000 > + > +/* > + * copy_file_range flags visible to user space. > + * Allocate high bits first, to try and avoid conflicting with Linux. > + */ > +#define COPY_FILE_RANGE_CLONE 0x00800000 /* Require > cloning. */ > +#define COPY_FILE_RANGE_USERFLAGS (COPY_FILE_RANGE_CLONE) > +
These are userspace flags for copy_file_range(2). Now you require userspace to include non-std-compliant and highly offensive WRT namespace pollution sys/vnode.h header to get to some bits of the copy_file_range(2) interface. This is not right thing to do, IMO. > #ifdef _KERNEL > > +/* copy_file_range flags only usable in the kernel */ > +#define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after > 1sec. */ > + > #ifdef MALLOC_DECLARE > MALLOC_DECLARE(M_VNODE); > #endif > @@ -621,10 +634,6 @@ typedef void vop_getpages_iodone_t(void *, vm_page_t *, > int, int); > #define VN_OPEN_INVFS 0x00000008 > #define VN_OPEN_WANTIOCTLCAPS 0x00000010 > > -/* copy_file_range kernel flags */ > -#define COPY_FILE_RANGE_KFLAGS 0xff000000 > -#define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after > 1sec. */ > - > /* > * Public vnode manipulation functions. > */