On Thu, Aug 07, 2025 at 11:50:26PM -0700, Rick Macklem wrote: > On Thu, Aug 7, 2025 at 10:46 PM Konstantin Belousov <kostik...@gmail.com> > wrote: > > > > CAUTION: This email originated from outside of the University of Guelph. Do > > not click links or open attachments unless you recognize the sender and > > know the content is safe. If in doubt, forward suspicious emails to > > ith...@uoguelph.ca. > > > > On Fri, Aug 08, 2025 at 12:55:34AM +0000, Rick Macklem wrote: > > > The branch main has been updated by rmacklem: > > > > > > URL: > > > https://cgit.FreeBSD.org/src/commit/?id=37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7 > > > > > > commit 37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7 > > > Author: Rick Macklem <rmack...@freebsd.org> > > > AuthorDate: 2025-08-08 00:52:23 +0000 > > > Commit: Rick Macklem <rmack...@freebsd.org> > > > CommitDate: 2025-08-08 00:52:23 +0000 > > > > > > vfs: Add support for file cloning to VOP_COPY_FILE_RANGE > > > > > > NFSv4 has a separate CLONE operation from COPY with > > > a couple of semantics differences. Unlike COPY, CLONE > > > must complete the "copy on write" and cannot return > > > partially copied. It also is required to use offsets (and > > > the length if not to EOF) that are aligned to a buffer > > > boundary. > > > > > > Since VOP_COPY_FILE_RANGE() can already do "copy on write" > > > for file systems that support it, such as ZFS with block > > > cloning enabled, all this patch does is add a flag called > > > COPY_FILE_RANGE_CLONE so that it will conform to the > > > rule that it must do a "copy on write" to completion. > > > > > > The patch also adds a new pathconf(2) name _PC_CLONE_BLKSIZE, > > > which acquires the blocksize requirement for cloning and > > > returns 0 for file systems that do not support the > > > "copy on write" feature. (This is needed for the NFSv4.2 > > > clone_blksize attribute.) > > > > > > This patch will allow the implementation of CLONE > > > for NFSv4.2. > > > > > > Reviewed by: asomers > > > Differential Revision: https://reviews.freebsd.org/D51808 > > > --- > > > sys/fs/fuse/fuse_vnops.c | 3 +++ > > > sys/kern/vfs_default.c | 1 + > > > sys/kern/vfs_syscalls.c | 2 +- > > > sys/kern/vfs_vnops.c | 5 +++++ > > > sys/sys/unistd.h | 1 + > > > sys/sys/vnode.h | 17 +++++++++++++---- > > > 6 files changed, 24 insertions(+), 5 deletions(-) > > > > > > diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c > > > index b90ce60ec664..b782146b7278 100644 > > > --- a/sys/fs/fuse/fuse_vnops.c > > > +++ b/sys/fs/fuse/fuse_vnops.c > > > @@ -877,6 +877,9 @@ fuse_vnop_copy_file_range(struct > > > vop_copy_file_range_args *ap) > > > pid_t pid; > > > int err; > > > > > > + if ((ap->a_flags & COPY_FILE_RANGE_CLONE) != 0) > > > + return (EXTERROR(ENOSYS, "Cannot clone")); > > > + > > > if (mp == NULL || mp != vnode_mount(outvp)) > > > return (EXTERROR(ENOSYS, "Mount points do not match")); > > > > > > diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c > > > index fd6202a1424c..85f67731e1cc 100644 > > > --- a/sys/kern/vfs_default.c > > > +++ b/sys/kern/vfs_default.c > > > @@ -457,6 +457,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap) > > > case _PC_NAMEDATTR_ENABLED: > > > case _PC_HAS_NAMEDATTR: > > > case _PC_HAS_HIDDENSYSTEM: > > > + case _PC_CLONE_BLKSIZE: > > > *ap->a_retval = 0; > > > return (0); > > > default: > > > diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c > > > index 9704e9c160a8..c64618036733 100644 > > > --- a/sys/kern/vfs_syscalls.c > > > +++ b/sys/kern/vfs_syscalls.c > > > @@ -5058,7 +5058,7 @@ kern_copy_file_range(struct thread *td, int infd, > > > off_t *inoffp, int outfd, > > > error = 0; > > > retlen = 0; > > > > > > - if (flags != 0) { > > > + if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) { > > > error = EINVAL; > > > goto out; > > > } > > > diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c > > > index 6451c9e07a60..93f87ddae4de 100644 > > > --- a/sys/kern/vfs_vnops.c > > > +++ b/sys/kern/vfs_vnops.c > > > @@ -3443,6 +3443,11 @@ vn_generic_copy_file_range(struct vnode *invp, > > > off_t *inoffp, > > > interrupted = 0; > > > dat = NULL; > > > > > > + if ((flags & COPY_FILE_RANGE_CLONE) != 0) { > > > + error = ENOSYS; > > > + goto out; > > > + } > > > + > > > error = vn_lock(invp, LK_SHARED); > > > if (error != 0) > > > goto out; > > > diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h > > > index 85ed93fd359d..7ab2f021e408 100644 > > > --- a/sys/sys/unistd.h > > > +++ b/sys/sys/unistd.h > > > @@ -159,6 +159,7 @@ > > > #define _PC_XATTR_ENABLED _PC_NAMEDATTR_ENABLED /* Solaris > > > Compatible */ > > > #define _PC_XATTR_EXISTS _PC_HAS_NAMEDATTR /* Solaris > > > Compatible */ > > > #define _PC_HAS_HIDDENSYSTEM 68 > > > +#define _PC_CLONE_BLKSIZE 69 > > > #endif > > > > > > /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ > > > diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h > > > index 074769d55c2d..8080e9edd8c3 100644 > > > --- a/sys/sys/vnode.h > > > +++ b/sys/sys/vnode.h > > > @@ -397,8 +397,21 @@ struct vattr { > > > */ > > > #define VLKTIMEOUT (hz / 20 + 1) > > > > > > +/* copy_file_range flags */ > > > +#define COPY_FILE_RANGE_KFLAGS 0xff000000 > > > + > > > +/* > > > + * copy_file_range flags visible to user space. > > > + * Allocate high bits first, to try and avoid conflicting with Linux. > > > + */ > > > +#define COPY_FILE_RANGE_CLONE 0x00800000 /* Require > > > cloning. */ > > > +#define COPY_FILE_RANGE_USERFLAGS (COPY_FILE_RANGE_CLONE) > > > + > > > > These are userspace flags for copy_file_range(2). Now you require > > userspace to include non-std-compliant and highly offensive WRT namespace > > pollution sys/vnode.h header to get to some bits of the copy_file_range(2) > > interface. This is not right thing to do, IMO. > So, where do you think the flag can go? > (The man page already mentions unistd.h and that is where the prototype > for the function is, so does putting it there sound ok?)
Yes, sys/unistd.h should be fine, into a __BSD_VISIBLE section.