On 2025-04-22 17:14:10, Christian Brauner wrote: > On Tue, Apr 22, 2025 at 04:31:29PM +0200, Christian Brauner wrote: > > On Thu, Mar 27, 2025 at 12:39:28PM +0100, Amir Goldstein wrote: > > > On Thu, Mar 27, 2025 at 10:33 AM Andrey Albershteyn <aalbe...@redhat.com> > > > wrote: > > > > > > > > On 2025-03-23 09:56:25, Amir Goldstein wrote: > > > > > On Fri, Mar 21, 2025 at 8:49 PM Andrey Albershteyn > > > > > <aalbe...@redhat.com> wrote: > > > > > > > > > > > > From: Andrey Albershteyn <aalbe...@redhat.com> > > > > > > > > > > > > Introduce getfsxattrat and setfsxattrat syscalls to manipulate inode > > > > > > extended attributes/flags. The syscalls take parent directory fd and > > > > > > path to the child together with struct fsxattr. > > > > > > > > > > > > This is an alternative to FS_IOC_FSSETXATTR ioctl with a difference > > > > > > that file don't need to be open as we can reference it with a path > > > > > > instead of fd. By having this we can manipulated inode extended > > > > > > attributes not only on regular files but also on special ones. This > > > > > > is not possible with FS_IOC_FSSETXATTR ioctl as with special files > > > > > > we can not call ioctl() directly on the filesystem inode using fd. > > > > > > > > > > > > This patch adds two new syscalls which allows userspace to get/set > > > > > > extended inode attributes on special files by using parent directory > > > > > > and a path - *at() like syscall. > > > > > > > > > > > > CC: linux-...@vger.kernel.org > > > > > > CC: linux-fsde...@vger.kernel.org > > > > > > CC: linux-...@vger.kernel.org > > > > > > Signed-off-by: Andrey Albershteyn <aalbe...@redhat.com> > > > > > > Acked-by: Arnd Bergmann <a...@arndb.de> > > > > > > --- > > > > > ... > > > > > > +SYSCALL_DEFINE5(setfsxattrat, int, dfd, const char __user *, > > > > > > filename, > > > > > > + struct fsxattr __user *, ufsx, size_t, usize, > > > > > > + unsigned int, at_flags) > > > > > > +{ > > > > > > + struct fileattr fa; > > > > > > + struct path filepath; > > > > > > + int error; > > > > > > + unsigned int lookup_flags = 0; > > > > > > + struct filename *name; > > > > > > + struct mnt_idmap *idmap;. > > > > > > > > > > > + struct dentry *dentry; > > > > > > + struct vfsmount *mnt; > > > > > > + struct fsxattr fsx = {}; > > > > > > + > > > > > > + BUILD_BUG_ON(sizeof(struct fsxattr) < FSXATTR_SIZE_VER0); > > > > > > + BUILD_BUG_ON(sizeof(struct fsxattr) != FSXATTR_SIZE_LATEST); > > > > > > + > > > > > > + if ((at_flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != > > > > > > 0) > > > > > > + return -EINVAL; > > > > > > + > > > > > > + if (!(at_flags & AT_SYMLINK_NOFOLLOW)) > > > > > > + lookup_flags |= LOOKUP_FOLLOW; > > > > > > + > > > > > > + if (at_flags & AT_EMPTY_PATH) > > > > > > + lookup_flags |= LOOKUP_EMPTY; > > > > > > + > > > > > > + if (usize > PAGE_SIZE) > > > > > > + return -E2BIG; > > > > > > + > > > > > > + if (usize < FSXATTR_SIZE_VER0) > > > > > > + return -EINVAL; > > > > > > + > > > > > > + error = copy_struct_from_user(&fsx, sizeof(struct fsxattr), > > > > > > ufsx, usize); > > > > > > + if (error) > > > > > > + return error; > > > > > > + > > > > > > + fsxattr_to_fileattr(&fsx, &fa); > > > > > > + > > > > > > + name = getname_maybe_null(filename, at_flags); > > > > > > + if (!name) { > > > > > > + CLASS(fd, f)(dfd); > > > > > > + > > > > > > + if (fd_empty(f)) > > > > > > + return -EBADF; > > > > > > + > > > > > > + idmap = file_mnt_idmap(fd_file(f)); > > > > > > + dentry = file_dentry(fd_file(f)); > > > > > > + mnt = fd_file(f)->f_path.mnt; > > > > > > + } else { > > > > > > + error = filename_lookup(dfd, name, lookup_flags, > > > > > > &filepath, > > > > > > + NULL); > > > > > > + if (error) > > > > > > + return error; > > > > > > + > > > > > > + idmap = mnt_idmap(filepath.mnt); > > > > > > + dentry = filepath.dentry; > > > > > > + mnt = filepath.mnt; > > > > > > + } > > > > > > + > > > > > > + error = mnt_want_write(mnt); > > > > > > + if (!error) { > > > > > > + error = vfs_fileattr_set(idmap, dentry, &fa); > > > > > > + if (error == -ENOIOCTLCMD) > > > > > > + error = -EOPNOTSUPP; > > > > > > > > > > This is awkward. > > > > > vfs_fileattr_set() should return -EOPNOTSUPP. > > > > > ioctl_setflags() could maybe convert it to -ENOIOCTLCMD, > > > > > but looking at similar cases ioctl_fiemap(), ioctl_fsfreeze() the > > > > > ioctl returns -EOPNOTSUPP. > > > > > > > > > > I don't think it is necessarily a bad idea to start returning > > > > > -EOPNOTSUPP instead of -ENOIOCTLCMD for the ioctl > > > > > because that really reflects the fact that the ioctl is now > > > > > implemented > > > > > in vfs and not in the specific fs. > > > > > > > > > > and I think it would not be a bad idea at all to make that change > > > > > together with the merge of the syscalls as a sort of hint to userspace > > > > > that uses the ioctl, that the sycalls API exists. > > > > > > > > > > Thanks, > > > > > Amir. > > > > > > > > > > > > > Hmm, not sure what you're suggesting here. I see it as: > > > > - get/setfsxattrat should return EOPNOTSUPP as it make more sense > > > > than ENOIOCTLCMD > > > > - ioctl_setflags returns ENOIOCTLCMD which also expected > > > > > > > > Don't really see a reason to change what vfs_fileattr_set() returns > > > > and then copying this if() to other places or start returning > > > > EOPNOTSUPP. > > > > > > ENOIOCTLCMD conceptually means that the ioctl command is unknown > > > This is not the case since ->fileattr_[gs]et() became a vfs API > > > > vfs_fileattr_{g,s}et() should not return ENOIOCTLCMD. Change the return > > code to EOPNOTSUPP and then make EOPNOTSUPP be translated to ENOTTY on > > on overlayfs and to ENOIOCTLCMD in ecryptfs and in fs/ioctl.c. This way > > we get a clean VFS api while retaining current behavior. Amir can do his > > cleanup based on that. > > Also this get/set dance is not something new apis should do. It should > be handled like setattr_prepare() or generic_fillattr() where the > filesystem calls a VFS helper and that does all of this based on the > current state of the inode instead of calling into the filesystem twice: > > int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, > struct fileattr *fa) > { > <snip> > inode_lock(inode); > err = vfs_fileattr_get(dentry, &old_ma); > if (!err) { > /* initialize missing bits from old_ma */ > if (fa->flags_valid) { > <snip> > err = fileattr_set_prepare(inode, &old_ma, fa); > if (!err && !security_inode_setfsxattr(inode, fa)) > err = inode->i_op->fileattr_set(idmap, dentry, fa); >
You mean something like this? (not all fs are done) -- >From 421445f054ccad3116d55ae22c8995a48bb753fd Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn <aalbe...@kernel.org> Date: Fri, 25 Apr 2025 17:20:42 +0200 Subject: [PATCH] fs: push retrieval of fileattr down to filesystems Currently, vfs_fileattr_set() calls twice to the file system. Firstly, to retrieve current state of the inode extended attributes and secondly to set the new ones. This patch refactors this in a way that filesystem firstly gets current inode attribute state and then calls VFS helper to verify them. This way vfs_fileattr_set() will call filesystem just once. Signed-off-by: Andrey Albershteyn <aalbe...@kernel.org> --- fs/ext2/ioctl.c | 9 ++++++ fs/ext4/ioctl.c | 9 ++++++ fs/f2fs/file.c | 12 +++++++- fs/file_attr.c | 62 ++++++++++++++++++++++++---------------- fs/gfs2/file.c | 9 ++++++ fs/hfsplus/inode.c | 9 ++++++ fs/jfs/ioctl.c | 9 +++++- fs/ntfs3/file.c | 12 +++++++- fs/orangefs/inode.c | 9 ++++++ fs/ubifs/ioctl.c | 12 +++++++- fs/xfs/xfs_ioctl.c | 6 ++++ include/linux/fileattr.h | 2 ++ mm/shmem.c | 8 ++++++ 13 files changed, 140 insertions(+), 28 deletions(-) diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 44e04484e570..3a45ed9c12b7 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -32,6 +32,15 @@ int ext2_fileattr_set(struct mnt_idmap *idmap, { struct inode *inode = d_inode(dentry); struct ext2_inode_info *ei = EXT2_I(inode); + struct fileattr cfa; + int err; + + err = ext2_fileattr_get(dentry, &cfa); + if (err) + return err; + err = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (err) + return err; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index d17207386ead..f988ff4d7256 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1002,6 +1002,15 @@ int ext4_fileattr_set(struct mnt_idmap *idmap, struct inode *inode = d_inode(dentry); u32 flags = fa->flags; int err = -EOPNOTSUPP; + struct fileattr cfa; + + err = ext4_fileattr_get(dentry, &cfa); + if (err) + return err; + + err = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (err) + return err; if (flags & ~EXT4_FL_USER_VISIBLE) goto out; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index abbcbb5865a3..f196a07f1f17 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3371,14 +3371,24 @@ int f2fs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); - u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL; + u32 fsflags, mask = F2FS_SETTABLE_FS_FL; u32 iflags; + struct fileattr cfa; int err; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) return -ENOSPC; + + err = f2fs_fileattr_get(dentry, &cfa); + if (err) + return err; + err = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (err) + return err; + fsflags = fa->flags; + if (fsflags & ~F2FS_GETTABLE_FS_FL) return -EOPNOTSUPP; fsflags &= F2FS_SETTABLE_FS_FL; diff --git a/fs/file_attr.c b/fs/file_attr.c index 5e51c5b851ef..d0a01377bca8 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -7,6 +7,8 @@ #include <linux/fileattr.h> #include <linux/namei.h> +#include "internal.h" + /** * fileattr_fill_xflags - initialize fileattr with xflags * @fa: fileattr pointer @@ -225,6 +227,36 @@ static int fileattr_set_prepare(struct inode *inode, return 0; } +/** + * vfs_fileattr_set_prepare - merge new filettr state and check for validity + * @idmap: idmap of the mount + * @dentry: the object to change + * @cfa: current fileattr state + * @fa: fileattr pointer with new values + * + * Return: 0 on success, or a negative error on failure. + */ +int vfs_fileattr_set_prepare(struct mnt_idmap *idmap, struct dentry *dentry, + struct fileattr *cfa, struct fileattr *fa) +{ + int err; + + /* initialize missing bits from cfa */ + if (fa->flags_valid) { + fa->fsx_xflags |= cfa->fsx_xflags & ~FS_XFLAG_COMMON; + fa->fsx_extsize = cfa->fsx_extsize; + fa->fsx_nextents = cfa->fsx_nextents; + fa->fsx_projid = cfa->fsx_projid; + fa->fsx_cowextsize = cfa->fsx_cowextsize; + } else { + fa->flags |= cfa->flags & ~FS_COMMON_FL; + } + + err = fileattr_set_prepare(d_inode(dentry), cfa, fa); + return err; +} +EXPORT_SYMBOL(vfs_fileattr_set_prepare); + /** * vfs_fileattr_set - change miscellaneous file attributes * @idmap: idmap of the mount @@ -245,7 +277,6 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); - struct fileattr old_ma = {}; int err; if (!inode->i_op->fileattr_set) @@ -255,29 +286,12 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, return -EPERM; inode_lock(inode); - err = vfs_fileattr_get(dentry, &old_ma); - if (!err) { - /* initialize missing bits from old_ma */ - if (fa->flags_valid) { - fa->fsx_xflags |= old_ma.fsx_xflags & ~FS_XFLAG_COMMON; - fa->fsx_extsize = old_ma.fsx_extsize; - fa->fsx_nextents = old_ma.fsx_nextents; - fa->fsx_projid = old_ma.fsx_projid; - fa->fsx_cowextsize = old_ma.fsx_cowextsize; - } else { - fa->flags |= old_ma.flags & ~FS_COMMON_FL; - } - - err = fileattr_set_prepare(inode, &old_ma, fa); - if (err) - goto out; - err = security_inode_file_setattr(dentry, fa); - if (err) - goto out; - err = inode->i_op->fileattr_set(idmap, dentry, fa); - if (err) - goto out; - } + err = security_inode_file_setattr(dentry, fa); + if (err) + goto out; + err = inode->i_op->fileattr_set(idmap, dentry, fa); + if (err) + goto out; out: inode_unlock(inode); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index fd1147aa3891..cf796fa73af2 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -282,10 +282,19 @@ int gfs2_fileattr_set(struct mnt_idmap *idmap, u32 fsflags = fa->flags, gfsflags = 0; u32 mask; int i; + struct fileattr cfa; + int error; if (d_is_special(dentry)) return -ENOTTY; + error = gfs2_fileattr_get(dentry, &cfa); + if (error) + return error; + error = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (error) + return error; + if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index f331e9574217..cdb11d00faea 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -678,6 +678,15 @@ int hfsplus_fileattr_set(struct mnt_idmap *idmap, struct inode *inode = d_inode(dentry); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); unsigned int new_fl = 0; + struct fileattr cfa; + int err; + + err = hfsplus_fileattr_get(dentry, &cfa); + if (err) + return err; + err = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (err) + return err; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index f7bd7e8f5be4..4c62c14d15b0 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -75,11 +75,18 @@ int jfs_fileattr_set(struct mnt_idmap *idmap, { struct inode *inode = d_inode(dentry); struct jfs_inode_info *jfs_inode = JFS_IP(inode); - unsigned int flags; + unsigned int flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE; + struct fileattr cfa; + int err; if (d_is_special(dentry)) return -ENOTTY; + fileattr_fill_flags(&cfa, jfs_map_ext2(flags, 0)); + err = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (err) + return err; + if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 9b6a3f8d2e7c..bc7ee7595b70 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -83,12 +83,22 @@ int ntfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, { struct inode *inode = d_inode(dentry); struct ntfs_inode *ni = ntfs_i(inode); - u32 flags = fa->flags; + u32 flags; unsigned int new_fl = 0; + struct fileattr cfa; + int err; + + err = ntfs_fileattr_get(dentry, &cfa); + if (err) + return err; + err = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (err) + return err; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; + flags = fa->flags; if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_COMPR_FL)) return -EOPNOTSUPP; diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 5ac743c6bc2e..aecb61146443 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -910,6 +910,15 @@ static int orangefs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { u64 val = 0; + struct fileattr cfa; + int error = 0; + + error = orangefs_fileattr_get(dentry, &cfa); + if (error) + return error; + error = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (error) + return error; gossip_debug(GOSSIP_FILE_DEBUG, "%s: called on %pd\n", __func__, dentry); diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 2c99349cf537..e71e362c786b 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -148,14 +148,24 @@ int ubifs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); - int flags = fa->flags; + int flags; + struct fileattr cfa; + int err; if (d_is_special(dentry)) return -ENOTTY; + err = ubifs_fileattr_get(dentry, &cfa); + if (err) + return err; + err = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (err) + return err; + if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; + flags = fa->flags; if (flags & ~UBIFS_GETTABLE_IOCTL_FLAGS) return -EOPNOTSUPP; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d250f7f74e3b..c861dc1c3cf0 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -733,12 +733,18 @@ xfs_fileattr_set( struct xfs_dquot *pdqp = NULL; struct xfs_dquot *olddquot = NULL; int error; + struct fileattr cfa; trace_xfs_ioctl_setattr(ip); if (d_is_special(dentry)) return -ENOTTY; + xfs_fill_fsxattr(ip, XFS_DATA_FORK, &cfa); + error = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (error) + return error; + if (!fa->fsx_valid) { if (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL | FS_NODUMP_FL | diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h index f62a5143eb2d..aba76d897533 100644 --- a/include/linux/fileattr.h +++ b/include/linux/fileattr.h @@ -75,6 +75,8 @@ static inline bool fileattr_has_fsx(const struct fileattr *fa) } int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int vfs_fileattr_set_prepare(struct mnt_idmap *idmap, struct dentry *dentry, + struct fileattr *cfa, struct fileattr *fa); int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int ioctl_getflags(struct file *file, unsigned int __user *argp); diff --git a/mm/shmem.c b/mm/shmem.c index 99327c30507c..c2a5991f944f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4199,6 +4199,14 @@ static int shmem_fileattr_set(struct mnt_idmap *idmap, struct inode *inode = d_inode(dentry); struct shmem_inode_info *info = SHMEM_I(inode); int ret, flags; + struct fileattr cfa; + + ret = shmem_fileattr_get(dentry, &cfa); + if (ret) + return ret; + ret = vfs_fileattr_set_prepare(idmap, dentry, &cfa, fa); + if (ret) + return ret; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; -- 2.47.2