The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=45117ffcd533ddf995f654db60b10899ae8370ec
commit 45117ffcd533ddf995f654db60b10899ae8370ec Author: Konstantin Belousov <[email protected]> AuthorDate: 2026-02-28 16:11:58 +0000 Commit: Konstantin Belousov <[email protected]> CommitDate: 2026-03-05 23:46:54 +0000 vfs: add VOP_DELAYED_SETSIZE() and related infrastructure The change generalizes code that was initially developed for nfs client to handle filesystems that needs to call vnode_pager_setsize() while only owning the vnode lock shared. Since vnode pager might need to trim or extend the vnode vm_object' page queue, the vnode lock for the call must be owned exclusive. This is typical for filesystems with remote authorative source of file attributes, like nfs/p9/fuse. Handle the conflict by delaying the vnode_pager_setsize() to the next vnode locking to avoid relock. But if the next locking request is in shared mode, lock it exclusively instead, perform the delayed vnode_pager_setsize() call by doing VOP_DEFAULT_SETSIZE(), and then downgrade to shared. Filesystems that opt into the feature must provide the implementation of VOP_DELAYED_SETSIZE() that actually calls vnode_pager_setsize(), and use vn_delay_setsize() helper to mark the vnode as requiring the delay call. Reviewed by: rmacklem Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D55595 --- sys/fs/deadfs/dead_vnops.c | 1 + sys/kern/vfs_default.c | 1 + sys/kern/vfs_vnops.c | 74 +++++++++++++++++++++++++++++++++++++++++++++- sys/kern/vnode_if.src | 8 +++++ sys/sys/vnode.h | 31 +++++++++++++++++++ 5 files changed, 114 insertions(+), 1 deletion(-) diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c index 137c86b65058..b6d6fa55d221 100644 --- a/sys/fs/deadfs/dead_vnops.c +++ b/sys/fs/deadfs/dead_vnops.c @@ -80,6 +80,7 @@ struct vop_vector dead_vnodeops = { .vop_write = dead_write, .vop_fplookup_vexec = VOP_EOPNOTSUPP, .vop_fplookup_symlink = VOP_EOPNOTSUPP, + .vop_delayed_setsize = VOP_NULL, }; VFS_VOP_VECTOR_REGISTER(dead_vnodeops); diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 468d5d18b02b..3151c69d1912 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -147,6 +147,7 @@ struct vop_vector default_vnodeops = { .vop_add_writecount = vop_stdadd_writecount, .vop_copy_file_range = vop_stdcopy_file_range, .vop_vput_pair = vop_stdvput_pair, + .vop_delayed_setsize = VOP_PANIC, }; VFS_VOP_VECTOR_REGISTER(default_vnodeops); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index a53df50c06bd..24efdf4ac0d5 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1959,9 +1959,74 @@ _vn_lock_fallback(struct vnode *vp, int flags, const char *file, int line, return (0); } +static int +vn_lock_delayed_setsize(struct vop_lock1_args *ap) +{ + struct vnode *vp; + int error, lktype; + bool onfault; + + vp = ap->a_vp; + lktype = ap->a_flags & LK_TYPE_MASK; + if (vp->v_op == &dead_vnodeops) + return (0); + VI_LOCK(vp); + if ((vp->v_iflag & VI_DELAYEDSSZ) == 0 || (lktype != LK_SHARED && + lktype != LK_EXCLUSIVE && lktype != LK_UPGRADE && + lktype != LK_TRYUPGRADE)) { + VI_UNLOCK(vp); + return (0); + } + onfault = (ap->a_flags & LK_EATTR_MASK) == LK_NOWAIT && + (ap->a_flags & LK_INIT_MASK) == LK_CANRECURSE && + (lktype == LK_SHARED || lktype == LK_EXCLUSIVE); + if (onfault && vp->v_vnlock->lk_recurse == 0) { + /* + * Force retry in vm_fault(), to make the lock request + * sleepable, which allows us to piggy-back the + * sleepable call to vnode_pager_setsize(). + */ + VI_UNLOCK(vp); + VOP_UNLOCK(vp); + return (EBUSY); + } + if ((ap->a_flags & LK_NOWAIT) != 0 || + (lktype == LK_SHARED && vp->v_vnlock->lk_recurse > 0)) { + VI_UNLOCK(vp); + return (0); + } + if (lktype == LK_SHARED) { + VOP_UNLOCK(vp); + ap->a_flags &= ~LK_TYPE_MASK; + ap->a_flags |= LK_EXCLUSIVE | LK_INTERLOCK; + error = VOP_LOCK1_APV(&default_vnodeops, ap); + if (error != 0 || vp->v_op == &dead_vnodeops) + return (error); + if (vp->v_data == NULL) + goto downgrade; + MPASS(vp->v_data != NULL); + VI_LOCK(vp); + if ((vp->v_iflag & VI_DELAYEDSSZ) == 0) { + VI_UNLOCK(vp); + goto downgrade; + } + } + vp->v_iflag &= ~VI_DELAYEDSSZ; + VI_UNLOCK(vp); + VOP_DELAYED_SETSIZE(vp); +downgrade: + if (lktype == LK_SHARED) { + ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); + ap->a_flags |= LK_DOWNGRADE; + (void)VOP_LOCK1_APV(&default_vnodeops, ap); + } + return (0); +} + int _vn_lock(struct vnode *vp, int flags, const char *file, int line) { + struct vop_lock1_args ap; int error; VNASSERT((flags & LK_TYPE_MASK) != 0, vp, @@ -1970,7 +2035,14 @@ _vn_lock(struct vnode *vp, int flags, const char *file, int line) error = VOP_LOCK1(vp, flags, file, line); if (__predict_false(error != 0 || VN_IS_DOOMED(vp))) return (_vn_lock_fallback(vp, flags, file, line, error)); - return (0); + if (__predict_false((vp->v_iflag & VI_DELAYEDSSZ) == 0)) + return (0); + ap.a_gen.a_desc = &vop_lock1_desc; + ap.a_vp = vp; + ap.a_flags = flags; + ap.a_file = file; + ap.a_line = line; + return (vn_lock_delayed_setsize(&ap)); } /* diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 6b7448d9f1df..78ba1aa7afda 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -847,6 +847,14 @@ vop_inotify_add_watch { IN struct thread *td; }; + +%% delayed_setsize vp E E E + +vop_delayed_setsize { + IN struct vnode *vp; +}; + + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 1a267e0e272c..36e10fd8d8b7 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -268,6 +268,7 @@ _Static_assert(sizeof(struct vnode) <= 448, "vnode size crosses 448 bytes"); #define VI_DEFINACT 0x0010 /* deferred inactive */ #define VI_FOPENING 0x0020 /* In open, with opening process having the first right to advlock file */ +#define VI_DELAYEDSSZ 0x0040 /* Delayed setsize */ #define VV_ROOT 0x0001 /* root of its filesystem */ #define VV_ISTTY 0x0002 /* vnode represents a tty */ @@ -1251,6 +1252,36 @@ vn_get_state(struct vnode *vp) atomic_load_consume_ptr(&(_vp)->v_data);\ }) +static inline void +vn_delay_setsize_locked(struct vnode *vp) +{ + ASSERT_VI_LOCKED(vp, "delayed_setsize"); + vp->v_iflag |= VI_DELAYEDSSZ; +} + +static inline void +vn_delay_setsize(struct vnode *vp) +{ + VI_LOCK(vp); + vn_delay_setsize_locked(vp); + VI_UNLOCK(vp); +} + +static inline void +vn_clear_delayed_setsize_locked(struct vnode *vp) +{ + ASSERT_VI_LOCKED(vp, "delayed_setsize"); + vp->v_iflag &= ~VI_DELAYEDSSZ; +} + +static inline void +vn_clear_delayed_setsize(struct vnode *vp) +{ + VI_LOCK(vp); + vn_clear_delayed_setsize_locked(vp); + VI_UNLOCK(vp); +} + #endif /* _KERNEL */ #endif /* !_SYS_VNODE_H_ */
