On Thu, May 09, 2013 at 09:02:56AM +0200, Peter Holm wrote: > On Wed, May 08, 2013 at 12:13:17PM +0300, Konstantin Belousov wrote: > > On Tue, May 07, 2013 at 08:30:06AM +0200, G??ran L??wkrantz wrote: > > > I created a PR, kern/178238, on this but would like to know if anyone has > > > any ideas or patches? > > > > > > Have updated the system where I see this to FreeBSD 9.1-STABLE #0 r250229 > > > and still have the problem. > > > > The patch below should fix the issue for you, at least it did so in my > > limited testing. > > > > What is does: > > 1. When inactivating a nullfs vnode, check if the lower vnode is > > unlinked, and reclaim upper vnode if so. [This fixes your case]. > > > > 2. Besides a callback to the upper filesystems for the lower vnode > > reclaimation, it also calls the upper fs for lower vnode unlink. > > This allows nullfs to purge cached vnodes for the unlinked lower. > > [This fixes an opposite case, when the vnode is removed from the > > lower mount, but upper aliases prevent the vnode from being > > recycled]. > > > > 3. Fix a wart which existed from the introduction of the nullfs caching, > > do not unlock lower vnode in the nullfs_reclaim_lowervp(). It should > > be completely innocent, but now it is also formally safe. > > > > 4. Fix vnode reference leak in nullfs_reclaim_lowervp(). > > > > Please note that the patch is basically not tested, I only verified your > > scenario and a mirror of it as described in item 2. > > > > diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h > > index 4f37020..a624be6 100644 > > --- a/sys/fs/nullfs/null.h > > The page fault seen in fifo_close() seems unrelated to this patch, > which I will continue testing some more. > > The scenario triggering the page fault is the "rm": > > mdconfig -a -t swap -s 1g -u 5 > bsdlabel -w md5 auto > newfs -U md5a > mount /dev/md5a /mnt > mount -t nullfs /mnt /mnt2 > mkfifo /mnt2/fifo > rm /mnt/fifo Yeah, I figured this out from the backtrace.
The panic in kostik563 is related and directly caused by the patch, since patch erronously performs vgone() on the active (removed) vnode. As result, a spurious VOP_CLOSE() call is performed on the vnode, which is more or less innocent for regular files, but fatal for fifos and probably nfsv4 as well. Updated patch is below. > > Not a problem on 8.3-STABLE r247938. Yes, new nullfs is only in HEAD and stable/9. diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h index 4f37020..0972dfc 100644 --- a/sys/fs/nullfs/null.h +++ b/sys/fs/nullfs/null.h @@ -53,8 +53,12 @@ struct null_node { LIST_ENTRY(null_node) null_hash; /* Hash list */ struct vnode *null_lowervp; /* VREFed once */ struct vnode *null_vnode; /* Back pointer */ + u_int null_flags; }; +#define NULLV_NOUNLOCK 0x0001 +#define NULLV_DROP 0x0002 + #define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data)) #define VTONULL(vp) ((struct null_node *)(vp)->v_data) #define NULLTOV(xp) ((xp)->null_vnode) diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c index 02932bd..ad02236 100644 --- a/sys/fs/nullfs/null_vfsops.c +++ b/sys/fs/nullfs/null_vfsops.c @@ -65,7 +65,6 @@ static vfs_statfs_t nullfs_statfs; static vfs_unmount_t nullfs_unmount; static vfs_vget_t nullfs_vget; static vfs_extattrctl_t nullfs_extattrctl; -static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp; /* * Mount null layer @@ -391,8 +390,37 @@ nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp) vp = null_hashget(mp, lowervp); if (vp == NULL) return; + VTONULL(vp)->null_flags |= NULLV_NOUNLOCK; vgone(vp); - vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY); + vput(vp); +} + +static void +nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp) +{ + struct vnode *vp; + struct null_node *xp; + + vp = null_hashget(mp, lowervp); + if (vp == NULL) + return; + xp = VTONULL(vp); + xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK; + vhold(vp); + vunref(vp); + + /* + * If vunref() dropped the last use reference on the nullfs + * vnode, it must be reclaimed, and its lock was split from + * the lower vnode lock. Need to do extra unlock before + * allowing the final vdrop() to free the vnode. + */ + if (vp->v_usecount == 0) { + KASSERT((vp->v_iflag & VI_DOOMED) != 0, + ("not reclaimed %p", vp)); + VOP_UNLOCK(vp, 0); + } + vdrop(vp); } static struct vfsops null_vfsops = { @@ -408,6 +436,7 @@ static struct vfsops null_vfsops = { .vfs_unmount = nullfs_unmount, .vfs_vget = nullfs_vget, .vfs_reclaim_lowervp = nullfs_reclaim_lowervp, + .vfs_unlink_lowervp = nullfs_unlink_lowervp, }; VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL); diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index f59865f..6ff15ee 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -692,18 +692,24 @@ null_unlock(struct vop_unlock_args *ap) static int null_inactive(struct vop_inactive_args *ap __unused) { - struct vnode *vp; + struct vnode *vp, *lvp; + struct null_node *xp; struct mount *mp; struct null_mount *xmp; vp = ap->a_vp; + xp = VTONULL(vp); + lvp = NULLVPTOLOWERVP(vp); mp = vp->v_mount; xmp = MOUNTTONULLMOUNT(mp); - if ((xmp->nullm_flags & NULLM_CACHE) == 0) { + if ((xmp->nullm_flags & NULLM_CACHE) == 0 || + (xp->null_flags & NULLV_DROP) != 0 || + (lvp->v_vflag & VV_NOSYNC) != 0) { /* * If this is the last reference and caching of the - * nullfs vnodes is not enabled, then free up the - * vnode so as not to tie up the lower vnodes. + * nullfs vnodes is not enabled, or the lower vnode is + * deleted, then free up the vnode so as not to tie up + * the lower vnodes. */ vp->v_object = NULL; vrecycle(vp); @@ -748,7 +754,10 @@ null_reclaim(struct vop_reclaim_args *ap) */ if (vp->v_writecount > 0) VOP_ADD_WRITECOUNT(lowervp, -1); - vput(lowervp); + if ((xp->null_flags & NULLV_NOUNLOCK) != 0) + vunref(lowervp); + else + vput(lowervp); free(xp, M_NULLFSNODE); return (0); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index de118f7..988a114 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -2700,19 +2700,20 @@ vgone(struct vnode *vp) } static void -vgonel_reclaim_lowervp_vfs(struct mount *mp __unused, +notify_lowervp_vfs_dummy(struct mount *mp __unused, struct vnode *lowervp __unused) { } /* - * Notify upper mounts about reclaimed vnode. + * Notify upper mounts about reclaimed or unlinked vnode. */ -static void -vgonel_reclaim_lowervp(struct vnode *vp) +void +vfs_notify_upper(struct vnode *vp, int event) { static struct vfsops vgonel_vfsops = { - .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs + .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy, + .vfs_unlink_lowervp = notify_lowervp_vfs_dummy, }; struct mount *mp, *ump, *mmp; @@ -2736,7 +2737,17 @@ vgonel_reclaim_lowervp(struct vnode *vp) } TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); MNT_IUNLOCK(mp); - VFS_RECLAIM_LOWERVP(ump, vp); + switch (event) { + case VFS_NOTIFY_UPPER_RECLAIM: + VFS_RECLAIM_LOWERVP(ump, vp); + break; + case VFS_NOTIFY_UPPER_UNLINK: + VFS_UNLINK_LOWERVP(ump, vp); + break; + default: + KASSERT(0, ("invalid event %d", event)); + break; + } MNT_ILOCK(mp); ump = TAILQ_NEXT(mmp, mnt_upper_link); TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); @@ -2783,7 +2794,7 @@ vgonel(struct vnode *vp) active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); - vgonel_reclaim_lowervp(vp); + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); /* * Clean out any buffers associated with the vnode. diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 29cb7bd..a004ea0 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1846,6 +1846,7 @@ restart: if (error) goto out; #endif + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); #ifdef MAC out: @@ -3825,6 +3826,7 @@ restart: return (error); goto restart; } + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); vn_finished_write(mp); out: diff --git a/sys/sys/mount.h b/sys/sys/mount.h index a9c86f6..a953dae 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -608,7 +608,7 @@ typedef int vfs_mount_t(struct mount *mp); typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, struct sysctl_req *req); typedef void vfs_susp_clean_t(struct mount *mp); -typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode *lowervp); +typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp); struct vfsops { vfs_mount_t *vfs_mount; @@ -626,7 +626,8 @@ struct vfsops { vfs_extattrctl_t *vfs_extattrctl; vfs_sysctl_t *vfs_sysctl; vfs_susp_clean_t *vfs_susp_clean; - vfs_reclaim_lowervp_t *vfs_reclaim_lowervp; + vfs_notify_lowervp_t *vfs_reclaim_lowervp; + vfs_notify_lowervp_t *vfs_unlink_lowervp; }; vfs_statfs_t __vfs_statfs; @@ -747,6 +748,14 @@ vfs_statfs_t __vfs_statfs; } \ } while (0) +#define VFS_UNLINK_LOWERVP(MP, VP) do { \ + if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \ + VFS_PROLOGUE(MP); \ + (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \ + VFS_EPILOGUE(MP); \ + } \ +} while (0) + #define VFS_KNOTE_LOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ @@ -759,6 +768,9 @@ vfs_statfs_t __vfs_statfs; VN_KNOTE((vp), (hint), 0); \ } while (0) +#define VFS_NOTIFY_UPPER_RECLAIM 1 +#define VFS_NOTIFY_UPPER_UNLINK 2 + #include <sys/module.h> /* @@ -840,6 +852,7 @@ int vfs_modevent(module_t, int, void *); void vfs_mount_error(struct mount *, const char *, ...); void vfs_mountroot(void); /* mount our root filesystem */ void vfs_mountedfrom(struct mount *, const char *from); +void vfs_notify_upper(struct vnode *, int); void vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp); void vfs_ref(struct mount *);
pgpVmvEK2_Ft2.pgp
Description: PGP signature