On Tue, Jul 12, 2011 at 03:02:44PM +0200, Attilio Rao wrote: > 2011/7/12 Kostik Belousov <[email protected]>: > > On Tue, Jul 12, 2011 at 07:10:28PM +0900, Kohji Okuno wrote: > >> Hello, > >> > >> I think that devfs has a problem. > >> I encountered the problem that open("/dev/AAA") returned ENOENT. > >> Of course, /dev/AAA exists. > >> > >> ENOENT was created by the point(***) in devfs_allocv(). > >> I think that the race condition had occurred between process A and > >> vnlru kernel thread. > >> > >> Please check the following. > >> > >> If vnlru set VI_DOOMED to vp->v_iflag but vnlru didn't still execute > >> VOP_RECLAIM(), process A cat get valid vp from de->de_vnode. > >> But, vget() will return ENOENT, because vp->v_iflag has VI_DOOMED. > >> > >> When I set the break point to (***), I checked that de->de_vnode and > >> vp->v_data were NULL. > >> > >> > >> process A: vnlru: > >> > >> devfs_allocv() { > >> vgonel(vp) { > >> ... ... > >> vp->v_iflag |= VI_DOOMED; > >> mtx_lock(&devfs_de_interlock); ... > >> vp = de->de_vnode; > >> if (vp != NULL) { VI_UNLOCK(vp); > >> _____________/ ... > >> VI_LOCK(vp); ____________/ if (VOP_RECLAIM(vp, td)) > >> mtx_unlock(&devfs_de_interlock); ... > >> ... \ devfs_reclaim(ap) { > >> error = vget(vp,...); \ > >> ... \______ > >> mtx_lock(&devfs_de_interlock); > >> if (devfs_allocv_drop_refs(...)) { de = vp->v_data; > >> ... if (de != NULL) { > >> } de->de_vnode = NULL; > >> else if (error) { vp->v_data = NULL; > >> ... } > >> rturn (error); (***) mtx_unlock(&devfs_de_interlock); > >> } ... > >> } > >> > >> > >> > >> I think that devfs_allocv() should be fixed as below. > >> How do you think? > >> > >> devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp) > >> { > >> int error; > >> struct vnode *vp; > >> struct cdev *dev; > >> struct devfs_mount *dmp; > >> > >> dmp = VFSTODEVFS(mp); > >> +#if 1 > >> + retry: > >> +#endif > >> if (de->de_flags & DE_DOOMED) { > >> > >> ... > >> > >> mtx_lock(&devfs_de_interlock); > >> vp = de->de_vnode; > >> if (vp != NULL) { > >> VI_LOCK(vp); > >> mtx_unlock(&devfs_de_interlock); > >> sx_xunlock(&dmp->dm_lock); > >> error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread); > >> sx_xlock(&dmp->dm_lock); > >> if (devfs_allocv_drop_refs(0, dmp, de)) { > >> if (error == 0) > >> vput(vp); > >> return (ENOENT); > >> } > >> else if (error) { > >> +#if 1 > >> + if (error == ENOENT) > >> + goto retry; > >> +#endif > >> sx_xunlock(&dmp->dm_lock); > >> return (error); > >> } > >> > > Thank you for the report. > > > > The proposed change would revert r179247, which also caused some issues. > > Are you able to reproduce the problem ? > > > > Could you try the following patch ? I cannot reproduce your situation, > > so the patch is untested by me. > > > > diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c > > index bf6dab8..bbbfff4 100644 > > --- a/sys/fs/devfs/devfs_vnops.c > > +++ b/sys/fs/devfs/devfs_vnops.c > > @@ -397,6 +397,7 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, > > int lockmode, > > sx_xunlock(&dmp->dm_lock); > > return (ENOENT); > > } > > +loop: > > DEVFS_DE_HOLD(de); > > DEVFS_DMP_HOLD(dmp); > > mtx_lock(&devfs_de_interlock); > > @@ -412,7 +413,16 @@ devfs_allocv(struct devfs_dirent *de, struct mount > > *mp, int lockmode, > > vput(vp); > > return (ENOENT); > > } > > - else if (error) { > > + else if (error != 0) { > > + if (error == ENOENT) { > > + mtx_lock(&devfs_de_interlock); > > + while (de->de_vnode != NULL) { > > + msleep(&de->de_vnode, > > + &devfs_de_interlock, 0, > > "dvall", 0); > > + } > > + mtx_unlock(&devfs_de_interlock); > > + goto loop; > > + } > > sx_xunlock(&dmp->dm_lock); > > return (error); > > } > > @@ -1259,6 +1269,7 @@ devfs_reclaim(struct vop_reclaim_args *ap) > > if (de != NULL) { > > de->de_vnode = NULL; > > vp->v_data = NULL; > > + wakeup(&de->de_vnode); > > } > > mtx_unlock(&devfs_de_interlock); > > I think that this approach may starve the thread for a while. > As I told you privately I was able to see on field a livelock because > of this check. In my case, it was a thread running for 63seconds (at > least, at that point the watchdog was tripping over). Feasible explanation was not found at the time, AFAIR. I could believe that this is possible with r179247 and driver stuck in the close cdevsw method.
More risky change would be to clear de_vnode early. All devfs code shall
be already safe by checking for VI_DOOMED, and if VI_DOOMED, v_data may
be NULL.
Again, I am unable to test.
diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index bf6dab8..955bd8b 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -397,6 +397,7 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int
lockmode,
sx_xunlock(&dmp->dm_lock);
return (ENOENT);
}
+loop:
DEVFS_DE_HOLD(de);
DEVFS_DMP_HOLD(dmp);
mtx_lock(&devfs_de_interlock);
@@ -405,16 +406,21 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp,
int lockmode,
VI_LOCK(vp);
mtx_unlock(&devfs_de_interlock);
sx_xunlock(&dmp->dm_lock);
- error = vget(vp, lockmode | LK_INTERLOCK, curthread);
+ vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread);
sx_xlock(&dmp->dm_lock);
if (devfs_allocv_drop_refs(0, dmp, de)) {
- if (error == 0)
- vput(vp);
+ vput(vp);
return (ENOENT);
}
- else if (error) {
- sx_xunlock(&dmp->dm_lock);
- return (error);
+ else if ((vp->v_iflag & VI_DOOMED) != 0) {
+ mtx_lock(&devfs_de_interlock);
+ if (de->de_vnode == vp) {
+ de->de_vnode = NULL;
+ vp->v_data = NULL;
+ }
+ mtx_unlock(&devfs_de_interlock);
+ vput(vp);
+ goto loop;
}
sx_xunlock(&dmp->dm_lock);
*vpp = vp;
pgpzWvdjVtaw0.pgp
Description: PGP signature
