On Tue, Apr 19, 2022 at 11:59:05AM +0200, Martin Pieuchot wrote:
> On 14/04/22(Thu) 18:29, Alexander Bluhm wrote:
> > [...]
> > vn_lock: v_usecount == 0: 0x23e6b910, type VREG, use 0, write 0, hold 0,
> > flags (VBIOONFREELIST)
> > tag VT_UFS, ino 703119, on dev 0, 10 flags 0x100, effnlink 1, nlink
> > 1
> > mode 0100660, owner 21, group 21, size 13647873
> > ==> vnode_history_print 0x23e6b910, next=5
> > [3] c++[68540] usecount 2>1
> > #0 0x625703e0
> > [4] reaper[31684] usecount 1>0
> > #0 splx+0x30
> > #1 0xfffffffc
> > #2 vrele+0x5c
> > #3 uvn_detach+0x154
>
> I wonder if the `uvn' has the UVM_VNODE_CANPERSIST set at this point.
> This would explain why the associate pages are not freed and end up in
> the inactive list.
>
> Is there an easy way to check that?
>
Here a new diff (sorry for the delay) which add a new vnode_history_record()
point inside uvn_detach() (when 'uvn' object has UVM_VNODE_CANPERSIST flag
sets).
I am unsure if it is the simpler method to track that: we will rely on
presence/absence of this particular entry to deduce if the 'uvn' has
UVM_VNODE_CANPERSIST or not, and as we are using a ring, info could be
overrided...
let's see if we could make progress.
Alexander and Andrew, could you generate the problem with this diff ?
Thanks.
--
Sebastien Marie
diff 8d3b4fd4f5e7dfb98f0fe3fdbfb579f0dfe8d3bc /home/semarie/repos/openbsd/src
blob - 13c70890ccc11f131013d14b853f4f06941924cc
file + sys/kern/vfs_subr.c
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -66,6 +66,7 @@
#include <sys/tree.h>
#include <sys/specdev.h>
#include <sys/atomic.h>
+#include <sys/stacktrace.h>
#include <netinet/in.h>
@@ -126,8 +127,28 @@ void vfs_unmountall(void);
void printlockedvnodes(void);
#endif
+#define VH_MAXST 10 /* ring size */
+#define VH_NOP 0
+#define VH_INCR 1
+#define VH_DECR 2
+struct vhistory {
+ pid_t vhh_pid;
+ char vhh_comm[_MAXCOMLEN]; /* incl NUL */
+ u_int vhh_old_usecount;
+ u_int vhh_new_usecount;
+ struct stacktrace vhh_st;
+};
+struct vnode_history {
+ u_int vh_next; /* [K] next free slot in vh_ring */
+ struct vhistory vh_ring[VH_MAXST]; /* [K] ring for holding the vnode
history */
+};
+void vnode_history_init(struct vnode *);
+void vnode_history_record(struct vnode *, int);
+void vnode_history_print(struct vnode *, int (*pr)(const char *, ...)
__attribute__((__format__(__kprintf__,1,2))));
+
struct pool vnode_pool;
struct pool uvm_vnode_pool;
+struct pool vnode_history_pool;
static inline int rb_buf_compare(const struct buf *b1, const struct buf *b2);
RBT_GENERATE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare);
@@ -154,6 +175,8 @@ vntblinit(void)
PR_WAITOK, "vnodes", NULL);
pool_init(&uvm_vnode_pool, sizeof(struct uvm_vnode), 0, IPL_NONE,
PR_WAITOK, "uvmvnodes", NULL);
+ pool_init(&vnode_history_pool, sizeof(struct vnode_history), 0,
+ IPL_NONE, PR_WAITOK, "vnodehis", NULL);
TAILQ_INIT(&vnode_hold_list);
TAILQ_INIT(&vnode_free_list);
TAILQ_INIT(&mountlist);
@@ -410,6 +433,7 @@ getnewvnode(enum vtagtype tag, struct mount *mp, const
splx(s);
vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO);
vp->v_uvm = pool_get(&uvm_vnode_pool, PR_WAITOK | PR_ZERO);
+ vp->v_history = pool_get(&vnode_history_pool, PR_WAITOK);
vp->v_uvm->u_vnode = vp;
uvm_obj_init(&vp->v_uvm->u_obj, &uvm_vnodeops, 0);
RBT_INIT(buf_rb_bufs, &vp->v_bufs_tree);
@@ -460,6 +484,7 @@ getnewvnode(enum vtagtype tag, struct mount *mp, const
vp->v_socket = NULL;
}
cache_purge(vp);
+ vnode_history_init(vp);
vp->v_type = VNON;
vp->v_tag = tag;
vp->v_op = vops;
@@ -674,9 +699,13 @@ vget(struct vnode *vp, int flags)
}
vp->v_usecount++;
+ vnode_history_record(vp, VH_INCR);
+
if (flags & LK_TYPE_MASK) {
if ((error = vn_lock(vp, flags)) != 0) {
vp->v_usecount--;
+ vnode_history_record(vp, VH_DECR);
+
if (vp->v_usecount == 0 && onfreelist)
vputonfreelist(vp);
}
@@ -700,6 +729,7 @@ vref(struct vnode *vp)
panic("vref on a VNON vnode");
#endif
vp->v_usecount++;
+ vnode_history_record(vp, VH_INCR);
}
void
@@ -762,6 +792,7 @@ vput(struct vnode *vp)
}
#endif
vp->v_usecount--;
+ vnode_history_record(vp, VH_DECR);
KASSERT(vp->v_usecount > 0 || vp->v_uvcount == 0);
if (vp->v_usecount > 0) {
VOP_UNLOCK(vp);
@@ -802,6 +833,8 @@ vrele(struct vnode *vp)
}
#endif
vp->v_usecount--;
+ vnode_history_record(vp, VH_DECR);
+
if (vp->v_usecount > 0) {
return (0);
}
@@ -813,7 +846,7 @@ vrele(struct vnode *vp)
}
#endif
- if (vn_lock(vp, LK_EXCLUSIVE)) {
+ if (vn_lock(vp, LK_EXCLUSIVE | LK_IOP)) {
#ifdef DIAGNOSTIC
vprint("vrele: cannot lock", vp);
#endif
@@ -824,6 +857,7 @@ vrele(struct vnode *vp)
if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
vputonfreelist(vp);
+
return (1);
}
@@ -999,8 +1033,10 @@ vclean(struct vnode *vp, int flags, struct proc *p)
* so that its count cannot fall to zero and generate a
* race against ourselves to recycle it.
*/
- if ((active = vp->v_usecount) != 0)
+ if ((active = vp->v_usecount) != 0) {
vp->v_usecount++;
+ vnode_history_record(vp, VH_INCR);
+ }
/*
* Prevent the vnode from being recycled or
@@ -1065,6 +1101,8 @@ vclean(struct vnode *vp, int flags, struct proc *p)
panic("vclean: cannot reclaim");
if (active) {
vp->v_usecount--;
+ vnode_history_record(vp, VH_DECR);
+
if (vp->v_usecount == 0) {
if (vp->v_holdcnt > 0)
panic("vclean: not clean");
@@ -1201,6 +1239,11 @@ vgonel(struct vnode *vp, struct proc *p)
if (vp->v_holdcnt > 0)
panic("vgonel: not clean");
+#ifdef DIAGNOSTIC
+ if (!(vp->v_bioflag & VBIOONFREELIST))
+ panic("vgonel: not on freelist");
+#endif
+
if (TAILQ_FIRST(&vnode_free_list) != vp) {
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
@@ -1275,8 +1318,7 @@ loop:
/*
* Print out a description of a vnode.
*/
-static char *typename[] =
- { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"
};
+static char *typename[] = { VTYPE_NAMES };
void
vprint(char *label, struct vnode *vp)
@@ -1315,6 +1357,8 @@ vprint(char *label, struct vnode *vp)
printf("\n\t");
VOP_PRINT(vp);
}
+
+ vnode_history_print(vp, &printf);
}
#endif /* DEBUG || DIAGNOSTIC */
@@ -2251,7 +2295,7 @@ vfs_vnode_print(void *v, int full,
{
struct vnode *vp = v;
- (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
+ (*pr)("%p tag %s(%d) type %s(%d) mount %p typedata %p\n", v,
(u_int)vp->v_tag >= nitems(vtags)? "<unk>":vtags[vp->v_tag],
vp->v_tag,
(u_int)vp->v_type >= nitems(vtypes)? "<unk>":vtypes[vp->v_type],
@@ -2261,6 +2305,9 @@ vfs_vnode_print(void *v, int full,
vp->v_data, vp->v_usecount, vp->v_writecount,
vp->v_holdcnt, vp->v_numoutput);
+ (*pr)("flag 0x%x lflag 0x%x bioflag 0x%x\n",
+ vp->v_flag, vp->v_lflag, vp->v_bioflag);
+
/* uvm_object_printit(&vp->v_uobj, full, pr); */
if (full) {
@@ -2277,6 +2324,8 @@ vfs_vnode_print(void *v, int full,
(*pr)(" bp %p\n", bp);
vfs_buf_print(bp, full, pr);
}
+
+ vnode_history_print(vp, pr);
}
}
@@ -2376,3 +2425,75 @@ copy_statfs_info(struct statfs *sbp, const struct moun
memcpy(&sbp->mount_info, &mp->mnt_stat.mount_info,
sizeof(union mount_info));
}
+
+#ifdef SMALL_KERNEL
+void
+vnode_history_init(struct vnode *vp)
+{
+}
+
+void
+vnode_history_record(struct vnode *vp, int direction)
+{
+}
+
+void vnode_history_print(struct vnode *vp, int (*pr)(const char *, ...)
__attribute__((__format__(__kprintf__,1,2))))
+{
+}
+#else
+void
+vnode_history_init(struct vnode *vp)
+{
+ KERNEL_ASSERT_LOCKED();
+
+ vp->v_history->vh_next = 0;
+ bzero(vp->v_history, sizeof(*vp->v_history));
+}
+
+void
+vnode_history_record(struct vnode *vp, int direction)
+{
+ struct proc *p = curproc;
+ struct vhistory *vh;
+
+ KERNEL_ASSERT_LOCKED();
+
+ vh = &(vp->v_history->vh_ring[vp->v_history->vh_next]);
+ vp->v_history->vh_next = (vp->v_history->vh_next + 1) % VH_MAXST;
+
+ vh->vhh_pid = p->p_p->ps_pid;
+ strlcpy(vh->vhh_comm, p->p_p->ps_comm, sizeof(vh->vhh_comm));
+ if (direction == VH_NOP)
+ vh->vhh_old_usecount = vp->v_usecount;
+ else if (direction == VH_INCR)
+ vh->vhh_old_usecount = vp->v_usecount - 1;
+ else if (direction == VH_DECR)
+ vh->vhh_old_usecount = vp->v_usecount + 1;
+ else
+ panic("invalid direction: %d", direction);
+ vh->vhh_new_usecount = vp->v_usecount;
+ stacktrace_save_at(&vh->vhh_st, 0);
+}
+
+void
+vnode_history_print(struct vnode *vp, int (*pr)(const char *, ...)
__attribute__((__format__(__kprintf__,1,2))))
+{
+ int i;
+
+ KERNEL_ASSERT_LOCKED();
+
+ pr("==> vnode_history_print %p, next=%d\n", vp, vp->v_history->vh_next);
+
+ for (i = 0; i < VH_MAXST; i++) {
+ struct vhistory *vh = &(vp->v_history->vh_ring[i]);
+
+ if (vh->vhh_st.st_count == 0)
+ continue;
+
+ pr(" [%d%s] %s[%d] usecount %d>%d\n", i, vp->v_history->vh_next
== i ? ">" : "",
+ vh->vhh_comm, vh->vhh_pid,
+ vh->vhh_old_usecount, vh->vhh_new_usecount);
+ stacktrace_print(&vh->vhh_st, pr);
+ }
+}
+#endif
blob - 87182a180504da6342a7a6dfebb3d2e5dc499194
file + sys/kern/vfs_vnops.c
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -566,6 +566,17 @@ vn_lock(struct vnode *vp, int flags)
int error, xlocked, do_wakeup;
do {
+#ifdef DIAGNOSTIC
+ /*
+ * The vn_lock() function must not be called when the
+ * vnode's reference count is zero.
+ */
+ if ((flags & LK_IOP) == 0 && (vp->v_usecount == 0)) {
+ vprint("vn_lock: v_usecount == 0", vp);
+ panic("vn_lock: v_usecount == 0");
+ }
+#endif
+
mtx_enter(&vnode_mtx);
if (vp->v_lflag & VXLOCK) {
vp->v_lflag |= VXWANT;
blob - 2388b5955dcaf4f6a6cb6a10840d5cb7b0e530c1
file + sys/sys/lock.h
--- sys/sys/lock.h
+++ sys/sys/lock.h
@@ -51,5 +51,6 @@
/* LK_ specific */
#define LK_DRAIN 0x1000UL /* wait for all lock activity to end */
#define LK_RETRY 0x2000UL /* vn_lock: retry until locked */
+#define LK_IOP 0x4000UL /* vn_lock: internal operation (with
v_usecount=0) */
#endif /* !_LOCK_H_ */
blob - 0ba69cdb12e3466472d18fe8dc163bfeb050b9e9
file + sys/sys/vnode.h
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -85,11 +85,14 @@ RBT_HEAD(buf_rb_bufs, buf);
struct namecache;
RBT_HEAD(namecache_rb_cache, namecache);
+struct vnode_history;
+
/*
* Locks used to protect struct members in struct vnode:
* a atomic
* V vnode_mtx
* B IPL_BIO
+ * K KERNEL_LOCK
*/
struct uvm_vnode;
struct vnode {
@@ -129,6 +132,8 @@ struct vnode {
void *v_data; /* private data for fs */
struct selinfo v_selectinfo; /* identity of poller(s) */
+
+ struct vnode_history *v_history; /* [K] hold v_usecount change
history */
};
#define v_mountedhere v_un.vu_mountedhere
#define v_socket v_un.vu_socket
blob - 3f3de38b7c58dd9ed524c93c916f8c63caa8cb42
file + sys/uvm/uvm_vnode.c
--- sys/uvm/uvm_vnode.c
+++ sys/uvm/uvm_vnode.c
@@ -323,6 +323,10 @@ uvn_detach(struct uvm_object *uobj)
* let it "stick around".
*/
if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
+ extern void vnode_history_record(struct vnode *, int);
+
+ vnode_history_record(vp, 0);
+
/* won't block */
uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
goto out;
@@ -1149,6 +1153,11 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t *pps, int npag
vn = uvn->u_vnode;
file_offset = pps[0]->offset;
+#ifndef SMALL_KERNEL
+ if (vn->v_usecount == 0)
+ vprint("uvn_io: start", vn);
+#endif
+
/* check for sync'ing I/O. */
while (uvn->u_flags & UVM_VNODE_IOSYNC) {
if (waitf == M_NOWAIT) {