The branch main has been updated by mjg:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3a4c5dab9266fac93a5cb22c7cee3938466aedea

commit 3a4c5dab9266fac93a5cb22c7cee3938466aedea
Author:     Mateusz Guzik <m...@freebsd.org>
AuthorDate: 2022-03-07 10:38:17 +0000
Commit:     Mateusz Guzik <m...@freebsd.org>
CommitDate: 2022-03-10 09:41:50 +0000

    vfs: [2/2] fix stalls in vnode reclaim by only counting attempts
    
    ... and ignoring if they succeded, which matches historical behavior.
    
    Reported by:    pho
---
 sys/kern/vfs_subr.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 436323873f7f..c184530bbda6 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1311,8 +1311,29 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct 
vnode *mvp)
                TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
                TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
                mtx_unlock(&vnode_list_mtx);
-               if (vtryrecycle(vp) == 0)
-                       count--;
+               /*
+                * FIXME: ignores the return value, meaning it may be nothing
+                * got recycled but it claims otherwise to the caller.
+                *
+                * Originally the value started being ignored in 2005 with
+                * 114a1006a8204aa156e1f9ad6476cdff89cada7f .
+                *
+                * Respecting the value can run into significant stalls if most
+                * vnodes belong to one file system and it has writes
+                * suspended.  In presence of many threads and millions of
+                * vnodes they keep contending on the vnode_list_mtx lock only
+                * to find vnodes they can't recycle.
+                *
+                * The solution would be to pre-check if the vnode is likely to
+                * be recycle-able, but it needs to happen with the
+                * vnode_list_mtx lock held. This runs into a problem where
+                * VOP_GETWRITEMOUNT (currently needed to find out about if
+                * writes are frozen) can take locks which LOR against it.
+                *
+                * Check nullfs for one example (null_getwritemount).
+                */
+               vtryrecycle(vp);
+               count--;
                mtx_lock(&vnode_list_mtx);
                vp = mvp;
        }

Reply via email to