The branch stable/13 has been updated by mjg:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=21a42a7c22c9bf921ba9d250c81bd41e70c63ea9

commit 21a42a7c22c9bf921ba9d250c81bd41e70c63ea9
Author:     Mateusz Guzik <m...@freebsd.org>
AuthorDate: 2023-09-14 14:35:40 +0000
Commit:     Mateusz Guzik <m...@freebsd.org>
CommitDate: 2023-10-04 12:04:15 +0000

    vfs: drop one vnode list lock trip during vnlru free recycle
    
    vnlru_free_impl would take the lock prior to returning even though most
    frequent caller does not need it.
    
    Unsurprisingly vnode_list mtx is the primary bottleneck when recycling
    and avoiding the useless lock trip helps.
    
    Setting maxvnodes to 400000 and running 20 parallel finds each with a
    dedicated directory tree of 1 million vnodes in total:
    before: 4.50s user 1225.71s system 1979% cpu 1:02.14 total
    after:  4.20s user 806.23s system 1973% cpu 41.059 total
    
    That's 34% reduction in total real time.
    
    With this the block *remains* the primary bottleneck when running on
    ZFS.
    
    (cherry picked from commit 74be676d87745eb727642f6f8329236c848929d5)
---
 sys/kern/vfs_subr.c | 43 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 209d6ac37cda..40f7a6d33c20 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1278,13 +1278,14 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, 
struct vnode *mvp)
        mtx_assert(&vnode_list_mtx, MA_OWNED);
        if (count > max_vnlru_free)
                count = max_vnlru_free;
+       if (count == 0) {
+               mtx_unlock(&vnode_list_mtx);
+               return (0);
+       }
        ocount = count;
        retried = false;
        vp = mvp;
        for (;;) {
-               if (count == 0) {
-                       break;
-               }
                vp = TAILQ_NEXT(vp, v_vnodelist);
                if (__predict_false(vp == NULL)) {
                        /*
@@ -1307,6 +1308,7 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, struct 
vnode *mvp)
                         */
                        TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
                        TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist);
+                       mtx_unlock(&vnode_list_mtx);
                        break;
                }
                if (__predict_false(vp->v_type == VMARKER))
@@ -1354,18 +1356,28 @@ vnlru_free_impl(int count, struct vfsops *mnt_op, 
struct vnode *mvp)
                 */
                vtryrecycle(vp);
                count--;
+               if (count == 0) {
+                       break;
+               }
                mtx_lock(&vnode_list_mtx);
                vp = mvp;
        }
+       mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
        return (ocount - count);
 }
 
+/*
+ * XXX: returns without vnode_list_mtx locked!
+ */
 static int
 vnlru_free_locked(int count)
 {
+       int ret;
 
        mtx_assert(&vnode_list_mtx, MA_OWNED);
-       return (vnlru_free_impl(count, NULL, vnode_list_free_marker));
+       ret = vnlru_free_impl(count, NULL, vnode_list_free_marker);
+       mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
+       return (ret);
 }
 
 void
@@ -1377,7 +1389,7 @@ vnlru_free_vfsops(int count, struct vfsops *mnt_op, 
struct vnode *mvp)
        VNPASS(mvp->v_type == VMARKER, mvp);
        mtx_lock(&vnode_list_mtx);
        vnlru_free_impl(count, mnt_op, mvp);
-       mtx_unlock(&vnode_list_mtx);
+       mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
 }
 
 /*
@@ -1549,7 +1561,7 @@ vnlru_under_unlocked(u_long rnumvnodes, u_long limit)
 }
 
 static void
-vnlru_kick(void)
+vnlru_kick_locked(void)
 {
 
        mtx_assert(&vnode_list_mtx, MA_OWNED);
@@ -1559,6 +1571,15 @@ vnlru_kick(void)
        }
 }
 
+static void
+vnlru_kick(void)
+{
+
+       mtx_lock(&vnode_list_mtx);
+       vnlru_kick_locked();
+       mtx_unlock(&vnode_list_mtx);
+}
+
 static void
 vnlru_proc(void)
 {
@@ -1589,6 +1610,7 @@ vnlru_proc(void)
                 */
                if (rnumvnodes > desiredvnodes) {
                        vnlru_free_locked(rnumvnodes - desiredvnodes);
+                       mtx_lock(&vnode_list_mtx);
                        rnumvnodes = atomic_load_long(&numvnodes);
                }
                /*
@@ -1767,6 +1789,7 @@ vn_alloc_hard(struct mount *mp)
        rnumvnodes = atomic_load_long(&numvnodes);
        if (rnumvnodes + 1 < desiredvnodes) {
                vn_alloc_cyclecount = 0;
+               mtx_unlock(&vnode_list_mtx);
                goto alloc;
        }
        rfreevnodes = vnlru_read_freevnodes();
@@ -1786,22 +1809,26 @@ vn_alloc_hard(struct mount *mp)
         */
        if (vnlru_free_locked(1) > 0)
                goto alloc;
+       mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
        if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
                /*
                 * Wait for space for a new vnode.
                 */
-               vnlru_kick();
+               mtx_lock(&vnode_list_mtx);
+               vnlru_kick_locked();
                vn_alloc_sleeps++;
                msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz);
                if (atomic_load_long(&numvnodes) + 1 > desiredvnodes &&
                    vnlru_read_freevnodes() > 1)
                        vnlru_free_locked(1);
+               else
+                       mtx_unlock(&vnode_list_mtx);
        }
 alloc:
+       mtx_assert(&vnode_list_mtx, MA_NOTOWNED);
        rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1;
        if (vnlru_under(rnumvnodes, vlowat))
                vnlru_kick();
-       mtx_unlock(&vnode_list_mtx);
        return (uma_zalloc_smr(vnode_zone, M_WAITOK));
 }
 

Reply via email to