kern

Mateusz Guzik Fri, 23 Oct 2020 18:14:28 -0700

Author: mjg
Date: Sat Oct 24 01:14:17 2020
New Revision: 366987
URL: https://svnweb.freebsd.org/changeset/base/366987


Log:
  cache: refactor alloc/free
  
  This in particular centralizes manipulation of numcache.

Modified:
  head/sys/kern/vfs_cache.c

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c   Sat Oct 24 01:13:47 2020        (r366986)
+++ head/sys/kern/vfs_cache.c   Sat Oct 24 01:14:17 2020        (r366987)
@@ -174,6 +174,19 @@ struct     namecache_ts {
  */
 #define CACHE_ZONE_ALIGNMENT   UMA_ALIGNOF(time_t)
 
+/*
+ * TODO: the initial value of CACHE_PATH_CUTOFF was inherited from the
+ * 4.4 BSD codebase. Later on struct namecache was tweaked to become
+ * smaller and the value was bumped to retain the total size, but it
+ * was never re-evaluated for suitability. A simple test counting
+ * lengths during package building shows that the value of 45 covers
+ * about 86% of all added entries, reaching 99% at 65.
+ *
+ * Regardless of the above, use of dedicated zones instead of malloc may be
+ * inducing additional waste. This may be hard to address as said zones are
+ * tied to VFS SMR. Even if retaining them, the current split should be
+ * reevaluated.
+ */
 #ifdef __LP64__
 #define        CACHE_PATH_CUTOFF       45
 #define        CACHE_LARGE_PAD         6
@@ -212,6 +225,8 @@ _Static_assert((CACHE_ZONE_LARGE_TS_SIZE % (CACHE_ZONE
  */
 #define NEG_HOT                0x01
 
+static bool    cache_neg_evict_cond(u_long lnumcache);
+
 /*
  * Mark an entry as invalid.
  *
@@ -380,62 +395,7 @@ VP2VNODELOCK(struct vnode *vp)
        return (&vnodelocks[(((uintptr_t)(vp) >> 8) & ncvnodehash)]);
 }
 
-/*
- * UMA zones for the VFS cache.
- *
- * The small cache is used for entries with short names, which are the
- * most common.  The large cache is used for entries which are too big to
- * fit in the small cache.
- */
-static uma_zone_t __read_mostly cache_zone_small;
-static uma_zone_t __read_mostly cache_zone_small_ts;
-static uma_zone_t __read_mostly cache_zone_large;
-static uma_zone_t __read_mostly cache_zone_large_ts;
-
-static struct namecache *
-cache_alloc(int len, int ts)
-{
-       struct namecache_ts *ncp_ts;
-       struct namecache *ncp;
-
-       if (__predict_false(ts)) {
-               if (len <= CACHE_PATH_CUTOFF)
-                       ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK);
-               else
-                       ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK);
-               ncp = &ncp_ts->nc_nc;
-       } else {
-               if (len <= CACHE_PATH_CUTOFF)
-                       ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK);
-               else
-                       ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK);
-       }
-       return (ncp);
-}
-
 static void
-cache_free(struct namecache *ncp)
-{
-       struct namecache_ts *ncp_ts;
-
-       MPASS(ncp != NULL);
-       if ((ncp->nc_flag & NCF_DVDROP) != 0)
-               vdrop(ncp->nc_dvp);
-       if (__predict_false(ncp->nc_flag & NCF_TS)) {
-               ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
-               if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
-                       uma_zfree_smr(cache_zone_small_ts, ncp_ts);
-               else
-                       uma_zfree_smr(cache_zone_large_ts, ncp_ts);
-       } else {
-               if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
-                       uma_zfree_smr(cache_zone_small, ncp);
-               else
-                       uma_zfree_smr(cache_zone_large, ncp);
-       }
-}
-
-static void
 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
 {
        struct namecache_ts *ncp_ts;
@@ -547,6 +507,126 @@ cache_assert_vnode_locked(struct vnode *vp)
 }
 
 /*
+ * Directory vnodes with entries are held for two reasons:
+ * 1. make them less of a target for reclamation in vnlru
+ * 2. suffer smaller performance penalty in locked lookup as requeieing is 
avoided
+ *
+ * Note this preferably would not be done and it's a hold over from. It will be
+ * feasible to eliminate altogether if all filesystems start supporting
+ * lockless lookup.
+ */
+static void
+cache_hold_vnode(struct vnode *vp)
+{
+
+       cache_assert_vnode_locked(vp);
+       VNPASS(LIST_EMPTY(&vp->v_cache_src), vp);
+       vhold(vp);
+       counter_u64_add(numcachehv, 1);
+}
+
+static void
+cache_drop_vnode(struct vnode *vp)
+{
+
+       /*
+        * Called after all locks are dropped, meaning we can't assert
+        * on the state of v_cache_src.
+        */
+       vdrop(vp);
+       counter_u64_add(numcachehv, -1);
+}
+
+/*
+ * UMA zones.
+ */
+static uma_zone_t __read_mostly cache_zone_small;
+static uma_zone_t __read_mostly cache_zone_small_ts;
+static uma_zone_t __read_mostly cache_zone_large;
+static uma_zone_t __read_mostly cache_zone_large_ts;
+
+static struct namecache *
+cache_alloc_uma(int len, bool ts)
+{
+       struct namecache_ts *ncp_ts;
+       struct namecache *ncp;
+
+       if (__predict_false(ts)) {
+               if (len <= CACHE_PATH_CUTOFF)
+                       ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK);
+               else
+                       ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK);
+               ncp = &ncp_ts->nc_nc;
+       } else {
+               if (len <= CACHE_PATH_CUTOFF)
+                       ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK);
+               else
+                       ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK);
+       }
+       return (ncp);
+}
+
+static void
+cache_free_uma(struct namecache *ncp)
+{
+       struct namecache_ts *ncp_ts;
+
+       if (__predict_false(ncp->nc_flag & NCF_TS)) {
+               ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
+               if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
+                       uma_zfree_smr(cache_zone_small_ts, ncp_ts);
+               else
+                       uma_zfree_smr(cache_zone_large_ts, ncp_ts);
+       } else {
+               if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
+                       uma_zfree_smr(cache_zone_small, ncp);
+               else
+                       uma_zfree_smr(cache_zone_large, ncp);
+       }
+}
+
+static struct namecache *
+cache_alloc(int len, bool ts)
+{
+       u_long lnumcache;
+
+       /*
+        * Avoid blowout in namecache entries.
+        *
+        * Bugs:
+        * 1. filesystems may end up tryng to add an already existing entry
+        * (for example this can happen after a cache miss during concurrent
+        * lookup), in which case we will call cache_neg_evict despite not
+        * adding anything.
+        * 2. the routine may fail to free anything and no provisions are made
+        * to make it try harder (see the inside for failure modes)
+        * 3. it only ever looks at negative entries.
+        */
+       lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
+       if (cache_neg_evict_cond(lnumcache)) {
+               lnumcache = atomic_load_long(&numcache);
+       }
+       if (__predict_false(lnumcache >= ncsize)) {
+               atomic_subtract_long(&numcache, 1);
+               counter_u64_add(numdrops, 1);
+               return (NULL);
+       }
+       return (cache_alloc_uma(len, ts));
+}
+
+static void
+cache_free(struct namecache *ncp)
+{
+
+       MPASS(ncp != NULL);
+       if ((ncp->nc_flag & NCF_DVDROP) != 0) {
+               cache_drop_vnode(ncp->nc_dvp);
+       }
+       cache_free_uma(ncp);
+       atomic_subtract_long(&numcache, 1);
+}
+
+/*
  * TODO: With the value stored we can do better than computing the hash based
  * on the address. The choice of FNV should also be revisited.
  */
@@ -1298,10 +1378,8 @@ cache_zap_locked(struct namecache *ncp)
                LIST_REMOVE(ncp, nc_src);
                if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
                        ncp->nc_flag |= NCF_DVDROP;
-                       counter_u64_add(numcachehv, -1);
                }
        }
-       atomic_subtract_long(&numcache, 1);
 }
 
 static void
@@ -2110,7 +2188,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
        uint32_t hash;
        int flag;
        int len;
-       u_long lnumcache;
 
        VNPASS(dvp != vp, dvp);
        VNPASS(!VN_IS_DOOMED(dvp), dvp);
@@ -2135,27 +2212,9 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
                }
        }
 
-       /*
-        * Avoid blowout in namecache entries.
-        *
-        * Bugs:
-        * 1. filesystems may end up tryng to add an already existing entry
-        * (for example this can happen after a cache miss during concurrent
-        * lookup), in which case we will call cache_neg_evict despite not
-        * adding anything.
-        * 2. the routine may fail to free anything and no provisions are made
-        * to make it try harder (see the inside for failure modes)
-        * 3. it only ever looks at negative entries.
-        */
-       lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
-       if (cache_neg_evict_cond(lnumcache)) {
-               lnumcache = atomic_load_long(&numcache);
-       }
-       if (__predict_false(lnumcache >= ncsize)) {
-               atomic_subtract_long(&numcache, 1);
-               counter_u64_add(numdrops, 1);
+       ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
+       if (ncp == NULL)
                return;
-       }
 
        cache_celockstate_init(&cel);
        ndd = NULL;
@@ -2165,7 +2224,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
         * Calculate the hash key and setup as much of the new
         * namecache entry as possible before acquiring the lock.
         */
-       ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
        ncp->nc_flag = flag | NCF_WIP;
        ncp->nc_vp = vp;
        if (vp == NULL)
@@ -2276,8 +2334,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
 
        if (flag != NCF_ISDOTDOT) {
                if (LIST_EMPTY(&dvp->v_cache_src)) {
-                       vhold(dvp);
-                       counter_u64_add(numcachehv, 1);
+                       cache_hold_vnode(dvp);
                }
                LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
        }
@@ -2318,7 +2375,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
        return;
 out_unlock_free:
        cache_enter_unlock(&cel);
-       atomic_subtract_long(&numcache, 1);
        cache_free(ncp);
        return;
 }
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

svn commit: r366987 - head/sys/kern

Reply via email to