Module Name: src Committed By: ad Date: Sat Sep 9 18:27:59 UTC 2023
Modified Files: src/sys/kern: vfs_cache.c src/sys/sys: namei.src src/usr.bin/pmap: main.h pmap.c Log Message: - Shrink namecache entries to 64 bytes on 32-bit platforms and use 32-bit key values there for speed (remains 128 bytes & 64-bits on _LP64). - Comments. To generate a diff of this commit: cvs rdiff -u -r1.154 -r1.155 src/sys/kern/vfs_cache.c cvs rdiff -u -r1.60 -r1.61 src/sys/sys/namei.src cvs rdiff -u -r1.7 -r1.8 src/usr.bin/pmap/main.h cvs rdiff -u -r1.57 -r1.58 src/usr.bin/pmap/pmap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/vfs_cache.c diff -u src/sys/kern/vfs_cache.c:1.154 src/sys/kern/vfs_cache.c:1.155 --- src/sys/kern/vfs_cache.c:1.154 Sat Apr 29 10:07:22 2023 +++ src/sys/kern/vfs_cache.c Sat Sep 9 18:27:59 2023 @@ -1,7 +1,7 @@ -/* $NetBSD: vfs_cache.c,v 1.154 2023/04/29 10:07:22 riastradh Exp $ */ +/* $NetBSD: vfs_cache.c,v 1.155 2023/09/09 18:27:59 ad Exp $ */ /*- - * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2019, 2020, 2023 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -79,10 +79,9 @@ * we are not sensible, and use a per-directory data structure to index * names, but the cache otherwise functions the same. * - * The index is a red-black tree. There are no special concurrency - * requirements placed on it, because it's per-directory and protected - * by the namecache's per-directory locks. It should therefore not be - * difficult to experiment with other types of index. + * The index is a red-black tree. It should not be difficult to + * experiment with other types of index, however note that a tree + * can trivially be made to support lockless lookup. * * Each cached name is stored in a struct namecache, along with a * pointer to the associated vnode (nc_vp). Names longer than a @@ -91,6 +90,19 @@ * in struct namecache. If it is a "negative" entry, (i.e. for a name * that is known NOT to exist) the vnode pointer will be NULL. * + * In practice this implementation is not any slower than the hash + * table that preceeded it and in some cases it significantly + * outperforms the hash table. Some reasons why this might be: + * + * - natural partitioning provided by the file system structure, which + * the prior implementation discarded (global hash table). + * - worst case tree traversal of O(log n), the hash table could have + * many collisions. + * - minimized cache misses & total L2/L3 CPU cache footprint; struct + * namecache and vnode_impl_t are laid out to keep cache footprint + * minimal in the lookup path; no hash table buckets to cache. + * - minimized number of conditionals & string comparisons. + * * For a directory with 3 cached names for 3 distinct vnodes, the * various vnodes and namecache structs would be connected like this * (the root is at the bottom of the diagram): @@ -167,12 +179,10 @@ * used during reverse lookup) * * 3) cache_lru_lock (LRU list direction, used during reclaim) - * - * 4) vp->v_interlock (what the cache entry points to) */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.154 2023/04/29 10:07:22 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.155 2023/09/09 18:27:59 ad Exp $"); #define __NAMECACHE_PRIVATE #ifdef _KERNEL_OPT @@ -202,6 +212,16 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c, #include <miscfs/genfs/genfs.h> +/* + * Assert that data structure layout hasn't changed unintentionally. + */ +#ifdef _LP64 +CTASSERT(sizeof(struct namecache) == 128); +#else +CTASSERT(sizeof(struct namecache) == 64); +#endif +CTASSERT(NC_NLEN_MASK >= MAXPATHLEN); + static void cache_activate(struct namecache *); static void cache_update_stats(void *); static int cache_compare_nodes(void *, const void *, const void *); @@ -262,7 +282,7 @@ static kmutex_t cache_stat_lock __cachel */ int cache_lru_maxdeact __read_mostly = 2; /* max # to deactivate */ int cache_lru_maxscan __read_mostly = 64; /* max # to scan/reclaim */ -int cache_maxlen __read_mostly = USHRT_MAX; /* max name length to cache */ +int cache_maxlen __read_mostly = NC_NLEN_MASK; /* max name length to cache */ int cache_stat_interval __read_mostly = 300; /* in seconds */ /* @@ -328,8 +348,8 @@ cache_compare_nodes(void *context, const if (nc1->nc_key > nc2->nc_key) { return 1; } - KASSERT(nc1->nc_nlen == nc2->nc_nlen); - return memcmp(nc1->nc_name, nc2->nc_name, nc1->nc_nlen); + KASSERT(NC_NLEN(nc1) == NC_NLEN(nc2)); + return memcmp(nc1->nc_name, nc2->nc_name, NC_NLEN(nc1)); } /* @@ -337,15 +357,15 @@ cache_compare_nodes(void *context, const * the key value to try and improve uniqueness, and so that length doesn't * need to be compared separately for string comparisons. */ -static inline uint64_t +static uintptr_t cache_key(const char *name, size_t nlen) { - uint64_t key; + uintptr_t key; - KASSERT(nlen <= USHRT_MAX); + KASSERT((nlen & ~NC_NLEN_MASK) == 0); key = hash32_buf(name, nlen, HASH32_STR_INIT); - return (key << 32) | nlen; + return (key << NC_NLEN_BITS) | (uintptr_t)nlen; } /* @@ -358,13 +378,13 @@ cache_remove(struct namecache *ncp, cons { struct vnode *vp, *dvp = ncp->nc_dvp; vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp); + size_t namelen = NC_NLEN(ncp); KASSERT(rw_write_held(&dvi->vi_nc_lock)); - KASSERT(cache_key(ncp->nc_name, ncp->nc_nlen) == ncp->nc_key); + KASSERT(cache_key(ncp->nc_name, namelen) == ncp->nc_key); KASSERT(rb_tree_find_node(&dvi->vi_nc_tree, ncp) == ncp); - SDT_PROBE(vfs, namecache, invalidate, done, ncp, - 0, 0, 0, 0); + SDT_PROBE(vfs, namecache, invalidate, done, ncp, 0, 0, 0, 0); /* * Remove from the vnode's list. This excludes cache_revlookup(), @@ -391,8 +411,8 @@ cache_remove(struct namecache *ncp, cons mutex_exit(&cache_lru_lock); /* Finally, free it. */ - if (ncp->nc_nlen > NCHNAMLEN) { - size_t sz = offsetof(struct namecache, nc_name[ncp->nc_nlen]); + if (namelen > NCHNAMLEN) { + size_t sz = offsetof(struct namecache, nc_name[namelen]); kmem_free(ncp, sz); } else { pool_cache_put(cache_pool, ncp); @@ -404,13 +424,15 @@ cache_remove(struct namecache *ncp, cons */ static struct namecache * __noinline cache_lookup_entry(struct vnode *dvp, const char *name, size_t namelen, - uint64_t key) + uintptr_t key) { vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp); struct rb_node *node = dvi->vi_nc_tree.rbt_root; struct namecache *ncp; - int lrulist, diff; + enum cache_lru_id lrulist; + int diff; + KASSERT(namelen <= MAXPATHLEN); KASSERT(rw_lock_held(&dvi->vi_nc_lock)); /* @@ -430,7 +452,7 @@ cache_lookup_entry(struct vnode *dvp, co KASSERT((void *)&ncp->nc_tree == (void *)ncp); KASSERT(ncp->nc_dvp == dvp); if (ncp->nc_key == key) { - KASSERT(ncp->nc_nlen == namelen); + KASSERT(NC_NLEN(ncp) == namelen); diff = memcmp(ncp->nc_name, name, namelen); if (__predict_true(diff == 0)) { break; @@ -511,7 +533,7 @@ cache_lookup(struct vnode *dvp, const ch vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp); struct namecache *ncp; struct vnode *vp; - uint64_t key; + uintptr_t key; int error; bool hit; krw_t op; @@ -564,7 +586,7 @@ cache_lookup(struct vnode *dvp, const ch COUNT(ncs_badhits); return false; } - if (ncp->nc_vp == NULL) { + if ((vp = ncp->nc_vp) == NULL) { if (iswht_ret != NULL) { /* * Restore the ISWHITEOUT flag saved earlier. @@ -592,7 +614,6 @@ cache_lookup(struct vnode *dvp, const ch rw_exit(&dvi->vi_nc_lock); return hit; } - vp = ncp->nc_vp; error = vcache_tryvget(vp); rw_exit(&dvi->vi_nc_lock); if (error) { @@ -638,7 +659,8 @@ cache_lookup_linked(struct vnode *dvp, c vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp); struct namecache *ncp; krwlock_t *oldlock, *newlock; - uint64_t key; + struct vnode *vp; + uintptr_t key; int error; KASSERT(namelen != cache_mp_nlen || name == cache_mp_name); @@ -727,7 +749,7 @@ cache_lookup_linked(struct vnode *dvp, c name, namelen, 0, 0); return false; } - if (ncp->nc_vp == NULL) { + if ((vp = ncp->nc_vp) == NULL) { /* found negative entry; vn is already null from above */ KASSERT(namelen != cache_mp_nlen); KASSERT(name != cache_mp_name); @@ -749,7 +771,7 @@ cache_lookup_linked(struct vnode *dvp, c if (newlock) { *plock = newlock; } - *vn_ret = ncp->nc_vp; + *vn_ret = vp; return true; } @@ -772,8 +794,9 @@ cache_revlookup(struct vnode *vp, struct { vnode_impl_t *vi = VNODE_TO_VIMPL(vp); struct namecache *ncp; + enum cache_lru_id lrulist; struct vnode *dvp; - int error, nlen, lrulist; + int error, nlen; char *bp; KASSERT(vp != NULL); @@ -812,12 +835,12 @@ cache_revlookup(struct vnode *vp, struct TAILQ_FOREACH(ncp, &vi->vi_nc_list, nc_list) { KASSERT(ncp->nc_vp == vp); KASSERT(ncp->nc_dvp != NULL); - nlen = ncp->nc_nlen; + nlen = NC_NLEN(ncp); /* * Ignore mountpoint entries. */ - if (ncp->nc_nlen == cache_mp_nlen) { + if (nlen == cache_mp_nlen) { continue; } @@ -929,7 +952,6 @@ cache_enter(struct vnode *dvp, struct vn ncp->nc_vp = vp; ncp->nc_dvp = dvp; ncp->nc_key = cache_key(name, namelen); - ncp->nc_nlen = namelen; ncp->nc_whiteout = ((cnflags & ISWHITEOUT) != 0); memcpy(ncp->nc_name, name, namelen); @@ -941,7 +963,7 @@ cache_enter(struct vnode *dvp, struct vn oncp = rb_tree_insert_node(&dvi->vi_nc_tree, ncp); if (oncp != ncp) { KASSERT(oncp->nc_key == ncp->nc_key); - KASSERT(oncp->nc_nlen == ncp->nc_nlen); + KASSERT(NC_NLEN(oncp) == NC_NLEN(ncp)); KASSERT(memcmp(oncp->nc_name, name, namelen) == 0); cache_remove(oncp, true); oncp = rb_tree_insert_node(&dvi->vi_nc_tree, ncp); @@ -1107,12 +1129,11 @@ nchinit(void) void cache_cpu_init(struct cpu_info *ci) { - void *p; size_t sz; - sz = roundup2(sizeof(struct nchcpu), coherency_unit) + coherency_unit; - p = kmem_zalloc(sz, KM_SLEEP); - ci->ci_data.cpu_nch = (void *)roundup2((uintptr_t)p, coherency_unit); + sz = roundup2(sizeof(struct nchcpu), coherency_unit); + ci->ci_data.cpu_nch = kmem_zalloc(sz, KM_SLEEP); + KASSERT(((uintptr_t)ci->ci_data.cpu_nch & (coherency_unit - 1)) == 0); } /* @@ -1232,7 +1253,7 @@ cache_purge_name(struct vnode *dvp, cons { vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp); struct namecache *ncp; - uint64_t key; + uintptr_t key; SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0); @@ -1524,7 +1545,7 @@ namecache_print(struct vnode *vp, void ( for (id = 0; id < LRU_COUNT; id++) { TAILQ_FOREACH(ncp, &cache_lru.list[id], nc_lru) { if (ncp->nc_vp == vp) { - (*pr)("name %.*s\n", ncp->nc_nlen, + (*pr)("name %.*s\n", NC_NLEN(ncp), ncp->nc_name); dvp = ncp->nc_dvp; } @@ -1537,7 +1558,7 @@ namecache_print(struct vnode *vp, void ( for (id = 0; id < LRU_COUNT; id++) { TAILQ_FOREACH(ncp, &cache_lru.list[id], nc_lru) { if (ncp->nc_vp == dvp) { - (*pr)("parent %.*s\n", ncp->nc_nlen, + (*pr)("parent %.*s\n", NC_NLEN(ncp), ncp->nc_name); } } Index: src/sys/sys/namei.src diff -u src/sys/sys/namei.src:1.60 src/sys/sys/namei.src:1.61 --- src/sys/sys/namei.src:1.60 Tue Jun 29 22:39:21 2021 +++ src/sys/sys/namei.src Sat Sep 9 18:27:59 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: namei.src,v 1.60 2021/06/29 22:39:21 dholland Exp $ */ +/* $NetBSD: namei.src,v 1.61 2023/09/09 18:27:59 ad Exp $ */ /* * Copyright (c) 1985, 1989, 1991, 1993 @@ -200,12 +200,24 @@ NAMEIFL PARAMASK 0x02ff800 /* mask of pa #define NCHNAMLEN sizeof(((struct namecache *)NULL)->nc_name) /* + * The uintptr_t-sized key value computed for each name consists of name + * length and a hash value. On 32-bit platforms the top NC_NLEN_BITS of + * the 32-bit hash value is lobbed off. + */ + +#define NC_NLEN_BITS 11 +#define NC_NLEN_MASK ((1 << NC_NLEN_BITS) - 1) +#define NC_NLEN(ncp) ((ncp)->nc_key & NC_NLEN_MASK) + +/* * Namecache entry. * * This structure describes the elements in the cache of recent names looked - * up by namei. It's carefully sized to take up 128 bytes on _LP64, to make - * good use of space and the CPU caches. Items used during RB tree lookup - * (nc_tree, nc_key) are clustered at the start of the structure. + * up by namei. It's carefully sized to take up 128 bytes on _LP64 and 64 + * bytes on 32-bit machines, to make good use of space and the CPU caches. + * + * Items used during RB tree lookup (nc_tree, nc_key) are clustered at the + * start of the structure to minimise cache misses during lookup. * * Field markings and their corresponding locks: * @@ -216,17 +228,20 @@ NAMEIFL PARAMASK 0x02ff800 /* mask of pa */ struct namecache { struct rb_node nc_tree; /* d red-black tree, must be first */ - uint64_t nc_key; /* - hashed key value */ + uintptr_t nc_key; /* - hashed key value */ TAILQ_ENTRY(namecache) nc_list; /* v nc_vp's list of cache entries */ TAILQ_ENTRY(namecache) nc_lru; /* l pseudo-lru chain */ struct vnode *nc_dvp; /* - vnode of parent of name */ struct vnode *nc_vp; /* - vnode the name refers to */ - int nc_lrulist; /* l which LRU list it's on */ - u_short nc_nlen; /* - length of the name */ - char nc_whiteout; /* - true if a whiteout */ - char nc_name[41]; /* - segment name */ -}; + u_char nc_lrulist; /* l LRU list entry is on */ + u_char nc_whiteout; /* - whiteout indicator */ +#ifdef _LP64 + char nc_name[46]; /* - segment name */ +#else + char nc_name[22]; /* - segment name */ #endif +}; +#endif /* __NAMECACHE_PRIVATE */ #ifdef _KERNEL #include <sys/pool.h> Index: src/usr.bin/pmap/main.h diff -u src/usr.bin/pmap/main.h:1.7 src/usr.bin/pmap/main.h:1.8 --- src/usr.bin/pmap/main.h:1.7 Sun Aug 21 07:46:52 2022 +++ src/usr.bin/pmap/main.h Sat Sep 9 18:27:59 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: main.h,v 1.7 2022/08/21 07:46:52 mlelstv Exp $ */ +/* $NetBSD: main.h,v 1.8 2023/09/09 18:27:59 ad Exp $ */ /* * Copyright (c) 2003 The NetBSD Foundation, Inc. @@ -36,9 +36,6 @@ extern u_long kernel_map_addr; extern void *uvm_vnodeops, *uvm_deviceops, *aobj_pager, *ubc_pager; extern rlim_t maxssiz; -LIST_HEAD(nchashhead, namecache); -extern struct nchashhead *nchashtbl; - struct cache_entry { LIST_ENTRY(cache_entry) ce_next; struct vnode *ce_vp, *ce_pvp; Index: src/usr.bin/pmap/pmap.c diff -u src/usr.bin/pmap/pmap.c:1.57 src/usr.bin/pmap/pmap.c:1.58 --- src/usr.bin/pmap/pmap.c:1.57 Sun Aug 21 07:46:52 2022 +++ src/usr.bin/pmap/pmap.c Sat Sep 9 18:27:59 2023 @@ -1,7 +1,7 @@ -/* $NetBSD: pmap.c,v 1.57 2022/08/21 07:46:52 mlelstv Exp $ */ +/* $NetBSD: pmap.c,v 1.58 2023/09/09 18:27:59 ad Exp $ */ /* - * Copyright (c) 2002, 2003, 2020 The NetBSD Foundation, Inc. + * Copyright (c) 2002, 2003, 2020, 2023 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -31,7 +31,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: pmap.c,v 1.57 2022/08/21 07:46:52 mlelstv Exp $"); +__RCSID("$NetBSD: pmap.c,v 1.58 2023/09/09 18:27:59 ad Exp $"); #endif #include <string.h> @@ -874,7 +874,7 @@ search_cache(kvm_t *kd, struct vnode *vp (vi.vi_vnode.v_vflag & VV_ROOT) != 0) break; /* Otherwise pull first NCHNAMLEN chars of name. */ - nlen = MIN((size_t)nc.nc_nlen, NCHNAMLEN); + nlen = MIN(NC_NLEN(&nc), NCHNAMLEN); /* too small */ if ((size_t)(o - buf) < nlen + (o != e ? 1 : 0)) break;