Module Name: src Committed By: riastradh Date: Sun Feb 27 14:16:43 UTC 2022
Modified Files: src/sys/kern: subr_pool.c Log Message: pool(9): Membar audit. - Use atomic_store_release and atomic_load_consume for associating a freshly constructed pool_cache with its underlying pool. The pool gets published in various ways before the pool cache is fully constructed. => Nix membar_sync -- no store-before-load is needed here. - Take pool_head_lock around sysctl kern.pool TAILQ_FOREACH. Then take a reference count, and drop the lock, around copyout. => Otherwise, pools could be partially initialized or freed while we're still trying to read from them -- and in the worst case, we might see a corrupted view of the tailq. => If we kept the lock around copyout, this could deadlock in memory allocation. => If we didn't take a reference count while releasing the lock, the pool could be destroyed while we're trying to traverse the list, sending us into oblivion instead of the next element. To generate a diff of this commit: cvs rdiff -u -r1.280 -r1.281 src/sys/kern/subr_pool.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/subr_pool.c diff -u src/sys/kern/subr_pool.c:1.280 src/sys/kern/subr_pool.c:1.281 --- src/sys/kern/subr_pool.c:1.280 Fri Dec 24 00:13:53 2021 +++ src/sys/kern/subr_pool.c Sun Feb 27 14:16:43 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: subr_pool.c,v 1.280 2021/12/24 00:13:53 riastradh Exp $ */ +/* $NetBSD: subr_pool.c,v 1.281 2022/02/27 14:16:43 riastradh Exp $ */ /* * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010, 2014, 2015, 2018, @@ -33,7 +33,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.280 2021/12/24 00:13:53 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.281 2022/02/27 14:16:43 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_ddb.h" @@ -1648,6 +1648,7 @@ pool_reclaim(struct pool *pp) { struct pool_item_header *ph, *phnext; struct pool_pagelist pq; + struct pool_cache *pc; uint32_t curtime; bool klock; int rv; @@ -1673,8 +1674,8 @@ pool_reclaim(struct pool *pp) klock = false; /* Reclaim items from the pool's cache (if any). */ - if (pp->pr_cache != NULL) - pool_cache_invalidate(pp->pr_cache); + if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) + pool_cache_invalidate(pc); if (mutex_tryenter(&pp->pr_lock) == 0) { if (klock) { @@ -1883,7 +1884,7 @@ pool_print1(struct pool *pp, const char if (skip_empty && pp->pr_nget == 0) return; - if ((pc = pp->pr_cache) != NULL) { + if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) { (*pr)("POOLCACHE"); } else { (*pr)("POOL"); @@ -1895,7 +1896,6 @@ pool_print1(struct pool *pp, const char pp->pr_wchan, pp, pp->pr_size, pp->pr_align, pp->pr_npages, pp->pr_nitems, pp->pr_nout, pp->pr_nget, pp->pr_nput, pp->pr_npagealloc, pp->pr_npagefree, pp->pr_nidle); - return; } @@ -2184,8 +2184,7 @@ pool_cache_bootstrap(pool_cache_t pc, si if (__predict_true(!cold)) mutex_exit(&pool_head_lock); - membar_sync(); - pp->pr_cache = pc; + atomic_store_release(&pp->pr_cache, pc); } /* @@ -2224,7 +2223,7 @@ pool_cache_bootstrap_destroy(pool_cache_ /* Disassociate it from the pool. */ mutex_enter(&pp->pr_lock); - pp->pr_cache = NULL; + atomic_store_relaxed(&pp->pr_cache, NULL); mutex_exit(&pp->pr_lock); /* Destroy per-CPU data */ @@ -3339,6 +3338,7 @@ pool_whatis(uintptr_t addr, void (*pr)(c TAILQ_FOREACH(pp, &pool_head, pr_poollist) { struct pool_item_header *ph; + struct pool_cache *pc; uintptr_t item; bool allocated = true; bool incache = false; @@ -3373,8 +3373,8 @@ pool_whatis(uintptr_t addr, void (*pr)(c allocated = pool_allocated(pp, ph, addr); } found: - if (allocated && pp->pr_cache) { - pool_cache_t pc = pp->pr_cache; + if (allocated && + (pc = atomic_load_consume(&pp->pr_cache)) != NULL) { struct pool_cache_group *pcg; int i; @@ -3437,9 +3437,11 @@ pool_sysctl(SYSCTLFN_ARGS) memset(&data, 0, sizeof(data)); error = 0; written = 0; + mutex_enter(&pool_head_lock); TAILQ_FOREACH(pp, &pool_head, pr_poollist) { if (written + sizeof(data) > *oldlenp) break; + pp->pr_refcnt++; strlcpy(data.pr_wchan, pp->pr_wchan, sizeof(data.pr_wchan)); data.pr_pagesize = pp->pr_alloc->pa_pagesz; data.pr_flags = pp->pr_roflags | pp->pr_flags; @@ -3469,9 +3471,8 @@ pool_sysctl(SYSCTLFN_ARGS) data.pr_cache_nempty = 0; data.pr_cache_ncontended = 0; data.pr_cache_npartial = 0; - if (pp->pr_cache) { + if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) { uint32_t nfull = 0; - pc = pp->pr_cache; data.pr_cache_meta_size = pc->pc_pcgsize; for (i = 0; i < pc->pc_ncpu; ++i) { cc = pc->pc_cpus[i]; @@ -3492,12 +3493,19 @@ pool_sysctl(SYSCTLFN_ARGS) data.pr_cache_nhit_global = data.pr_cache_nmiss_pcpu - data.pr_cache_nmiss_global; + if (pp->pr_refcnt == UINT_MAX) /* XXX possible? */ + continue; + mutex_exit(&pool_head_lock); error = sysctl_copyout(l, &data, oldp, sizeof(data)); + mutex_enter(&pool_head_lock); + if (--pp->pr_refcnt == 0) + cv_broadcast(&pool_busy); if (error) break; written += sizeof(data); oldp = (char *)oldp + sizeof(data); } + mutex_exit(&pool_head_lock); *oldlenp = written; return error;