Hi, I now have a semi-working pool_cache based memory allocator for network drivers (tested using the iwn driver). I am uncertain, however, about how to get it fully functioning. The issue is memory allocation in interrupt context.
My first attempt was to use the palloc argument to pool_cache_init (see the mbuf_pool_poolpage_alloc method, below). This works for a while but eventually panics in kmem_alloc because it gets called in interrupt context (without the error return for interrupt context that is in the code now). The kmem_alloc manpage suggests using pool_cache instead, so I implemented ctor/dtor methods to use in place of mbuf_pool_poolpage_alloc. This version panics, however, almost immediately in bus_dmamem_map because the pool "pvpl " (created in arch/x86/x86/pmap.c) is initialized with IPL_NONE. This impacts, of course, the mbuf_pool_poolpage_alloc version too, but happens more quickly here because the ctor method gets called via pool_cache_get (i.e., there is no preallocation of the dma buffers). Adding the current error returns in mbuf_pool_poolpage_alloc and mbuf_pool_ctor avoids the panics but results in a nonworking network driver. I'm unsure how to proceed on this. Some thoughts: 1) Initialize the "pvpl" pool with IPL_VM 2) Preallocate a large number of buffers in mbuf_pool_cache_init. 3) Rewrite the network drivers so that they do not request memory in interrupt context. 4) Other. Comments? Thanks, Sverre Here's the current code: /* Mbuf_pool_cache_init sets up a DMA safe pool_cache for the specified bus and size. The pool_cace will use bus_dmamem_alloc as its memory allocator. Mbuf_pool_cache_init may be called multiple times for a given bus and size. Subsequent calls returns the original pool_cache and increments a reference count. Mbuf_pool_cache_init should be called from bus or device attach methods as needed. Mbuf_pool_cache_destroy should similarly be called from a bus or device detach method. The reference counter is used to destroy the pool_cache when appropriate. */ #include <sys/types.h> #include <sys/kmem.h> #include <sys/pool.h> #include <sys/bus.h> #include <sys/cpu.h> #define MBUF_POOL_POOLPAGE_ALLOC /* The mbuf_pool_item list */ static TAILQ_HEAD(, mbuf_pool_item) mbuf_pool_head = TAILQ_HEAD_INITIALIZER(mbuf_pool_head); struct mbuf_pool_item { TAILQ_ENTRY(mbuf_pool_item) mbuf_pool_list; bus_dma_tag_t mpi_bus_tag; unsigned int mpi_size; char *mpi_name; pool_cache_t mpi_pc; unsigned int mpi_refcnt; }; struct mbuf_pool_extmem { bus_size_t em_size; bus_dma_segment_t em_seg; void *em_vaddr; }; typedef struct mbuf_pool_extmem mbuf_pool_extmem_t; static bool mbuf_pool_initialized = 0; static kmutex_t mbuf_pool_lock; #ifdef MBUF_POOL_POOLPAGE_ALLOC static struct pool_allocator mbuf_pool_allocator; #endif #define MBUF_POOL_TO_MPI(pool) ((struct mbuf_pool_item *)(pool->pr_qcache)) struct mbuf_pool_item * mbuf_pool_get_pool_item(pool_cache_t pc, bus_dma_tag_t tag, unsigned int size); char * mbuf_pool_get_pool_name(bus_dma_tag_t tag, unsigned int size); pool_cache_t mbuf_pool_cache_init(bus_dma_tag_t tag, unsigned int size); void mbuf_pool_cache_destroy(pool_cache_t pc); void * mbuf_pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap); void mbuf_pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa); #ifdef MBUF_POOL_POOLPAGE_ALLOC /* * Custom pool alloc and free methods. */ static void * mbuf_pool_poolpage_alloc(struct pool *pool, int prflags) { void *poolpage; unsigned int poolpage_size = pool->pr_alloc->pa_pagesz; struct mbuf_pool_item *mpi; mbuf_pool_extmem_t *em; unsigned int size, em_count, i, nsegs; int error; /* kmem_alloc cannot be used in interrupt context */ if (cpu_intr_p() || cpu_softintr_p()) return NULL; /* Verify assumptions that are made in the code, below */ if (poolpage_size < sizeof(mbuf_pool_extmem_t) || poolpage_size % sizeof(mbuf_pool_extmem_t) != 0) panic("mbuf_pool_poolpage_alloc: invalid struct mbuf_pool_extmem size"); /* XXX Should this be KM_NOSLEEP? */ /* cannot use in interrupt context. says use pool_cache instead. how? */ poolpage = kmem_alloc(poolpage_size, KM_SLEEP); if (poolpage == NULL) goto fail1; mpi = MBUF_POOL_TO_MPI(pool); em_count = poolpage_size / sizeof(mbuf_pool_extmem_t); size = mpi->mpi_size; em = poolpage; for (i = 0; i < em_count; i++) { em->em_size = size; /* XXX verify alignment arg (size) */ error = bus_dmamem_alloc(mpi->mpi_bus_tag, size, size, 0, &em->em_seg, 1, &nsegs, BUS_DMA_NOWAIT); if (error != 0 || nsegs != 1) goto fail2; error = bus_dmamem_map(mpi->mpi_bus_tag, &em->em_seg, 1, size, &em->em_vaddr, BUS_DMA_WAITOK); if (error != 0 || em->em_vaddr == NULL) goto fail3; em++; } return poolpage; fail3: bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1); fail2: while (--em >= (mbuf_pool_extmem_t *) poolpage) bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1); kmem_free(poolpage, poolpage_size); fail1: printf("iwn: mbuf_pool_poolpage_alloc failure\n"); return NULL; } static void mbuf_pool_poolpage_free(struct pool *pool, void *poolpage) { unsigned int poolpage_size = pool->pr_alloc->pa_pagesz; struct mbuf_pool_item *mpi; mbuf_pool_extmem_t *em; unsigned int em_count, i; mpi = MBUF_POOL_TO_MPI(pool); em_count = poolpage_size / sizeof(mbuf_pool_extmem_t); em = poolpage; for (i = 0; i < em_count; i++) { bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1); em++; } kmem_free(poolpage, poolpage_size); } #else static int mbuf_pool_ctor(void *arg, void *object, int flags) { struct mbuf_pool_item *mpi = arg; mbuf_pool_extmem_t *em = object; unsigned int size, nsegs; int error; /* bus_dmamem_map fails in interrupt context */ if (cpu_intr_p() || cpu_softintr_p()) return EBUSY; size = mpi->mpi_size; em->em_size = size; /* XXX verify alignment arg (size) */ error = bus_dmamem_alloc(mpi->mpi_bus_tag, size, size, 0, &em->em_seg, 1, &nsegs, BUS_DMA_NOWAIT); if (error != 0 || nsegs != 1) goto fail1; /* XXX next call fails with ipl error in pool_get (pool pvpl) */ error = bus_dmamem_map(mpi->mpi_bus_tag, &em->em_seg, 1, size, &em->em_vaddr, BUS_DMA_WAITOK); if (error != 0 || em->em_vaddr == NULL) goto fail2; return 0; fail2: bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1); fail1: printf("iwn: mbuf_pool_ctor failure\n"); /* XXX need to return an error here */ return (error != 0) ? error : ENOMEM; } static void mbuf_pool_dtor(void *arg, void *object) { struct mbuf_pool_item *mpi = arg; mbuf_pool_extmem_t *em = object; bus_dmamem_free(mpi->mpi_bus_tag, &em->em_seg, 1); } #endif /* * Return the mbuf_pool_item struct that matches pc or tag and size. * Must be called with mutex held. */ struct mbuf_pool_item * mbuf_pool_get_pool_item(pool_cache_t pc, bus_dma_tag_t tag, unsigned int size) { struct mbuf_pool_item *mpi = NULL, *mpi1; TAILQ_FOREACH(mpi1, &mbuf_pool_head, mbuf_pool_list) { if (mpi1->mpi_pc == pc || (mpi1->mpi_size == size && mpi1->mpi_bus_tag == tag)) { mpi = mpi1; break; } } return mpi; } char * mbuf_pool_get_pool_name(bus_dma_tag_t tag, unsigned int size) { char *name; name = kmem_alloc(16, KM_SLEEP); snprintf(name, 22, "iwn: test_cache_%d", size); return name; } pool_cache_t mbuf_pool_cache_init(bus_dma_tag_t tag, unsigned int size) { pool_cache_t pc = NULL; char *name; struct mbuf_pool_item *mpi; int pc_size = sizeof(struct mbuf_pool_extmem); int nmbclusters; if (! mbuf_pool_initialized) { /* XXX Racy code. Need a proper constructor? */ /* XXX IPL_NONE implies: cannot use in an interrupt handler. Verify! */ mutex_init(&mbuf_pool_lock, MUTEX_DEFAULT, IPL_NONE); mbuf_pool_initialized = true; } mutex_enter(&mbuf_pool_lock); /* Protect by mutex in order to avoid race with mbuf_pool_cache_destroy */ /* Existing mbuf_pool_cache? */ mpi = mbuf_pool_get_pool_item(NULL, tag, size); if (mpi == NULL) { /* Create a new pool cache */ mpi = kmem_alloc(sizeof(struct mbuf_pool_item), KM_SLEEP); if (mpi == NULL) goto fail; mpi->mpi_bus_tag = tag; mpi->mpi_size = size; /* Pool caches must be named - make up a name. */ name = mbuf_pool_get_pool_name(tag, size); mpi->mpi_name = name; /* Should we use IPL_NET instead of IPL_VM? */ #ifdef MBUF_POOL_POOLPAGE_ALLOC mbuf_pool_allocator.pa_alloc = &mbuf_pool_poolpage_alloc; mbuf_pool_allocator.pa_free = &mbuf_pool_poolpage_free; pc = pool_cache_init(pc_size, 0, 0, PR_NOALIGN|PR_NOTOUCH, name, &mbuf_pool_allocator, IPL_VM, NULL, NULL, NULL); #else pc = pool_cache_init(pc_size, 0, 0, PR_NOALIGN|PR_NOTOUCH, name, NULL, IPL_VM, &mbuf_pool_ctor, &mbuf_pool_dtor, mpi); #endif printf("mbuf_pool_cache_init (%px): %d / %s (%d)\n", pc, size, name, pc_size); if (pc == NULL) { kmem_free(mpi, sizeof(struct mbuf_pool_item)); goto fail; } /* * Set the hard limit on the pool to the number of * mbuf clusters the kernel is to support. Log the limit * reached message max once a minute. * XXX Sizing is wrong. Fix. */ nmbclusters = physmem * PAGE_SIZE / (4 * size); #ifdef NMBCLUSTERS_MAX nmbclusters = MIN(nmbclusters, NMBCLUSTERS_MAX); #endif #ifdef NMBCLUSTERS nmbclusters = MIN(nmbclusters, NMBCLUSTERS); #endif pool_cache_sethardlimit(pc, nmbclusters, "WARNING: mbuf_pool_cache limit reached", 60); /* mpi is needed in mbuf_pool_poolpage_alloc/free */ /* XXX is this OK? */ pc->pc_pool.pr_qcache = mpi; mpi->mpi_pc = pc; mpi->mpi_refcnt = 1; /* Add the mbuf_pool_item to the mbuf pool item list. */ TAILQ_INSERT_TAIL(&mbuf_pool_head, mpi, mbuf_pool_list); } else { /* Increment refcount and return the existing pool cache */ mpi->mpi_refcnt++; pc = mpi->mpi_pc; } fail: mutex_exit(&mbuf_pool_lock); if (pc != NULL) { /* pool_cache_set_drain_hook(pc, m_reclaim, NULL); */ /* pool_cache_sethardlimit(pc, nmbclusters, mclpool_4k_warnmsg, 60); */ } return pc; } void mbuf_pool_cache_destroy(pool_cache_t pc) { struct mbuf_pool_item *mpi; mutex_enter(&mbuf_pool_lock); mpi = mbuf_pool_get_pool_item(pc, NULL, 0); KASSERT(mpi != NULL); if (mpi->mpi_refcnt <= 1) { /* Pool cache is no longer needed */ pool_cache_destroy(mpi->mpi_pc); TAILQ_REMOVE(&mbuf_pool_head, mpi, mbuf_pool_list); kmem_free(mpi, sizeof(struct mbuf_pool_item)); } else { mpi->mpi_refcnt--; } if (TAILQ_EMPTY(&mbuf_pool_head)) { /* XXX Add code here that un-initializes this object when appropriate. */ /* XXX OK to distroy a held mutex? */ /* XXX Racy code. */ mutex_destroy(&mbuf_pool_lock); mbuf_pool_initialized = false; } if (mbuf_pool_initialized) mutex_exit(&mbuf_pool_lock); } /* XXX These methods may not be needed. Why not call the pool_cache methods instead? */ /* Perhaps implement OpenBSD's livelock solution? */ void * mbuf_pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap) { return pool_cache_get_paddr(pc, flags, pap); } void mbuf_pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa) { return pool_cache_put_paddr(pc, object, pa); } /* Implement these as needed: mbuf_pool_cache_get mbuf_pool_cache_put mbuf_pool_cache_destruct_object mbuf_pool_cache_invalidate mbuf_pool_cache_sethiwat mbuf_pool_cache_setlowat mbuf_pool_cache_sethardlimit */