The branch main has been updated by andrew:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3247bc7cd65275ac30f717b9dcd8a295e92e1e1e

commit 3247bc7cd65275ac30f717b9dcd8a295e92e1e1e
Author:     Andrew Turner <and...@freebsd.org>
AuthorDate: 2022-08-19 10:50:06 +0000
Commit:     Andrew Turner <and...@freebsd.org>
CommitDate: 2022-09-27 15:05:52 +0000

    arm64 pmap: per-domain pv chunk list
    
    As with amd64 use a per-domain pv chunk lock to reduce contention as
    chunks get created and removed all the time.
    
    Sponsored by:   The FreeBSD Foundation
    Differential Revision: https://reviews.freebsd.org/D36307
---
 sys/arm64/arm64/pmap.c | 180 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 123 insertions(+), 57 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 0c2f623aa9a3..dcc0c637cc1e 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -150,6 +150,12 @@ __FBSDID("$FreeBSD$");
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
+#ifdef NUMA
+#define        PMAP_MEMDOM     MAXMEMDOM
+#else
+#define        PMAP_MEMDOM     1
+#endif
+
 #define        PMAP_ASSERT_STAGE1(pmap)        MPASS((pmap)->pm_stage == 
PM_STAGE1)
 #define        PMAP_ASSERT_STAGE2(pmap)        MPASS((pmap)->pm_stage == 
PM_STAGE2)
 
@@ -276,8 +282,28 @@ vm_offset_t kernel_vm_end = 0;
 /*
  * Data for the pv entry allocation mechanism.
  */
-static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
-static struct mtx pv_chunks_mutex;
+#ifdef NUMA
+static __inline int
+pc_to_domain(struct pv_chunk *pc)
+{
+       return (vm_phys_domain(DMAP_TO_PHYS((vm_offset_t)pc)));
+}
+#else
+static __inline int
+pc_to_domain(struct pv_chunk *pc __unused)
+{
+       return (0);
+}
+#endif
+
+struct pv_chunks_list {
+       struct mtx pvc_lock;
+       TAILQ_HEAD(pch, pv_chunk) pvc_list;
+       int active_reclaims;
+} __aligned(CACHE_LINE_SIZE);
+
+struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
+
 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
 static struct md_page *pv_table;
 static struct md_page pv_dummy;
@@ -1324,9 +1350,13 @@ pmap_init(void)
        }
 
        /*
-        * Initialize the pv chunk list mutex.
+        * Initialize pv chunk lists.
         */
-       mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
+       for (i = 0; i < PMAP_MEMDOM; i++) {
+               mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL,
+                   MTX_DEF);
+               TAILQ_INIT(&pv_chunks[i].pvc_list);
+       }
 
        /*
         * Initialize the pool of pv list locks.
@@ -2550,8 +2580,9 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, 
CTLFLAG_RD, &pv_entry_spare, 0,
  * exacerbating the shortage of free pv entries.
  */
 static vm_page_t
-reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
+reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain)
 {
+       struct pv_chunks_list *pvc;
        struct pv_chunk *pc, *pc_marker, *pc_marker_end;
        struct pv_chunk_header pc_marker_b, pc_marker_end_b;
        struct md_page *pvh;
@@ -2564,7 +2595,6 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock 
**lockp)
        struct spglist free;
        uint64_t inuse;
        int bit, field, freed, lvl;
-       static int active_reclaims = 0;
 
        PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
        KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
@@ -2577,10 +2607,11 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock 
**lockp)
        pc_marker = (struct pv_chunk *)&pc_marker_b;
        pc_marker_end = (struct pv_chunk *)&pc_marker_end_b;
 
-       mtx_lock(&pv_chunks_mutex);
-       active_reclaims++;
-       TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru);
-       TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru);
+       pvc = &pv_chunks[domain];
+       mtx_lock(&pvc->pvc_lock);
+       pvc->active_reclaims++;
+       TAILQ_INSERT_HEAD(&pvc->pvc_list, pc_marker, pc_lru);
+       TAILQ_INSERT_TAIL(&pvc->pvc_list, pc_marker_end, pc_lru);
        while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
            SLIST_EMPTY(&free)) {
                next_pmap = pc->pc_pmap;
@@ -2593,11 +2624,11 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock 
**lockp)
                         */
                        goto next_chunk;
                }
-               mtx_unlock(&pv_chunks_mutex);
+               mtx_unlock(&pvc->pvc_lock);
 
                /*
                 * A pv_chunk can only be removed from the pc_lru list
-                * when both pv_chunks_mutex is owned and the
+                * when both pvc->pvc_lock is owned and the
                 * corresponding pmap is locked.
                 */
                if (pmap != next_pmap) {
@@ -2608,15 +2639,15 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock 
**lockp)
                        if (pmap > locked_pmap) {
                                RELEASE_PV_LIST_LOCK(lockp);
                                PMAP_LOCK(pmap);
-                               mtx_lock(&pv_chunks_mutex);
+                               mtx_lock(&pvc->pvc_lock);
                                continue;
                        } else if (pmap != locked_pmap) {
                                if (PMAP_TRYLOCK(pmap)) {
-                                       mtx_lock(&pv_chunks_mutex);
+                                       mtx_lock(&pvc->pvc_lock);
                                        continue;
                                } else {
                                        pmap = NULL; /* pmap is not locked */
-                                       mtx_lock(&pv_chunks_mutex);
+                                       mtx_lock(&pvc->pvc_lock);
                                        pc = TAILQ_NEXT(pc_marker, pc_lru);
                                        if (pc == NULL ||
                                            pc->pc_pmap != next_pmap)
@@ -2668,7 +2699,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock 
**lockp)
                        }
                }
                if (freed == 0) {
-                       mtx_lock(&pv_chunks_mutex);
+                       mtx_lock(&pvc->pvc_lock);
                        goto next_chunk;
                }
                /* Every freed mapping is for a 4 KB page. */
@@ -2684,20 +2715,20 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock 
**lockp)
                        /* Entire chunk is free; return it. */
                        m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
                        dump_drop_page(m_pc->phys_addr);
-                       mtx_lock(&pv_chunks_mutex);
-                       TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+                       mtx_lock(&pvc->pvc_lock);
+                       TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
                        break;
                }
                TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
-               mtx_lock(&pv_chunks_mutex);
+               mtx_lock(&pvc->pvc_lock);
                /* One freed pv entry in locked_pmap is sufficient. */
                if (pmap == locked_pmap)
                        break;
 
 next_chunk:
-               TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
-               TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru);
-               if (active_reclaims == 1 && pmap != NULL) {
+               TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
+               TAILQ_INSERT_AFTER(&pvc->pvc_list, pc, pc_marker, pc_lru);
+               if (pvc->active_reclaims == 1 && pmap != NULL) {
                        /*
                         * Rotate the pv chunks list so that we do not
                         * scan the same pv chunks that could not be
@@ -2705,17 +2736,17 @@ next_chunk:
                         * and/or superpage mapping) on every
                         * invocation of reclaim_pv_chunk().
                         */
-                       while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) {
+                       while ((pc = TAILQ_FIRST(&pvc->pvc_list)) != pc_marker){
                                MPASS(pc->pc_pmap != NULL);
-                               TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
-                               TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
+                               TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
+                               TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
                        }
                }
        }
-       TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
-       TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru);
-       active_reclaims--;
-       mtx_unlock(&pv_chunks_mutex);
+       TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
+       TAILQ_REMOVE(&pvc->pvc_list, pc_marker_end, pc_lru);
+       pvc->active_reclaims--;
+       mtx_unlock(&pvc->pvc_lock);
        if (pmap != NULL && pmap != locked_pmap)
                PMAP_UNLOCK(pmap);
        if (m_pc == NULL && !SLIST_EMPTY(&free)) {
@@ -2728,6 +2759,23 @@ next_chunk:
        return (m_pc);
 }
 
+static vm_page_t
+reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
+{
+       vm_page_t m;
+       int i, domain;
+
+       domain = PCPU_GET(domain);
+       for (i = 0; i < vm_ndomains; i++) {
+               m = reclaim_pv_chunk_domain(locked_pmap, lockp, domain);
+               if (m != NULL)
+                       break;
+               domain = (domain + 1) % vm_ndomains;
+       }
+
+       return (m);
+}
+
 /*
  * free the pv_entry back to the free list
  */
@@ -2776,28 +2824,37 @@ free_pv_chunk_dequeued(struct pv_chunk *pc)
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
-       mtx_lock(&pv_chunks_mutex);
-       TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
-       mtx_unlock(&pv_chunks_mutex);
+       struct pv_chunks_list *pvc;
+
+       pvc = &pv_chunks[pc_to_domain(pc)];
+       mtx_lock(&pvc->pvc_lock);
+       TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
+       mtx_unlock(&pvc->pvc_lock);
        free_pv_chunk_dequeued(pc);
 }
 
 static void
 free_pv_chunk_batch(struct pv_chunklist *batch)
 {
+       struct pv_chunks_list *pvc;
        struct pv_chunk *pc, *npc;
+       int i;
 
-       if (TAILQ_EMPTY(batch))
-               return;
-
-       mtx_lock(&pv_chunks_mutex);
-       TAILQ_FOREACH(pc, batch, pc_list) {
-               TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+       for (i = 0; i < vm_ndomains; i++) {
+               if (TAILQ_EMPTY(&batch[i]))
+                       continue;
+               pvc = &pv_chunks[i];
+               mtx_lock(&pvc->pvc_lock);
+               TAILQ_FOREACH(pc, &batch[i], pc_list) {
+                       TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
+               }
+               mtx_unlock(&pvc->pvc_lock);
        }
-       mtx_unlock(&pv_chunks_mutex);
 
-       TAILQ_FOREACH_SAFE(pc, batch, pc_list, npc) {
-               free_pv_chunk_dequeued(pc);
+       for (i = 0; i < vm_ndomains; i++) {
+               TAILQ_FOREACH_SAFE(pc, &batch[i], pc_list, npc) {
+                       free_pv_chunk_dequeued(pc);
+               }
        }
 }
 
@@ -2812,6 +2869,7 @@ free_pv_chunk_batch(struct pv_chunklist *batch)
 static pv_entry_t
 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
 {
+       struct pv_chunks_list *pvc;
        int bit, field;
        pv_entry_t pv;
        struct pv_chunk *pc;
@@ -2860,9 +2918,10 @@ retry:
        pc->pc_pmap = pmap;
        memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
        pc->pc_map[0] &= ~1ul;          /* preallocated bit 0 */
-       mtx_lock(&pv_chunks_mutex);
-       TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
-       mtx_unlock(&pv_chunks_mutex);
+       pvc = &pv_chunks[vm_page_domain(m)];
+       mtx_lock(&pvc->pvc_lock);
+       TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
+       mtx_unlock(&pvc->pvc_lock);
        pv = &pc->pc_pventry[0];
        TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
        PV_STAT(atomic_add_long(&pv_entry_count, 1));
@@ -2879,10 +2938,11 @@ retry:
 static void
 reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
 {
-       struct pch new_tail;
+       struct pv_chunks_list *pvc;
+       struct pch new_tail[PMAP_MEMDOM];
        struct pv_chunk *pc;
        vm_page_t m;
-       int avail, free;
+       int avail, free, i;
        bool reclaimed;
 
        PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -2894,7 +2954,8 @@ reserve_pv_entries(pmap_t pmap, int needed, struct rwlock 
**lockp)
         * reclaim_pv_chunk() could recycle one of these chunks.  In
         * contrast, these chunks must be added to the pmap upon allocation.
         */
-       TAILQ_INIT(&new_tail);
+       for (i = 0; i < PMAP_MEMDOM; i++)
+               TAILQ_INIT(&new_tail[i]);
 retry:
        avail = 0;
        TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
@@ -2921,7 +2982,7 @@ retry:
                pc->pc_pmap = pmap;
                memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
                TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
-               TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+               TAILQ_INSERT_TAIL(&new_tail[vm_page_domain(m)], pc, pc_lru);
                PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
 
                /*
@@ -2932,10 +2993,13 @@ retry:
                if (reclaimed)
                        goto retry;
        }
-       if (!TAILQ_EMPTY(&new_tail)) {
-               mtx_lock(&pv_chunks_mutex);
-               TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
-               mtx_unlock(&pv_chunks_mutex);
+       for (i = 0; i < vm_ndomains; i++) {
+               if (TAILQ_EMPTY(&new_tail[i]))
+                       continue;
+               pvc = &pv_chunks[i];
+               mtx_lock(&pvc->pvc_lock);
+               TAILQ_CONCAT(&pvc->pvc_list, &new_tail[i], pc_lru);
+               mtx_unlock(&pvc->pvc_lock);
        }
 }
 
@@ -5276,7 +5340,7 @@ pmap_remove_pages(pmap_t pmap)
        pd_entry_t *pde;
        pt_entry_t *pte, tpte;
        struct spglist free;
-       struct pv_chunklist free_chunks;
+       struct pv_chunklist free_chunks[PMAP_MEMDOM];
        vm_page_t m, ml3, mt;
        pv_entry_t pv;
        struct md_page *pvh;
@@ -5284,12 +5348,13 @@ pmap_remove_pages(pmap_t pmap)
        struct rwlock *lock;
        int64_t bit;
        uint64_t inuse, bitmask;
-       int allfree, field, freed, idx, lvl;
+       int allfree, field, freed, i, idx, lvl;
        vm_paddr_t pa;
 
        lock = NULL;
 
-       TAILQ_INIT(&free_chunks);
+       for (i = 0; i < PMAP_MEMDOM; i++)
+               TAILQ_INIT(&free_chunks[i]);
        SLIST_INIT(&free);
        PMAP_LOCK(pmap);
        TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
@@ -5430,13 +5495,14 @@ pmap_remove_pages(pmap_t pmap)
                PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
                if (allfree) {
                        TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
-                       TAILQ_INSERT_TAIL(&free_chunks, pc, pc_list);
+                       TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc,
+                           pc_list);
                }
        }
        if (lock != NULL)
                rw_wunlock(lock);
        pmap_invalidate_all(pmap);
-       free_pv_chunk_batch(&free_chunks);
+       free_pv_chunk_batch(free_chunks);
        PMAP_UNLOCK(pmap);
        vm_page_free_pages_toq(&free, true);
 }

Reply via email to