The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1fee99800a79887b9037749a34d09f2acab082c0

commit 1fee99800a79887b9037749a34d09f2acab082c0
Author:     Alan Cox <a...@freebsd.org>
AuthorDate: 2025-05-27 08:27:16 +0000
Commit:     Alan Cox <a...@freebsd.org>
CommitDate: 2025-06-08 18:35:56 +0000

    vm_page: Retire its listq field
    
    Over the life cycle of a vm_page, its listq field has been used for two
    distinct purposes.  First, linking together all of the pages allocated
    to a vm_object.  Recently, c8d56817b80f ("vm_object: drop memq field")
    completed the elimination of this use case, using pctrie iterators in
    place of iteration over the listq.  Second, linking together power-of-
    two-sized chunks of free pages within vm_phys.  This change eliminates
    that use case.  In essence, this change reverts vm_phys back to using
    the plinks.q field, like it did before 5cd29d0f3cda ("Improve VM page
    queue scalability."), but with a twist to maintain scalability.  Just
    before vm_phys uses the plinks.q field, it ensures that any lazy dequeue
    from a paging queue, e.g., PQ_ACTIVE, has completed.  Typically, the
    dequeue has completed, so vm_page_dequeue() is infrequently called by
    vm_freelist_add().  The reason being that vm_phys only needs to use the
    plinks.q field within the first page of any power-of-two-sized chunk,
    so the rest of the pages can still have pending dequeues until the
    chunk is split.
    
    This change saves a non-trivial amount of memory, since we have an
    instance of struct vm_page for every dynamically allocatable physical
    page.
    
    Bump __FreeBSD_version, since third-party modules that use the inline
    accessors in vm_page.h may need to be recompiled.
    
    Reviewed by:    dougm, kib, markj
    Differential Revision:  https://reviews.freebsd.org/D50515
---
 sys/sys/param.h  |  2 +-
 sys/vm/vm_page.c | 22 +++++++++++++++-------
 sys/vm/vm_page.h |  1 -
 sys/vm/vm_phys.c | 20 +++++++++++++++-----
 4 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/sys/sys/param.h b/sys/sys/param.h
index da2089918323..e167c96cf9f8 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -73,7 +73,7 @@
  * cannot include sys/param.h and should only be updated here.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1500045
+#define __FreeBSD_version 1500046
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 79eaf8563208..128a1ef7ca54 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -341,7 +341,7 @@ vm_page_blacklist_add(vm_paddr_t pa, bool verbose)
        vm_domain_free_unlock(vmd);
        if (found) {
                vm_domain_freecnt_inc(vmd, -1);
-               TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
+               TAILQ_INSERT_TAIL(&blacklist_head, m, plinks.q);
                if (verbose)
                        printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa);
        }
@@ -411,7 +411,7 @@ sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
        if (error != 0)
                return (error);
        sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
-       TAILQ_FOREACH(m, &blacklist_head, listq) {
+       TAILQ_FOREACH(m, &blacklist_head, plinks.q) {
                sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",",
                    (uintmax_t)m->phys_addr);
                first = 0;
@@ -2470,6 +2470,13 @@ again:
        }
 
 found:
+       /*
+        * If the page comes from the free page cache, then it might still
+        * have a pending deferred dequeue.  Specifically, when the page is
+        * imported from a different pool by vm_phys_alloc_npages(), the
+        * second, third, etc. pages in a non-zero order set could have
+        * pending deferred dequeues.
+        */
        vm_page_dequeue(m);
        vm_page_alloc_check(m);
 
@@ -2536,17 +2543,18 @@ vm_page_alloc_nofree_domain(int domain, int req)
                        return (NULL);
                }
                m->ref_count = count - 1;
-               TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+               TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q);
                VM_CNT_ADD(v_nofree_count, count);
        }
        m = TAILQ_FIRST(&vmd->vmd_nofreeq);
-       TAILQ_REMOVE(&vmd->vmd_nofreeq, m, listq);
+       TAILQ_REMOVE(&vmd->vmd_nofreeq, m, plinks.q);
        if (m->ref_count > 0) {
                vm_page_t m_next;
 
                m_next = &m[1];
+               vm_page_dequeue(m_next);
                m_next->ref_count = m->ref_count - 1;
-               TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, listq);
+               TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, plinks.q);
                m->ref_count = 0;
        }
        vm_domain_free_unlock(vmd);
@@ -2566,7 +2574,7 @@ vm_page_free_nofree(struct vm_domain *vmd, vm_page_t m)
 {
        vm_domain_free_lock(vmd);
        MPASS(m->ref_count == 0);
-       TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+       TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q);
        vm_domain_free_unlock(vmd);
        VM_CNT_ADD(v_nofree_count, 1);
 }
@@ -3971,7 +3979,7 @@ vm_page_dequeue(vm_page_t m)
 
        old = vm_page_astate_load(m);
        do {
-               if (old.queue == PQ_NONE) {
+               if (__predict_true(old.queue == PQ_NONE)) {
                        KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0,
                            ("%s: page %p has unexpected queue state",
                            __func__, m));
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 4bcd8d9f8236..4f82a69ebe25 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -229,7 +229,6 @@ struct vm_page {
                        void *zone;
                } uma;
        } plinks;
-       TAILQ_ENTRY(vm_page) listq;     /* pages in same object (O) */
        vm_object_t object;             /* which object am I in (O) */
        vm_pindex_t pindex;             /* offset into object (O,P) */
        vm_paddr_t phys_addr;           /* physical address of page (C) */
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index 95bf6b61fe19..ba16ae551093 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -393,13 +393,23 @@ static void
 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int pool,
     int tail)
 {
+       /*
+        * The paging queues and the free page lists utilize the same field,
+        * plinks.q, within the vm_page structure.  When a physical page is
+        * freed, it is lazily removed from the paging queues to reduce the
+        * cost of removal through batching.  Here, we must ensure that any
+        * deferred dequeue on the physical page has completed before using
+        * its plinks.q field.
+        */
+       if (__predict_false(vm_page_astate_load(m).queue != PQ_NONE))
+               vm_page_dequeue(m);
 
        m->order = order;
        m->pool = pool;
        if (tail)
-               TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
+               TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
        else
-               TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
+               TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
        fl[order].lcnt++;
 }
 
@@ -407,7 +417,7 @@ static void
 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
 {
 
-       TAILQ_REMOVE(&fl[order].pl, m, listq);
+       TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
        fl[order].lcnt--;
        m->order = VM_NFREEORDER;
 }
@@ -1582,7 +1592,7 @@ vm_phys_find_freelist_contig(struct vm_freelist *fl, 
u_long npages,
         * check if there are enough free blocks starting at a properly aligned
         * block.  Thus, no block is checked for free-ness more than twice.
         */
-       TAILQ_FOREACH(m, &fl[max_order].pl, listq) {
+       TAILQ_FOREACH(m, &fl[max_order].pl, plinks.q) {
                /*
                 * Skip m unless it is first in a sequence of free max page
                 * blocks >= low in its segment.
@@ -1655,7 +1665,7 @@ vm_phys_find_queues_contig(
        for (oind = order; oind < VM_NFREEORDER; oind++) {
                for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
                        fl = (*queues)[pind];
-                       TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
+                       TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
                                /*
                                 * Determine if the address range starting at pa
                                 * is within the given range, satisfies the

Reply via email to