The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=1fee99800a79887b9037749a34d09f2acab082c0
commit 1fee99800a79887b9037749a34d09f2acab082c0 Author: Alan Cox <a...@freebsd.org> AuthorDate: 2025-05-27 08:27:16 +0000 Commit: Alan Cox <a...@freebsd.org> CommitDate: 2025-06-08 18:35:56 +0000 vm_page: Retire its listq field Over the life cycle of a vm_page, its listq field has been used for two distinct purposes. First, linking together all of the pages allocated to a vm_object. Recently, c8d56817b80f ("vm_object: drop memq field") completed the elimination of this use case, using pctrie iterators in place of iteration over the listq. Second, linking together power-of- two-sized chunks of free pages within vm_phys. This change eliminates that use case. In essence, this change reverts vm_phys back to using the plinks.q field, like it did before 5cd29d0f3cda ("Improve VM page queue scalability."), but with a twist to maintain scalability. Just before vm_phys uses the plinks.q field, it ensures that any lazy dequeue from a paging queue, e.g., PQ_ACTIVE, has completed. Typically, the dequeue has completed, so vm_page_dequeue() is infrequently called by vm_freelist_add(). The reason being that vm_phys only needs to use the plinks.q field within the first page of any power-of-two-sized chunk, so the rest of the pages can still have pending dequeues until the chunk is split. This change saves a non-trivial amount of memory, since we have an instance of struct vm_page for every dynamically allocatable physical page. Bump __FreeBSD_version, since third-party modules that use the inline accessors in vm_page.h may need to be recompiled. Reviewed by: dougm, kib, markj Differential Revision: https://reviews.freebsd.org/D50515 --- sys/sys/param.h | 2 +- sys/vm/vm_page.c | 22 +++++++++++++++------- sys/vm/vm_page.h | 1 - sys/vm/vm_phys.c | 20 +++++++++++++++----- 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/sys/sys/param.h b/sys/sys/param.h index da2089918323..e167c96cf9f8 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -73,7 +73,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500045 +#define __FreeBSD_version 1500046 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 79eaf8563208..128a1ef7ca54 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -341,7 +341,7 @@ vm_page_blacklist_add(vm_paddr_t pa, bool verbose) vm_domain_free_unlock(vmd); if (found) { vm_domain_freecnt_inc(vmd, -1); - TAILQ_INSERT_TAIL(&blacklist_head, m, listq); + TAILQ_INSERT_TAIL(&blacklist_head, m, plinks.q); if (verbose) printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa); } @@ -411,7 +411,7 @@ sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS) if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); - TAILQ_FOREACH(m, &blacklist_head, listq) { + TAILQ_FOREACH(m, &blacklist_head, plinks.q) { sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",", (uintmax_t)m->phys_addr); first = 0; @@ -2470,6 +2470,13 @@ again: } found: + /* + * If the page comes from the free page cache, then it might still + * have a pending deferred dequeue. Specifically, when the page is + * imported from a different pool by vm_phys_alloc_npages(), the + * second, third, etc. pages in a non-zero order set could have + * pending deferred dequeues. + */ vm_page_dequeue(m); vm_page_alloc_check(m); @@ -2536,17 +2543,18 @@ vm_page_alloc_nofree_domain(int domain, int req) return (NULL); } m->ref_count = count - 1; - TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq); + TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q); VM_CNT_ADD(v_nofree_count, count); } m = TAILQ_FIRST(&vmd->vmd_nofreeq); - TAILQ_REMOVE(&vmd->vmd_nofreeq, m, listq); + TAILQ_REMOVE(&vmd->vmd_nofreeq, m, plinks.q); if (m->ref_count > 0) { vm_page_t m_next; m_next = &m[1]; + vm_page_dequeue(m_next); m_next->ref_count = m->ref_count - 1; - TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, listq); + TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, plinks.q); m->ref_count = 0; } vm_domain_free_unlock(vmd); @@ -2566,7 +2574,7 @@ vm_page_free_nofree(struct vm_domain *vmd, vm_page_t m) { vm_domain_free_lock(vmd); MPASS(m->ref_count == 0); - TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq); + TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, plinks.q); vm_domain_free_unlock(vmd); VM_CNT_ADD(v_nofree_count, 1); } @@ -3971,7 +3979,7 @@ vm_page_dequeue(vm_page_t m) old = vm_page_astate_load(m); do { - if (old.queue == PQ_NONE) { + if (__predict_true(old.queue == PQ_NONE)) { KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0, ("%s: page %p has unexpected queue state", __func__, m)); diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 4bcd8d9f8236..4f82a69ebe25 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -229,7 +229,6 @@ struct vm_page { void *zone; } uma; } plinks; - TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ vm_object_t object; /* which object am I in (O) */ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page (C) */ diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index 95bf6b61fe19..ba16ae551093 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -393,13 +393,23 @@ static void vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int pool, int tail) { + /* + * The paging queues and the free page lists utilize the same field, + * plinks.q, within the vm_page structure. When a physical page is + * freed, it is lazily removed from the paging queues to reduce the + * cost of removal through batching. Here, we must ensure that any + * deferred dequeue on the physical page has completed before using + * its plinks.q field. + */ + if (__predict_false(vm_page_astate_load(m).queue != PQ_NONE)) + vm_page_dequeue(m); m->order = order; m->pool = pool; if (tail) - TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); + TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); else - TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); + TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); fl[order].lcnt++; } @@ -407,7 +417,7 @@ static void vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) { - TAILQ_REMOVE(&fl[order].pl, m, listq); + TAILQ_REMOVE(&fl[order].pl, m, plinks.q); fl[order].lcnt--; m->order = VM_NFREEORDER; } @@ -1582,7 +1592,7 @@ vm_phys_find_freelist_contig(struct vm_freelist *fl, u_long npages, * check if there are enough free blocks starting at a properly aligned * block. Thus, no block is checked for free-ness more than twice. */ - TAILQ_FOREACH(m, &fl[max_order].pl, listq) { + TAILQ_FOREACH(m, &fl[max_order].pl, plinks.q) { /* * Skip m unless it is first in a sequence of free max page * blocks >= low in its segment. @@ -1655,7 +1665,7 @@ vm_phys_find_queues_contig( for (oind = order; oind < VM_NFREEORDER; oind++) { for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) { fl = (*queues)[pind]; - TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { + TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { /* * Determine if the address range starting at pa * is within the given range, satisfies the