The branch main has been updated by dougm:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d0b225d16418e224b5d30d13f45515aa70ad47a3

commit d0b225d16418e224b5d30d13f45515aa70ad47a3
Author:     Doug Moore <do...@freebsd.org>
AuthorDate: 2024-10-08 08:31:16 +0000
Commit:     Doug Moore <do...@freebsd.org>
CommitDate: 2024-10-08 08:31:16 +0000

    swap_pager: use iterators in swp_pager_meta_build
    
    Add a method to use an iterator for pctrie insertion; this should
    improve performance when the last search ended near the place where
    the new item will be inserted.
    
    Add an iterator argument to swp_pager_meta_build, so that the lookups
    and insertions it does can be faster in the common case when keys are
    bunched close together, or appear in sequence.
    
    Reviewed by:    alc
    Differential Revision:  https://reviews.freebsd.org/D46848
---
 sys/kern/subr_pctrie.c |  36 +++++++++++++++++
 sys/sys/pctrie.h       |  20 +++++++++
 sys/vm/swap_pager.c    | 107 ++++++++++++++++++++++++++++++++-----------------
 3 files changed, 126 insertions(+), 37 deletions(-)

diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c
index a7b487166054..50216287845f 100644
--- a/sys/kern/subr_pctrie.c
+++ b/sys/kern/subr_pctrie.c
@@ -594,6 +594,42 @@ pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index)
        return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED));
 }
 
+/*
+ * Insert the val in the trie, starting search with iterator.  Return a pointer
+ * to indicate where a new node must be allocated to complete insertion.
+ * Assumes access is externally synchronized by a lock.
+ */
+void *
+pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val)
+{
+       struct pctrie_node *node;
+
+       it->index = *val;
+       node = _pctrie_iter_lookup_node(it, *val, NULL, PCTRIE_LOCKED);
+       if (node == PCTRIE_NULL) {
+               if (it->top == 0)
+                       pctrie_root_store(it->ptree,
+                           pctrie_toleaf(val), PCTRIE_LOCKED);
+               else
+                       pctrie_addnode(it->path[it->top - 1], it->index,
+                           pctrie_toleaf(val), PCTRIE_LOCKED);
+               return (NULL);
+       }
+       if (__predict_false(pctrie_match_value(node, it->index) != NULL))
+               panic("%s: key %jx is already present", __func__,
+                   (uintmax_t)it->index);
+
+       /*
+        * 'node' must be replaced in the tree with a new branch node, with
+        * children 'node' and 'val'. Return the place that points to 'node'
+        * now, and will point to to the new branching node later.
+        */
+       if (it->top == 0)
+               return ((smr_pctnode_t *)&it->ptree->pt_root);
+       node = it->path[it->top - 1];
+       return (&node->pn_child[pctrie_slot(node, it->index)]);
+}
+
 /*
  * Returns the value stored at a fixed offset from the current index value,
  * possibly NULL.
diff --git a/sys/sys/pctrie.h b/sys/sys/pctrie.h
index 4d5450c25079..d71290c8cf23 100644
--- a/sys/sys/pctrie.h
+++ b/sys/sys/pctrie.h
@@ -239,6 +239,24 @@ name##_PCTRIE_RECLAIM_CALLBACK(struct pctrie *ptree,       
                \
                freefn(ptree, freenode);                                \
 }                                                                      \
                                                                        \
+static __inline __unused int                                           \
+name##_PCTRIE_ITER_INSERT(struct pctrie_iter *it, struct type *ptr)    \
+{                                                                      \
+       struct pctrie_node *parent;                                     \
+       void *parentp;                                                  \
+       uint64_t *val = name##_PCTRIE_PTR2VAL(ptr);                     \
+                                                                       \
+       parentp = pctrie_iter_insert_lookup(it, val);                   \
+       if (parentp == NULL)                                            \
+               return (0);                                             \
+       parent = allocfn(it->ptree);                                    \
+       if (__predict_false(parent == NULL))                            \
+               return (ENOMEM);                                        \
+       pctrie_insert_node(parentp, parent, val);                       \
+       it->path[it->top++] = parent;                                   \
+       return (0);                                                     \
+}                                                                      \
+                                                                       \
 static __inline __unused struct type *                                 \
 name##_PCTRIE_ITER_LOOKUP(struct pctrie_iter *it, uint64_t index)      \
 {                                                                      \
@@ -369,6 +387,8 @@ uint64_t    *pctrie_iter_lookup(struct pctrie_iter *it, 
uint64_t index);
 uint64_t       *pctrie_iter_stride(struct pctrie_iter *it, int stride);
 uint64_t       *pctrie_iter_next(struct pctrie_iter *it);
 uint64_t       *pctrie_iter_prev(struct pctrie_iter *it);
+void           *pctrie_iter_insert_lookup(struct pctrie_iter *it,
+                   uint64_t *val);
 uint64_t       *pctrie_lookup_ge(struct pctrie *ptree, uint64_t key);
 uint64_t       *pctrie_subtree_lookup_gt(struct pctrie_node *node,
                    uint64_t key);
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 807215647a93..f4db46a32dee 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -486,8 +486,8 @@ static daddr_t      swp_pager_getswapspace(int *npages);
 /*
  * Metadata functions
  */
-static daddr_t swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t,
-       bool);
+static daddr_t swp_pager_meta_build(struct pctrie_iter *, vm_object_t object,
+       vm_pindex_t, daddr_t, bool);
 static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t,
     vm_size_t *);
 static void swp_pager_meta_transfer(vm_object_t src, vm_object_t dst,
@@ -551,29 +551,39 @@ swblk_lookup_remove(vm_object_t object, struct swblk *sb)
        SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p);
 }
 
-static int
-swblk_lookup_insert(vm_object_t object, struct swblk *sb)
-{
-       return (SWAP_PCTRIE_INSERT(&object->un_pager.swp.swp_blks, sb));
-}
-
 static bool
 swblk_is_empty(vm_object_t object)
 {
        return (pctrie_is_empty(&object->un_pager.swp.swp_blks));
 }
 
-static struct swblk *
-swblk_iter_init(struct pctrie_iter *blks, vm_object_t object,
-    vm_pindex_t pindex)
+static void
+swblk_iter_init_only(struct pctrie_iter *blks, vm_object_t object)
 {
        VM_OBJECT_ASSERT_LOCKED(object);
        MPASS((object->flags & OBJ_SWAP) != 0);
        pctrie_iter_init(blks, &object->un_pager.swp.swp_blks);
+}
+
+
+static struct swblk *
+swblk_iter_init(struct pctrie_iter *blks, vm_object_t object,
+    vm_pindex_t pindex)
+{
+       swblk_iter_init_only(blks, object);
        return (SWAP_PCTRIE_ITER_LOOKUP_GE(blks,
            rounddown(pindex, SWAP_META_PAGES)));
 }
 
+static struct swblk *
+swblk_iter_reinit(struct pctrie_iter *blks, vm_object_t object,
+    vm_pindex_t pindex)
+{
+       swblk_iter_init_only(blks, object);
+       return (SWAP_PCTRIE_ITER_LOOKUP(blks,
+           rounddown(pindex, SWAP_META_PAGES)));
+}
+
 static struct swblk *
 swblk_iter_limit_init(struct pctrie_iter *blks, vm_object_t object,
     vm_pindex_t pindex, vm_pindex_t limit)
@@ -591,6 +601,19 @@ swblk_iter_next(struct pctrie_iter *blks)
        return (SWAP_PCTRIE_ITER_JUMP_GE(blks, SWAP_META_PAGES));
 }
 
+static struct swblk *
+swblk_iter_lookup(struct pctrie_iter *blks, vm_pindex_t pindex)
+{
+       return (SWAP_PCTRIE_ITER_LOOKUP(blks,
+           rounddown(pindex, SWAP_META_PAGES)));
+}
+
+static int
+swblk_iter_insert(struct pctrie_iter *blks, struct swblk *sb)
+{
+       return (SWAP_PCTRIE_ITER_INSERT(blks, sb));
+}
+
 static void
 swblk_iter_remove(struct pctrie_iter *blks)
 {
@@ -1081,6 +1104,7 @@ swap_pager_freespace_pgo(vm_object_t object, vm_pindex_t 
start, vm_size_t size)
 int
 swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_pindex_t size)
 {
+       struct pctrie_iter blks;
        struct page_range range;
        daddr_t addr, blk;
        vm_pindex_t i, j;
@@ -1088,6 +1112,7 @@ swap_pager_reserve(vm_object_t object, vm_pindex_t start, 
vm_pindex_t size)
 
        swp_pager_init_freerange(&range);
        VM_OBJECT_WLOCK(object);
+       swblk_iter_init_only(&blks, object);
        for (i = 0; i < size; i += n) {
                n = MIN(size - i, INT_MAX);
                blk = swp_pager_getswapspace(&n);
@@ -1097,7 +1122,7 @@ swap_pager_reserve(vm_object_t object, vm_pindex_t start, 
vm_pindex_t size)
                        return (-1);
                }
                for (j = 0; j < n; ++j) {
-                       addr = swp_pager_meta_build(object,
+                       addr = swp_pager_meta_build(&blks, object,
                            start + i + j, blk + j, false);
                        if (addr != SWAPBLK_NONE)
                                swp_pager_update_freerange(&range, addr);
@@ -1535,6 +1560,7 @@ static void
 swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count,
     int flags, int *rtvals)
 {
+       struct pctrie_iter blks;
        struct page_range range;
        struct buf *bp;
        daddr_t addr, blk;
@@ -1580,11 +1606,15 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, 
int count,
                        continue;
                }
                VM_OBJECT_WLOCK(object);
+               swblk_iter_init_only(&blks, object);
                for (j = 0; j < n; ++j) {
                        mreq = ma[i + j];
                        vm_page_aflag_clear(mreq, PGA_SWAP_FREE);
-                       addr = swp_pager_meta_build(mreq->object, mreq->pindex,
-                           blk + j, false);
+                       KASSERT(mreq->object == object,
+                           ("%s: object mismatch %p/%p",
+                           __func__, mreq->object, object));
+                       addr = swp_pager_meta_build(&blks, object,
+                           mreq->pindex, blk + j, false);
                        if (addr != SWAPBLK_NONE)
                                swp_pager_update_freerange(&range, addr);
                        MPASS(mreq->dirty == VM_PAGE_BITS_ALL);
@@ -2102,19 +2132,18 @@ swp_pager_free_empty_swblk(vm_object_t object, struct 
swblk *sb)
  *     any.
  */
 static daddr_t
-swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk,
-    bool nowait_noreplace)
+swp_pager_meta_build(struct pctrie_iter *blks, vm_object_t object,
+    vm_pindex_t pindex, daddr_t swapblk, bool nowait_noreplace)
 {
        static volatile int swblk_zone_exhausted, swpctrie_zone_exhausted;
        struct swblk *sb, *sb1;
-       vm_pindex_t modpi, rdpi;
+       vm_pindex_t modpi;
        daddr_t prev_swapblk;
        int error, i;
 
        VM_OBJECT_ASSERT_WLOCKED(object);
 
-       rdpi = rounddown(pindex, SWAP_META_PAGES);
-       sb = swblk_lookup(object, rdpi);
+       sb = swblk_iter_lookup(blks, pindex);
        if (sb == NULL) {
                if (swapblk == SWAPBLK_NONE)
                        return (SWAPBLK_NONE);
@@ -2122,7 +2151,7 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t 
pindex, daddr_t swapblk,
                        sb = uma_zalloc(swblk_zone, M_NOWAIT | (curproc ==
                            pageproc ? M_USE_RESERVE : 0));
                        if (sb != NULL) {
-                               sb->p = rdpi;
+                               sb->p = rounddown(pindex, SWAP_META_PAGES);
                                for (i = 0; i < SWAP_META_PAGES; i++)
                                        sb->d[i] = SWAPBLK_NONE;
                                if (atomic_cmpset_int(&swblk_zone_exhausted,
@@ -2143,17 +2172,17 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t 
pindex, daddr_t swapblk,
                        } else
                                uma_zwait(swblk_zone);
                        VM_OBJECT_WLOCK(object);
-                       sb = swblk_lookup(object, rdpi);
+                       sb = swblk_iter_reinit(blks, object, pindex);
                        if (sb != NULL)
                                /*
                                 * Somebody swapped out a nearby page,
-                                * allocating swblk at the rdpi index,
+                                * allocating swblk at the pindex index,
                                 * while we dropped the object lock.
                                 */
                                goto allocated;
                }
                for (;;) {
-                       error = swblk_lookup_insert(object, sb);
+                       error = swblk_iter_insert(blks, sb);
                        if (error == 0) {
                                if (atomic_cmpset_int(&swpctrie_zone_exhausted,
                                    1, 0))
@@ -2175,7 +2204,7 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t 
pindex, daddr_t swapblk,
                        } else
                                uma_zwait(swpctrie_zone);
                        VM_OBJECT_WLOCK(object);
-                       sb1 = swblk_lookup(object, rdpi);
+                       sb1 = swblk_iter_reinit(blks, object, pindex);
                        if (sb1 != NULL) {
                                uma_zfree(swblk_zone, sb);
                                sb = sb1;
@@ -2184,7 +2213,7 @@ swp_pager_meta_build(vm_object_t object, vm_pindex_t 
pindex, daddr_t swapblk,
                }
        }
 allocated:
-       MPASS(sb->p == rdpi);
+       MPASS(sb->p == rounddown(pindex, SWAP_META_PAGES));
 
        modpi = pindex % SWAP_META_PAGES;
        /* Return prior contents of metadata. */
@@ -2196,8 +2225,11 @@ allocated:
                /*
                 * Free the swblk if we end up with the empty page run.
                 */
-               if (swapblk == SWAPBLK_NONE)
-                       swp_pager_free_empty_swblk(object, sb);
+               if (swapblk == SWAPBLK_NONE &&
+                   swp_pager_swblk_empty(sb, 0, SWAP_META_PAGES)) {
+                       swblk_iter_remove(blks);
+                       uma_zfree(swblk_zone, sb);
+               }
        }
        return (prev_swapblk);
 }
@@ -2213,7 +2245,7 @@ static void
 swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject,
     vm_pindex_t pindex, vm_pindex_t count)
 {
-       struct pctrie_iter blks;
+       struct pctrie_iter dstblks, srcblks;
        struct page_range range;
        struct swblk *sb;
        daddr_t blk, d[SWAP_META_PAGES];
@@ -2231,15 +2263,16 @@ swp_pager_meta_transfer(vm_object_t srcobject, 
vm_object_t dstobject,
        swp_pager_init_freerange(&range);
        d_mask = 0;
        last = pindex + count;
-       for (sb = swblk_iter_limit_init(&blks, srcobject, pindex, last),
+       swblk_iter_init_only(&dstblks, dstobject);
+       for (sb = swblk_iter_limit_init(&srcblks, srcobject, pindex, last),
            start = swblk_start(sb, pindex);
-           sb != NULL; sb = swblk_iter_next(&blks), start = 0) {
-               limit = MIN(last - blks.index, SWAP_META_PAGES);
+           sb != NULL; sb = swblk_iter_next(&srcblks), start = 0) {
+               limit = MIN(last - srcblks.index, SWAP_META_PAGES);
                for (i = start; i < limit; i++) {
                        if (sb->d[i] == SWAPBLK_NONE)
                                continue;
-                       blk = swp_pager_meta_build(dstobject,
-                           blks.index + i - pindex, sb->d[i], true);
+                       blk = swp_pager_meta_build(&dstblks, dstobject,
+                           srcblks.index + i - pindex, sb->d[i], true);
                        if (blk == sb->d[i]) {
                                /*
                                 * Failed memory allocation stopped transfer;
@@ -2256,7 +2289,7 @@ swp_pager_meta_transfer(vm_object_t srcobject, 
vm_object_t dstobject,
                }
                if (swp_pager_swblk_empty(sb, 0, start) &&
                    swp_pager_swblk_empty(sb, limit, SWAP_META_PAGES)) {
-                       swblk_iter_remove(&blks);
+                       swblk_iter_remove(&srcblks);
                        uma_zfree(swblk_zone, sb);
                }
                if (d_mask != 0) {
@@ -2264,8 +2297,8 @@ swp_pager_meta_transfer(vm_object_t srcobject, 
vm_object_t dstobject,
                        VM_OBJECT_WUNLOCK(srcobject);
                        do {
                                i = ffs(d_mask) - 1;
-                               swp_pager_meta_build(dstobject,
-                                   blks.index + i - pindex, d[i], false);
+                               swp_pager_meta_build(&dstblks, dstobject,
+                                   srcblks.index + i - pindex, d[i], false);
                                d_mask &= ~(1 << i);
                        } while (d_mask != 0);
                        VM_OBJECT_WLOCK(srcobject);
@@ -2274,7 +2307,7 @@ swp_pager_meta_transfer(vm_object_t srcobject, 
vm_object_t dstobject,
                         * While the lock was not held, the iterator path could
                         * have become stale, so discard it.
                         */
-                       pctrie_iter_reset(&blks);
+                       pctrie_iter_reset(&srcblks);
                }
        }
        swp_pager_freeswapspace(&range);

Reply via email to