Author: markj
Date: Sat Dec 28 19:04:00 2019
New Revision: 356157
URL: https://svnweb.freebsd.org/changeset/base/356157

Log:
  Remove page locking for queue operations.
  
  With the previous reviews, the page lock is no longer required in order
  to perform queue operations on a page.  It is also no longer needed in
  the page queue scans.  This change effectively eliminates remaining uses
  of the page lock and also the false sharing caused by multiple pages
  sharing a page lock.
  
  Reviewed by:  jeff
  Tested by:    pho
  Sponsored by: Netflix, Intel
  Differential Revision:        https://reviews.freebsd.org/D22885

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  head/sys/dev/md/md.c
  head/sys/fs/tmpfs/tmpfs_subr.c
  head/sys/kern/uipc_shm.c
  head/sys/vm/swap_pager.c
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_object.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h
  head/sys/vm/vm_pageout.c
  head/sys/vm/vm_swapout.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c   Sat Dec 28 
19:03:46 2019        (r356156)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c   Sat Dec 28 
19:04:00 2019        (r356157)
@@ -1761,12 +1761,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_
                bcopy((char *)db->db_data + bufoff, va, PAGESIZE);
                zfs_unmap_page(sf);
                vm_page_valid(m);
-               vm_page_lock(m);
                if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
                        vm_page_activate(m);
                else
                        vm_page_deactivate(m);
-               vm_page_unlock(m);
                vm_page_sunbusy(m);
        }
        *rbehind = i;
@@ -1884,12 +1882,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_
                }
                zfs_unmap_page(sf);
                vm_page_valid(m);
-               vm_page_lock(m);
                if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
                        vm_page_activate(m);
                else
                        vm_page_deactivate(m);
-               vm_page_unlock(m);
                vm_page_sunbusy(m);
        }
        *rahead = i;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c     Sat Dec 
28 19:03:46 2019        (r356156)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c     Sat Dec 
28 19:04:00 2019        (r356157)
@@ -545,9 +545,7 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
                        zfs_vmobject_wlock(obj);
                        if (error == 0) {
                                vm_page_valid(pp);
-                               vm_page_lock(pp);
                                vm_page_activate(pp);
-                               vm_page_unlock(pp);
                        }
                        vm_page_sunbusy(pp);
                        if (error != 0 && !vm_page_wired(pp) &&

Modified: head/sys/dev/md/md.c
==============================================================================
--- head/sys/dev/md/md.c        Sat Dec 28 19:03:46 2019        (r356156)
+++ head/sys/dev/md/md.c        Sat Dec 28 19:04:00 2019        (r356157)
@@ -1145,12 +1145,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
                }
                if (m != NULL) {
                        vm_page_xunbusy(m);
-                       vm_page_lock(m);
-                       if (vm_page_active(m))
-                               vm_page_reference(m);
-                       else
-                               vm_page_activate(m);
-                       vm_page_unlock(m);
+                       vm_page_reference(m);
                }
 
                /* Actions on further pages start at offset 0 */

Modified: head/sys/fs/tmpfs/tmpfs_subr.c
==============================================================================
--- head/sys/fs/tmpfs/tmpfs_subr.c      Sat Dec 28 19:03:46 2019        
(r356156)
+++ head/sys/fs/tmpfs/tmpfs_subr.c      Sat Dec 28 19:04:00 2019        
(r356157)
@@ -1490,9 +1490,7 @@ retry:
                                         * current operation is not regarded
                                         * as an access.
                                         */
-                                       vm_page_lock(m);
                                        vm_page_launder(m);
-                                       vm_page_unlock(m);
                                } else {
                                        vm_page_free(m);
                                        if (ignerr)

Modified: head/sys/kern/uipc_shm.c
==============================================================================
--- head/sys/kern/uipc_shm.c    Sat Dec 28 19:03:46 2019        (r356156)
+++ head/sys/kern/uipc_shm.c    Sat Dec 28 19:04:00 2019        (r356157)
@@ -209,9 +209,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struc
        error = uiomove_fromphys(&m, offset, tlen, uio);
        if (uio->uio_rw == UIO_WRITE && error == 0)
                vm_page_set_dirty(m);
-       vm_page_lock(m);
        vm_page_activate(m);
-       vm_page_unlock(m);
        vm_page_sunbusy(m);
 
        return (error);

Modified: head/sys/vm/swap_pager.c
==============================================================================
--- head/sys/vm/swap_pager.c    Sat Dec 28 19:03:46 2019        (r356156)
+++ head/sys/vm/swap_pager.c    Sat Dec 28 19:04:00 2019        (r356157)
@@ -1630,10 +1630,9 @@ swp_pager_async_iodone(struct buf *bp)
                                 * then finish the I/O.
                                 */
                                MPASS(m->dirty == VM_PAGE_BITS_ALL);
+
                                /* PQ_UNSWAPPABLE? */
-                               vm_page_lock(m);
                                vm_page_activate(m);
-                               vm_page_unlock(m);
                                vm_page_sunbusy(m);
                        }
                } else if (bp->b_iocmd == BIO_READ) {
@@ -1668,9 +1667,7 @@ swp_pager_async_iodone(struct buf *bp)
                            ("swp_pager_async_iodone: page %p is not write"
                            " protected", m));
                        vm_page_undirty(m);
-                       vm_page_lock(m);
                        vm_page_deactivate_noreuse(m);
-                       vm_page_unlock(m);
                        vm_page_sunbusy(m);
                }
        }
@@ -1718,10 +1715,8 @@ swp_pager_force_dirty(vm_page_t m)
 
        vm_page_dirty(m);
 #ifdef INVARIANTS
-       vm_page_lock(m);
        if (!vm_page_wired(m) && m->a.queue == PQ_NONE)
                panic("page %p is neither wired nor queued", m);
-       vm_page_unlock(m);
 #endif
        vm_page_xunbusy(m);
        swap_pager_unswapped(m);
@@ -1732,9 +1727,7 @@ swp_pager_force_launder(vm_page_t m)
 {
 
        vm_page_dirty(m);
-       vm_page_lock(m);
        vm_page_launder(m);
-       vm_page_unlock(m);
        vm_page_xunbusy(m);
        swap_pager_unswapped(m);
 }

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c      Sat Dec 28 19:03:46 2019        (r356156)
+++ head/sys/vm/vm_fault.c      Sat Dec 28 19:04:00 2019        (r356157)
@@ -162,9 +162,7 @@ fault_page_release(vm_page_t *mp)
                 * this page.  Deactivating it leaves it available for
                 * pageout while optimizing fault restarts.
                 */
-               vm_page_lock(m);
                vm_page_deactivate(m);
-               vm_page_unlock(m);
                vm_page_xunbusy(m);
                *mp = NULL;
        }
@@ -395,9 +393,7 @@ vm_fault_populate_cleanup(vm_object_t object, vm_pinde
        for (pidx = first, m = vm_page_lookup(object, pidx);
            pidx <= last; pidx++, m = vm_page_next(m)) {
                vm_fault_populate_check_page(m);
-               vm_page_lock(m);
                vm_page_deactivate(m);
-               vm_page_unlock(m);
                vm_page_xunbusy(m);
        }
 }
@@ -406,7 +402,6 @@ static int
 vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type,
     int fault_flags, boolean_t wired, vm_page_t *m_hold)
 {
-       struct mtx *m_mtx;
        vm_offset_t vaddr;
        vm_page_t m;
        vm_pindex_t map_first, map_last, pager_first, pager_last, pidx;
@@ -518,22 +513,17 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t pro
                MPASS(rv == KERN_SUCCESS);
 #endif
                VM_OBJECT_WLOCK(fs->first_object);
-               m_mtx = NULL;
                for (i = 0; i < npages; i++) {
-                       if ((fault_flags & VM_FAULT_WIRE) != 0) {
+                       if ((fault_flags & VM_FAULT_WIRE) != 0)
                                vm_page_wire(&m[i]);
-                       } else {
-                               vm_page_change_lock(&m[i], &m_mtx);
+                       else
                                vm_page_activate(&m[i]);
-                       }
                        if (m_hold != NULL && m[i].pindex == fs->first_pindex) {
                                *m_hold = &m[i];
                                vm_page_wire(&m[i]);
                        }
                        vm_page_xunbusy(&m[i]);
                }
-               if (m_mtx != NULL)
-                       mtx_unlock(m_mtx);
        }
        curthread->td_ru.ru_majflt++;
        return (KERN_SUCCESS);
@@ -1393,13 +1383,10 @@ readrest:
         * If the page is not wired down, then put it where the pageout daemon
         * can find it.
         */
-       if ((fault_flags & VM_FAULT_WIRE) != 0) {
+       if ((fault_flags & VM_FAULT_WIRE) != 0)
                vm_page_wire(fs.m);
-       } else {
-               vm_page_lock(fs.m);
+       else
                vm_page_activate(fs.m);
-               vm_page_unlock(fs.m);
-       }
        if (m_hold != NULL) {
                *m_hold = fs.m;
                vm_page_wire(fs.m);
@@ -1499,12 +1486,13 @@ vm_fault_dontneed(const struct faultstate *fs, vm_offs
                                 * Typically, at this point, prefetched pages
                                 * are still in the inactive queue.  Only
                                 * pages that triggered page faults are in the
-                                * active queue.
+                                * active queue.  The test for whether the page
+                                * is in the inactive queue is racy; in the
+                                * worst case we will requeue the page
+                                * unnecessarily.
                                 */
-                               vm_page_lock(m);
                                if (!vm_page_inactive(m))
                                        vm_page_deactivate(m);
-                               vm_page_unlock(m);
                        }
                }
        }
@@ -1879,9 +1867,7 @@ again:
                                    ("dst_m %p is not wired", dst_m));
                        }
                } else {
-                       vm_page_lock(dst_m);
                        vm_page_activate(dst_m);
-                       vm_page_unlock(dst_m);
                }
                vm_page_xunbusy(dst_m);
        }

Modified: head/sys/vm/vm_object.c
==============================================================================
--- head/sys/vm/vm_object.c     Sat Dec 28 19:03:46 2019        (r356156)
+++ head/sys/vm/vm_object.c     Sat Dec 28 19:04:00 2019        (r356157)
@@ -1293,9 +1293,7 @@ next_page:
                        vm_page_busy_sleep(tm, "madvpo", false);
                        goto relookup;
                }
-               vm_page_lock(tm);
                vm_page_advise(tm, advice);
-               vm_page_unlock(tm);
                vm_page_xunbusy(tm);
                vm_object_madvise_freespace(tobject, advice, tm->pindex, 1);
 next_pindex:
@@ -2059,7 +2057,6 @@ wired:
 void
 vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
-       struct mtx *mtx;
        vm_page_t p, next;
 
        VM_OBJECT_ASSERT_LOCKED(object);
@@ -2073,14 +2070,10 @@ vm_object_page_noreuse(vm_object_t object, vm_pindex_t
         * Here, the variable "p" is either (1) the page with the least pindex
         * greater than or equal to the parameter "start" or (2) NULL. 
         */
-       mtx = NULL;
        for (; p != NULL && (p->pindex < end || end == 0); p = next) {
                next = TAILQ_NEXT(p, listq);
-               vm_page_change_lock(p, &mtx);
                vm_page_deactivate_noreuse(p);
        }
-       if (mtx != NULL)
-               mtx_unlock(mtx);
 }
 
 /*

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c       Sat Dec 28 19:03:46 2019        (r356156)
+++ head/sys/vm/vm_page.c       Sat Dec 28 19:04:00 2019        (r356157)
@@ -1371,12 +1371,10 @@ vm_page_readahead_finish(vm_page_t m)
         * have shown that deactivating the page is usually the best choice,
         * unless the page is wanted by another thread.
         */
-       vm_page_lock(m);
        if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
                vm_page_activate(m);
        else
                vm_page_deactivate(m);
-       vm_page_unlock(m);
        vm_page_xunbusy_unchecked(m);
 }
 
@@ -3651,7 +3649,6 @@ static void
 vm_page_enqueue(vm_page_t m, uint8_t queue)
 {
 
-       vm_page_assert_locked(m);
        KASSERT(m->a.queue == PQ_NONE &&
            (m->a.flags & PGA_QUEUE_STATE_MASK) == 0,
            ("%s: page %p is already enqueued", __func__, m));
@@ -4124,7 +4121,6 @@ void
 vm_page_unswappable(vm_page_t m)
 {
 
-       vm_page_assert_locked(m);
        KASSERT(!vm_page_wired(m) && (m->oflags & VPO_UNMANAGED) == 0,
            ("page %p already unswappable", m));
 
@@ -4222,9 +4218,7 @@ vm_page_release_locked(vm_page_t m, int flags)
                    m->dirty == 0 && vm_page_tryxbusy(m)) {
                        vm_page_free(m);
                } else {
-                       vm_page_lock(m);
                        vm_page_release_toq(m, PQ_INACTIVE, flags != 0);
-                       vm_page_unlock(m);
                }
        }
 }
@@ -4293,7 +4287,6 @@ void
 vm_page_advise(vm_page_t m, int advice)
 {
 
-       vm_page_assert_locked(m);
        VM_OBJECT_ASSERT_WLOCKED(m->object);
        if (advice == MADV_FREE)
                /*
@@ -4309,14 +4302,14 @@ vm_page_advise(vm_page_t m, int advice)
                return;
        }
 
+       if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m))
+               vm_page_dirty(m);
+
        /*
         * Clear any references to the page.  Otherwise, the page daemon will
         * immediately reactivate the page.
         */
        vm_page_aflag_clear(m, PGA_REFERENCED);
-
-       if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m))
-               vm_page_dirty(m);
 
        /*
         * Place clean pages near the head of the inactive queue rather than

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h       Sat Dec 28 19:03:46 2019        (r356156)
+++ head/sys/vm/vm_page.h       Sat Dec 28 19:04:00 2019        (r356157)
@@ -808,12 +808,6 @@ vm_page_aflag_clear(vm_page_t m, uint16_t bits)
        uint32_t *addr, val;
 
        /*
-        * The PGA_REFERENCED flag can only be cleared if the page is locked.
-        */
-       if ((bits & PGA_REFERENCED) != 0)
-               vm_page_assert_locked(m);
-
-       /*
         * Access the whole 32-bit word containing the aflags field with an
         * atomic update.  Parallel non-atomic updates to the other fields
         * within this word are handled properly by the atomic update.
@@ -920,8 +914,6 @@ _vm_page_queue(vm_page_astate_t as)
 static inline uint8_t
 vm_page_queue(vm_page_t m)
 {
-
-       vm_page_assert_locked(m);
 
        return (_vm_page_queue(vm_page_astate_load(m)));
 }

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c    Sat Dec 28 19:03:46 2019        (r356156)
+++ head/sys/vm/vm_pageout.c    Sat Dec 28 19:04:00 2019        (r356157)
@@ -396,14 +396,11 @@ more:
                        vm_page_xunbusy(p);
                        break;
                }
-               vm_page_lock(p);
                if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
-                       vm_page_unlock(p);
                        vm_page_xunbusy(p);
                        ib = 0;
                        break;
                }
-               vm_page_unlock(p);
                mc[--page_base] = pb = p;
                ++pageout_count;
                ++ib;
@@ -429,13 +426,10 @@ more:
                        vm_page_xunbusy(p);
                        break;
                }
-               vm_page_lock(p);
                if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) {
-                       vm_page_unlock(p);
                        vm_page_xunbusy(p);
                        break;
                }
-               vm_page_unlock(p);
                mc[page_base + pageout_count] = ps = p;
                ++pageout_count;
                ++is;
@@ -511,10 +505,12 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, 
                    ("vm_pageout_flush: page %p is not write protected", mt));
                switch (pageout_status[i]) {
                case VM_PAGER_OK:
-                       vm_page_lock(mt);
+                       /*
+                        * The page may have moved since laundering started, in
+                        * which case it should be left alone.
+                        */
                        if (vm_page_in_laundry(mt))
                                vm_page_deactivate_noreuse(mt);
-                       vm_page_unlock(mt);
                        /* FALLTHROUGH */
                case VM_PAGER_PEND:
                        numpagedout++;
@@ -527,10 +523,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, 
                         * the page daemon.
                         */
                        vm_page_undirty(mt);
-                       vm_page_lock(mt);
                        if (vm_page_in_laundry(mt))
                                vm_page_deactivate_noreuse(mt);
-                       vm_page_unlock(mt);
                        break;
                case VM_PAGER_ERROR:
                case VM_PAGER_FAIL:
@@ -546,14 +540,12 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, 
                         * clog the laundry and inactive queues.  (We will try
                         * paging it out again later.)
                         */
-                       vm_page_lock(mt);
                        if (object->type == OBJT_SWAP &&
                            pageout_status[i] == VM_PAGER_FAIL) {
                                vm_page_unswappable(mt);
                                numpagedout++;
                        } else
                                vm_page_activate(mt);
-                       vm_page_unlock(mt);
                        if (eio != NULL && i >= mreq && i - mreq < runlen)
                                *eio = TRUE;
                        break;
@@ -611,7 +603,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
        vm_pindex_t pindex;
        int error, lockmode;
 
-       vm_page_assert_locked(m);
        object = m->object;
        VM_OBJECT_ASSERT_WLOCKED(object);
        error = 0;
@@ -631,7 +622,6 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
         * of time.
         */
        if (object->type == OBJT_VNODE) {
-               vm_page_unlock(m);
                vm_page_xunbusy(m);
                vp = object->handle;
                if (vp->v_type == VREG &&
@@ -662,11 +652,9 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
                        error = ENOENT;
                        goto unlock_all;
                }
-               vm_page_lock(m);
 
                /*
-                * While the object and page were unlocked, the page
-                * may have been:
+                * While the object was unlocked, the page may have been:
                 * (1) moved to a different queue,
                 * (2) reallocated to a different object,
                 * (3) reallocated to a different offset, or
@@ -674,17 +662,15 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
                 */
                if (!vm_page_in_laundry(m) || m->object != object ||
                    m->pindex != pindex || m->dirty == 0) {
-                       vm_page_unlock(m);
                        error = ENXIO;
                        goto unlock_all;
                }
 
                /*
-                * The page may have been busied while the object and page
-                * locks were released.
+                * The page may have been busied while the object lock was
+                * released.
                 */
                if (vm_page_tryxbusy(m) == 0) {
-                       vm_page_unlock(m);
                        error = EBUSY;
                        goto unlock_all;
                }
@@ -695,11 +681,9 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
         */
        if (!vm_page_try_remove_write(m)) {
                vm_page_xunbusy(m);
-               vm_page_unlock(m);
                error = EBUSY;
                goto unlock_all;
        }
-       vm_page_unlock(m);
 
        /*
         * If a page is dirty, then it is either being washed
@@ -714,7 +698,6 @@ unlock_all:
        VM_OBJECT_WUNLOCK(object);
 
 unlock_mp:
-       vm_page_lock_assert(m, MA_NOTOWNED);
        if (mp != NULL) {
                if (vp != NULL)
                        vput(vp);
@@ -735,7 +718,6 @@ vm_pageout_launder(struct vm_domain *vmd, int launder,
 {
        struct scan_state ss;
        struct vm_pagequeue *pq;
-       struct mtx *mtx;
        vm_object_t object;
        vm_page_t m, marker;
        vm_page_astate_t new, old;
@@ -743,7 +725,6 @@ vm_pageout_launder(struct vm_domain *vmd, int launder,
        int vnodes_skipped;
        bool pageout_ok;
 
-       mtx = NULL;
        object = NULL;
        starting_target = launder;
        vnodes_skipped = 0;
@@ -771,9 +752,6 @@ scan:
                if (__predict_false((m->flags & PG_MARKER) != 0))
                        continue;
 
-               vm_page_change_lock(m, &mtx);
-
-recheck:
                /*
                 * Don't touch a page that was removed from the queue after the
                 * page queue lock was released.  Otherwise, ensure that any
@@ -783,46 +761,39 @@ recheck:
                if (vm_pageout_defer(m, queue, true))
                        continue;
 
-               if (object != m->object) {
+               /*
+                * Lock the page's object.
+                */
+               if (object == NULL || object != m->object) {
                        if (object != NULL)
                                VM_OBJECT_WUNLOCK(object);
-
-                       /*
-                        * A page's object pointer may be set to NULL before
-                        * the object lock is acquired.
-                        */
                        object = (vm_object_t)atomic_load_ptr(&m->object);
-                       if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) {
-                               mtx_unlock(mtx);
-                               /* Depends on type-stability. */
-                               VM_OBJECT_WLOCK(object);
-                               mtx_lock(mtx);
-                               goto recheck;
+                       if (__predict_false(object == NULL))
+                               /* The page is being freed by another thread. */
+                               continue;
+
+                       /* Depends on type-stability. */
+                       VM_OBJECT_WLOCK(object);
+                       if (__predict_false(m->object != object)) {
+                               VM_OBJECT_WUNLOCK(object);
+                               object = NULL;
+                               continue;
                        }
                }
-               if (__predict_false(m->object == NULL))
-                       /*
-                        * The page has been removed from its object.
-                        */
-                       continue;
-               KASSERT(m->object == object, ("page %p does not belong to %p",
-                   m, object));
 
                if (vm_page_tryxbusy(m) == 0)
                        continue;
 
                /*
                 * Check for wirings now that we hold the object lock and have
-                * verified that the page is unbusied.  If the page is mapped,
-                * it may still be wired by pmap lookups.  The call to
+                * exclusively busied the page.  If the page is mapped, it may
+                * still be wired by pmap lookups.  The call to
                 * vm_page_try_remove_all() below atomically checks for such
                 * wirings and removes mappings.  If the page is unmapped, the
-                * wire count is guaranteed not to increase.
+                * wire count is guaranteed not to increase after this check.
                 */
-               if (__predict_false(vm_page_wired(m))) {
-                       vm_page_dequeue_deferred(m);
+               if (__predict_false(vm_page_wired(m)))
                        goto skip_page;
-               }
 
                /*
                 * Invalid pages can be easily freed.  They cannot be
@@ -898,10 +869,8 @@ recheck:
                 */
                if (object->ref_count != 0) {
                        vm_page_test_dirty(m);
-                       if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
-                               vm_page_dequeue_deferred(m);
+                       if (m->dirty == 0 && !vm_page_try_remove_all(m))
                                goto skip_page;
-                       }
                }
 
                /*
@@ -912,6 +881,13 @@ recheck:
                 */
                if (m->dirty == 0) {
 free_page:
+                       /*
+                        * Now we are guaranteed that no other threads are
+                        * manipulating the page, check for a last-second
+                        * reference.
+                        */
+                       if (vm_pageout_defer(m, queue, true))
+                               goto skip_page;
                        vm_page_free(m);
                        VM_CNT_INC(v_dfree);
                } else if ((object->flags & OBJ_DEAD) == 0) {
@@ -948,17 +924,12 @@ free_page:
                                pageout_lock_miss++;
                                vnodes_skipped++;
                        }
-                       mtx = NULL;
                        object = NULL;
                } else {
 skip_page:
                        vm_page_xunbusy(m);
                }
        }
-       if (mtx != NULL) {
-               mtx_unlock(mtx);
-               mtx = NULL;
-       }
        if (object != NULL) {
                VM_OBJECT_WUNLOCK(object);
                object = NULL;
@@ -1195,7 +1166,6 @@ static void
 vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage)
 {
        struct scan_state ss;
-       struct mtx *mtx;
        vm_object_t object;
        vm_page_t m, marker;
        struct vm_pagequeue *pq;
@@ -1236,7 +1206,6 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page
         * and scanning resumes.
         */
        max_scan = page_shortage > 0 ? pq->pq_cnt : min_scan;
-       mtx = NULL;
 act_scan:
        vm_pageout_init_scan(&ss, pq, marker, &vmd->vmd_clock[0], max_scan);
        while ((m = vm_pageout_next(&ss, false)) != NULL) {
@@ -1255,8 +1224,6 @@ act_scan:
                if (__predict_false((m->flags & PG_MARKER) != 0))
                        continue;
 
-               vm_page_change_lock(m, &mtx);
-
                /*
                 * Don't touch a page that was removed from the queue after the
                 * page queue lock was released.  Otherwise, ensure that any
@@ -1390,10 +1357,6 @@ act_scan:
 
                page_shortage -= ps_delta;
        }
-       if (mtx != NULL) {
-               mtx_unlock(mtx);
-               mtx = NULL;
-       }
        vm_pagequeue_lock(pq);
        TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_clock[0], plinks.q);
        TAILQ_INSERT_AFTER(&pq->pq_pl, marker, &vmd->vmd_clock[0], plinks.q);
@@ -1459,7 +1422,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int sh
 {
        struct scan_state ss;
        struct vm_batchqueue rq;
-       struct mtx *mtx;
        vm_page_t m, marker;
        struct vm_pagequeue *pq;
        vm_object_t object;
@@ -1484,7 +1446,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int sh
        deficit = atomic_readandclear_int(&vmd->vmd_pageout_deficit);
        starting_page_shortage = page_shortage = shortage + deficit;
 
-       mtx = NULL;
        object = NULL;
        vm_batchqueue_init(&rq);
 
@@ -1502,9 +1463,6 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int sh
                KASSERT((m->flags & PG_MARKER) == 0,
                    ("marker page %p was dequeued", m));
 
-               vm_page_change_lock(m, &mtx);
-
-recheck:
                /*
                 * Don't touch a page that was removed from the queue after the
                 * page queue lock was released.  Otherwise, ensure that any
@@ -1514,30 +1472,25 @@ recheck:
                if (vm_pageout_defer(m, PQ_INACTIVE, false))
                        continue;
 
-               if (object != m->object) {
+               /*
+                * Lock the page's object.
+                */
+               if (object == NULL || object != m->object) {
                        if (object != NULL)
                                VM_OBJECT_WUNLOCK(object);
-
-                       /*
-                        * A page's object pointer may be set to NULL before
-                        * the object lock is acquired.
-                        */
                        object = (vm_object_t)atomic_load_ptr(&m->object);
-                       if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) {
-                               mtx_unlock(mtx);
-                               /* Depends on type-stability. */
-                               VM_OBJECT_WLOCK(object);
-                               mtx_lock(mtx);
-                               goto recheck;
+                       if (__predict_false(object == NULL))
+                               /* The page is being freed by another thread. */
+                               continue;
+
+                       /* Depends on type-stability. */
+                       VM_OBJECT_WLOCK(object);
+                       if (__predict_false(m->object != object)) {
+                               VM_OBJECT_WUNLOCK(object);
+                               object = NULL;
+                               goto reinsert;
                        }
                }
-               if (__predict_false(m->object == NULL))
-                       /*
-                        * The page has been removed from its object.
-                        */
-                       continue;
-               KASSERT(m->object == object, ("page %p does not belong to %p",
-                   m, object));
 
                if (vm_page_tryxbusy(m) == 0) {
                        /*
@@ -1557,17 +1510,15 @@ recheck:
                        vm_pager_page_unswapped(m);
 
                /*
-                * Re-check for wirings now that we hold the object lock and
-                * have verified that the page is unbusied.  If the page is
-                * mapped, it may still be wired by pmap lookups.  The call to
+                * Check for wirings now that we hold the object lock and have
+                * exclusively busied the page.  If the page is mapped, it may
+                * still be wired by pmap lookups.  The call to
                 * vm_page_try_remove_all() below atomically checks for such
                 * wirings and removes mappings.  If the page is unmapped, the
-                * wire count is guaranteed not to increase.
+                * wire count is guaranteed not to increase after this check.
                 */
-               if (__predict_false(vm_page_wired(m))) {
-                       vm_page_dequeue_deferred(m);
+               if (__predict_false(vm_page_wired(m)))
                        goto skip_page;
-               }
 
                /*
                 * Invalid pages can be easily freed. They cannot be
@@ -1635,10 +1586,8 @@ recheck:
                 */
                if (object->ref_count != 0) {
                        vm_page_test_dirty(m);
-                       if (m->dirty == 0 && !vm_page_try_remove_all(m)) {
-                               vm_page_dequeue_deferred(m);
+                       if (m->dirty == 0 && !vm_page_try_remove_all(m))
                                goto skip_page;
-                       }
                }
 
                /*
@@ -1651,13 +1600,19 @@ recheck:
                if (m->dirty == 0) {
 free_page:
                        /*
-                        * Because we dequeued the page and have already
-                        * checked for concurrent dequeue and enqueue
-                        * requests, we can safely disassociate the page
-                        * from the inactive queue.
+                        * Now we are guaranteed that no other threads are
+                        * manipulating the page, check for a last-second
+                        * reference that would save it from doom.
                         */
-                       KASSERT((m->a.flags & PGA_QUEUE_STATE_MASK) == 0,
-                           ("page %p has queue state", m));
+                       if (vm_pageout_defer(m, PQ_INACTIVE, false))
+                               goto skip_page;
+
+                       /*
+                        * Because we dequeued the page and have already checked
+                        * for pending dequeue and enqueue requests, we can
+                        * safely disassociate the page from the inactive queue
+                        * without holding the queue lock.
+                        */
                        m->a.queue = PQ_NONE;
                        vm_page_free(m);
                        page_shortage--;
@@ -1671,8 +1626,6 @@ skip_page:
 reinsert:
                vm_pageout_reinsert_inactive(&ss, &rq, m);
        }
-       if (mtx != NULL)
-               mtx_unlock(mtx);
        if (object != NULL)
                VM_OBJECT_WUNLOCK(object);
        vm_pageout_reinsert_inactive(&ss, &rq, NULL);

Modified: head/sys/vm/vm_swapout.c
==============================================================================
--- head/sys/vm/vm_swapout.c    Sat Dec 28 19:03:46 2019        (r356156)
+++ head/sys/vm/vm_swapout.c    Sat Dec 28 19:04:00 2019        (r356157)
@@ -186,12 +186,10 @@ vm_swapout_object_deactivate_page(pmap_t pmap, vm_page
                return;
        }
        if (!pmap_is_referenced(m)) {
-               vm_page_lock(m);
                if (!vm_page_active(m))
                        (void)vm_page_try_remove_all(m);
                else if (unmap && vm_page_try_remove_all(m))
                        vm_page_deactivate(m);
-               vm_page_unlock(m);
        }
        vm_page_xunbusy(m);
 }
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to