Author: markj
Date: Sat Dec 28 19:03:46 2019
New Revision: 356156
URL: https://svnweb.freebsd.org/changeset/base/356156

Log:
  Generalize lazy dequeue logic for wired pages.
  
  Some recent work aims to remove the use of the page lock for
  synchronizing updates to page queue state.  This change adds a mechanism
  to preserve the existing behaviour of lazily dequeuing wired pages,
  which was previously synchronized using the page lock.
  
  Handle this by setting PGA_DEQUEUE when a managed page's wire count
  transitions from 0 to 1.  When the page daemon encounters a page with a
  flag in PGA_QUEUE_OP_MASK set, it creates a batch queue entry for that
  page, but in so doing it does not modify the page itself and thus racing
  with a concurrent free of the page is harmless.  The flag is advisory;
  the page daemon still checks for wirings after acquiring the object and
  page xbusy locks.
  
  vm_page_unwire_managed() now clears PGA_DEQUEUE on a 1->0 transition.
  It must do this before dropping the reference to avoid a use-after-free
  but also handles races with concurrent wirings to ensure that
  PGA_DEQUEUE is not left unset on a wired page.
  
  Reviewed by:  jeff
  Tested by:    pho
  Sponsored by: Netflix, Intel
  Differential Revision:        https://reviews.freebsd.org/D22882

Modified:
  head/sys/vm/vm_page.c
  head/sys/vm/vm_pageout.c

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c       Sat Dec 28 19:03:32 2019        (r356155)
+++ head/sys/vm/vm_page.c       Sat Dec 28 19:03:46 2019        (r356156)
@@ -3530,8 +3530,6 @@ vm_page_pqbatch_submit(vm_page_t m, uint8_t queue)
 
        KASSERT((m->oflags & VPO_UNMANAGED) == 0,
            ("page %p is unmanaged", m));
-       KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL,
-           ("missing synchronization for page %p", m));
        KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue));
 
        domain = vm_phys_domain(m);
@@ -3904,8 +3902,11 @@ vm_page_wire(vm_page_t m)
        old = atomic_fetchadd_int(&m->ref_count, 1);
        KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX,
            ("vm_page_wire: counter overflow for page %p", m));
-       if (VPRC_WIRE_COUNT(old) == 0)
+       if (VPRC_WIRE_COUNT(old) == 0) {
+               if ((m->oflags & VPO_UNMANAGED) == 0)
+                       vm_page_aflag_set(m, PGA_DEQUEUE);
                vm_wire_add(1);
+       }
 }
 
 /*
@@ -3928,8 +3929,11 @@ vm_page_wire_mapped(vm_page_t m)
                        return (false);
        } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1));
 
-       if (VPRC_WIRE_COUNT(old) == 0)
+       if (VPRC_WIRE_COUNT(old) == 0) {
+               if ((m->oflags & VPO_UNMANAGED) == 0)
+                       vm_page_aflag_set(m, PGA_DEQUEUE);
                vm_wire_add(1);
+       }
        return (true);
 }
 
@@ -3942,37 +3946,43 @@ static void
 vm_page_unwire_managed(vm_page_t m, uint8_t nqueue, bool noreuse)
 {
        u_int old;
-       bool locked;
 
        KASSERT((m->oflags & VPO_UNMANAGED) == 0,
            ("%s: page %p is unmanaged", __func__, m));
 
        /*
         * Update LRU state before releasing the wiring reference.
-        * We only need to do this once since we hold the page lock.
         * Use a release store when updating the reference count to
         * synchronize with vm_page_free_prep().
         */
        old = m->ref_count;
-       locked = false;
        do {
                KASSERT(VPRC_WIRE_COUNT(old) > 0,
                    ("vm_page_unwire: wire count underflow for page %p", m));
-               if (!locked && VPRC_WIRE_COUNT(old) == 1) {
-                       vm_page_lock(m);
-                       locked = true;
+
+               if (old > VPRC_OBJREF + 1) {
+                       /*
+                        * The page has at least one other wiring reference.  An
+                        * earlier iteration of this loop may have called
+                        * vm_page_release_toq() and cleared PGA_DEQUEUE, so
+                        * re-set it if necessary.
+                        */
+                       if ((vm_page_astate_load(m).flags & PGA_DEQUEUE) == 0)
+                               vm_page_aflag_set(m, PGA_DEQUEUE);
+               } else if (old == VPRC_OBJREF + 1) {
+                       /*
+                        * This is the last wiring.  Clear PGA_DEQUEUE and
+                        * update the page's queue state to reflect the
+                        * reference.  If the page does not belong to an object
+                        * (i.e., the VPRC_OBJREF bit is clear), we only need to
+                        * clear leftover queue state.
+                        */
                        vm_page_release_toq(m, nqueue, false);
+               } else if (old == 1) {
+                       vm_page_aflag_clear(m, PGA_DEQUEUE);
                }
        } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));
 
-       /*
-        * Release the lock only after the wiring is released, to ensure that
-        * the page daemon does not encounter and dequeue the page while it is
-        * still wired.
-        */
-       if (locked)
-               vm_page_unlock(m);
-
        if (VPRC_WIRE_COUNT(old) == 1) {
                vm_wire_sub(1);
                if (old == 1)
@@ -4026,6 +4036,8 @@ vm_page_unwire_noq(vm_page_t m)
 
        if (VPRC_WIRE_COUNT(old) > 1)
                return (false);
+       if ((m->oflags & VPO_UNMANAGED) == 0)
+               vm_page_aflag_clear(m, PGA_DEQUEUE);
        vm_wire_sub(1);
        return (true);
 }
@@ -4035,10 +4047,6 @@ vm_page_unwire_noq(vm_page_t m)
  * active or being moved to the active queue, ensure that its act_count is
  * at least ACT_INIT but do not otherwise mess with it.
  *
- * The page may be wired.  The caller should release its wiring reference
- * before releasing the page lock, otherwise the page daemon may immediately
- * dequeue the page.
- *
  * A managed page must be locked.
  */
 static __always_inline void
@@ -4046,16 +4054,18 @@ vm_page_mvqueue(vm_page_t m, const uint8_t nqueue, con
 {
        vm_page_astate_t old, new;
 
-       vm_page_assert_locked(m);
-       KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-           ("%s: page %p is unmanaged", __func__, m));
        KASSERT(m->ref_count > 0,
            ("%s: page %p does not carry any references", __func__, m));
        KASSERT(nflag == PGA_REQUEUE || nflag == PGA_REQUEUE_HEAD,
            ("%s: invalid flags %x", __func__, nflag));
 
+       if ((m->oflags & VPO_UNMANAGED) != 0)
+               return;
+
        old = vm_page_astate_load(m);
        do {
+               if ((old.flags & PGA_DEQUEUE) != 0)
+                       break;
                new = old;
                if (nqueue == PQ_ACTIVE)
                        new.act_count = max(old.act_count, ACT_INIT);
@@ -4076,8 +4086,6 @@ void
 vm_page_activate(vm_page_t m)
 {
 
-       if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
-               return;
        vm_page_mvqueue(m, PQ_ACTIVE, PGA_REQUEUE);
 }
 
@@ -4089,8 +4097,6 @@ void
 vm_page_deactivate(vm_page_t m)
 {
 
-       if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
-               return;
        vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE);
 }
 
@@ -4098,11 +4104,6 @@ void
 vm_page_deactivate_noreuse(vm_page_t m)
 {
 
-       KASSERT(m->object != NULL,
-           ("vm_page_deactivate_noreuse: page %p has no object", m));
-
-       if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
-               return;
        vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE_HEAD);
 }
 
@@ -4113,8 +4114,6 @@ void
 vm_page_launder(vm_page_t m)
 {
 
-       if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
-               return;
        vm_page_mvqueue(m, PQ_LAUNDRY, PGA_REQUEUE);
 }
 
@@ -4141,8 +4140,6 @@ vm_page_release_toq(vm_page_t m, uint8_t nqueue, const
 {
        vm_page_astate_t old, new;
        uint16_t nflag;
-
-       vm_page_assert_locked(m);
 
        /*
         * Use a check of the valid bits to determine whether we should

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c    Sat Dec 28 19:03:32 2019        (r356155)
+++ head/sys/vm/vm_pageout.c    Sat Dec 28 19:03:46 2019        (r356156)
@@ -318,6 +318,26 @@ vm_pageout_next(struct scan_state *ss, const bool dequ
 }
 
 /*
+ * Determine whether processing of a page should be deferred and ensure that 
any
+ * outstanding queue operations are processed.
+ */
+static __always_inline bool
+vm_pageout_defer(vm_page_t m, const uint8_t queue, const bool enqueued)
+{
+       vm_page_astate_t as;
+
+       as = vm_page_astate_load(m);
+       if (__predict_false(as.queue != queue ||
+           ((as.flags & PGA_ENQUEUED) != 0) != enqueued))
+               return (true);
+       if ((as.flags & PGA_QUEUE_OP_MASK) != 0) {
+               vm_page_pqbatch_submit(m, queue);
+               return (true);
+       }
+       return (false);
+}
+
+/*
  * Scan for pages at adjacent offsets within the given page's object that are
  * eligible for laundering, form a cluster of these pages and the given page,
  * and launder that cluster.
@@ -755,35 +775,14 @@ scan:
 
 recheck:
                /*
-                * The page may have been disassociated from the queue
-                * or even freed while locks were dropped.  We thus must be
-                * careful whenever modifying page state.  Once the object lock
-                * has been acquired, we have a stable reference to the page.
+                * Don't touch a page that was removed from the queue after the
+                * page queue lock was released.  Otherwise, ensure that any
+                * pending queue operations, such as dequeues for wired pages,
+                * are handled.
                 */
-               if (vm_page_queue(m) != queue)
+               if (vm_pageout_defer(m, queue, true))
                        continue;
 
-               /*
-                * A requeue was requested, so this page gets a second
-                * chance.
-                */
-               if ((m->a.flags & PGA_REQUEUE) != 0) {
-                       vm_page_pqbatch_submit(m, queue);
-                       continue;
-               }
-
-               /*
-                * Wired pages may not be freed.  Complete their removal
-                * from the queue now to avoid needless revisits during
-                * future scans.  This check is racy and must be reverified once
-                * we hold the object lock and have verified that the page
-                * is not busy.
-                */
-               if (vm_page_wired(m)) {
-                       vm_page_dequeue_deferred(m);
-                       continue;
-               }
-
                if (object != m->object) {
                        if (object != NULL)
                                VM_OBJECT_WUNLOCK(object);
@@ -813,9 +812,9 @@ recheck:
                        continue;
 
                /*
-                * Re-check for wirings now that we hold the object lock and
-                * have verified that the page is unbusied.  If the page is
-                * mapped, it may still be wired by pmap lookups.  The call to
+                * Check for wirings now that we hold the object lock and have
+                * verified that the page is unbusied.  If the page is mapped,
+                * it may still be wired by pmap lookups.  The call to
                 * vm_page_try_remove_all() below atomically checks for such
                 * wirings and removes mappings.  If the page is unmapped, the
                 * wire count is guaranteed not to increase.
@@ -1259,23 +1258,15 @@ act_scan:
                vm_page_change_lock(m, &mtx);
 
                /*
-                * The page may have been disassociated from the queue
-                * or even freed while locks were dropped.  We thus must be
-                * careful whenever modifying page state.  Once the object lock
-                * has been acquired, we have a stable reference to the page.
+                * Don't touch a page that was removed from the queue after the
+                * page queue lock was released.  Otherwise, ensure that any
+                * pending queue operations, such as dequeues for wired pages,
+                * are handled.
                 */
-               if (vm_page_queue(m) != PQ_ACTIVE)
+               if (vm_pageout_defer(m, PQ_ACTIVE, true))
                        continue;
 
                /*
-                * Wired pages are dequeued lazily.
-                */
-               if (vm_page_wired(m)) {
-                       vm_page_dequeue_deferred(m);
-                       continue;
-               }
-
-               /*
                 * A page's object pointer may be set to NULL before
                 * the object lock is acquired.
                 */
@@ -1515,27 +1506,13 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int sh
 
 recheck:
                /*
-                * The page may have been disassociated from the queue
-                * or even freed while locks were dropped.  We thus must be
-                * careful whenever modifying page state.  Once the object lock
-                * has been acquired, we have a stable reference to the page.
+                * Don't touch a page that was removed from the queue after the
+                * page queue lock was released.  Otherwise, ensure that any
+                * pending queue operations, such as dequeues for wired pages,
+                * are handled.
                 */
-               old = vm_page_astate_load(m);
-               if (old.queue != PQ_INACTIVE ||
-                   (old.flags & PGA_QUEUE_STATE_MASK) != 0)
+               if (vm_pageout_defer(m, PQ_INACTIVE, false))
                        continue;
-
-               /*
-                * Wired pages may not be freed.  Complete their removal
-                * from the queue now to avoid needless revisits during
-                * future scans.  This check is racy and must be reverified once
-                * we hold the object lock and have verified that the page
-                * is not busy.
-                */
-               if (vm_page_wired(m)) {
-                       vm_page_dequeue_deferred(m);
-                       continue;
-               }
 
                if (object != m->object) {
                        if (object != NULL)
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to