Author: alc
Date: Tue Nov 13 02:50:39 2012
New Revision: 242941
URL: http://svnweb.freebsd.org/changeset/base/242941

Log:
  Replace the single, global page queues lock with per-queue locks on the
  active and inactive paging queues.
  
  Reviewed by:  kib

Modified:
  head/sys/sys/vmmeter.h
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h
  head/sys/vm/vm_pageout.c

Modified: head/sys/sys/vmmeter.h
==============================================================================
--- head/sys/sys/vmmeter.h      Tue Nov 13 02:42:31 2012        (r242940)
+++ head/sys/sys/vmmeter.h      Tue Nov 13 02:50:39 2012        (r242941)
@@ -46,7 +46,7 @@
  *      c - constant after initialization
  *      f - locked by vm_page_queue_free_mtx
  *      p - locked by being in the PCPU and atomicity respect to interrupts
- *      q - locked by vm_page_queue_mtx
+ *      q - changes are synchronized by the corresponding vm_pagequeue lock
  */
 struct vmmeter {
        /*
@@ -76,7 +76,7 @@ struct vmmeter {
        u_int v_intrans;        /* (p) intransit blocking page faults */
        u_int v_reactivated;    /* (f) pages reactivated from free list */
        u_int v_pdwakeups;      /* (f) times daemon has awaken from sleep */
-       u_int v_pdpages;        /* (q) pages analyzed by daemon */
+       u_int v_pdpages;        /* (p) pages analyzed by daemon */
 
        u_int v_tcached;        /* (p) total pages cached */
        u_int v_dfree;          /* (p) pages freed by daemon */

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c      Tue Nov 13 02:42:31 2012        (r242940)
+++ head/sys/vm/vm_fault.c      Tue Nov 13 02:50:39 2012        (r242941)
@@ -388,7 +388,7 @@ RetryFault:;
                                vm_object_deallocate(fs.first_object);
                                goto RetryFault;
                        }
-                       vm_pageq_remove(fs.m);
+                       vm_page_remque(fs.m);
                        vm_page_unlock(fs.m);
 
                        /*

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c       Tue Nov 13 02:42:31 2012        (r242940)
+++ head/sys/vm/vm_page.c       Tue Nov 13 02:50:39 2012        (r242941)
@@ -63,10 +63,16 @@
 /*
  *                     GENERAL RULES ON VM_PAGE MANIPULATION
  *
- *     - a pageq mutex is required when adding or removing a page from a
- *       page queue (vm_page_queue[]), regardless of other mutexes or the
+ *     - A page queue lock is required when adding or removing a page from a
+ *       page queue (vm_pagequeues[]), regardless of other locks or the
  *       busy state of a page.
  *
+ *             * In general, no thread besides the page daemon can acquire or
+ *               hold more than one page queue lock at a time.
+ *
+ *             * The page daemon can acquire and hold any pair of page queue
+ *               locks in any order.
+ *
  *     - The object mutex is held when inserting or removing
  *       pages from an object (vm_page_insert() or vm_page_remove()).
  *
@@ -115,8 +121,20 @@ __FBSDID("$FreeBSD$");
  *     page structure.
  */
 
-struct vpgqueues vm_page_queues[PQ_COUNT];
-struct mtx_padalign vm_page_queue_mtx;
+struct vm_pagequeue vm_pagequeues[PQ_COUNT] = {
+       [PQ_INACTIVE] = {
+               .pq_pl = TAILQ_HEAD_INITIALIZER(
+                   vm_pagequeues[PQ_INACTIVE].pq_pl),
+               .pq_cnt = &cnt.v_inactive_count,
+               .pq_name = "vm inactive pagequeue"
+       },
+       [PQ_ACTIVE] = {
+               .pq_pl = TAILQ_HEAD_INITIALIZER(
+                   vm_pagequeues[PQ_ACTIVE].pq_pl),
+               .pq_cnt = &cnt.v_active_count,
+               .pq_name = "vm active pagequeue"
+       }
+};
 struct mtx_padalign vm_page_queue_free_mtx;
 
 struct mtx_padalign pa_lock[PA_LOCK_COUNT];
@@ -139,7 +157,6 @@ static uma_zone_t fakepg_zone;
 
 static struct vnode *vm_page_alloc_init(vm_page_t m);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
-static void vm_page_queue_remove(int queue, vm_page_t m);
 static void vm_page_enqueue(int queue, vm_page_t m);
 static void vm_page_init_fakepg(void *dummy);
 
@@ -294,20 +311,11 @@ vm_page_startup(vm_offset_t vaddr)
        /*
         * Initialize the page and queue locks.
         */
-       mtx_init(&vm_page_queue_mtx, "vm page queue", NULL, MTX_DEF |
-           MTX_RECURSE);
        mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF);
        for (i = 0; i < PA_LOCK_COUNT; i++)
                mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
-
-       /*
-        * Initialize the queue headers for the hold queue, the active queue,
-        * and the inactive queue.
-        */
        for (i = 0; i < PQ_COUNT; i++)
-               TAILQ_INIT(&vm_page_queues[i].pl);
-       vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
-       vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
+               vm_pagequeue_init_lock(&vm_pagequeues[i]);
 
        /*
         * Allocate memory for use when boot strapping the kernel memory
@@ -1867,61 +1875,109 @@ vm_waitpfault(void)
 }
 
 /*
- *     vm_page_queue_remove:
+ *     vm_page_dequeue:
  *
- *     Remove the given page from the specified queue.
+ *     Remove the given page from its current page queue.
  *
- *     The page and page queues must be locked.
+ *     The page must be locked.
  */
-static __inline void
-vm_page_queue_remove(int queue, vm_page_t m)
+void
+vm_page_dequeue(vm_page_t m)
 {
-       struct vpgqueues *pq;
+       struct vm_pagequeue *pq;
 
-       mtx_assert(&vm_page_queue_mtx, MA_OWNED);
        vm_page_lock_assert(m, MA_OWNED);
-       pq = &vm_page_queues[queue];
-       TAILQ_REMOVE(&pq->pl, m, pageq);
-       (*pq->cnt)--;
+       KASSERT(m->queue != PQ_NONE,
+           ("vm_page_dequeue: page %p is not queued", m));
+       pq = &vm_pagequeues[m->queue];
+       vm_pagequeue_lock(pq);
+       m->queue = PQ_NONE;
+       TAILQ_REMOVE(&pq->pq_pl, m, pageq);
+       (*pq->pq_cnt)--;
+       vm_pagequeue_unlock(pq);
 }
 
 /*
- *     vm_pageq_remove:
+ *     vm_page_dequeue_locked:
  *
- *     Remove a page from its queue.
+ *     Remove the given page from its current page queue.
  *
- *     The given page must be locked.
+ *     The page and page queue must be locked.
  */
 void
-vm_pageq_remove(vm_page_t m)
+vm_page_dequeue_locked(vm_page_t m)
 {
-       int queue;
+       struct vm_pagequeue *pq;
 
        vm_page_lock_assert(m, MA_OWNED);
-       if ((queue = m->queue) != PQ_NONE) {
-               vm_page_lock_queues();
-               m->queue = PQ_NONE;
-               vm_page_queue_remove(queue, m);
-               vm_page_unlock_queues();
-       }
+       pq = &vm_pagequeues[m->queue];
+       vm_pagequeue_assert_locked(pq);
+       m->queue = PQ_NONE;
+       TAILQ_REMOVE(&pq->pq_pl, m, pageq);
+       (*pq->pq_cnt)--;
 }
 
 /*
  *     vm_page_enqueue:
  *
- *     Add the given page to the specified queue.
+ *     Add the given page to the specified page queue.
  *
- *     The page queues must be locked.
+ *     The page must be locked.
  */
 static void
 vm_page_enqueue(int queue, vm_page_t m)
 {
-       struct vpgqueues *vpq;
+       struct vm_pagequeue *pq;
 
-       vpq = &vm_page_queues[queue];
+       vm_page_lock_assert(m, MA_OWNED);
+       pq = &vm_pagequeues[queue];
+       vm_pagequeue_lock(pq);
        m->queue = queue;
-       TAILQ_INSERT_TAIL(&vpq->pl, m, pageq);
-       ++*vpq->cnt;
+       TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq);
+       ++*pq->pq_cnt;
+       vm_pagequeue_unlock(pq);
+}
+
+/*
+ *     vm_page_requeue:
+ *
+ *     Move the given page to the tail of its current page queue.
+ *
+ *     The page must be locked.
+ */
+void
+vm_page_requeue(vm_page_t m)
+{
+       struct vm_pagequeue *pq;
+
+       vm_page_lock_assert(m, MA_OWNED);
+       KASSERT(m->queue != PQ_NONE,
+           ("vm_page_requeue: page %p is not queued", m));
+       pq = &vm_pagequeues[m->queue];
+       vm_pagequeue_lock(pq);
+       TAILQ_REMOVE(&pq->pq_pl, m, pageq);
+       TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq);
+       vm_pagequeue_unlock(pq);
+}
+
+/*
+ *     vm_page_requeue_locked:
+ *
+ *     Move the given page to the tail of its current page queue.
+ *
+ *     The page queue must be locked.
+ */
+void
+vm_page_requeue_locked(vm_page_t m)
+{
+       struct vm_pagequeue *pq;
+
+       KASSERT(m->queue != PQ_NONE,
+           ("vm_page_requeue_locked: page %p is not queued", m));
+       pq = &vm_pagequeues[m->queue];
+       vm_pagequeue_assert_locked(pq);
+       TAILQ_REMOVE(&pq->pq_pl, m, pageq);
+       TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq);
 }
 
 /*
@@ -1944,11 +2000,9 @@ vm_page_activate(vm_page_t m)
                if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
                        if (m->act_count < ACT_INIT)
                                m->act_count = ACT_INIT;
-                       vm_page_lock_queues();
                        if (queue != PQ_NONE)
-                               vm_page_queue_remove(queue, m);
+                               vm_page_dequeue(m);
                        vm_page_enqueue(PQ_ACTIVE, m);
-                       vm_page_unlock_queues();
                } else
                        KASSERT(queue == PQ_NONE,
                            ("vm_page_activate: wired page %p is queued", m));
@@ -2008,7 +2062,9 @@ vm_page_free_toq(vm_page_t m)
                vm_page_lock_assert(m, MA_OWNED);
                KASSERT(!pmap_page_is_mapped(m),
                    ("vm_page_free_toq: freeing mapped page %p", m));
-       }
+       } else
+               KASSERT(m->queue == PQ_NONE,
+                   ("vm_page_free_toq: unmanaged page %p is queued", m));
        PCPU_INC(cnt.v_tfree);
 
        if (VM_PAGE_IS_FREE(m))
@@ -2022,8 +2078,7 @@ vm_page_free_toq(vm_page_t m)
         * callback routine until after we've put the page on the
         * appropriate free queue.
         */
-       if ((m->oflags & VPO_UNMANAGED) == 0)
-               vm_pageq_remove(m);
+       vm_page_remque(m);
        vm_page_remove(m);
 
        /*
@@ -2101,8 +2156,10 @@ vm_page_wire(vm_page_t m)
                return;
        }
        if (m->wire_count == 0) {
-               if ((m->oflags & VPO_UNMANAGED) == 0)
-                       vm_pageq_remove(m);
+               KASSERT((m->oflags & VPO_UNMANAGED) == 0 ||
+                   m->queue == PQ_NONE,
+                   ("vm_page_wire: unmanaged page %p is queued", m));
+               vm_page_remque(m);
                atomic_add_int(&cnt.v_wire_count, 1);
        }
        m->wire_count++;
@@ -2145,9 +2202,7 @@ vm_page_unwire(vm_page_t m, int activate
                                return;
                        if (!activate)
                                m->flags &= ~PG_WINATCFLS;
-                       vm_page_lock_queues();
                        vm_page_enqueue(activate ? PQ_ACTIVE : PQ_INACTIVE, m);
-                       vm_page_unlock_queues();
                }
        } else
                panic("vm_page_unwire: page %p's wire count is zero", m);
@@ -2176,6 +2231,7 @@ vm_page_unwire(vm_page_t m, int activate
 static inline void
 _vm_page_deactivate(vm_page_t m, int athead)
 {
+       struct vm_pagequeue *pq;
        int queue;
 
        vm_page_lock_assert(m, MA_OWNED);
@@ -2186,19 +2242,18 @@ _vm_page_deactivate(vm_page_t m, int ath
        if ((queue = m->queue) == PQ_INACTIVE)
                return;
        if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
-               m->flags &= ~PG_WINATCFLS;
-               vm_page_lock_queues();
                if (queue != PQ_NONE)
-                       vm_page_queue_remove(queue, m);
+                       vm_page_dequeue(m);
+               m->flags &= ~PG_WINATCFLS;
+               pq = &vm_pagequeues[PQ_INACTIVE];
+               vm_pagequeue_lock(pq);
+               m->queue = PQ_INACTIVE;
                if (athead)
-                       TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m,
-                           pageq);
+                       TAILQ_INSERT_HEAD(&pq->pq_pl, m, pageq);
                else
-                       TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m,
-                           pageq);
-               m->queue = PQ_INACTIVE;
+                       TAILQ_INSERT_TAIL(&pq->pq_pl, m, pageq);
                cnt.v_inactive_count++;
-               vm_page_unlock_queues();
+               vm_pagequeue_unlock(pq);
        }
 }
 
@@ -2298,7 +2353,7 @@ vm_page_cache(vm_page_t m)
        /*
         * Remove the page from the paging queues.
         */
-       vm_pageq_remove(m);
+       vm_page_remque(m);
 
        /*
         * Remove the page from the object's collection of resident
@@ -3039,7 +3094,7 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pag
        db_printf("\n");
 
        db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
-               *vm_page_queues[PQ_ACTIVE].cnt,
-               *vm_page_queues[PQ_INACTIVE].cnt);
+               *vm_pagequeues[PQ_ACTIVE].pq_cnt,
+               *vm_pagequeues[PQ_INACTIVE].pq_cnt);
 }
 #endif /* DDB */

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h       Tue Nov 13 02:42:31 2012        (r242940)
+++ head/sys/vm/vm_page.h       Tue Nov 13 02:50:39 2012        (r242941)
@@ -92,7 +92,7 @@
  *     In general, operations on this structure's mutable fields are
  *     synchronized using either one of or a combination of the lock on the
  *     object that the page belongs to (O), the pool lock for the page (P),
- *     or the lock for either the free or paging queues (Q).  If a field is
+ *     or the lock for either the free or paging queue (Q).  If a field is
  *     annotated below with two of these locks, then holding either lock is
  *     sufficient for read access, but both locks are required for write
  *     access.
@@ -111,8 +111,6 @@
  *     field is encapsulated in vm_page_clear_dirty_mask().
  */
 
-TAILQ_HEAD(pglist, vm_page);
-
 #if PAGE_SIZE == 4096
 #define VM_PAGE_BITS_ALL 0xffu
 typedef uint8_t vm_page_bits_t;
@@ -128,7 +126,7 @@ typedef uint64_t vm_page_bits_t;
 #endif
 
 struct vm_page {
-       TAILQ_ENTRY(vm_page) pageq;     /* queue info for FIFO queue or free 
list (Q) */
+       TAILQ_ENTRY(vm_page) pageq;     /* page queue or free list (Q)  */
        TAILQ_ENTRY(vm_page) listq;     /* pages in same object (O)     */
        struct vm_page *left;           /* splay tree link (O)          */
        struct vm_page *right;          /* splay tree link (O)          */
@@ -180,12 +178,22 @@ struct vm_page {
 #define        PQ_ACTIVE       1
 #define        PQ_COUNT        2
 
-struct vpgqueues {
-       struct pglist pl;
-       int     *cnt;
-};
+TAILQ_HEAD(pglist, vm_page);
 
-extern struct vpgqueues vm_page_queues[PQ_COUNT];
+struct vm_pagequeue {
+       struct mtx      pq_mutex;
+       struct pglist   pq_pl;
+       int *const      pq_cnt;
+       const char *const pq_name;
+} __aligned(CACHE_LINE_SIZE);
+
+extern struct vm_pagequeue vm_pagequeues[PQ_COUNT];
+
+#define        vm_pagequeue_assert_locked(pq)  mtx_assert(&(pq)->pq_mutex, 
MA_OWNED)
+#define        vm_pagequeue_init_lock(pq)      mtx_init(&(pq)->pq_mutex,       
\
+           (pq)->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK);
+#define        vm_pagequeue_lock(pq)           mtx_lock(&(pq)->pq_mutex)
+#define        vm_pagequeue_unlock(pq)         mtx_unlock(&(pq)->pq_mutex)
 
 extern struct mtx_padalign vm_page_queue_free_mtx;
 extern struct mtx_padalign pa_lock[];
@@ -320,11 +328,6 @@ vm_page_t vm_phys_paddr_to_vm_page(vm_pa
 
 vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 
-extern struct mtx_padalign vm_page_queue_mtx;
-
-#define vm_page_lock_queues()   mtx_lock(&vm_page_queue_mtx)
-#define vm_page_unlock_queues() mtx_unlock(&vm_page_queue_mtx)
-
 /* page allocation classes: */
 #define VM_ALLOC_NORMAL                0
 #define VM_ALLOC_INTERRUPT     1
@@ -354,8 +357,6 @@ void vm_page_free(vm_page_t m);
 void vm_page_free_zero(vm_page_t m);
 void vm_page_wakeup(vm_page_t m);
 
-void vm_pageq_remove(vm_page_t m);
-
 void vm_page_activate (vm_page_t);
 vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
 vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
@@ -370,6 +371,8 @@ int vm_page_try_to_cache (vm_page_t);
 int vm_page_try_to_free (vm_page_t);
 void vm_page_dontneed(vm_page_t);
 void vm_page_deactivate (vm_page_t);
+void vm_page_dequeue(vm_page_t m);
+void vm_page_dequeue_locked(vm_page_t m);
 vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
 vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
@@ -384,6 +387,8 @@ void vm_page_readahead_finish(vm_page_t 
 void vm_page_reference(vm_page_t m);
 void vm_page_remove (vm_page_t);
 void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
+void vm_page_requeue(vm_page_t m);
+void vm_page_requeue_locked(vm_page_t m);
 void vm_page_set_valid_range(vm_page_t m, int base, int size);
 void vm_page_sleep(vm_page_t m, const char *msg);
 vm_page_t vm_page_splay(vm_pindex_t, vm_page_t);
@@ -512,6 +517,22 @@ vm_page_dirty(vm_page_t m)
 }
 
 /*
+ *     vm_page_remque:
+ *
+ *     If the given page is in a page queue, then remove it from that page
+ *     queue.
+ *
+ *     The page must be locked.
+ */
+static inline void
+vm_page_remque(vm_page_t m)
+{
+
+       if (m->queue != PQ_NONE)
+               vm_page_dequeue(m);
+}
+
+/*
  *     vm_page_sleep_if_busy:
  *
  *     Sleep and release the page queues lock if VPO_BUSY is set or,

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c    Tue Nov 13 02:42:31 2012        (r242940)
+++ head/sys/vm/vm_pageout.c    Tue Nov 13 02:50:39 2012        (r242941)
@@ -218,7 +218,6 @@ static void vm_req_vmdaemon(int req);
 #endif
 static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
 static void vm_pageout_page_stats(void);
-static void vm_pageout_requeue(vm_page_t m);
 
 /*
  * Initialize a dummy page for marking the caller's place in the specified
@@ -255,29 +254,29 @@ static boolean_t
 vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
 {
        struct vm_page marker;
+       struct vm_pagequeue *pq;
        boolean_t unchanged;
        u_short queue;
        vm_object_t object;
 
        queue = m->queue;
        vm_pageout_init_marker(&marker, queue);
+       pq = &vm_pagequeues[queue];
        object = m->object;
        
-       TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl,
-                          m, &marker, pageq);
-       vm_page_unlock_queues();
+       TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq);
+       vm_pagequeue_unlock(pq);
        vm_page_unlock(m);
        VM_OBJECT_LOCK(object);
        vm_page_lock(m);
-       vm_page_lock_queues();
+       vm_pagequeue_lock(pq);
 
        /* Page queue might have changed. */
        *next = TAILQ_NEXT(&marker, pageq);
        unchanged = (m->queue == queue &&
                     m->object == object &&
                     &marker == TAILQ_NEXT(m, pageq));
-       TAILQ_REMOVE(&vm_page_queues[queue].pl,
-                    &marker, pageq);
+       TAILQ_REMOVE(&pq->pq_pl, &marker, pageq);
        return (unchanged);
 }
 
@@ -294,27 +293,27 @@ static boolean_t
 vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
 {
        struct vm_page marker;
+       struct vm_pagequeue *pq;
        boolean_t unchanged;
        u_short queue;
 
        vm_page_lock_assert(m, MA_NOTOWNED);
-       mtx_assert(&vm_page_queue_mtx, MA_OWNED);
-
        if (vm_page_trylock(m))
                return (TRUE);
 
        queue = m->queue;
        vm_pageout_init_marker(&marker, queue);
+       pq = &vm_pagequeues[queue];
 
-       TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, m, &marker, pageq);
-       vm_page_unlock_queues();
+       TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq);
+       vm_pagequeue_unlock(pq);
        vm_page_lock(m);
-       vm_page_lock_queues();
+       vm_pagequeue_lock(pq);
 
        /* Page queue might have changed. */
        *next = TAILQ_NEXT(&marker, pageq);
        unchanged = (m->queue == queue && &marker == TAILQ_NEXT(m, pageq));
-       TAILQ_REMOVE(&vm_page_queues[queue].pl, &marker, pageq);
+       TAILQ_REMOVE(&pq->pq_pl, &marker, pageq);
        return (unchanged);
 }
 
@@ -565,13 +564,15 @@ static boolean_t
 vm_pageout_launder(int queue, int tries, vm_paddr_t low, vm_paddr_t high)
 {
        struct mount *mp;
+       struct vm_pagequeue *pq;
        struct vnode *vp;
        vm_object_t object;
        vm_paddr_t pa;
        vm_page_t m, m_tmp, next;
 
-       vm_page_lock_queues();
-       TAILQ_FOREACH_SAFE(m, &vm_page_queues[queue].pl, pageq, next) {
+       pq = &vm_pagequeues[queue];
+       vm_pagequeue_lock(pq);
+       TAILQ_FOREACH_SAFE(m, &pq->pq_pl, pageq, next) {
                KASSERT(m->queue == queue,
                    ("vm_pageout_launder: page %p's queue is not %d", m,
                    queue));
@@ -603,7 +604,7 @@ vm_pageout_launder(int queue, int tries,
                                continue;
                        }
                        if (object->type == OBJT_VNODE) {
-                               vm_page_unlock_queues();
+                               vm_pagequeue_unlock(pq);
                                vp = object->handle;
                                vm_object_reference_locked(object);
                                VM_OBJECT_UNLOCK(object);
@@ -618,7 +619,7 @@ vm_pageout_launder(int queue, int tries,
                                return (TRUE);
                        } else if (object->type == OBJT_SWAP ||
                            object->type == OBJT_DEFAULT) {
-                               vm_page_unlock_queues();
+                               vm_pagequeue_unlock(pq);
                                m_tmp = m;
                                vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC,
                                    0, NULL, NULL);
@@ -626,12 +627,17 @@ vm_pageout_launder(int queue, int tries,
                                return (TRUE);
                        }
                } else {
+                       /*
+                        * Dequeue here to prevent lock recursion in
+                        * vm_page_cache().
+                        */
+                       vm_page_dequeue_locked(m);
                        vm_page_cache(m);
                        vm_page_unlock(m);
                }
                VM_OBJECT_UNLOCK(object);
        }
-       vm_page_unlock_queues();
+       vm_pagequeue_unlock(pq);
        return (FALSE);
 }
 
@@ -745,19 +751,14 @@ vm_pageout_object_deactivate_pages(pmap_
                                            p->act_count == 0)) {
                                                pmap_remove_all(p);
                                                vm_page_deactivate(p);
-                                       } else {
-                                               vm_page_lock_queues();
-                                               vm_pageout_requeue(p);
-                                               vm_page_unlock_queues();
-                                       }
+                                       } else
+                                               vm_page_requeue(p);
                                } else {
                                        vm_page_activate(p);
                                        if (p->act_count < ACT_MAX -
                                            ACT_ADVANCE)
                                                p->act_count += ACT_ADVANCE;
-                                       vm_page_lock_queues();
-                                       vm_pageout_requeue(p);
-                                       vm_page_unlock_queues();
+                                       vm_page_requeue(p);
                                }
                        } else if (p->queue == PQ_INACTIVE)
                                pmap_remove_all(p);
@@ -853,26 +854,6 @@ vm_pageout_map_deactivate_pages(map, des
 #endif         /* !defined(NO_SWAPPING) */
 
 /*
- *     vm_pageout_requeue:
- *
- *     Move the specified page to the tail of its present page queue.
- *
- *     The page queues must be locked.
- */
-static void
-vm_pageout_requeue(vm_page_t m)
-{
-       struct vpgqueues *vpq;
-
-       mtx_assert(&vm_page_queue_mtx, MA_OWNED);
-       KASSERT(m->queue != PQ_NONE,
-           ("vm_pageout_requeue: page %p is not queued", m));
-       vpq = &vm_page_queues[m->queue];
-       TAILQ_REMOVE(&vpq->pl, m, pageq);
-       TAILQ_INSERT_TAIL(&vpq->pl, m, pageq);
-}
-
-/*
  *     vm_pageout_scan does the dirty work for the pageout daemon.
  */
 static void
@@ -880,6 +861,7 @@ vm_pageout_scan(int pass)
 {
        vm_page_t m, next;
        struct vm_page marker;
+       struct vm_pagequeue *pq;
        int page_shortage, maxscan, pcount;
        int addl_page_shortage;
        vm_object_t object;
@@ -888,6 +870,8 @@ vm_pageout_scan(int pass)
        int maxlaunder;
        boolean_t queues_locked;
 
+       vm_pageout_init_marker(&marker, PQ_INACTIVE);
+
        /*
         * Decrease registered cache sizes.
         */
@@ -911,15 +895,7 @@ vm_pageout_scan(int pass)
         */
        page_shortage = vm_paging_target() + addl_page_shortage;
 
-       vm_pageout_init_marker(&marker, PQ_INACTIVE);
-
        /*
-        * Start scanning the inactive queue for pages we can move to the
-        * cache or free.  The scan will stop when the target is reached or
-        * we have scanned the entire inactive queue.  Note that m->act_count
-        * is not used to form decisions for the inactive queue, only for the
-        * active queue.
-        *
         * maxlaunder limits the number of dirty pages we flush per scan.
         * For most systems a smaller value (16 or 32) is more robust under
         * extreme memory and disk pressure because any unnecessary writes
@@ -933,18 +909,27 @@ vm_pageout_scan(int pass)
                maxlaunder = 1;
        if (pass)
                maxlaunder = 10000;
-       vm_page_lock_queues();
-       queues_locked = TRUE;
+
        maxscan = cnt.v_inactive_count;
 
-       for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
+       /*
+        * Start scanning the inactive queue for pages we can move to the
+        * cache or free.  The scan will stop when the target is reached or
+        * we have scanned the entire inactive queue.  Note that m->act_count
+        * is not used to form decisions for the inactive queue, only for the
+        * active queue.
+        */
+       pq = &vm_pagequeues[PQ_INACTIVE];
+       vm_pagequeue_lock(pq);
+       queues_locked = TRUE;
+       for (m = TAILQ_FIRST(&pq->pq_pl);
             m != NULL && maxscan-- > 0 && page_shortage > 0;
             m = next) {
+               vm_pagequeue_assert_locked(pq);
                KASSERT(queues_locked, ("unlocked queues"));
-               mtx_assert(&vm_page_queue_mtx, MA_OWNED);
                KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
 
-               cnt.v_pdpages++;
+               PCPU_INC(cnt.v_pdpages);
                next = TAILQ_NEXT(m, pageq);
 
                /*
@@ -991,13 +976,12 @@ vm_pageout_scan(int pass)
                }
 
                /*
-                * We unlock vm_page_queue_mtx, invalidating the
+                * We unlock the inactive page queue, invalidating the
                 * 'next' pointer.  Use our marker to remember our
                 * place.
                 */
-               TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl,
-                   m, &marker, pageq);
-               vm_page_unlock_queues();
+               TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, pageq);
+               vm_pagequeue_unlock(pq);
                queues_locked = FALSE;
 
                /*
@@ -1098,9 +1082,9 @@ vm_pageout_scan(int pass)
                         * the thrash point for a heavily loaded machine.
                         */
                        m->flags |= PG_WINATCFLS;
-                       vm_page_lock_queues();
+                       vm_pagequeue_lock(pq);
                        queues_locked = TRUE;
-                       vm_pageout_requeue(m);
+                       vm_page_requeue_locked(m);
                } else if (maxlaunder > 0) {
                        /*
                         * We always want to try to flush some dirty pages if
@@ -1127,11 +1111,11 @@ vm_pageout_scan(int pass)
                         * Those objects are in a "rundown" state.
                         */
                        if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) {
-                               vm_page_lock_queues();
+                               vm_pagequeue_lock(pq);
                                vm_page_unlock(m);
                                VM_OBJECT_UNLOCK(object);
                                queues_locked = TRUE;
-                               vm_pageout_requeue(m);
+                               vm_page_requeue_locked(m);
                                goto relock_queues;
                        }
 
@@ -1184,7 +1168,7 @@ vm_pageout_scan(int pass)
                                }
                                VM_OBJECT_LOCK(object);
                                vm_page_lock(m);
-                               vm_page_lock_queues();
+                               vm_pagequeue_lock(pq);
                                queues_locked = TRUE;
                                /*
                                 * The page might have been moved to another
@@ -1218,12 +1202,12 @@ vm_pageout_scan(int pass)
                                 */
                                if (m->hold_count) {
                                        vm_page_unlock(m);
-                                       vm_pageout_requeue(m);
+                                       vm_page_requeue_locked(m);
                                        if (object->flags & OBJ_MIGHTBEDIRTY)
                                                vnodes_skipped++;
                                        goto unlock_and_continue;
                                }
-                               vm_page_unlock_queues();
+                               vm_pagequeue_unlock(pq);
                                queues_locked = FALSE;
                        }
 
@@ -1246,7 +1230,7 @@ unlock_and_continue:
                        VM_OBJECT_UNLOCK(object);
                        if (mp != NULL) {
                                if (queues_locked) {
-                                       vm_page_unlock_queues();
+                                       vm_pagequeue_unlock(pq);
                                        queues_locked = FALSE;
                                }
                                if (vp != NULL)
@@ -1261,13 +1245,13 @@ unlock_and_continue:
                VM_OBJECT_UNLOCK(object);
 relock_queues:
                if (!queues_locked) {
-                       vm_page_lock_queues();
+                       vm_pagequeue_lock(pq);
                        queues_locked = TRUE;
                }
                next = TAILQ_NEXT(&marker, pageq);
-               TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl,
-                   &marker, pageq);
+               TAILQ_REMOVE(&pq->pq_pl, &marker, pageq);
        }
+       vm_pagequeue_unlock(pq);
 
        /*
         * Compute the number of pages we want to try to move from the
@@ -1283,9 +1267,9 @@ relock_queues:
         * deactivation candidates.
         */
        pcount = cnt.v_active_count;
-       m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
-       mtx_assert(&vm_page_queue_mtx, MA_OWNED);
-
+       pq = &vm_pagequeues[PQ_ACTIVE];
+       vm_pagequeue_lock(pq);
+       m = TAILQ_FIRST(&pq->pq_pl);
        while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
 
                KASSERT(m->queue == PQ_ACTIVE,
@@ -1322,7 +1306,7 @@ relock_queues:
                    (m->hold_count != 0)) {
                        vm_page_unlock(m);
                        VM_OBJECT_UNLOCK(object);
-                       vm_pageout_requeue(m);
+                       vm_page_requeue_locked(m);
                        m = next;
                        continue;
                }
@@ -1331,7 +1315,7 @@ relock_queues:
                 * The count for pagedaemon pages is done after checking the
                 * page for eligibility...
                 */
-               cnt.v_pdpages++;
+               PCPU_INC(cnt.v_pdpages);
 
                /*
                 * Check to see "how much" the page has been used.
@@ -1358,14 +1342,16 @@ relock_queues:
                 * Only if an object is currently being used, do we use the
                 * page activation count stats.
                 */
-               if (actcount && (object->ref_count != 0)) {
-                       vm_pageout_requeue(m);
-               } else {
+               if (actcount != 0 && object->ref_count != 0)
+                       vm_page_requeue_locked(m);
+               else {
                        m->act_count -= min(m->act_count, ACT_DECLINE);
                        if (vm_pageout_algorithm ||
                            object->ref_count == 0 ||
                            m->act_count == 0) {
                                page_shortage--;
+                               /* Dequeue to avoid later lock recursion. */
+                               vm_page_dequeue_locked(m);
                                if (object->ref_count == 0) {
                                        KASSERT(!pmap_page_is_mapped(m),
                                    ("vm_pageout_scan: page %p is mapped", m));
@@ -1376,15 +1362,14 @@ relock_queues:
                                } else {
                                        vm_page_deactivate(m);
                                }
-                       } else {
-                               vm_pageout_requeue(m);
-                       }
+                       } else
+                               vm_page_requeue_locked(m);
                }
                vm_page_unlock(m);
                VM_OBJECT_UNLOCK(object);
                m = next;
        }
-       vm_page_unlock_queues();
+       vm_pagequeue_unlock(pq);
 #if !defined(NO_SWAPPING)
        /*
         * Idle process swapout -- run once per second.
@@ -1529,6 +1514,7 @@ vm_pageout_oom(int shortage)
 static void
 vm_pageout_page_stats()
 {
+       struct vm_pagequeue *pq;
        vm_object_t object;
        vm_page_t m,next;
        int pcount,tpcount;             /* Number of pages to check */
@@ -1542,7 +1528,6 @@ vm_pageout_page_stats()
        if (page_shortage <= 0)
                return;
 
-       vm_page_lock_queues();
        pcount = cnt.v_active_count;
        fullintervalcount += vm_pageout_stats_interval;
        if (fullintervalcount < vm_pageout_full_stats_interval) {
@@ -1554,7 +1539,9 @@ vm_pageout_page_stats()
                fullintervalcount = 0;
        }
 
-       m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
+       pq = &vm_pagequeues[PQ_ACTIVE];
+       vm_pagequeue_lock(pq);
+       m = TAILQ_FIRST(&pq->pq_pl);
        while ((m != NULL) && (pcount-- > 0)) {
                int actcount;
 
@@ -1589,7 +1576,7 @@ vm_pageout_page_stats()
                    (m->hold_count != 0)) {
                        vm_page_unlock(m);
                        VM_OBJECT_UNLOCK(object);
-                       vm_pageout_requeue(m);
+                       vm_page_requeue_locked(m);
                        m = next;
                        continue;
                }
@@ -1605,7 +1592,7 @@ vm_pageout_page_stats()
                        m->act_count += ACT_ADVANCE + actcount;
                        if (m->act_count > ACT_MAX)
                                m->act_count = ACT_MAX;
-                       vm_pageout_requeue(m);
+                       vm_page_requeue_locked(m);
                } else {
                        if (m->act_count == 0) {
                                /*
@@ -1618,17 +1605,19 @@ vm_pageout_page_stats()
                                 * of doing the operation.
                                 */
                                pmap_remove_all(m);
+                               /* Dequeue to avoid later lock recursion. */
+                               vm_page_dequeue_locked(m);
                                vm_page_deactivate(m);
                        } else {
                                m->act_count -= min(m->act_count, ACT_DECLINE);
-                               vm_pageout_requeue(m);
+                               vm_page_requeue_locked(m);
                        }
                }
                vm_page_unlock(m);
                VM_OBJECT_UNLOCK(object);
                m = next;
        }
-       vm_page_unlock_queues();
+       vm_pagequeue_unlock(pq);
 }
 
 /*
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to