Author: jeff
Date: Thu Mar 22 19:21:11 2018
New Revision: 331369
URL: https://svnweb.freebsd.org/changeset/base/331369

Log:
  Lock reservations with a dedicated lock in each reservation.  Protect the
  vmd_free_count with atomics.
  
  This allows us to allocate and free from reservations without the free lock
  except where a superpage is allocated from the physical layer, which is
  roughly 1/512 of the operations on amd64.
  
  Use the counter api to eliminate cache conention on counters.
  
  Reviewed by:  markj
  Tested by:    pho
  Sponsored by: Netflix, Dell/EMC Isilon
  Differential Revision:        https://reviews.freebsd.org/D14707

Modified:
  head/sys/vm/vm_page.c
  head/sys/vm/vm_pagequeue.h
  head/sys/vm/vm_reserv.c
  head/sys/vm/vm_reserv.h

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c       Thu Mar 22 19:11:43 2018        (r331368)
+++ head/sys/vm/vm_page.c       Thu Mar 22 19:21:11 2018        (r331369)
@@ -177,7 +177,6 @@ static uma_zone_t fakepg_zone;
 static void vm_page_alloc_check(vm_page_t m);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_enqueue(uint8_t queue, vm_page_t m);
-static void vm_page_free_phys(struct vm_domain *vmd, vm_page_t m);
 static void vm_page_init(void *dummy);
 static int vm_page_insert_after(vm_page_t m, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mpred);
@@ -1677,10 +1676,10 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pi
  * for the request class and false otherwise.
  */
 int
-vm_domain_available(struct vm_domain *vmd, int req, int npages)
+vm_domain_allocate(struct vm_domain *vmd, int req, int npages)
 {
+       u_int limit, old, new;
 
-       vm_domain_free_assert_locked(vmd);
        req = req & VM_ALLOC_CLASS_MASK;
 
        /*
@@ -1688,15 +1687,34 @@ vm_domain_available(struct vm_domain *vmd, int req, in
         */
        if (curproc == pageproc && req != VM_ALLOC_INTERRUPT)
                req = VM_ALLOC_SYSTEM;
+       if (req == VM_ALLOC_INTERRUPT)
+               limit = 0;
+       else if (req == VM_ALLOC_SYSTEM)
+               limit = vmd->vmd_interrupt_free_min;
+       else
+               limit = vmd->vmd_free_reserved;
 
-       if (vmd->vmd_free_count >= npages + vmd->vmd_free_reserved ||
-           (req == VM_ALLOC_SYSTEM &&
-           vmd->vmd_free_count >= npages + vmd->vmd_interrupt_free_min) ||
-           (req == VM_ALLOC_INTERRUPT &&
-           vmd->vmd_free_count >= npages))
-               return (1);
+       /*
+        * Attempt to reserve the pages.  Fail if we're below the limit.
+        */
+       limit += npages;
+       old = vmd->vmd_free_count;
+       do {
+               if (old < limit)
+                       return (0);
+               new = old - npages;
+       } while (atomic_fcmpset_int(&vmd->vmd_free_count, &old, new) == 0);
 
-       return (0);
+       /* Wake the page daemon if we've crossed the threshold. */
+       if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old))
+               pagedaemon_wakeup(vmd->vmd_domain);
+
+       /* Only update bitsets on transitions. */
+       if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) ||
+           (old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe))
+               vm_domain_set(vmd);
+
+       return (1);
 }
 
 vm_page_t
@@ -1723,44 +1741,34 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pind
 again:
        m = NULL;
 #if VM_NRESERVLEVEL > 0
+       /*
+        * Can we allocate the page from a reservation?
+        */
        if (vm_object_reserv(object) &&
-           (m = vm_reserv_extend(req, object, pindex, domain, mpred))
-           != NULL) {
+           ((m = vm_reserv_extend(req, object, pindex, domain, mpred)) != NULL 
||
+           (m = vm_reserv_alloc_page(req, object, pindex, domain, mpred)) != 
NULL)) {
                domain = vm_phys_domain(m);
                vmd = VM_DOMAIN(domain);
                goto found;
        }
 #endif
        vmd = VM_DOMAIN(domain);
-       vm_domain_free_lock(vmd);
-       if (vm_domain_available(vmd, req, 1)) {
+       if (vm_domain_allocate(vmd, req, 1)) {
                /*
-                * Can we allocate the page from a reservation?
+                * If not, allocate it from the free page queues.
                 */
+               vm_domain_free_lock(vmd);
+               m = vm_phys_alloc_pages(domain, object != NULL ?
+                   VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
+               vm_domain_free_unlock(vmd);
+               if (m == NULL) {
+                       vm_domain_freecnt_inc(vmd, 1);
 #if VM_NRESERVLEVEL > 0
-               if (!vm_object_reserv(object) ||
-                   (m = vm_reserv_alloc_page(object, pindex,
-                   domain, mpred)) == NULL)
+                       if (vm_reserv_reclaim_inactive(domain))
+                               goto again;
 #endif
-               {
-                       /*
-                        * If not, allocate it from the free page queues.
-                        */
-                       m = vm_phys_alloc_pages(domain, object != NULL ?
-                           VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
-#if VM_NRESERVLEVEL > 0
-                       if (m == NULL && vm_reserv_reclaim_inactive(domain)) {
-                               m = vm_phys_alloc_pages(domain,
-                                   object != NULL ?
-                                   VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT,
-                                   0);
-                       }
-#endif
                }
        }
-       if (m != NULL)
-               vm_domain_freecnt_dec(vmd, 1);
-       vm_domain_free_unlock(vmd);
        if (m == NULL) {
                /*
                 * Not allocatable, give up.
@@ -1775,9 +1783,7 @@ again:
         */
        KASSERT(m != NULL, ("missing page"));
 
-#if VM_NRESERVLEVEL > 0
 found:
-#endif
        vm_page_alloc_check(m);
 
        /*
@@ -1934,9 +1940,14 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin
         */
 again:
 #if VM_NRESERVLEVEL > 0
+       /*
+        * Can we allocate the pages from a reservation?
+        */
        if (vm_object_reserv(object) &&
-           (m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
-           npages, low, high, alignment, boundary, mpred)) != NULL) {
+           ((m_ret = vm_reserv_extend_contig(req, object, pindex, domain,
+           npages, low, high, alignment, boundary, mpred)) != NULL ||
+           (m_ret = vm_reserv_alloc_contig(req, object, pindex, domain,
+           npages, low, high, alignment, boundary, mpred)) != NULL)) {
                domain = vm_phys_domain(m_ret);
                vmd = VM_DOMAIN(domain);
                goto found;
@@ -1944,31 +1955,23 @@ again:
 #endif
        m_ret = NULL;
        vmd = VM_DOMAIN(domain);
-       vm_domain_free_lock(vmd);
-       if (vm_domain_available(vmd, req, npages)) {
+       if (vm_domain_allocate(vmd, req, npages)) {
                /*
-                * Can we allocate the pages from a reservation?
+                * allocate them from the free page queues.
                 */
+               vm_domain_free_lock(vmd);
+               m_ret = vm_phys_alloc_contig(domain, npages, low, high,
+                   alignment, boundary);
+               vm_domain_free_unlock(vmd);
+               if (m_ret == NULL) {
+                       vm_domain_freecnt_inc(vmd, npages);
 #if VM_NRESERVLEVEL > 0
-retry:
-               if (!vm_object_reserv(object) ||
-                   (m_ret = vm_reserv_alloc_contig(object, pindex, domain,
-                   npages, low, high, alignment, boundary, mpred)) == NULL)
+                       if (vm_reserv_reclaim_contig(domain, npages, low,
+                           high, alignment, boundary))
+                               goto again;
 #endif
-                       /*
-                        * If not, allocate them from the free page queues.
-                        */
-                       m_ret = vm_phys_alloc_contig(domain, npages, low, high,
-                           alignment, boundary);
-#if VM_NRESERVLEVEL > 0
-               if (m_ret == NULL && vm_reserv_reclaim_contig(
-                   domain, npages, low, high, alignment, boundary))
-                       goto retry;
-#endif
+               }
        }
-       if (m_ret != NULL)
-               vm_domain_freecnt_dec(vmd, npages);
-       vm_domain_free_unlock(vmd);
        if (m_ret == NULL) {
                if (vm_domain_alloc_fail(vmd, object, req))
                        goto again;
@@ -2109,13 +2112,14 @@ vm_page_alloc_freelist_domain(int domain, int freelist
         */
        vmd = VM_DOMAIN(domain);
 again:
-       vm_domain_free_lock(vmd);
-       if (vm_domain_available(vmd, req, 1))
+       if (vm_domain_allocate(vmd, req, 1)) {
+               vm_domain_free_lock(vmd);
                m = vm_phys_alloc_freelist_pages(domain, freelist,
                    VM_FREEPOOL_DIRECT, 0);
-       if (m != NULL)
-               vm_domain_freecnt_dec(vmd, 1);
-       vm_domain_free_unlock(vmd);
+               vm_domain_free_unlock(vmd);
+               if (m == NULL)
+                       vm_domain_freecnt_inc(vmd, 1);
+       }
        if (m == NULL) {
                if (vm_domain_alloc_fail(vmd, NULL, req))
                        goto again;
@@ -2491,8 +2495,9 @@ retry:
                                        vm_page_remque(m);
                                        vm_page_replace_checked(m_new, object,
                                            m->pindex, m);
-                                       m->valid = 0;
-                                       vm_page_undirty(m);
+                                       if (vm_page_free_prep(m, false))
+                                               SLIST_INSERT_HEAD(&free, m,
+                                                   plinks.s.ss);
 
                                        /*
                                         * The new page must be deactivated
@@ -2504,10 +2509,12 @@ retry:
                                        m->flags &= ~PG_ZERO;
                                        vm_page_remque(m);
                                        vm_page_remove(m);
+                                       if (vm_page_free_prep(m, false))
+                                               SLIST_INSERT_HEAD(&free, m,
+                                                   plinks.s.ss);
                                        KASSERT(m->dirty == 0,
                                            ("page %p is dirty", m));
                                }
-                               SLIST_INSERT_HEAD(&free, m, plinks.s.ss);
                        } else
                                error = EBUSY;
 unlock:
@@ -2548,7 +2555,7 @@ unlock:
                do {
                        MPASS(vm_phys_domain(m) == domain);
                        SLIST_REMOVE_HEAD(&free, plinks.s.ss);
-                       vm_page_free_phys(vmd, m);
+                       vm_phys_free_pages(m, 0);
                        cnt++;
                } while ((m = SLIST_FIRST(&free)) != NULL);
                vm_domain_free_unlock(vmd);
@@ -3159,24 +3166,12 @@ vm_page_free_prep(vm_page_t m, bool pagequeue_locked)
        if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
                pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
 
-       return (true);
-}
-
-/*
- * Insert the page into the physical memory allocator's free page
- * queues.  This is the last step to free a page.  The caller is
- * responsible for adjusting the free page count.
- */
-static void
-vm_page_free_phys(struct vm_domain *vmd, vm_page_t m)
-{
-
-       vm_domain_free_assert_locked(vmd);
-
 #if VM_NRESERVLEVEL > 0
-       if (!vm_reserv_free_page(m))
+       if (vm_reserv_free_page(m))
+               return (false);
 #endif
-               vm_phys_free_pages(m, 0);
+
+       return (true);
 }
 
 void
@@ -3200,7 +3195,7 @@ vm_page_free_phys_pglist(struct pglist *tq)
                        vmd = vm_pagequeue_domain(m);
                        vm_domain_free_lock(vmd);
                }
-               vm_page_free_phys(vmd, m);
+               vm_phys_free_pages(m, 0);
                cnt++;
        }
        if (vmd != NULL) {
@@ -3227,7 +3222,7 @@ vm_page_free_toq(vm_page_t m)
                return;
        vmd = vm_pagequeue_domain(m);
        vm_domain_free_lock(vmd);
-       vm_page_free_phys(vmd, m);
+       vm_phys_free_pages(m, 0);
        vm_domain_free_unlock(vmd);
        vm_domain_freecnt_inc(vmd, 1);
 }

Modified: head/sys/vm/vm_pagequeue.h
==============================================================================
--- head/sys/vm/vm_pagequeue.h  Thu Mar 22 19:11:43 2018        (r331368)
+++ head/sys/vm/vm_pagequeue.h  Thu Mar 22 19:21:11 2018        (r331369)
@@ -180,7 +180,7 @@ vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int adde
 
 void vm_domain_set(struct vm_domain *vmd);
 void vm_domain_clear(struct vm_domain *vmd);
-int vm_domain_available(struct vm_domain *vmd, int req, int npages);
+int vm_domain_allocate(struct vm_domain *vmd, int req, int npages);
 
 /*
  *      vm_pagequeue_domain:
@@ -265,23 +265,6 @@ vm_domain_freecnt_inc(struct vm_domain *vmd, int adj)
            new >= vmd->vmd_pageout_free_min)))
                vm_domain_clear(vmd);
 }
-
-static inline void
-vm_domain_freecnt_dec(struct vm_domain *vmd, int adj)
-{
-       u_int old, new;
-
-       old = atomic_fetchadd_int(&vmd->vmd_free_count, -adj);
-       new = old - adj;
-       KASSERT(new >= 0, ("vm_domain_freecnt_dec: free count underflow"));
-       if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old))
-               pagedaemon_wakeup(vmd->vmd_domain);
-       /* Only update bitsets on transitions. */
-       if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) ||
-           (old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe))
-               vm_domain_set(vmd);
-}
-
 
 #endif /* _KERNEL */
 #endif                         /* !_VM_PAGEQUEUE_ */

Modified: head/sys/vm/vm_reserv.c
==============================================================================
--- head/sys/vm/vm_reserv.c     Thu Mar 22 19:11:43 2018        (r331368)
+++ head/sys/vm/vm_reserv.c     Thu Mar 22 19:21:11 2018        (r331369)
@@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/counter.h>
+#include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
@@ -54,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
+#include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -166,22 +169,37 @@ popmap_is_set(popmap_t popmap[], int i)
  *
  * A partially populated reservation can be broken and reclaimed at any time.
  *
- * f - vm_domain_free_lock
+ * r - vm_reserv_lock
+ * d - vm_reserv_domain_lock
  * o - vm_reserv_object_lock
  * c - constant after boot
  */
 struct vm_reserv {
-       TAILQ_ENTRY(vm_reserv) partpopq;        /* (f) per-domain queue. */
-       LIST_ENTRY(vm_reserv) objq;             /* (o, f) object queue */
-       vm_object_t     object;                 /* (o, f) containing object */
-       vm_pindex_t     pindex;                 /* (o, f) offset in object */
+       struct mtx      lock;                   /* reservation lock. */
+       TAILQ_ENTRY(vm_reserv) partpopq;        /* (d) per-domain queue. */
+       LIST_ENTRY(vm_reserv) objq;             /* (o, r) object queue */
+       vm_object_t     object;                 /* (o, r) containing object */
+       vm_pindex_t     pindex;                 /* (o, r) offset in object */
        vm_page_t       pages;                  /* (c) first page  */
-       int             domain;                 /* (c) NUMA domain. */
-       int             popcnt;                 /* (f) # of pages in use */
-       char            inpartpopq;             /* (f) */
-       popmap_t        popmap[NPOPMAP];        /* (f) bit vector, used pages */
+       uint16_t        domain;                 /* (c) NUMA domain. */
+       uint16_t        popcnt;                 /* (r) # of pages in use */
+       char            inpartpopq;             /* (d) */
+       popmap_t        popmap[NPOPMAP];        /* (r) bit vector, used pages */
 };
 
+#define        vm_reserv_lockptr(rv)           (&(rv)->lock)
+#define        vm_reserv_assert_locked(rv)                                     
\
+           mtx_assert(vm_reserv_lockptr(rv), MA_OWNED)
+#define        vm_reserv_lock(rv)              mtx_lock(vm_reserv_lockptr(rv))
+#define        vm_reserv_trylock(rv)           
mtx_trylock(vm_reserv_lockptr(rv))
+#define        vm_reserv_unlock(rv)            
mtx_unlock(vm_reserv_lockptr(rv))
+
+static struct mtx_padalign vm_reserv_domain_locks[MAXMEMDOM];
+
+#define        vm_reserv_domain_lockptr(d)     &vm_reserv_domain_locks[(d)]
+#define        vm_reserv_domain_lock(d)        
mtx_lock(vm_reserv_domain_lockptr(d))
+#define        vm_reserv_domain_unlock(d)      
mtx_unlock(vm_reserv_domain_lockptr(d))
+
 /*
  * The reservation array
  *
@@ -218,13 +236,13 @@ static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop[MAXMEMDO
 
 static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
 
-static long vm_reserv_broken;
-SYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
-    &vm_reserv_broken, 0, "Cumulative number of broken reservations");
+static counter_u64_t vm_reserv_broken = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
+    &vm_reserv_broken, "Cumulative number of broken reservations");
 
-static long vm_reserv_freed;
-SYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
-    &vm_reserv_freed, 0, "Cumulative number of freed reservations");
+static counter_u64_t vm_reserv_freed = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
+    &vm_reserv_freed, "Cumulative number of freed reservations");
 
 static int sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS);
 
@@ -236,9 +254,9 @@ static int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_AR
 SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 
0,
     sysctl_vm_reserv_partpopq, "A", "Partially populated reservation queues");
 
-static long vm_reserv_reclaimed;
-SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
-    &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
+static counter_u64_t vm_reserv_reclaimed = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
+    &vm_reserv_reclaimed, "Cumulative number of reclaimed reservations");
 
 /*
  * The object lock pool is used to synchronize the rvq.  We can not use a
@@ -313,12 +331,12 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
                for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
                        counter = 0;
                        unused_pages = 0;
-                       vm_domain_free_lock(VM_DOMAIN(domain));
+                       vm_reserv_domain_lock(domain);
                        TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
                                counter++;
                                unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
                        }
-                       vm_domain_free_unlock(VM_DOMAIN(domain));
+                       vm_reserv_domain_unlock(domain);
                        sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
                            domain, level,
                            unused_pages * ((int)PAGE_SIZE / 1024), counter);
@@ -337,6 +355,9 @@ vm_reserv_remove(vm_reserv_t rv)
 {
        vm_object_t object;
 
+       vm_reserv_assert_locked(rv);
+       CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+           __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
        KASSERT(rv->object != NULL,
            ("vm_reserv_remove: reserv %p is free", rv));
        KASSERT(!rv->inpartpopq,
@@ -356,6 +377,11 @@ vm_reserv_insert(vm_reserv_t rv, vm_object_t object, v
 {
        int i;
 
+       vm_reserv_assert_locked(rv);
+       CTR6(KTR_VM,
+           "%s: rv %p(%p) object %p new %p popcnt %d",
+           __FUNCTION__, rv, rv->pages, rv->object, object,
+          rv->popcnt);
        KASSERT(rv->object == NULL,
            ("vm_reserv_insert: reserv %p isn't free", rv));
        KASSERT(rv->popcnt == 0,
@@ -377,14 +403,15 @@ vm_reserv_insert(vm_reserv_t rv, vm_object_t object, v
  * becomes zero, the reservation is destroyed.  Additionally, moves the
  * reservation to the tail of the partially populated reservation queue if the
  * population count is non-zero.
- *
- * The free page queue lock must be held.
  */
 static void
 vm_reserv_depopulate(vm_reserv_t rv, int index)
 {
+       struct vm_domain *vmd;
 
-       vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+       vm_reserv_assert_locked(rv);
+       CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+           __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
        KASSERT(rv->object != NULL,
            ("vm_reserv_depopulate: reserv %p is free", rv));
        KASSERT(popmap_is_set(rv->popmap, index),
@@ -395,10 +422,7 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
        KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
            ("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
            rv, rv->domain));
-       if (rv->inpartpopq) {
-               TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
-               rv->inpartpopq = FALSE;
-       } else {
+       if (rv->popcnt == VM_LEVEL_0_NPAGES) {
                KASSERT(rv->pages->psind == 1,
                    ("vm_reserv_depopulate: reserv %p is already demoted",
                    rv));
@@ -406,14 +430,25 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
        }
        popmap_clear(rv->popmap, index);
        rv->popcnt--;
+       vm_reserv_domain_lock(rv->domain);
+       if (rv->inpartpopq) {
+               TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
+               rv->inpartpopq = FALSE;
+       }
+       if (rv->popcnt != 0) {
+               rv->inpartpopq = TRUE;
+               TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
+       }
+       vm_reserv_domain_unlock(rv->domain);
+       vmd = VM_DOMAIN(rv->domain);
        if (rv->popcnt == 0) {
                vm_reserv_remove(rv);
+               vm_domain_free_lock(vmd);
                vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
-               vm_reserv_freed++;
-       } else {
-               rv->inpartpopq = TRUE;
-               TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
+               vm_domain_free_unlock(vmd);
+               counter_u64_add(vm_reserv_freed, 1);
        }
+       vm_domain_freecnt_inc(vmd, 1);
 }
 
 /*
@@ -484,7 +519,9 @@ static void
 vm_reserv_populate(vm_reserv_t rv, int index)
 {
 
-       vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+       vm_reserv_assert_locked(rv);
+       CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+           __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
        KASSERT(rv->object != NULL,
            ("vm_reserv_populate: reserv %p is free", rv));
        KASSERT(popmap_is_clear(rv->popmap, index),
@@ -497,17 +534,23 @@ vm_reserv_populate(vm_reserv_t rv, int index)
        KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
            ("vm_reserv_populate: reserv %p's domain is corrupted %d",
            rv, rv->domain));
+       popmap_set(rv->popmap, index);
+       rv->popcnt++;
+       vm_reserv_domain_lock(rv->domain);
        if (rv->inpartpopq) {
                TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
                rv->inpartpopq = FALSE;
        }
-       popmap_set(rv->popmap, index);
-       rv->popcnt++;
        if (rv->popcnt < VM_LEVEL_0_NPAGES) {
                rv->inpartpopq = TRUE;
                TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
-       } else
+       } else {
+               KASSERT(rv->pages->psind == 0,
+                   ("vm_reserv_populate: reserv %p is already promoted",
+                   rv));
                rv->pages->psind = 1;
+       }
+       vm_reserv_domain_unlock(rv->domain);
 }
 
 /*
@@ -572,31 +615,29 @@ vm_reserv_extend_contig(int req, vm_object_t object, v
                return (NULL);
        domain = rv->domain;
        vmd = VM_DOMAIN(domain);
-       vm_domain_free_lock(vmd);
-       if (rv->object != object || !vm_domain_available(vmd, req, npages)) {
-               m = NULL;
+       vm_reserv_lock(rv);
+       if (rv->object != object)
                goto out;
-       }
        m = &rv->pages[index];
        pa = VM_PAGE_TO_PHYS(m);
        if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
-           ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
-               m = NULL;
+           ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
                goto out;
-       }
        /* Handle vm_page_rename(m, new_object, ...). */
        for (i = 0; i < npages; i++) {
-               if (popmap_is_set(rv->popmap, index + i)) {
-                       m = NULL;
+               if (popmap_is_set(rv->popmap, index + i))
                        goto out;
-               }
        }
+       if (!vm_domain_allocate(vmd, req, npages))
+               goto out;
        for (i = 0; i < npages; i++)
                vm_reserv_populate(rv, index + i);
-       vm_domain_freecnt_dec(vmd, npages);
-out:
-       vm_domain_free_unlock(vmd);
+       vm_reserv_unlock(rv);
        return (m);
+
+out:
+       vm_reserv_unlock(rv);
+       return (NULL);
 }
 
 /*
@@ -618,10 +659,11 @@ out:
  * The object and free page queue must be locked.
  */
 vm_page_t
-vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
+vm_reserv_alloc_contig(int req, vm_object_t object, vm_pindex_t pindex, int 
domain,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_page_t mpred)
 {
+       struct vm_domain *vmd;
        vm_paddr_t pa, size;
        vm_page_t m, m_ret, msucc;
        vm_pindex_t first, leftcap, rightcap;
@@ -629,7 +671,6 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
        u_long allocpages, maxpages, minpages;
        int i, index, n;
 
-       vm_domain_free_assert_locked(VM_DOMAIN(domain));
        VM_OBJECT_ASSERT_WLOCKED(object);
        KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
 
@@ -737,9 +778,19 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
         * specified index may not be the first page within the first new
         * reservation.
         */
-       m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment,
-           VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
-       if (m == NULL)
+       m = NULL;
+       vmd = VM_DOMAIN(domain);
+       if (vm_domain_allocate(vmd, req, npages)) {
+               vm_domain_free_lock(vmd);
+               m = vm_phys_alloc_contig(domain, allocpages, low, high,
+                   ulmax(alignment, VM_LEVEL_0_SIZE),
+                   boundary > VM_LEVEL_0_SIZE ? boundary : 0);
+               vm_domain_free_unlock(vmd);
+               if (m == NULL) {
+                       vm_domain_freecnt_inc(vmd, npages);
+                       return (NULL);
+               }
+       } else
                return (NULL);
        KASSERT(vm_phys_domain(m) == domain,
            ("vm_reserv_alloc_contig: Page domain does not match requested."));
@@ -757,6 +808,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
                KASSERT(rv->pages == m,
                    ("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
                    rv));
+               vm_reserv_lock(rv);
                vm_reserv_insert(rv, object, first);
                n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
                for (i = 0; i < n; i++)
@@ -766,6 +818,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
                        m_ret = &rv->pages[index];
                        index = 0;
                }
+               vm_reserv_unlock(rv);
                m += VM_LEVEL_0_NPAGES;
                first += VM_LEVEL_0_NPAGES;
                allocpages -= VM_LEVEL_0_NPAGES;
@@ -813,18 +866,20 @@ vm_reserv_extend(int req, vm_object_t object, vm_pinde
        vmd = VM_DOMAIN(domain);
        index = VM_RESERV_INDEX(object, pindex);
        m = &rv->pages[index];
-       vm_domain_free_lock(vmd);
-       if (vm_domain_available(vmd, req, 1) == 0 ||
-           /* Handle reclaim race. */
-           rv->object != object ||
+       vm_reserv_lock(rv);
+       /* Handle reclaim race. */
+       if (rv->object != object ||
            /* Handle vm_page_rename(m, new_object, ...). */
-           popmap_is_set(rv->popmap, index))
+           popmap_is_set(rv->popmap, index)) {
                m = NULL;
-       if (m != NULL) {
-               vm_reserv_populate(rv, index);
-               vm_domain_freecnt_dec(vmd, 1);
+               goto out;
        }
-       vm_domain_free_unlock(vmd);
+       if (vm_domain_allocate(vmd, req, 1) == 0)
+               m = NULL;
+       else
+               vm_reserv_populate(rv, index);
+out:
+       vm_reserv_unlock(rv);
 
        return (m);
 }
@@ -840,15 +895,15 @@ vm_reserv_extend(int req, vm_object_t object, vm_pinde
  * The object and free page queue must be locked.
  */
 vm_page_t
-vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
+vm_reserv_alloc_page(int req, vm_object_t object, vm_pindex_t pindex, int 
domain,
     vm_page_t mpred)
 {
+       struct vm_domain *vmd;
        vm_page_t m, msucc;
        vm_pindex_t first, leftcap, rightcap;
        vm_reserv_t rv;
        int index;
 
-       vm_domain_free_assert_locked(VM_DOMAIN(domain));
        VM_OBJECT_ASSERT_WLOCKED(object);
 
        /*
@@ -917,15 +972,28 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p
        /*
         * Allocate and populate the new reservation.
         */
-       m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
-       if (m == NULL)
+       m = NULL;
+       vmd = VM_DOMAIN(domain);
+       if (vm_domain_allocate(vmd, req, 1)) {
+               vm_domain_free_lock(vmd);
+               m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
+                   VM_LEVEL_0_ORDER);
+               vm_domain_free_unlock(vmd);
+               if (m == NULL) {
+                       vm_domain_freecnt_inc(vmd, 1);
+                       return (NULL);
+               }
+       } else
                return (NULL);
        rv = vm_reserv_from_page(m);
+       vm_reserv_lock(rv);
        KASSERT(rv->pages == m,
            ("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
        vm_reserv_insert(rv, object, first);
        index = VM_RESERV_INDEX(object, pindex);
        vm_reserv_populate(rv, index);
+       vm_reserv_unlock(rv);
+
        return (&rv->pages[index]);
 }
 
@@ -942,7 +1010,9 @@ vm_reserv_break(vm_reserv_t rv)
 {
        int begin_zeroes, hi, i, lo;
 
-       vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+       vm_reserv_assert_locked(rv);
+       CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+           __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
        vm_reserv_remove(rv);
        rv->pages->psind = 0;
        i = hi = 0;
@@ -981,12 +1051,14 @@ vm_reserv_break(vm_reserv_t rv)
                if (i != NPOPMAP)
                        /* Convert from ffsl() to ordinary bit numbering. */
                        hi--;
+               vm_domain_free_lock(VM_DOMAIN(rv->domain));
                vm_phys_free_contig(&rv->pages[begin_zeroes], NBPOPMAP * i +
                    hi - begin_zeroes);
+               vm_domain_free_unlock(VM_DOMAIN(rv->domain));
        } while (i < NPOPMAP);
        KASSERT(rv->popcnt == 0,
            ("vm_reserv_break: reserv %p's popcnt is corrupted", rv));
-       vm_reserv_broken++;
+       counter_u64_add(vm_reserv_broken, 1);
 }
 
 /*
@@ -996,7 +1068,6 @@ void
 vm_reserv_break_all(vm_object_t object)
 {
        vm_reserv_t rv;
-       struct vm_domain *vmd;
 
        /*
         * This access of object->rvq is unsynchronized so that the
@@ -1005,27 +1076,22 @@ vm_reserv_break_all(vm_object_t object)
         * lock prevents new additions, so we are guaranteed that when
         * it returns NULL the object is properly empty.
         */
-       vmd = NULL;
        while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
-               if (vmd != VM_DOMAIN(rv->domain)) {
-                       if (vmd != NULL)
-                               vm_domain_free_unlock(vmd);
-                       vmd = VM_DOMAIN(rv->domain);
-                       vm_domain_free_lock(vmd);
-               }
+               vm_reserv_lock(rv);
                /* Reclaim race. */
-               if (rv->object != object)
+               if (rv->object != object) {
+                       vm_reserv_unlock(rv);
                        continue;
-               KASSERT(rv->object == object,
-                   ("vm_reserv_break_all: reserv %p is corrupted", rv));
+               }
+               vm_reserv_domain_lock(rv->domain);
                if (rv->inpartpopq) {
                        TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
                        rv->inpartpopq = FALSE;
                }
+               vm_reserv_domain_unlock(rv->domain);
                vm_reserv_break(rv);
+               vm_reserv_unlock(rv);
        }
-       if (vmd != NULL)
-               vm_domain_free_unlock(vmd);
 }
 
 /*
@@ -1038,13 +1104,21 @@ boolean_t
 vm_reserv_free_page(vm_page_t m)
 {
        vm_reserv_t rv;
+       boolean_t ret;
 
        rv = vm_reserv_from_page(m);
        if (rv->object == NULL)
                return (FALSE);
-       vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
-       vm_reserv_depopulate(rv, m - rv->pages);
-       return (TRUE);
+       vm_reserv_lock(rv);
+       /* Re-validate after lock. */
+       if (rv->object != NULL) {
+               vm_reserv_depopulate(rv, m - rv->pages);
+               ret = TRUE;
+       } else
+               ret = FALSE;
+       vm_reserv_unlock(rv);
+
+       return (ret);
 }
 
 /*
@@ -1058,6 +1132,7 @@ vm_reserv_init(void)
 {
        vm_paddr_t paddr;
        struct vm_phys_seg *seg;
+       struct vm_reserv *rv;
        int i, segind;
 
        /*
@@ -1068,15 +1143,22 @@ vm_reserv_init(void)
                seg = &vm_phys_segs[segind];
                paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
                while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
-                       vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
-                           PHYS_TO_VM_PAGE(paddr);
-                       vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].domain =
-                           seg->domain;
+                       rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT];
+                       rv->pages = PHYS_TO_VM_PAGE(paddr);
+                       rv->domain = seg->domain;
+                       mtx_init(&rv->lock, "vm reserv", NULL, MTX_DEF);
                        paddr += VM_LEVEL_0_SIZE;
                }
        }
-       for (i = 0; i < MAXMEMDOM; i++)
+       for (i = 0; i < MAXMEMDOM; i++) {
+               mtx_init(&vm_reserv_domain_locks[i], "VM reserv domain", NULL,
+                   MTX_DEF);
                TAILQ_INIT(&vm_rvq_partpop[i]);
+       }
+
+       for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
+               mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
+                   MTX_DEF);
 }
 
 /*
@@ -1091,7 +1173,6 @@ vm_reserv_is_page_free(vm_page_t m)
        rv = vm_reserv_from_page(m);
        if (rv->object == NULL)
                return (false);
-       vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
        return (popmap_is_clear(rv->popmap, m - rv->pages));
 }
 
@@ -1131,7 +1212,10 @@ static void
 vm_reserv_reclaim(vm_reserv_t rv)
 {
 
-       vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
+       vm_reserv_assert_locked(rv);
+       CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
+           __FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
+       vm_reserv_domain_lock(rv->domain);
        KASSERT(rv->inpartpopq,
            ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
        KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
@@ -1139,8 +1223,9 @@ vm_reserv_reclaim(vm_reserv_t rv)
            rv, rv->domain));
        TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
        rv->inpartpopq = FALSE;
+       vm_reserv_domain_unlock(rv->domain);
        vm_reserv_break(rv);
-       vm_reserv_reclaimed++;
+       counter_u64_add(vm_reserv_reclaimed, 1);
 }
 
 /*
@@ -1155,9 +1240,14 @@ vm_reserv_reclaim_inactive(int domain)
 {
        vm_reserv_t rv;
 
-       vm_domain_free_assert_locked(VM_DOMAIN(domain));
-       if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
+       while ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
+               vm_reserv_lock(rv);
+               if (rv != TAILQ_FIRST(&vm_rvq_partpop[domain])) {
+                       vm_reserv_unlock(rv);
+                       continue;
+               }
                vm_reserv_reclaim(rv);
+               vm_reserv_unlock(rv);
                return (TRUE);
        }
        return (FALSE);
@@ -1176,14 +1266,16 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
 {
        vm_paddr_t pa, size;
-       vm_reserv_t rv;
+       vm_reserv_t rv, rvn;
        int hi, i, lo, low_index, next_free;
 
-       vm_domain_free_assert_locked(VM_DOMAIN(domain));
        if (npages > VM_LEVEL_0_NPAGES - 1)
                return (FALSE);
        size = npages << PAGE_SHIFT;
-       TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
+       vm_reserv_domain_lock(domain);
+again:
+       for (rv = TAILQ_FIRST(&vm_rvq_partpop[domain]); rv != NULL; rv = rvn) {
+               rvn = TAILQ_NEXT(rv, partpopq);
                pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]);
                if (pa + PAGE_SIZE - size < low) {
                        /* This entire reservation is too low; go to next. */
@@ -1194,6 +1286,17 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm
                        /* This entire reservation is too high; go to next. */
                        continue;
                }
+               if (vm_reserv_trylock(rv) == 0) {
+                       vm_reserv_domain_unlock(domain);
+                       vm_reserv_lock(rv);
+                       if (!rv->inpartpopq) {
+                               vm_reserv_domain_lock(domain);
+                               if (!rvn->inpartpopq)
+                                       goto again;
+                               continue;
+                       }
+               } else
+                       vm_reserv_domain_unlock(domain);
                if (pa < low) {
                        /* Start the search for free pages at "low". */
                        low_index = (low + PAGE_MASK - pa) >> PAGE_SHIFT;
@@ -1239,6 +1342,7 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm
                                if ((NBPOPMAP * i - next_free) * PAGE_SIZE >=
                                    size) {
                                        vm_reserv_reclaim(rv);
+                                       vm_reserv_unlock(rv);
                                        return (TRUE);
                                }
                                hi = ffsl(rv->popmap[i]);
@@ -1249,10 +1353,16 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm
                        if ((NBPOPMAP * i + hi - next_free) * PAGE_SIZE >=
                            size) {
                                vm_reserv_reclaim(rv);
+                               vm_reserv_unlock(rv);
                                return (TRUE);
                        }
                } while (i < NPOPMAP);
+               vm_reserv_unlock(rv);
+               vm_reserv_domain_lock(domain);
+               if (rvn != NULL && !rvn->inpartpopq)
+                       goto again;
        }
+       vm_reserv_domain_unlock(domain);
        return (FALSE);
 }
 
@@ -1270,7 +1380,11 @@ vm_reserv_rename(vm_page_t m, vm_object_t new_object, 
        VM_OBJECT_ASSERT_WLOCKED(new_object);
        rv = vm_reserv_from_page(m);
        if (rv->object == old_object) {
-               vm_domain_free_lock(VM_DOMAIN(rv->domain));
+               vm_reserv_lock(rv);
+               CTR6(KTR_VM,
+                   "%s: rv %p object %p new %p popcnt %d inpartpop %d",

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to