Author: jeff
Date: Tue Nov 28 23:18:35 2017
New Revision: 326346
URL: https://svnweb.freebsd.org/changeset/base/326346

Log:
  Move domain iterators into the page layer where domain selection should take
  place.  This makes the majority of the phys layer explicitly domain specific.
  
  Reviewed by:  markj, kib (some objections)
  Discussed with:       alc
  Tested by:    pho
  Sponsored by: Netflix & Dell EMC Isilon
  Differential Revision:        https://reviews.freebsd.org/D13014

Modified:
  head/sys/vm/vm_domain.c
  head/sys/vm/vm_domain.h
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h
  head/sys/vm/vm_phys.c
  head/sys/vm/vm_phys.h
  head/sys/vm/vm_reserv.c
  head/sys/vm/vm_reserv.h

Modified: head/sys/vm/vm_domain.c
==============================================================================
--- head/sys/vm/vm_domain.c     Tue Nov 28 22:57:13 2017        (r326345)
+++ head/sys/vm/vm_domain.c     Tue Nov 28 23:18:35 2017        (r326346)
@@ -61,6 +61,118 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/vm_domain.h>
 
+/*
+ * Default to first-touch + round-robin.
+ */
+static struct mtx vm_default_policy_mtx;
+MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex",
+    MTX_DEF);
+#ifdef VM_NUMA_ALLOC
+static struct vm_domain_policy vm_default_policy =
+    VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
+#else
+/* Use round-robin so the domain policy code will only try once per allocation 
*/
+static struct vm_domain_policy vm_default_policy =
+    VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0);
+#endif
+
+static int
+sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS)
+{
+       char policy_name[32];
+       int error;
+
+       mtx_lock(&vm_default_policy_mtx);
+
+       /* Map policy to output string */
+       switch (vm_default_policy.p.policy) {
+       case VM_POLICY_FIRST_TOUCH:
+               strcpy(policy_name, "first-touch");
+               break;
+       case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
+               strcpy(policy_name, "first-touch-rr");
+               break;
+       case VM_POLICY_ROUND_ROBIN:
+       default:
+               strcpy(policy_name, "rr");
+               break;
+       }
+       mtx_unlock(&vm_default_policy_mtx);
+
+       error = sysctl_handle_string(oidp, &policy_name[0],
+           sizeof(policy_name), req);
+       if (error != 0 || req->newptr == NULL)
+               return (error);
+
+       mtx_lock(&vm_default_policy_mtx);
+       /* Set: match on the subset of policies that make sense as a default */
+       if (strcmp("first-touch-rr", policy_name) == 0) {
+               vm_domain_policy_set(&vm_default_policy,
+                   VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
+       } else if (strcmp("first-touch", policy_name) == 0) {
+               vm_domain_policy_set(&vm_default_policy,
+                   VM_POLICY_FIRST_TOUCH, 0);
+       } else if (strcmp("rr", policy_name) == 0) {
+               vm_domain_policy_set(&vm_default_policy,
+                   VM_POLICY_ROUND_ROBIN, 0);
+       } else {
+               error = EINVAL;
+               goto finish;
+       }
+
+       error = 0;
+finish:
+       mtx_unlock(&vm_default_policy_mtx);
+       return (error);
+}
+
+SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW,
+    0, 0, sysctl_vm_default_policy, "A",
+    "Default policy (rr, first-touch, first-touch-rr");
+
+/*
+ * Initialise a VM domain iterator.
+ *
+ * Check the thread policy, then the proc policy,
+ * then default to the system policy.
+ */
+void
+vm_policy_iterator_init(struct vm_domain_iterator *vi)
+{
+#ifdef VM_NUMA_ALLOC
+       struct vm_domain_policy lcl;
+#endif
+
+       vm_domain_iterator_init(vi);
+
+#ifdef VM_NUMA_ALLOC
+       /* Copy out the thread policy */
+       vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy);
+       if (lcl.p.policy != VM_POLICY_NONE) {
+               /* Thread policy is present; use it */
+               vm_domain_iterator_set_policy(vi, &lcl);
+               return;
+       }
+
+       vm_domain_policy_localcopy(&lcl,
+           &curthread->td_proc->p_vm_dom_policy);
+       if (lcl.p.policy != VM_POLICY_NONE) {
+               /* Process policy is present; use it */
+               vm_domain_iterator_set_policy(vi, &lcl);
+               return;
+       }
+#endif
+       /* Use system default policy */
+       vm_domain_iterator_set_policy(vi, &vm_default_policy);
+}
+
+void
+vm_policy_iterator_finish(struct vm_domain_iterator *vi)
+{
+
+       vm_domain_iterator_cleanup(vi);
+}
+
 #ifdef VM_NUMA_ALLOC
 static __inline int
 vm_domain_rr_selectdomain(int skip_domain)

Modified: head/sys/vm/vm_domain.h
==============================================================================
--- head/sys/vm/vm_domain.h     Tue Nov 28 22:57:13 2017        (r326345)
+++ head/sys/vm/vm_domain.h     Tue Nov 28 23:18:35 2017        (r326346)
@@ -63,4 +63,7 @@ extern        int vm_domain_iterator_run(struct vm_domain_ite
 extern int vm_domain_iterator_isdone(struct vm_domain_iterator *vi);
 extern int vm_domain_iterator_cleanup(struct vm_domain_iterator *vi);
 
+extern void vm_policy_iterator_init(struct vm_domain_iterator *vi);
+extern void vm_policy_iterator_finish(struct vm_domain_iterator *vi);
+
 #endif /* __VM_DOMAIN_H__ */

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c       Tue Nov 28 22:57:13 2017        (r326345)
+++ head/sys/vm/vm_page.c       Tue Nov 28 23:18:35 2017        (r326346)
@@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
+#include <vm/vm_domain.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
@@ -1603,6 +1604,16 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, 
            vm_radix_lookup_le(&object->rtree, pindex) : NULL));
 }
 
+vm_page_t
+vm_page_alloc_domain(vm_object_t object, vm_pindex_t pindex, int domain,
+    int req)
+{
+
+       return (vm_page_alloc_domain_after(object, pindex, domain, req,
+           object != NULL ? vm_radix_lookup_le(&object->rtree, pindex) :
+           NULL));
+}
+
 /*
  * Allocate a page in the specified object with the given page index.  To
  * optimize insertion of the page into the object, the caller must also 
specifiy
@@ -1610,10 +1621,35 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, 
  * page index, or NULL if no such page exists.
  */
 vm_page_t
-vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req,
-    vm_page_t mpred)
+vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
+    int req, vm_page_t mpred)
 {
+       struct vm_domain_iterator vi;
        vm_page_t m;
+       int domain, wait;
+
+       m = NULL;
+       vm_policy_iterator_init(&vi);
+       wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK);
+       req &= ~wait;
+       while (vm_domain_iterator_run(&vi, &domain) == 0) {
+               if (vm_domain_iterator_isdone(&vi))
+                       req |= wait;
+               m = vm_page_alloc_domain_after(object, pindex, domain, req,
+                   mpred);
+               if (m != NULL)
+                       break;
+       }
+       vm_policy_iterator_finish(&vi);
+
+       return (m);
+}
+
+vm_page_t
+vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
+    int req, vm_page_t mpred)
+{
+       vm_page_t m;
        int flags, req_class;
        u_int free_count;
 
@@ -1643,6 +1679,7 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pi
         * for the request class.
         */
 again:
+       m = NULL;
        mtx_lock(&vm_page_queue_free_mtx);
        if (vm_cnt.v_free_count > vm_cnt.v_free_reserved ||
            (req_class == VM_ALLOC_SYSTEM &&
@@ -1655,23 +1692,26 @@ again:
 #if VM_NRESERVLEVEL > 0
                if (object == NULL || (object->flags & (OBJ_COLORED |
                    OBJ_FICTITIOUS)) != OBJ_COLORED || (m =
-                   vm_reserv_alloc_page(object, pindex, mpred)) == NULL)
+                   vm_reserv_alloc_page(object, pindex, domain,
+                   mpred)) == NULL)
 #endif
                {
                        /*
                         * If not, allocate it from the free page queues.
                         */
-                       m = vm_phys_alloc_pages(object != NULL ?
+                       m = vm_phys_alloc_pages(domain, object != NULL ?
                            VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 #if VM_NRESERVLEVEL > 0
-                       if (m == NULL && vm_reserv_reclaim_inactive()) {
-                               m = vm_phys_alloc_pages(object != NULL ?
+                       if (m == NULL && vm_reserv_reclaim_inactive(domain)) {
+                               m = vm_phys_alloc_pages(domain,
+                                   object != NULL ?
                                    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT,
                                    0);
                        }
 #endif
                }
-       } else {
+       }
+       if (m == NULL) {
                /*
                 * Not allocatable, give up.
                 */
@@ -1799,6 +1839,32 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t p
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr)
 {
+       struct vm_domain_iterator vi;
+       vm_page_t m;
+       int domain, wait;
+
+       m = NULL;
+       vm_policy_iterator_init(&vi);
+       wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK);
+       req &= ~wait;
+       while (vm_domain_iterator_run(&vi, &domain) == 0) {
+               if (vm_domain_iterator_isdone(&vi))
+                       req |= wait;
+               m = vm_page_alloc_contig_domain(object, pindex, domain, req,
+                   npages, low, high, alignment, boundary, memattr);
+               if (m != NULL)
+                       break;
+       }
+       vm_policy_iterator_finish(&vi);
+
+       return (m);
+}
+
+vm_page_t
+vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
+    int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
+    vm_paddr_t boundary, vm_memattr_t memattr)
+{
        vm_page_t m, m_ret, mpred;
        u_int busy_lock, flags, oflags;
        int req_class;
@@ -1838,6 +1904,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t p
         * below the lower bound for the allocation class?
         */
 again:
+       m_ret = NULL;
        mtx_lock(&vm_page_queue_free_mtx);
        if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved ||
            (req_class == VM_ALLOC_SYSTEM &&
@@ -1850,31 +1917,27 @@ again:
 #if VM_NRESERVLEVEL > 0
 retry:
                if (object == NULL || (object->flags & OBJ_COLORED) == 0 ||
-                   (m_ret = vm_reserv_alloc_contig(object, pindex, npages,
-                   low, high, alignment, boundary, mpred)) == NULL)
+                   (m_ret = vm_reserv_alloc_contig(object, pindex, domain,
+                   npages, low, high, alignment, boundary, mpred)) == NULL)
 #endif
                        /*
                         * If not, allocate them from the free page queues.
                         */
-                       m_ret = vm_phys_alloc_contig(npages, low, high,
+                       m_ret = vm_phys_alloc_contig(domain, npages, low, high,
                            alignment, boundary);
-       } else {
-               if (vm_page_alloc_fail(object, req))
-                       goto again;
-               return (NULL);
-       }
-       if (m_ret != NULL)
-               vm_phys_freecnt_adj(m_ret, -npages);
-       else {
 #if VM_NRESERVLEVEL > 0
-               if (vm_reserv_reclaim_contig(npages, low, high, alignment,
-                   boundary))
+               if (m_ret == NULL && vm_reserv_reclaim_contig(
+                   domain, npages, low, high, alignment, boundary))
                        goto retry;
 #endif
        }
-       mtx_unlock(&vm_page_queue_free_mtx);
-       if (m_ret == NULL)
+       if (m_ret == NULL) {
+               if (vm_page_alloc_fail(object, req))
+                       goto again;
                return (NULL);
+       }
+       vm_phys_freecnt_adj(m_ret, -npages);
+       mtx_unlock(&vm_page_queue_free_mtx);
        for (m = m_ret; m < &m_ret[npages]; m++)
                vm_page_alloc_check(m);
 
@@ -1988,7 +2051,30 @@ vm_page_alloc_check(vm_page_t m)
 vm_page_t
 vm_page_alloc_freelist(int flind, int req)
 {
+       struct vm_domain_iterator vi;
        vm_page_t m;
+       int domain, wait;
+
+       m = NULL;
+       vm_policy_iterator_init(&vi);
+       wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK);
+       req &= ~wait;
+       while (vm_domain_iterator_run(&vi, &domain) == 0) {
+               if (vm_domain_iterator_isdone(&vi))
+                       req |= wait;
+               m = vm_page_alloc_freelist_domain(domain, flind, req);
+               if (m != NULL)
+                       break;
+       }
+       vm_policy_iterator_finish(&vi);
+
+       return (m);
+}
+
+vm_page_t
+vm_page_alloc_freelist_domain(int domain, int flind, int req)
+{
+       vm_page_t m;
        u_int flags, free_count;
        int req_class;
 
@@ -2009,15 +2095,12 @@ again:
            (req_class == VM_ALLOC_SYSTEM &&
            vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
            (req_class == VM_ALLOC_INTERRUPT &&
-           vm_cnt.v_free_count > 0)) {
-               m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0);
-       } else {
+           vm_cnt.v_free_count > 0))
+               m = vm_phys_alloc_freelist_pages(domain, flind,
+                   VM_FREEPOOL_DIRECT, 0);
+       if (m == NULL) {
                if (vm_page_alloc_fail(NULL, req))
                        goto again;
-               return (NULL);
-       }
-       if (m == NULL) {
-               mtx_unlock(&vm_page_queue_free_mtx);
                return (NULL);
        }
        free_count = vm_phys_freecnt_adj(m, -1);

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h       Tue Nov 28 22:57:13 2017        (r326345)
+++ head/sys/vm/vm_page.h       Tue Nov 28 23:18:35 2017        (r326346)
@@ -476,16 +476,24 @@ void vm_page_free_zero(vm_page_t m);
 void vm_page_activate (vm_page_t);
 void vm_page_advise(vm_page_t m, int advice);
 vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int);
+vm_page_t vm_page_alloc_domain(vm_object_t, vm_pindex_t, int, int);
 vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t);
+vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int,
+    vm_page_t);
 vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr);
+vm_page_t vm_page_alloc_contig_domain(vm_object_t object,
+    vm_pindex_t pindex, int domain, int req, u_long npages, vm_paddr_t low,
+    vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
+    vm_memattr_t memattr);
 vm_page_t vm_page_alloc_freelist(int, int);
+vm_page_t vm_page_alloc_freelist_domain(int, int, int);
 void vm_page_change_lock(vm_page_t m, struct mtx **mtx);
 vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
 int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
     vm_page_t *ma, int count);
-void vm_page_deactivate (vm_page_t);
+void vm_page_deactivate(vm_page_t);
 void vm_page_deactivate_noreuse(vm_page_t);
 void vm_page_dequeue(vm_page_t m);
 void vm_page_dequeue_locked(vm_page_t m);
@@ -506,6 +514,8 @@ void vm_page_putfake(vm_page_t m);
 void vm_page_readahead_finish(vm_page_t m);
 bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
+bool vm_page_reclaim_contig_domain(int req, u_long npages, int domain,
+    vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
 void vm_page_reference(vm_page_t m);
 void vm_page_remove (vm_page_t);
 int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);

Modified: head/sys/vm/vm_phys.c
==============================================================================
--- head/sys/vm/vm_phys.c       Tue Nov 28 22:57:13 2017        (r326345)
+++ head/sys/vm/vm_phys.c       Tue Nov 28 23:18:35 2017        (r326346)
@@ -151,23 +151,6 @@ SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRIN
 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
     &vm_ndomains, 0, "Number of physical memory domains available.");
 
-/*
- * Default to first-touch + round-robin.
- */
-static struct mtx vm_default_policy_mtx;
-MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex",
-    MTX_DEF);
-#ifdef VM_NUMA_ALLOC
-static struct vm_domain_policy vm_default_policy =
-    VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
-#else
-/* Use round-robin so the domain policy code will only try once per allocation 
*/
-static struct vm_domain_policy vm_default_policy =
-    VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0);
-#endif
-
-static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
-    int order);
 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary);
@@ -176,60 +159,6 @@ static void vm_phys_create_seg(vm_paddr_t start, vm_pa
 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
     int order);
 
-static int
-sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS)
-{
-       char policy_name[32];
-       int error;
-
-       mtx_lock(&vm_default_policy_mtx);
-
-       /* Map policy to output string */
-       switch (vm_default_policy.p.policy) {
-       case VM_POLICY_FIRST_TOUCH:
-               strcpy(policy_name, "first-touch");
-               break;
-       case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
-               strcpy(policy_name, "first-touch-rr");
-               break;
-       case VM_POLICY_ROUND_ROBIN:
-       default:
-               strcpy(policy_name, "rr");
-               break;
-       }
-       mtx_unlock(&vm_default_policy_mtx);
-
-       error = sysctl_handle_string(oidp, &policy_name[0],
-           sizeof(policy_name), req);
-       if (error != 0 || req->newptr == NULL)
-               return (error);
-
-       mtx_lock(&vm_default_policy_mtx);
-       /* Set: match on the subset of policies that make sense as a default */
-       if (strcmp("first-touch-rr", policy_name) == 0) {
-               vm_domain_policy_set(&vm_default_policy,
-                   VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
-       } else if (strcmp("first-touch", policy_name) == 0) {
-               vm_domain_policy_set(&vm_default_policy,
-                   VM_POLICY_FIRST_TOUCH, 0);
-       } else if (strcmp("rr", policy_name) == 0) {
-               vm_domain_policy_set(&vm_default_policy,
-                   VM_POLICY_ROUND_ROBIN, 0);
-       } else {
-               error = EINVAL;
-               goto finish;
-       }
-
-       error = 0;
-finish:
-       mtx_unlock(&vm_default_policy_mtx);
-       return (error);
-}
-
-SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW,
-    0, 0, sysctl_vm_default_policy, "A",
-    "Default policy (rr, first-touch, first-touch-rr");
-
 /*
  * Red-black tree helpers for vm fictitious range management.
  */
@@ -271,71 +200,6 @@ vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *
            (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
 }
 
-#ifdef notyet
-static __inline int
-vm_rr_selectdomain(void)
-{
-#ifdef VM_NUMA_ALLOC
-       struct thread *td;
-
-       td = curthread;
-
-       td->td_dom_rr_idx++;
-       td->td_dom_rr_idx %= vm_ndomains;
-       return (td->td_dom_rr_idx);
-#else
-       return (0);
-#endif
-}
-#endif /* notyet */
-
-/*
- * Initialise a VM domain iterator.
- *
- * Check the thread policy, then the proc policy,
- * then default to the system policy.
- *
- * Later on the various layers will have this logic
- * plumbed into them and the phys code will be explicitly
- * handed a VM domain policy to use.
- */
-static void
-vm_policy_iterator_init(struct vm_domain_iterator *vi)
-{
-#ifdef VM_NUMA_ALLOC
-       struct vm_domain_policy lcl;
-#endif
-
-       vm_domain_iterator_init(vi);
-
-#ifdef VM_NUMA_ALLOC
-       /* Copy out the thread policy */
-       vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy);
-       if (lcl.p.policy != VM_POLICY_NONE) {
-               /* Thread policy is present; use it */
-               vm_domain_iterator_set_policy(vi, &lcl);
-               return;
-       }
-
-       vm_domain_policy_localcopy(&lcl,
-           &curthread->td_proc->p_vm_dom_policy);
-       if (lcl.p.policy != VM_POLICY_NONE) {
-               /* Process policy is present; use it */
-               vm_domain_iterator_set_policy(vi, &lcl);
-               return;
-       }
-#endif
-       /* Use system default policy */
-       vm_domain_iterator_set_policy(vi, &vm_default_policy);
-}
-
-static void
-vm_policy_iterator_finish(struct vm_domain_iterator *vi)
-{
-
-       vm_domain_iterator_cleanup(vi);
-}
-
 boolean_t
 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
 {
@@ -504,7 +368,7 @@ _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, 
 
        KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
            ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
-       KASSERT(domain < vm_ndomains,
+       KASSERT(domain >= 0 && domain < vm_ndomains,
            ("vm_phys_create_seg: invalid domain provided"));
        seg = &vm_phys_segs[vm_phys_nsegs++];
        while (seg > vm_phys_segs && (seg - 1)->start >= end) {
@@ -736,29 +600,16 @@ vm_phys_split_pages(vm_page_t m, int oind, struct vm_f
  * The free page queues must be locked.
  */
 vm_page_t
-vm_phys_alloc_pages(int pool, int order)
+vm_phys_alloc_pages(int domain, int pool, int order)
 {
        vm_page_t m;
-       int domain, flind;
-       struct vm_domain_iterator vi;
+       int flind;
 
-       KASSERT(pool < VM_NFREEPOOL,
-           ("vm_phys_alloc_pages: pool %d is out of range", pool));
-       KASSERT(order < VM_NFREEORDER,
-           ("vm_phys_alloc_pages: order %d is out of range", order));
-
-       vm_policy_iterator_init(&vi);
-
-       while ((vm_domain_iterator_run(&vi, &domain)) == 0) {
-               for (flind = 0; flind < vm_nfreelists; flind++) {
-                       m = vm_phys_alloc_domain_pages(domain, flind, pool,
-                           order);
-                       if (m != NULL)
-                               return (m);
-               }
+       for (flind = 0; flind < vm_nfreelists; flind++) {
+               m = vm_phys_alloc_freelist_pages(domain, flind, pool, order);
+               if (m != NULL)
+                       return (m);
        }
-
-       vm_policy_iterator_finish(&vi);
        return (NULL);
 }
 
@@ -770,41 +621,23 @@ vm_phys_alloc_pages(int pool, int order)
  * The free page queues must be locked.
  */
 vm_page_t
-vm_phys_alloc_freelist_pages(int freelist, int pool, int order)
+vm_phys_alloc_freelist_pages(int domain, int flind, int pool, int order)
 {
+       struct vm_freelist *alt, *fl;
        vm_page_t m;
-       struct vm_domain_iterator vi;
-       int domain;
+       int oind, pind;
 
-       KASSERT(freelist < VM_NFREELIST,
+       KASSERT(domain >= 0 && domain < vm_ndomains,
+           ("vm_phys_alloc_freelist_pages: domain %d is out of range",
+           domain));
+       KASSERT(flind < VM_NFREELIST,
            ("vm_phys_alloc_freelist_pages: freelist %d is out of range",
-           freelist));
+           flind));
        KASSERT(pool < VM_NFREEPOOL,
            ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
        KASSERT(order < VM_NFREEORDER,
            ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
 
-       vm_policy_iterator_init(&vi);
-
-       while ((vm_domain_iterator_run(&vi, &domain)) == 0) {
-               m = vm_phys_alloc_domain_pages(domain,
-                   vm_freelist_to_flind[freelist], pool, order);
-               if (m != NULL)
-                       return (m);
-       }
-
-       vm_policy_iterator_finish(&vi);
-       return (NULL);
-}
-
-static vm_page_t
-vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
-{      
-       struct vm_freelist *fl;
-       struct vm_freelist *alt;
-       int oind, pind;
-       vm_page_t m;
-
        mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
        fl = &vm_phys_free_queues[domain][flind][pool][0];
        for (oind = order; oind < VM_NFREEORDER; oind++) {
@@ -1261,14 +1094,13 @@ vm_phys_unfree_page(vm_page_t m)
  * "alignment" and "boundary" must be a power of two.
  */
 vm_page_t
-vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
+vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t 
high,
     u_long alignment, vm_paddr_t boundary)
 {
        vm_paddr_t pa_end, pa_start;
        vm_page_t m_run;
-       struct vm_domain_iterator vi;
        struct vm_phys_seg *seg;
-       int domain, segind;
+       int segind;
 
        KASSERT(npages > 0, ("npages is 0"));
        KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
@@ -1276,12 +1108,6 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm
        mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
        if (low >= high)
                return (NULL);
-       vm_policy_iterator_init(&vi);
-restartdom:
-       if (vm_domain_iterator_run(&vi, &domain) != 0) {
-               vm_policy_iterator_finish(&vi);
-               return (NULL);
-       }
        m_run = NULL;
        for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
                seg = &vm_phys_segs[segind];
@@ -1304,9 +1130,6 @@ restartdom:
                if (m_run != NULL)
                        break;
        }
-       if (m_run == NULL && !vm_domain_iterator_isdone(&vi))
-               goto restartdom;
-       vm_policy_iterator_finish(&vi);
        return (m_run);
 }
 

Modified: head/sys/vm/vm_phys.h
==============================================================================
--- head/sys/vm/vm_phys.h       Tue Nov 28 22:57:13 2017        (r326345)
+++ head/sys/vm/vm_phys.h       Tue Nov 28 23:18:35 2017        (r326346)
@@ -72,10 +72,11 @@ extern int vm_phys_nsegs;
  * The following functions are only to be used by the virtual memory system.
  */
 void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end);
-vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
-    u_long alignment, vm_paddr_t boundary);
-vm_page_t vm_phys_alloc_freelist_pages(int freelist, int pool, int order);
-vm_page_t vm_phys_alloc_pages(int pool, int order);
+vm_page_t vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low,
+    vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
+vm_page_t vm_phys_alloc_freelist_pages(int domain, int freelist, int pool,
+    int order);
+vm_page_t vm_phys_alloc_pages(int domain, int pool, int order);
 boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t 
high);
 int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
     vm_memattr_t memattr);
@@ -92,12 +93,13 @@ boolean_t vm_phys_unfree_page(vm_page_t m);
 int vm_phys_mem_affinity(int f, int t);
 
 /*
- *     vm_phys_domain:
  *
- *     Return the memory domain the page belongs to.
+ *     vm_phys_domidx:
+ *
+ *     Return the index of the domain the page belongs to.
  */
-static inline struct vm_domain *
-vm_phys_domain(vm_page_t m)
+static inline int
+vm_phys_domidx(vm_page_t m)
 {
 #ifdef VM_NUMA_ALLOC
        int domn, segind;
@@ -107,10 +109,22 @@ vm_phys_domain(vm_page_t m)
        KASSERT(segind < vm_phys_nsegs, ("segind %d m %p", segind, m));
        domn = vm_phys_segs[segind].domain;
        KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m));
-       return (&vm_dom[domn]);
+       return (domn);
 #else
-       return (&vm_dom[0]);
+       return (0);
 #endif
+}
+
+/*
+ *     vm_phys_domain:
+ *
+ *     Return the memory domain the page belongs to.
+ */
+static inline struct vm_domain *
+vm_phys_domain(vm_page_t m)
+{
+
+       return (&vm_dom[vm_phys_domidx(m)]);
 }
 
 static inline u_int

Modified: head/sys/vm/vm_reserv.c
==============================================================================
--- head/sys/vm/vm_reserv.c     Tue Nov 28 22:57:13 2017        (r326345)
+++ head/sys/vm/vm_reserv.c     Tue Nov 28 23:18:35 2017        (r326346)
@@ -170,6 +170,7 @@ struct vm_reserv {
        vm_object_t     object;                 /* containing object */
        vm_pindex_t     pindex;                 /* offset within object */
        vm_page_t       pages;                  /* first page of a superpage */
+       int             domain;                 /* NUMA domain */
        int             popcnt;                 /* # of pages in use */
        char            inpartpopq;
        popmap_t        popmap[NPOPMAP];        /* bit vector of used pages */
@@ -207,8 +208,7 @@ static vm_reserv_t vm_reserv_array;
  *
  * Access to this queue is synchronized by the free page queue lock.
  */
-static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop =
-                           TAILQ_HEAD_INITIALIZER(vm_rvq_partpop);
+static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop[MAXMEMDOM];
 
 static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
 
@@ -277,24 +277,27 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
 {
        struct sbuf sbuf;
        vm_reserv_t rv;
-       int counter, error, level, unused_pages;
+       int counter, error, domain, level, unused_pages;
 
        error = sysctl_wire_old_buffer(req, 0);
        if (error != 0)
                return (error);
        sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
-       sbuf_printf(&sbuf, "\nLEVEL     SIZE  NUMBER\n\n");
-       for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
-               counter = 0;
-               unused_pages = 0;
-               mtx_lock(&vm_page_queue_free_mtx);
-               TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) {
-                       counter++;
-                       unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
+       sbuf_printf(&sbuf, "\nDOMAIN    LEVEL     SIZE  NUMBER\n\n");
+       for (domain = 0; domain < vm_ndomains; domain++) {
+               for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
+                       counter = 0;
+                       unused_pages = 0;
+                       mtx_lock(&vm_page_queue_free_mtx);
+                       TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
+                               counter++;
+                               unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
+                       }
+                       mtx_unlock(&vm_page_queue_free_mtx);
+                       sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
+                           domain, level,
+                           unused_pages * ((int)PAGE_SIZE / 1024), counter);
                }
-               mtx_unlock(&vm_page_queue_free_mtx);
-               sbuf_printf(&sbuf, "%5d: %6dK, %6d\n", level,
-                   unused_pages * ((int)PAGE_SIZE / 1024), counter);
        }
        error = sbuf_finish(&sbuf);
        sbuf_delete(&sbuf);
@@ -321,8 +324,11 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
            index));
        KASSERT(rv->popcnt > 0,
            ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv));
+       KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
+           ("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
+           rv, rv->domain));
        if (rv->inpartpopq) {
-               TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
+               TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
                rv->inpartpopq = FALSE;
        } else {
                KASSERT(rv->pages->psind == 1,
@@ -335,11 +341,12 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
        if (rv->popcnt == 0) {
                LIST_REMOVE(rv, objq);
                rv->object = NULL;
+               rv->domain = -1;
                vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
                vm_reserv_freed++;
        } else {
                rv->inpartpopq = TRUE;
-               TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
+               TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
        }
 }
 
@@ -384,15 +391,18 @@ vm_reserv_populate(vm_reserv_t rv, int index)
            ("vm_reserv_populate: reserv %p is already full", rv));
        KASSERT(rv->pages->psind == 0,
            ("vm_reserv_populate: reserv %p is already promoted", rv));
+       KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
+           ("vm_reserv_populate: reserv %p's domain is corrupted %d",
+           rv, rv->domain));
        if (rv->inpartpopq) {
-               TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
+               TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
                rv->inpartpopq = FALSE;
        }
        popmap_set(rv->popmap, index);
        rv->popcnt++;
        if (rv->popcnt < VM_LEVEL_0_NPAGES) {
                rv->inpartpopq = TRUE;
-               TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
+               TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
        } else
                rv->pages->psind = 1;
 }
@@ -413,9 +423,9 @@ vm_reserv_populate(vm_reserv_t rv, int index)
  * The object and free page queue must be locked.
  */
 vm_page_t
-vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
-    vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
-    vm_page_t mpred)
+vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
+    u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
+    vm_paddr_t boundary, vm_page_t mpred)
 {
        vm_paddr_t pa, size;
        vm_page_t m, m_ret, msucc;
@@ -535,7 +545,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
         * specified index may not be the first page within the first new
         * reservation.
         */
-       m = vm_phys_alloc_contig(allocpages, low, high, ulmax(alignment,
+       m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment,
            VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
        if (m == NULL)
                return (NULL);
@@ -558,6 +568,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
                LIST_INSERT_HEAD(&object->rvq, rv, objq);
                rv->object = object;
                rv->pindex = first;
+               rv->domain = vm_phys_domidx(m);
                KASSERT(rv->popcnt == 0,
                    ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted",
                    rv));
@@ -613,7 +624,8 @@ found:
  * The object and free page queue must be locked.
  */
 vm_page_t
-vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred)
+vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
+    vm_page_t mpred)
 {
        vm_page_t m, msucc;
        vm_pindex_t first, leftcap, rightcap;
@@ -692,7 +704,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p
        /*
         * Allocate and populate the new reservation.
         */
-       m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
+       m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
        if (m == NULL)
                return (NULL);
        rv = vm_reserv_from_page(m);
@@ -703,6 +715,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p
        LIST_INSERT_HEAD(&object->rvq, rv, objq);
        rv->object = object;
        rv->pindex = first;
+       rv->domain = vm_phys_domidx(m);
        KASSERT(rv->popcnt == 0,
            ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv));
        KASSERT(!rv->inpartpopq,
@@ -749,6 +762,7 @@ vm_reserv_break(vm_reserv_t rv, vm_page_t m)
            ("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv));
        LIST_REMOVE(rv, objq);
        rv->object = NULL;
+       rv->domain = -1;
        if (m != NULL) {
                /*
                 * Since the reservation is being broken, there is no harm in
@@ -818,7 +832,7 @@ vm_reserv_break_all(vm_object_t object)
                KASSERT(rv->object == object,
                    ("vm_reserv_break_all: reserv %p is corrupted", rv));
                if (rv->inpartpopq) {
-                       TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
+                       TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
                        rv->inpartpopq = FALSE;
                }
                vm_reserv_break(rv, NULL);
@@ -856,7 +870,7 @@ vm_reserv_init(void)
 {
        vm_paddr_t paddr;
        struct vm_phys_seg *seg;
-       int segind;
+       int i, segind;
 
        /*
         * Initialize the reservation array.  Specifically, initialize the
@@ -871,6 +885,8 @@ vm_reserv_init(void)
                        paddr += VM_LEVEL_0_SIZE;
                }
        }
+       for (i = 0; i < MAXMEMDOM; i++)
+               TAILQ_INIT(&vm_rvq_partpop[i]);
 }
 
 /*
@@ -928,7 +944,10 @@ vm_reserv_reclaim(vm_reserv_t rv)
        mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
        KASSERT(rv->inpartpopq,
            ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
-       TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
+       KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
+           ("vm_reserv_reclaim: reserv %p's domain is corrupted %d",
+           rv, rv->domain));
+       TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
        rv->inpartpopq = FALSE;
        vm_reserv_break(rv, NULL);
        vm_reserv_reclaimed++;
@@ -942,12 +961,12 @@ vm_reserv_reclaim(vm_reserv_t rv)
  * The free page queue lock must be held.
  */
 boolean_t
-vm_reserv_reclaim_inactive(void)
+vm_reserv_reclaim_inactive(int domain)
 {
        vm_reserv_t rv;
 
        mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
-       if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) {
+       if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
                vm_reserv_reclaim(rv);
                return (TRUE);
        }
@@ -963,8 +982,8 @@ vm_reserv_reclaim_inactive(void)
  * The free page queue lock must be held.
  */
 boolean_t
-vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
-    u_long alignment, vm_paddr_t boundary)
+vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
+    vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
 {
        vm_paddr_t pa, size;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to