Author: alc
Date: Sat Dec 19 18:42:50 2015
New Revision: 292469
URL: https://svnweb.freebsd.org/changeset/base/292469

Log:
  Introduce a new mechanism for relocating virtual pages to a new physical
  address and use this mechanism when:
  
  1. kmem_alloc_{attr,contig}() can't find suitable free pages in the physical
     memory allocator's free page lists.  This replaces the long-standing
     approach of scanning the inactive and inactive queues, converting clean
     pages into PG_CACHED pages and laundering dirty pages.  In contrast, the
     new mechanism does not use PG_CACHED pages nor does it trigger a large
     number of I/O operations.
  
  2. on 32-bit MIPS processors, uma_small_alloc() and the pmap can't find
     free pages in the physical memory allocator's free page lists that are
     covered by the direct map.  Tested by: adrian
  
  3. ttm_bo_global_init() and ttm_vm_page_alloc_dma32() can't find suitable
     free pages in the physical memory allocator's free page lists.
  
  In the coming months, I expect that this new mechanism will be applied in
  other places.  For example, balloon drivers should use relocation to
  minimize fragmentation of the guest physical address space.
  
  Make vm_phys_alloc_contig() a little smarter (and more efficient in some
  cases).  Specifically, use vm_phys_segs[] earlier to avoid scanning free
  page lists that can't possibly contain suitable pages.
  
  Reviewed by:  kib, markj
  Glanced at:   jhb
  Discussed with:       jeff
  Sponsored by: EMC / Isilon Storage Division
  Differential Revision:        https://reviews.freebsd.org/D4444

Modified:
  head/sys/dev/drm2/ttm/ttm_bo.c
  head/sys/dev/drm2/ttm/ttm_page_alloc.c
  head/sys/mips/include/pmap.h
  head/sys/mips/mips/pmap.c
  head/sys/mips/mips/uma_machdep.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h
  head/sys/vm/vm_pageout.c
  head/sys/vm/vm_pageout.h
  head/sys/vm/vm_phys.c
  head/sys/vm/vm_phys.h
  head/sys/vm/vm_reserv.c
  head/sys/vm/vm_reserv.h

Modified: head/sys/dev/drm2/ttm/ttm_bo.c
==============================================================================
--- head/sys/dev/drm2/ttm/ttm_bo.c      Sat Dec 19 13:19:39 2015        
(r292468)
+++ head/sys/dev/drm2/ttm/ttm_bo.c      Sat Dec 19 18:42:50 2015        
(r292469)
@@ -1488,21 +1488,21 @@ int ttm_bo_global_init(struct drm_global
        struct ttm_bo_global_ref *bo_ref =
                container_of(ref, struct ttm_bo_global_ref, ref);
        struct ttm_bo_global *glob = ref->object;
-       int ret;
+       int req, ret;
        int tries;
 
        sx_init(&glob->device_list_mutex, "ttmdlm");
        mtx_init(&glob->lru_lock, "ttmlru", NULL, MTX_DEF);
        glob->mem_glob = bo_ref->mem_glob;
+       req = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ;
        tries = 0;
 retry:
-       glob->dummy_read_page = vm_page_alloc_contig(NULL, 0,
-           VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ,
+       glob->dummy_read_page = vm_page_alloc_contig(NULL, 0, req,
            1, 0, VM_MAX_ADDRESS, PAGE_SIZE, 0, VM_MEMATTR_UNCACHEABLE);
 
        if (unlikely(glob->dummy_read_page == NULL)) {
-               if (tries < 1) {
-                       vm_pageout_grow_cache(tries, 0, VM_MAX_ADDRESS);
+               if (tries < 1 && vm_page_reclaim_contig(req, 1,
+                   0, VM_MAX_ADDRESS, PAGE_SIZE, 0)) {
                        tries++;
                        goto retry;
                }

Modified: head/sys/dev/drm2/ttm/ttm_page_alloc.c
==============================================================================
--- head/sys/dev/drm2/ttm/ttm_page_alloc.c      Sat Dec 19 13:19:39 2015        
(r292468)
+++ head/sys/dev/drm2/ttm/ttm_page_alloc.c      Sat Dec 19 18:42:50 2015        
(r292469)
@@ -166,13 +166,9 @@ ttm_vm_page_alloc_dma32(int req, vm_mema
                    PAGE_SIZE, 0, memattr);
                if (p != NULL || tries > 2)
                        return (p);
-
-               /*
-                * Before growing the cache see if this is just a normal
-                * memory shortage.
-                */
-               VM_WAIT;
-               vm_pageout_grow_cache(tries, 0, 0xffffffff);
+               if (!vm_page_reclaim_contig(req, 1, 0, 0xffffffff,
+                   PAGE_SIZE, 0))
+                       VM_WAIT;
        }
 }
 

Modified: head/sys/mips/include/pmap.h
==============================================================================
--- head/sys/mips/include/pmap.h        Sat Dec 19 13:19:39 2015        
(r292468)
+++ head/sys/mips/include/pmap.h        Sat Dec 19 18:42:50 2015        
(r292469)
@@ -178,7 +178,6 @@ void *pmap_kenter_temporary(vm_paddr_t p
 void pmap_kenter_temporary_free(vm_paddr_t pa);
 void pmap_flush_pvcache(vm_page_t m);
 int pmap_emulate_modified(pmap_t pmap, vm_offset_t va);
-void pmap_grow_direct_page_cache(void);
 void pmap_page_set_memattr(vm_page_t, vm_memattr_t);
 
 #endif                         /* _KERNEL */

Modified: head/sys/mips/mips/pmap.c
==============================================================================
--- head/sys/mips/mips/pmap.c   Sat Dec 19 13:19:39 2015        (r292468)
+++ head/sys/mips/mips/pmap.c   Sat Dec 19 18:42:50 2015        (r292469)
@@ -166,6 +166,7 @@ static pv_entry_t pmap_pvh_remove(struct
 static vm_page_t pmap_alloc_direct_page(unsigned int index, int req);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
+static void pmap_grow_direct_page(int req);
 static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
     pd_entry_t pde);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
@@ -1040,14 +1041,16 @@ pmap_pinit0(pmap_t pmap)
        bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
-void
-pmap_grow_direct_page_cache()
+static void
+pmap_grow_direct_page(int req)
 {
 
 #ifdef __mips_n64
        VM_WAIT;
 #else
-       vm_pageout_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS);
+       if (!vm_page_reclaim_contig(req, 1, 0, MIPS_KSEG0_LARGEST_PHYS,
+           PAGE_SIZE, 0))
+               VM_WAIT;
 #endif
 }
 
@@ -1077,13 +1080,15 @@ pmap_pinit(pmap_t pmap)
 {
        vm_offset_t ptdva;
        vm_page_t ptdpg;
-       int i;
+       int i, req_class;
 
        /*
         * allocate the page directory page
         */
-       while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) 
== NULL)
-              pmap_grow_direct_page_cache();
+       req_class = VM_ALLOC_NORMAL;
+       while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, req_class)) ==
+           NULL)
+               pmap_grow_direct_page(req_class);
 
        ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg));
        pmap->pm_segtab = (pd_entry_t *)ptdva;
@@ -1107,15 +1112,17 @@ _pmap_allocpte(pmap_t pmap, unsigned pte
 {
        vm_offset_t pageva;
        vm_page_t m;
+       int req_class;
 
        /*
         * Find or fabricate a new pagetable page
         */
-       if ((m = pmap_alloc_direct_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) {
+       req_class = VM_ALLOC_NORMAL;
+       if ((m = pmap_alloc_direct_page(ptepindex, req_class)) == NULL) {
                if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
                        PMAP_UNLOCK(pmap);
                        rw_wunlock(&pvh_global_lock);
-                       pmap_grow_direct_page_cache();
+                       pmap_grow_direct_page(req_class);
                        rw_wlock(&pvh_global_lock);
                        PMAP_LOCK(pmap);
                }
@@ -1241,9 +1248,10 @@ pmap_growkernel(vm_offset_t addr)
        vm_page_t nkpg;
        pd_entry_t *pde, *pdpe;
        pt_entry_t *pte;
-       int i;
+       int i, req_class;
 
        mtx_assert(&kernel_map->system_mtx, MA_OWNED);
+       req_class = VM_ALLOC_INTERRUPT;
        addr = roundup2(addr, NBSEG);
        if (addr - 1 >= kernel_map->max_offset)
                addr = kernel_map->max_offset;
@@ -1252,7 +1260,7 @@ pmap_growkernel(vm_offset_t addr)
 #ifdef __mips_n64
                if (*pdpe == 0) {
                        /* new intermediate page table entry */
-                       nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT);
+                       nkpg = pmap_alloc_direct_page(nkpt, req_class);
                        if (nkpg == NULL)
                                panic("pmap_growkernel: no memory to grow 
kernel");
                        *pdpe = 
(pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
@@ -1272,8 +1280,13 @@ pmap_growkernel(vm_offset_t addr)
                /*
                 * This index is bogus, but out of the way
                 */
-               nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT);
-               if (!nkpg)
+               nkpg = pmap_alloc_direct_page(nkpt, req_class);
+#ifndef __mips_n64
+               if (nkpg == NULL && vm_page_reclaim_contig(req_class, 1,
+                   0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0))
+                       nkpg = pmap_alloc_direct_page(nkpt, req_class);
+#endif
+               if (nkpg == NULL)
                        panic("pmap_growkernel: no memory to grow kernel");
                nkpt++;
                *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));

Modified: head/sys/mips/mips/uma_machdep.c
==============================================================================
--- head/sys/mips/mips/uma_machdep.c    Sat Dec 19 13:19:39 2015        
(r292468)
+++ head/sys/mips/mips/uma_machdep.c    Sat Dec 19 18:42:50 2015        
(r292469)
@@ -53,11 +53,16 @@ uma_small_alloc(uma_zone_t zone, vm_size
 
        for (;;) {
                m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, pflags);
+#ifndef __mips_n64
+               if (m == NULL && vm_page_reclaim_contig(pflags, 1,
+                   0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0))
+                       continue;
+#endif
                if (m == NULL) {
                        if (wait & M_NOWAIT)
                                return (NULL);
                        else
-                               pmap_grow_direct_page_cache();
+                               VM_WAIT;
                } else
                        break;
        }

Modified: head/sys/vm/vm_kern.c
==============================================================================
--- head/sys/vm/vm_kern.c       Sat Dec 19 13:19:39 2015        (r292468)
+++ head/sys/vm/vm_kern.c       Sat Dec 19 18:42:50 2015        (r292469)
@@ -181,7 +181,10 @@ retry:
                if (m == NULL) {
                        VM_OBJECT_WUNLOCK(object);
                        if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
-                               vm_pageout_grow_cache(tries, low, high);
+                               if (!vm_page_reclaim_contig(pflags, 1,
+                                   low, high, PAGE_SIZE, 0) &&
+                                   (flags & M_WAITOK) != 0)
+                                       VM_WAIT;
                                VM_OBJECT_WLOCK(object);
                                tries++;
                                goto retry;
@@ -217,6 +220,7 @@ kmem_alloc_contig(struct vmem *vmem, vm_
        vm_offset_t addr, tmp;
        vm_ooffset_t offset;
        vm_page_t end_m, m;
+       u_long npages;
        int pflags, tries;
  
        size = round_page(size);
@@ -224,15 +228,18 @@ kmem_alloc_contig(struct vmem *vmem, vm_
                return (0);
        offset = addr - VM_MIN_KERNEL_ADDRESS;
        pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED;
+       npages = atop(size);
        VM_OBJECT_WLOCK(object);
        tries = 0;
 retry:
        m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags,
-           atop(size), low, high, alignment, boundary, memattr);
+           npages, low, high, alignment, boundary, memattr);
        if (m == NULL) {
                VM_OBJECT_WUNLOCK(object);
                if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
-                       vm_pageout_grow_cache(tries, low, high);
+                       if (!vm_page_reclaim_contig(pflags, npages, low, high,
+                           alignment, boundary) && (flags & M_WAITOK) != 0)
+                               VM_WAIT;
                        VM_OBJECT_WLOCK(object);
                        tries++;
                        goto retry;
@@ -240,7 +247,7 @@ retry:
                vmem_free(vmem, addr, size);
                return (0);
        }
-       end_m = m + atop(size);
+       end_m = m + npages;
        tmp = addr;
        for (; m < end_m; m++) {
                if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c       Sat Dec 19 13:19:39 2015        (r292468)
+++ head/sys/vm/vm_page.c       Sat Dec 19 18:42:50 2015        (r292469)
@@ -158,11 +158,14 @@ static struct vnode *vm_page_alloc_init(
 static void vm_page_cache_turn_free(vm_page_t m);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_enqueue(uint8_t queue, vm_page_t m);
+static void vm_page_free_wakeup(void);
 static void vm_page_init_fakepg(void *dummy);
 static int vm_page_insert_after(vm_page_t m, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mpred);
 static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
     vm_page_t mpred);
+static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
+    vm_paddr_t high);
 
 SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL);
 
@@ -2093,6 +2096,592 @@ vm_page_alloc_freelist(int flind, int re
        return (m);
 }
 
+#define        VPSC_ANY        0       /* No restrictions. */
+#define        VPSC_NORESERV   1       /* Skip reservations; implies 
VPSC_NOSUPER. */
+#define        VPSC_NOSUPER    2       /* Skip superpages. */
+
+/*
+ *     vm_page_scan_contig:
+ *
+ *     Scan vm_page_array[] between the specified entries "m_start" and
+ *     "m_end" for a run of contiguous physical pages that satisfy the
+ *     specified conditions, and return the lowest page in the run.  The
+ *     specified "alignment" determines the alignment of the lowest physical
+ *     page in the run.  If the specified "boundary" is non-zero, then the
+ *     run of physical pages cannot span a physical address that is a
+ *     multiple of "boundary".
+ *
+ *     "m_end" is never dereferenced, so it need not point to a vm_page
+ *     structure within vm_page_array[].
+ *
+ *     "npages" must be greater than zero.  "m_start" and "m_end" must not
+ *     span a hole (or discontiguity) in the physical address space.  Both
+ *     "alignment" and "boundary" must be a power of two.
+ */
+vm_page_t
+vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end,
+    u_long alignment, vm_paddr_t boundary, int options)
+{
+       struct mtx *m_mtx, *new_mtx;
+       vm_object_t object;
+       vm_paddr_t pa;
+       vm_page_t m, m_run;
+#if VM_NRESERVLEVEL > 0
+       int level;
+#endif
+       int m_inc, order, run_ext, run_len;
+
+       KASSERT(npages > 0, ("npages is 0"));
+       KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
+       KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
+       m_run = NULL;
+       run_len = 0;
+       m_mtx = NULL;
+       for (m = m_start; m < m_end && run_len < npages; m += m_inc) {
+               KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0,
+                   ("page %p is PG_FICTITIOUS or PG_MARKER", m));
+
+               /*
+                * If the current page would be the start of a run, check its
+                * physical address against the end, alignment, and boundary
+                * conditions.  If it doesn't satisfy these conditions, either
+                * terminate the scan or advance to the next page that
+                * satisfies the failed condition.
+                */
+               if (run_len == 0) {
+                       KASSERT(m_run == NULL, ("m_run != NULL"));
+                       if (m + npages > m_end)
+                               break;
+                       pa = VM_PAGE_TO_PHYS(m);
+                       if ((pa & (alignment - 1)) != 0) {
+                               m_inc = atop(roundup2(pa, alignment) - pa);
+                               continue;
+                       }
+                       if (((pa ^ (pa + ptoa(npages) - 1)) & ~(boundary -
+                           1)) != 0) {
+                               m_inc = atop(roundup2(pa, boundary) - pa);
+                               continue;
+                       }
+               } else
+                       KASSERT(m_run != NULL, ("m_run == NULL"));
+
+               /*
+                * Avoid releasing and reacquiring the same page lock.
+                */
+               new_mtx = vm_page_lockptr(m);
+               if (m_mtx != new_mtx) {
+                       if (m_mtx != NULL)
+                               mtx_unlock(m_mtx);
+                       m_mtx = new_mtx;
+                       mtx_lock(m_mtx);
+               }
+               m_inc = 1;
+retry:
+               if (m->wire_count != 0 || m->hold_count != 0)
+                       run_ext = 0;
+#if VM_NRESERVLEVEL > 0
+               else if ((level = vm_reserv_level(m)) >= 0 &&
+                   (options & VPSC_NORESERV) != 0) {
+                       run_ext = 0;
+                       /* Advance to the end of the reservation. */
+                       pa = VM_PAGE_TO_PHYS(m);
+                       m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) -
+                           pa);
+               }
+#endif
+               else if ((object = m->object) != NULL) {
+                       /*
+                        * The page is considered eligible for relocation if
+                        * and only if it could be laundered or reclaimed by
+                        * the page daemon.
+                        */
+                       if (!VM_OBJECT_TRYRLOCK(object)) {
+                               mtx_unlock(m_mtx);
+                               VM_OBJECT_RLOCK(object);
+                               mtx_lock(m_mtx);
+                               if (m->object != object) {
+                                       /*
+                                        * The page may have been freed.
+                                        */
+                                       VM_OBJECT_RUNLOCK(object);
+                                       goto retry;
+                               } else if (m->wire_count != 0 ||
+                                   m->hold_count != 0) {
+                                       run_ext = 0;
+                                       goto unlock;
+                               }
+                       }
+                       KASSERT((m->flags & PG_UNHOLDFREE) == 0,
+                           ("page %p is PG_UNHOLDFREE", m));
+                       /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */
+                       if (object->type != OBJT_DEFAULT &&
+                           object->type != OBJT_SWAP &&
+                           object->type != OBJT_VNODE)
+                               run_ext = 0;
+                       else if ((m->flags & PG_CACHED) != 0 ||
+                           m != vm_page_lookup(object, m->pindex)) {
+                               /*
+                                * The page is cached or recently converted
+                                * from cached to free.
+                                */
+#if VM_NRESERVLEVEL > 0
+                               if (level >= 0) {
+                                       /*
+                                        * The page is reserved.  Extend the
+                                        * current run by one page.
+                                        */
+                                       run_ext = 1;
+                               } else
+#endif
+                               if ((order = m->order) < VM_NFREEORDER) {
+                                       /*
+                                        * The page is enqueued in the
+                                        * physical memory allocator's cache/
+                                        * free page queues.  Moreover, it is
+                                        * the first page in a power-of-two-
+                                        * sized run of contiguous cache/free
+                                        * pages.  Add these pages to the end
+                                        * of the current run, and jump
+                                        * ahead.
+                                        */
+                                       run_ext = 1 << order;
+                                       m_inc = 1 << order;
+                               } else
+                                       run_ext = 0;
+#if VM_NRESERVLEVEL > 0
+                       } else if ((options & VPSC_NOSUPER) != 0 &&
+                           (level = vm_reserv_level_iffullpop(m)) >= 0) {
+                               run_ext = 0;
+                               /* Advance to the end of the superpage. */
+                               pa = VM_PAGE_TO_PHYS(m);
+                               m_inc = atop(roundup2(pa + 1,
+                                   vm_reserv_size(level)) - pa);
+#endif
+                       } else if (object->memattr == VM_MEMATTR_DEFAULT &&
+                           m->queue != PQ_NONE && !vm_page_busied(m)) {
+                               /*
+                                * The page is allocated but eligible for
+                                * relocation.  Extend the current run by one
+                                * page.
+                                */
+                               KASSERT(pmap_page_get_memattr(m) ==
+                                   VM_MEMATTR_DEFAULT,
+                                   ("page %p has an unexpected memattr", m));
+                               KASSERT((m->oflags & (VPO_SWAPINPROG |
+                                   VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0,
+                                   ("page %p has unexpected oflags", m));
+                               /* Don't care: VPO_NOSYNC. */
+                               run_ext = 1;
+                       } else
+                               run_ext = 0;
+unlock:
+                       VM_OBJECT_RUNLOCK(object);
+#if VM_NRESERVLEVEL > 0
+               } else if (level >= 0) {
+                       /*
+                        * The page is reserved but not yet allocated.  In
+                        * other words, it is still cached or free.  Extend
+                        * the current run by one page.
+                        */
+                       run_ext = 1;
+#endif
+               } else if ((order = m->order) < VM_NFREEORDER) {
+                       /*
+                        * The page is enqueued in the physical memory
+                        * allocator's cache/free page queues.  Moreover, it
+                        * is the first page in a power-of-two-sized run of
+                        * contiguous cache/free pages.  Add these pages to
+                        * the end of the current run, and jump ahead.
+                        */
+                       run_ext = 1 << order;
+                       m_inc = 1 << order;
+               } else {
+                       /*
+                        * Skip the page for one of the following reasons: (1)
+                        * It is enqueued in the physical memory allocator's
+                        * cache/free page queues.  However, it is not the
+                        * first page in a run of contiguous cache/free pages.
+                        * (This case rarely occurs because the scan is
+                        * performed in ascending order.) (2) It is not
+                        * reserved, and it is transitioning from free to
+                        * allocated.  (Conversely, the transition from
+                        * allocated to free for managed pages is blocked by
+                        * the page lock.) (3) It is allocated but not
+                        * contained by an object and not wired, e.g.,
+                        * allocated by Xen's balloon driver.
+                        */
+                       run_ext = 0;
+               }
+
+               /*
+                * Extend or reset the current run of pages.
+                */
+               if (run_ext > 0) {
+                       if (run_len == 0)
+                               m_run = m;
+                       run_len += run_ext;
+               } else {
+                       if (run_len > 0) {
+                               m_run = NULL;
+                               run_len = 0;
+                       }
+               }
+       }
+       if (m_mtx != NULL)
+               mtx_unlock(m_mtx);
+       if (run_len >= npages)
+               return (m_run);
+       return (NULL);
+}
+
+/*
+ *     vm_page_reclaim_run:
+ *
+ *     Try to relocate each of the allocated virtual pages within the
+ *     specified run of physical pages to a new physical address.  Free the
+ *     physical pages underlying the relocated virtual pages.  A virtual page
+ *     is relocatable if and only if it could be laundered or reclaimed by
+ *     the page daemon.  Whenever possible, a virtual page is relocated to a
+ *     physical address above "high".
+ *
+ *     Returns 0 if every physical page within the run was already free or
+ *     just freed by a successful relocation.  Otherwise, returns a non-zero
+ *     value indicating why the last attempt to relocate a virtual page was
+ *     unsuccessful.
+ *
+ *     "req_class" must be an allocation class.
+ */
+static int
+vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
+    vm_paddr_t high)
+{
+       struct mtx *m_mtx, *new_mtx;
+       struct spglist free;
+       vm_object_t object;
+       vm_paddr_t pa;
+       vm_page_t m, m_end, m_new;
+       int error, order, req;
+
+       KASSERT((req_class & VM_ALLOC_CLASS_MASK) == req_class,
+           ("req_class is not an allocation class"));
+       SLIST_INIT(&free);
+       error = 0;
+       m = m_run;
+       m_end = m_run + npages;
+       m_mtx = NULL;
+       for (; error == 0 && m < m_end; m++) {
+               KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0,
+                   ("page %p is PG_FICTITIOUS or PG_MARKER", m));
+
+               /*
+                * Avoid releasing and reacquiring the same page lock.
+                */
+               new_mtx = vm_page_lockptr(m);
+               if (m_mtx != new_mtx) {
+                       if (m_mtx != NULL)
+                               mtx_unlock(m_mtx);
+                       m_mtx = new_mtx;
+                       mtx_lock(m_mtx);
+               }
+retry:
+               if (m->wire_count != 0 || m->hold_count != 0)
+                       error = EBUSY;
+               else if ((object = m->object) != NULL) {
+                       /*
+                        * The page is relocated if and only if it could be
+                        * laundered or reclaimed by the page daemon.
+                        */
+                       if (!VM_OBJECT_TRYWLOCK(object)) {
+                               mtx_unlock(m_mtx);
+                               VM_OBJECT_WLOCK(object);
+                               mtx_lock(m_mtx);
+                               if (m->object != object) {
+                                       /*
+                                        * The page may have been freed.
+                                        */
+                                       VM_OBJECT_WUNLOCK(object);
+                                       goto retry;
+                               } else if (m->wire_count != 0 ||
+                                   m->hold_count != 0) {
+                                       error = EBUSY;
+                                       goto unlock;
+                               }
+                       }
+                       KASSERT((m->flags & PG_UNHOLDFREE) == 0,
+                           ("page %p is PG_UNHOLDFREE", m));
+                       /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */
+                       if (object->type != OBJT_DEFAULT &&
+                           object->type != OBJT_SWAP &&
+                           object->type != OBJT_VNODE)
+                               error = EINVAL;
+                       else if ((m->flags & PG_CACHED) != 0 ||
+                           m != vm_page_lookup(object, m->pindex)) {
+                               /*
+                                * The page is cached or recently converted
+                                * from cached to free.
+                                */
+                               VM_OBJECT_WUNLOCK(object);
+                               goto cached;
+                       } else if (object->memattr != VM_MEMATTR_DEFAULT)
+                               error = EINVAL;
+                       else if (m->queue != PQ_NONE && !vm_page_busied(m)) {
+                               KASSERT(pmap_page_get_memattr(m) ==
+                                   VM_MEMATTR_DEFAULT,
+                                   ("page %p has an unexpected memattr", m));
+                               KASSERT((m->oflags & (VPO_SWAPINPROG |
+                                   VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0,
+                                   ("page %p has unexpected oflags", m));
+                               /* Don't care: VPO_NOSYNC. */
+                               if (m->valid != 0) {
+                                       /*
+                                        * First, try to allocate a new page
+                                        * that is above "high".  Failing
+                                        * that, try to allocate a new page
+                                        * that is below "m_run".  Allocate
+                                        * the new page between the end of
+                                        * "m_run" and "high" only as a last
+                                        * resort.
+                                        */
+                                       req = req_class | VM_ALLOC_NOOBJ;
+                                       if ((m->flags & PG_NODUMP) != 0)
+                                               req |= VM_ALLOC_NODUMP;
+                                       if (trunc_page(high) !=
+                                           ~(vm_paddr_t)PAGE_MASK) {
+                                               m_new = vm_page_alloc_contig(
+                                                   NULL, 0, req, 1,
+                                                   round_page(high),
+                                                   ~(vm_paddr_t)0,
+                                                   PAGE_SIZE, 0,
+                                                   VM_MEMATTR_DEFAULT);
+                                       } else
+                                               m_new = NULL;
+                                       if (m_new == NULL) {
+                                               pa = VM_PAGE_TO_PHYS(m_run);
+                                               m_new = vm_page_alloc_contig(
+                                                   NULL, 0, req, 1,
+                                                   0, pa - 1, PAGE_SIZE, 0,
+                                                   VM_MEMATTR_DEFAULT);
+                                       }
+                                       if (m_new == NULL) {
+                                               pa += ptoa(npages);
+                                               m_new = vm_page_alloc_contig(
+                                                   NULL, 0, req, 1,
+                                                   pa, high, PAGE_SIZE, 0,
+                                                   VM_MEMATTR_DEFAULT);
+                                       }
+                                       if (m_new == NULL) {
+                                               error = ENOMEM;
+                                               goto unlock;
+                                       }
+                                       KASSERT(m_new->wire_count == 0,
+                                           ("page %p is wired", m));
+
+                                       /*
+                                        * Replace "m" with the new page.  For
+                                        * vm_page_replace(), "m" must be busy
+                                        * and dequeued.  Finally, change "m"
+                                        * as if vm_page_free() was called.
+                                        */
+                                       if (object->ref_count != 0)
+                                               pmap_remove_all(m);
+                                       m_new->aflags = m->aflags;
+                                       KASSERT(m_new->oflags == VPO_UNMANAGED,
+                                           ("page %p is managed", m));
+                                       m_new->oflags = m->oflags & VPO_NOSYNC;
+                                       pmap_copy_page(m, m_new);
+                                       m_new->valid = m->valid;
+                                       m_new->dirty = m->dirty;
+                                       m->flags &= ~PG_ZERO;
+                                       vm_page_xbusy(m);
+                                       vm_page_remque(m);
+                                       vm_page_replace_checked(m_new, object,
+                                           m->pindex, m);
+                                       m->valid = 0;
+                                       vm_page_undirty(m);
+
+                                       /*
+                                        * The new page must be deactivated
+                                        * before the object is unlocked.
+                                        */
+                                       new_mtx = vm_page_lockptr(m_new);
+                                       if (m_mtx != new_mtx) {
+                                               mtx_unlock(m_mtx);
+                                               m_mtx = new_mtx;
+                                               mtx_lock(m_mtx);
+                                       }
+                                       vm_page_deactivate(m_new);
+                               } else {
+                                       m->flags &= ~PG_ZERO;
+                                       vm_page_remque(m);
+                                       vm_page_remove(m);
+                                       KASSERT(m->dirty == 0,
+                                           ("page %p is dirty", m));
+                               }
+                               SLIST_INSERT_HEAD(&free, m, plinks.s.ss);
+                       } else
+                               error = EBUSY;
+unlock:
+                       VM_OBJECT_WUNLOCK(object);
+               } else {
+cached:
+                       mtx_lock(&vm_page_queue_free_mtx);
+                       order = m->order;
+                       if (order < VM_NFREEORDER) {
+                               /*
+                                * The page is enqueued in the physical memory
+                                * allocator's cache/free page queues.
+                                * Moreover, it is the first page in a power-
+                                * of-two-sized run of contiguous cache/free
+                                * pages.  Jump ahead to the last page within
+                                * that run, and continue from there.
+                                */
+                               m += (1 << order) - 1;
+                       }
+#if VM_NRESERVLEVEL > 0
+                       else if (vm_reserv_is_page_free(m))
+                               order = 0;
+#endif
+                       mtx_unlock(&vm_page_queue_free_mtx);
+                       if (order == VM_NFREEORDER)
+                               error = EINVAL;
+               }
+       }
+       if (m_mtx != NULL)
+               mtx_unlock(m_mtx);
+       if ((m = SLIST_FIRST(&free)) != NULL) {
+               mtx_lock(&vm_page_queue_free_mtx);
+               do {
+                       SLIST_REMOVE_HEAD(&free, plinks.s.ss);
+                       vm_phys_freecnt_adj(m, 1);
+#if VM_NRESERVLEVEL > 0
+                       if (!vm_reserv_free_page(m))
+#else
+                       if (true)
+#endif
+                               vm_phys_free_pages(m, 0);
+               } while ((m = SLIST_FIRST(&free)) != NULL);
+               vm_page_zero_idle_wakeup();
+               vm_page_free_wakeup();
+               mtx_unlock(&vm_page_queue_free_mtx);
+       }
+       return (error);
+}
+
+#define        NRUNS   16
+
+CTASSERT(powerof2(NRUNS));
+
+#define        RUN_INDEX(count)        ((count) & (NRUNS - 1))
+
+#define        MIN_RECLAIM     8
+
+/*
+ *     vm_page_reclaim_contig:
+ *
+ *     Reclaim allocated, contiguous physical memory satisfying the specified
+ *     conditions by relocating the virtual pages using that physical memory.
+ *     Returns true if reclamation is successful and false otherwise.  Since
+ *     relocation requires the allocation of physical pages, reclamation may
+ *     fail due to a shortage of cache/free pages.  When reclamation fails,
+ *     callers are expected to perform VM_WAIT before retrying a failed
+ *     allocation operation, e.g., vm_page_alloc_contig().
+ *
+ *     The caller must always specify an allocation class through "req".
+ *
+ *     allocation classes:
+ *     VM_ALLOC_NORMAL         normal process request
+ *     VM_ALLOC_SYSTEM         system *really* needs a page
+ *     VM_ALLOC_INTERRUPT      interrupt time request
+ *
+ *     The optional allocation flags are ignored.
+ *
+ *     "npages" must be greater than zero.  Both "alignment" and "boundary"
+ *     must be a power of two.
+ */
+bool
+vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
+    u_long alignment, vm_paddr_t boundary)
+{
+       vm_paddr_t curr_low;
+       vm_page_t m_run, m_runs[NRUNS];
+       u_long count, reclaimed;
+       int error, i, options, req_class;
+
+       KASSERT(npages > 0, ("npages is 0"));
+       KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
+       KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
+       req_class = req & VM_ALLOC_CLASS_MASK;
+
+       /*
+        * The page daemon is allowed to dig deeper into the free page list.
+        */
+       if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
+               req_class = VM_ALLOC_SYSTEM;
+
+       /*
+        * Return if the number of cached and free pages cannot satisfy the
+        * requested allocation.
+        */
+       count = vm_cnt.v_free_count + vm_cnt.v_cache_count;
+       if (count < npages + vm_cnt.v_free_reserved || (count < npages +
+           vm_cnt.v_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
+           (count < npages && req_class == VM_ALLOC_INTERRUPT))
+               return (false);
+
+       /*
+        * Scan up to three times, relaxing the restrictions ("options") on
+        * the reclamation of reservations and superpages each time.
+        */
+       for (options = VPSC_NORESERV;;) {
+               /*
+                * Find the highest runs that satisfy the given constraints
+                * and restrictions, and record them in "m_runs".
+                */
+               curr_low = low;
+               count = 0;
+               for (;;) {
+                       m_run = vm_phys_scan_contig(npages, curr_low, high,
+                           alignment, boundary, options);
+                       if (m_run == NULL)
+                               break;
+                       curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages);
+                       m_runs[RUN_INDEX(count)] = m_run;
+                       count++;
+               }
+
+               /*
+                * Reclaim the highest runs in LIFO (descending) order until
+                * the number of reclaimed pages, "reclaimed", is at least
+                * MIN_RECLAIM.  Reset "reclaimed" each time because each
+                * reclamation is idempotent, and runs will (likely) recur
+                * from one scan to the next as restrictions are relaxed.
+                */
+               reclaimed = 0;
+               for (i = 0; count > 0 && i < NRUNS; i++) {
+                       count--;
+                       m_run = m_runs[RUN_INDEX(count)];
+                       error = vm_page_reclaim_run(req_class, npages, m_run,
+                           high);
+                       if (error == 0) {
+                               reclaimed += npages;
+                               if (reclaimed >= MIN_RECLAIM)
+                                       return (true);
+                       }
+               }
+
+               /*
+                * Either relax the restrictions on the next scan or return if
+                * the last scan had no restrictions.
+                */
+               if (options == VPSC_NORESERV)
+                       options = VPSC_NOSUPER;
+               else if (options == VPSC_NOSUPER)
+                       options = VPSC_ANY;
+               else if (options == VPSC_ANY)
+                       return (reclaimed != 0);
+       }
+}
+
 /*
  *     vm_wait:        (also see VM_WAIT macro)
  *

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h       Sat Dec 19 13:19:39 2015        (r292468)
+++ head/sys/vm/vm_page.h       Sat Dec 19 18:42:50 2015        (r292469)
@@ -474,6 +474,8 @@ vm_page_t vm_page_prev(vm_page_t m);
 boolean_t vm_page_ps_is_valid(vm_page_t m);
 void vm_page_putfake(vm_page_t m);
 void vm_page_readahead_finish(vm_page_t m);
+bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
+    vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
 void vm_page_reference(vm_page_t m);
 void vm_page_remove (vm_page_t);
 int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
@@ -482,6 +484,8 @@ vm_page_t vm_page_replace(vm_page_t mnew
 void vm_page_requeue(vm_page_t m);
 void vm_page_requeue_locked(vm_page_t m);
 int vm_page_sbusied(vm_page_t m);
+vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start,
+    vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options);
 void vm_page_set_valid_range(vm_page_t m, int base, int size);
 int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
 vm_offset_t vm_page_startup(vm_offset_t vaddr);

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c    Sat Dec 19 13:19:39 2015        (r292468)
+++ head/sys/vm/vm_pageout.c    Sat Dec 19 18:42:50 2015        (r292469)
@@ -237,8 +237,6 @@ SYSCTL_INT(_vm, OID_AUTO, max_wired,
        CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page 
count");
 
 static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
-static boolean_t vm_pageout_launder(struct vm_pagequeue *pq, int, vm_paddr_t,
-    vm_paddr_t);
 #if !defined(NO_SWAPPING)
 static void vm_pageout_map_deactivate_pages(vm_map_t, long);
 static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
@@ -595,170 +593,6 @@ vm_pageout_flush(vm_page_t *mc, int coun
        return (numpagedout);
 }
 
-static boolean_t
-vm_pageout_launder(struct vm_pagequeue *pq, int tries, vm_paddr_t low,
-    vm_paddr_t high)
-{
-       struct mount *mp;
-       struct vnode *vp;
-       vm_object_t object;
-       vm_paddr_t pa;
-       vm_page_t m, m_tmp, next;
-       int lockmode;
-
-       vm_pagequeue_lock(pq);
-       TAILQ_FOREACH_SAFE(m, &pq->pq_pl, plinks.q, next) {
-               if ((m->flags & PG_MARKER) != 0)
-                       continue;
-               pa = VM_PAGE_TO_PHYS(m);
-               if (pa < low || pa + PAGE_SIZE > high)
-                       continue;
-               if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
-                       vm_page_unlock(m);
-                       continue;
-               }
-               object = m->object;
-               if ((!VM_OBJECT_TRYWLOCK(object) &&
-                   (!vm_pageout_fallback_object_lock(m, &next) ||
-                   m->hold_count != 0)) || vm_page_busied(m)) {
-                       vm_page_unlock(m);
-                       VM_OBJECT_WUNLOCK(object);
-                       continue;
-               }
-               vm_page_test_dirty(m);
-               if (m->dirty == 0 && object->ref_count != 0)
-                       pmap_remove_all(m);
-               if (m->dirty != 0) {
-                       vm_page_unlock(m);
-                       if (tries == 0 || (object->flags & OBJ_DEAD) != 0) {
-                               VM_OBJECT_WUNLOCK(object);
-                               continue;
-                       }
-                       if (object->type == OBJT_VNODE) {
-                               vm_pagequeue_unlock(pq);
-                               vp = object->handle;
-                               vm_object_reference_locked(object);
-                               VM_OBJECT_WUNLOCK(object);
-                               (void)vn_start_write(vp, &mp, V_WAIT);
-                               lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
-                                   LK_SHARED : LK_EXCLUSIVE;
-                               vn_lock(vp, lockmode | LK_RETRY);
-                               VM_OBJECT_WLOCK(object);
-                               vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
-                               VM_OBJECT_WUNLOCK(object);
-                               VOP_UNLOCK(vp, 0);
-                               vm_object_deallocate(object);
-                               vn_finished_write(mp);
-                               return (TRUE);
-                       } else if (object->type == OBJT_SWAP ||
-                           object->type == OBJT_DEFAULT) {
-                               vm_pagequeue_unlock(pq);
-                               m_tmp = m;
-                               vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC,
-                                   0, NULL, NULL);
-                               VM_OBJECT_WUNLOCK(object);
-                               return (TRUE);
-                       }
-               } else {
-                       /*
-                        * Dequeue here to prevent lock recursion in
-                        * vm_page_cache().
-                        */
-                       vm_page_dequeue_locked(m);
-                       vm_page_cache(m);
-                       vm_page_unlock(m);
-               }
-               VM_OBJECT_WUNLOCK(object);
-       }
-       vm_pagequeue_unlock(pq);
-       return (FALSE);
-}
-
-/*
- * Increase the number of cached pages.  The specified value, "tries",
- * determines which categories of pages are cached:
- *
- *  0: All clean, inactive pages within the specified physical address range
- *     are cached.  Will not sleep.
- *  1: The vm_lowmem handlers are called.  All inactive pages within
- *     the specified physical address range are cached.  May sleep.
- *  2: The vm_lowmem handlers are called.  All inactive and active pages
- *     within the specified physical address range are cached.  May sleep.
- */
-void
-vm_pageout_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high)
-{
-       int actl, actmax, inactl, inactmax, dom, initial_dom;
-       static int start_dom = 0;
-
-       if (tries > 0) {
-               /*
-                * Decrease registered cache sizes.  The vm_lowmem handlers
-                * may acquire locks and/or sleep, so they can only be invoked
-                * when "tries" is greater than zero.
-                */
-               SDT_PROBE0(vm, , , vm__lowmem_cache);
-               EVENTHANDLER_INVOKE(vm_lowmem, 0);
-
-               /*
-                * We do this explicitly after the caches have been drained
-                * above.
-                */
-               uma_reclaim();
-       }
-
-       /*
-        * Make the next scan start on the next domain.
-        */
-       initial_dom = atomic_fetchadd_int(&start_dom, 1) % vm_ndomains;
-
-       inactl = 0;
-       inactmax = vm_cnt.v_inactive_count;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to