Author: jeff Date: Tue Feb 6 22:10:07 2018 New Revision: 328954 URL: https://svnweb.freebsd.org/changeset/base/328954
Log: Use per-domain locks for vm page queue free. Move paging control from global to per-domain state. Protect reservations with the free lock from the domain that they belong to. Refactor to make vm domains more of a first class object. Reviewed by: markj, kib, gallatin Tested by: pho Sponsored by: Netflix, Dell/EMC Isilon Differential Revision: https://reviews.freebsd.org/D14000 Added: head/sys/vm/vm_pagequeue.h (contents, props changed) Modified: head/sys/amd64/amd64/machdep.c head/sys/arm/arm/machdep.c head/sys/arm/arm/pmap-v4.c head/sys/cddl/compat/opensolaris/sys/kmem.h head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c head/sys/compat/linprocfs/linprocfs.c head/sys/fs/tmpfs/tmpfs_subr.c head/sys/i386/i386/machdep.c head/sys/kern/init_main.c head/sys/kern/subr_vmem.c head/sys/kern/subr_witness.c head/sys/mips/mips/machdep.c head/sys/powerpc/booke/pmap.c head/sys/powerpc/powerpc/machdep.c head/sys/sparc64/sparc64/machdep.c head/sys/sys/vmmeter.h head/sys/vm/swap_pager.c head/sys/vm/uma_core.c head/sys/vm/vm_extern.h head/sys/vm/vm_glue.c head/sys/vm/vm_init.c head/sys/vm/vm_kern.c head/sys/vm/vm_map.c head/sys/vm/vm_meter.c head/sys/vm/vm_object.c head/sys/vm/vm_object.h head/sys/vm/vm_page.c head/sys/vm/vm_page.h head/sys/vm/vm_pageout.c head/sys/vm/vm_pageout.h head/sys/vm/vm_phys.c head/sys/vm/vm_phys.h head/sys/vm/vm_reserv.c head/sys/vm/vm_reserv.h head/sys/vm/vm_swapout.c head/sys/vm/vnode_pager.c Modified: head/sys/amd64/amd64/machdep.c ============================================================================== --- head/sys/amd64/amd64/machdep.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/amd64/amd64/machdep.c Tue Feb 6 22:10:07 2018 (r328954) @@ -282,7 +282,7 @@ cpu_startup(dummy) memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } - if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) + if (memsize < ptoa((uintmax_t)vm_free_count())) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); realmem = atop(memsize); @@ -309,8 +309,8 @@ cpu_startup(dummy) vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", - ptoa((uintmax_t)vm_cnt.v_free_count), - ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); + ptoa((uintmax_t)vm_free_count()), + ptoa((uintmax_t)vm_free_count()) / 1048576); /* * Set up buffers, so they can be used to read disk labels. Modified: head/sys/arm/arm/machdep.c ============================================================================== --- head/sys/arm/arm/machdep.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/arm/arm/machdep.c Tue Feb 6 22:10:07 2018 (r328954) @@ -228,8 +228,8 @@ cpu_startup(void *dummy) (uintmax_t)arm32_ptob(realmem), (uintmax_t)arm32_ptob(realmem) / mbyte); printf("avail memory = %ju (%ju MB)\n", - (uintmax_t)arm32_ptob(vm_cnt.v_free_count), - (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte); + (uintmax_t)arm32_ptob(vm_free_count()), + (uintmax_t)arm32_ptob(vm_free_count()) / mbyte); if (bootverbose) { arm_physmem_print_tables(); devmap_print_table(); Modified: head/sys/arm/arm/pmap-v4.c ============================================================================== --- head/sys/arm/arm/pmap-v4.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/arm/arm/pmap-v4.c Tue Feb 6 22:10:07 2018 (r328954) @@ -3817,7 +3817,7 @@ pmap_get_pv_entry(void) pv_entry_count++; if (pv_entry_count > pv_entry_high_water) - pagedaemon_wakeup(); + pagedaemon_wakeup(0); /* XXX ARM NUMA */ ret_value = uma_zalloc(pvzone, M_NOWAIT); return ret_value; } Modified: head/sys/cddl/compat/opensolaris/sys/kmem.h ============================================================================== --- head/sys/cddl/compat/opensolaris/sys/kmem.h Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/cddl/compat/opensolaris/sys/kmem.h Tue Feb 6 22:10:07 2018 (r328954) @@ -78,7 +78,7 @@ void kmem_reap(void); int kmem_debugging(void); void *calloc(size_t n, size_t s); -#define freemem vm_cnt.v_free_count +#define freemem vm_free_count() #define minfree vm_cnt.v_free_min #define heap_arena kernel_arena #define zio_arena NULL Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Tue Feb 6 22:10:07 2018 (r328954) @@ -379,7 +379,7 @@ static void arc_free_target_init(void *unused __unused) { - zfs_arc_free_target = vm_pageout_wakeup_thresh; + zfs_arc_free_target = (vm_cnt.v_free_min / 10) * 11; } SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, arc_free_target_init, NULL); Modified: head/sys/compat/linprocfs/linprocfs.c ============================================================================== --- head/sys/compat/linprocfs/linprocfs.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/compat/linprocfs/linprocfs.c Tue Feb 6 22:10:07 2018 (r328954) @@ -156,7 +156,7 @@ linprocfs_domeminfo(PFS_FILL_ARGS) /* * The correct thing here would be: * - memfree = vm_cnt.v_free_count * PAGE_SIZE; + memfree = vm_free_count() * PAGE_SIZE; memused = memtotal - memfree; * * but it might mislead linux binaries into thinking there @@ -178,7 +178,7 @@ linprocfs_domeminfo(PFS_FILL_ARGS) * like unstaticizing it just for linprocfs's sake. */ buffers = 0; - cached = vm_cnt.v_inactive_count * PAGE_SIZE; + cached = vm_inactive_count() * PAGE_SIZE; sbuf_printf(sb, "MemTotal: %9lu kB\n" Modified: head/sys/fs/tmpfs/tmpfs_subr.c ============================================================================== --- head/sys/fs/tmpfs/tmpfs_subr.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/fs/tmpfs/tmpfs_subr.c Tue Feb 6 22:10:07 2018 (r328954) @@ -106,7 +106,7 @@ tmpfs_mem_avail(void) { vm_ooffset_t avail; - avail = swap_pager_avail + vm_cnt.v_free_count - tmpfs_pages_reserved; + avail = swap_pager_avail + vm_free_count() - tmpfs_pages_reserved; if (__predict_false(avail < 0)) avail = 0; return (avail); Modified: head/sys/i386/i386/machdep.c ============================================================================== --- head/sys/i386/i386/machdep.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/i386/i386/machdep.c Tue Feb 6 22:10:07 2018 (r328954) @@ -271,7 +271,7 @@ cpu_startup(dummy) memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } - if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) + if (memsize < ptoa((uintmax_t)vm_free_count())) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); realmem = atop(memsize); @@ -298,8 +298,8 @@ cpu_startup(dummy) vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", - ptoa((uintmax_t)vm_cnt.v_free_count), - ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); + ptoa((uintmax_t)vm_free_count()), + ptoa((uintmax_t)vm_free_count()) / 1048576); /* * Set up buffers, so they can be used to read disk labels. Modified: head/sys/kern/init_main.c ============================================================================== --- head/sys/kern/init_main.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/kern/init_main.c Tue Feb 6 22:10:07 2018 (r328954) @@ -87,6 +87,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm.h> #include <vm/vm_param.h> +#include <vm/vm_extern.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <sys/copyright.h> @@ -555,7 +556,7 @@ proc0_init(void *dummy __unused) p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; /* Cast to avoid overflow on i386/PAE. */ - pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count); + pageablemem = ptoa((vm_paddr_t)vm_free_count()); p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; Modified: head/sys/kern/subr_vmem.c ============================================================================== --- head/sys/kern/subr_vmem.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/kern/subr_vmem.c Tue Feb 6 22:10:07 2018 (r328954) @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/taskqueue.h> #include <sys/vmem.h> +#include <sys/vmmeter.h> #include "opt_vm.h" @@ -72,6 +73,8 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_param.h> #include <vm/vm_page.h> #include <vm/vm_pageout.h> +#include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> #include <vm/uma_int.h> int vmem_startup_count(void); @@ -644,7 +647,7 @@ vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, int do * possible due to M_USE_RESERVE page allocation. */ if (wait & M_WAITOK) - VM_WAIT; + vm_wait_domain(domain); return (NULL); } mtx_unlock(&vmem_bt_lock); Modified: head/sys/kern/subr_witness.c ============================================================================== --- head/sys/kern/subr_witness.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/kern/subr_witness.c Tue Feb 6 22:10:07 2018 (r328954) @@ -139,7 +139,7 @@ __FBSDID("$FreeBSD$"); #define WITNESS_COUNT 1536 #endif #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ -#define WITNESS_PENDLIST (2048 + MAXCPU) +#define WITNESS_PENDLIST (2048 + (MAXCPU * 4)) /* Allocate 256 KB of stack data space */ #define WITNESS_LO_DATA_COUNT 2048 Modified: head/sys/mips/mips/machdep.c ============================================================================== --- head/sys/mips/mips/machdep.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/mips/mips/machdep.c Tue Feb 6 22:10:07 2018 (r328954) @@ -210,8 +210,8 @@ cpu_startup(void *dummy) vm_ksubmap_init(&kmi); printf("avail memory = %ju (%juMB)\n", - ptoa((uintmax_t)vm_cnt.v_free_count), - ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); + ptoa((uintmax_t)vm_free_count()), + ptoa((uintmax_t)vm_free_count()) / 1048576); cpu_init_interrupts(); /* Modified: head/sys/powerpc/booke/pmap.c ============================================================================== --- head/sys/powerpc/booke/pmap.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/powerpc/booke/pmap.c Tue Feb 6 22:10:07 2018 (r328954) @@ -1190,7 +1190,7 @@ pv_alloc(void) pv_entry_count++; if (pv_entry_count > pv_entry_high_water) - pagedaemon_wakeup(); + pagedaemon_wakeup(0); /* XXX powerpc NUMA */ pv = uma_zalloc(pvzone, M_NOWAIT); return (pv); Modified: head/sys/powerpc/powerpc/machdep.c ============================================================================== --- head/sys/powerpc/powerpc/machdep.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/powerpc/powerpc/machdep.c Tue Feb 6 22:10:07 2018 (r328954) @@ -221,8 +221,8 @@ cpu_startup(void *dummy) vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", - ptoa((uintmax_t)vm_cnt.v_free_count), - ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); + ptoa((uintmax_t)vm_free_count()), + ptoa((uintmax_t)vm_free_count()) / 1048576); /* * Set up buffers, so they can be used to read disk labels. Modified: head/sys/sparc64/sparc64/machdep.c ============================================================================== --- head/sys/sparc64/sparc64/machdep.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/sparc64/sparc64/machdep.c Tue Feb 6 22:10:07 2018 (r328954) @@ -190,8 +190,8 @@ cpu_startup(void *arg) EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL, SHUTDOWN_PRI_LAST); - printf("avail memory = %lu (%lu MB)\n", vm_cnt.v_free_count * PAGE_SIZE, - vm_cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE)); + printf("avail memory = %lu (%lu MB)\n", vm_free_count() * PAGE_SIZE, + vm_free_count() / ((1024 * 1024) / PAGE_SIZE)); if (bootverbose) printf("machine: %s\n", sparc64_model); Modified: head/sys/sys/vmmeter.h ============================================================================== --- head/sys/sys/vmmeter.h Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/sys/vmmeter.h Tue Feb 6 22:10:07 2018 (r328954) @@ -140,23 +140,23 @@ struct vmmeter { u_int v_interrupt_free_min; /* (c) reserved pages for int code */ u_int v_free_severe; /* (c) severe page depletion point */ u_int v_wire_count VMMETER_ALIGNED; /* (a) pages wired down */ - u_int v_active_count VMMETER_ALIGNED; /* (a) pages active */ - u_int v_inactive_count VMMETER_ALIGNED; /* (a) pages inactive */ - u_int v_laundry_count VMMETER_ALIGNED; /* (a) pages eligible for - laundering */ - u_int v_free_count VMMETER_ALIGNED; /* (f) pages free */ }; #endif /* _KERNEL || _WANT_VMMETER */ #ifdef _KERNEL +#include <sys/domainset.h> + extern struct vmmeter vm_cnt; -extern u_int vm_pageout_wakeup_thresh; +extern domainset_t vm_min_domains; +extern domainset_t vm_severe_domains; #define VM_CNT_ADD(var, x) counter_u64_add(vm_cnt.var, x) #define VM_CNT_INC(var) VM_CNT_ADD(var, 1) #define VM_CNT_FETCH(var) counter_u64_fetch(vm_cnt.var) +u_int vm_free_count(void); + /* * Return TRUE if we are under our severe low-free-pages threshold * @@ -167,7 +167,7 @@ static inline int vm_page_count_severe(void) { - return (vm_cnt.v_free_severe > vm_cnt.v_free_count); + return (!DOMAINSET_EMPTY(&vm_severe_domains)); } /* @@ -183,50 +183,8 @@ static inline int vm_page_count_min(void) { - return (vm_cnt.v_free_min > vm_cnt.v_free_count); + return (!DOMAINSET_EMPTY(&vm_min_domains)); } -/* - * Return TRUE if we have not reached our free page target during - * free page recovery operations. - */ -static inline int -vm_page_count_target(void) -{ - - return (vm_cnt.v_free_target > vm_cnt.v_free_count); -} - -/* - * Return the number of pages we need to free-up or cache - * A positive number indicates that we do not have enough free pages. - */ -static inline int -vm_paging_target(void) -{ - - return (vm_cnt.v_free_target - vm_cnt.v_free_count); -} - -/* - * Returns TRUE if the pagedaemon needs to be woken up. - */ -static inline int -vm_paging_needed(u_int free_count) -{ - - return (free_count < vm_pageout_wakeup_thresh); -} - -/* - * Return the number of pages we need to launder. - * A positive number indicates that we have a shortfall of clean pages. - */ -static inline int -vm_laundry_target(void) -{ - - return (vm_paging_target()); -} #endif /* _KERNEL */ #endif /* _SYS_VMMETER_H_ */ Modified: head/sys/vm/swap_pager.c ============================================================================== --- head/sys/vm/swap_pager.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/swap_pager.c Tue Feb 6 22:10:07 2018 (r328954) @@ -2327,7 +2327,7 @@ swapoff_one(struct swdevt *sp, struct ucred *cred) * of data we will have to page back in, plus an epsilon so * the system doesn't become critically low on swap space. */ - if (vm_cnt.v_free_count + swap_pager_avail < nblks + nswap_lowat) + if (vm_free_count() + swap_pager_avail < nblks + nswap_lowat) return (ENOMEM); /* Modified: head/sys/vm/uma_core.c ============================================================================== --- head/sys/vm/uma_core.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/uma_core.c Tue Feb 6 22:10:07 2018 (r328954) @@ -3464,7 +3464,7 @@ uma_large_malloc_domain(vm_size_t size, int domain, in slab->us_data = (void *)addr; slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC; slab->us_size = size; - slab->us_domain = vm_phys_domidx(PHYS_TO_VM_PAGE( + slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE( pmap_kextract(addr))); uma_total_inc(size); } else { Modified: head/sys/vm/vm_extern.h ============================================================================== --- head/sys/vm/vm_extern.h Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/vm_extern.h Tue Feb 6 22:10:07 2018 (r328954) @@ -122,5 +122,9 @@ struct sf_buf *vm_imgact_map_page(vm_object_t object, void vm_imgact_unmap_page(struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); +u_int vm_active_count(void); +u_int vm_inactive_count(void); +u_int vm_laundry_count(void); +u_int vm_wait_count(void); #endif /* _KERNEL */ #endif /* !_VM_EXTERN_H_ */ Modified: head/sys/vm/vm_glue.c ============================================================================== --- head/sys/vm/vm_glue.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/vm_glue.c Tue Feb 6 22:10:07 2018 (r328954) @@ -552,7 +552,7 @@ vm_forkproc(struct thread *td, struct proc *p2, struct } while (vm_page_count_severe()) { - VM_WAIT; + vm_wait_severe(); } if ((flags & RFMEM) == 0) { Modified: head/sys/vm/vm_init.c ============================================================================== --- head/sys/vm/vm_init.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/vm_init.c Tue Feb 6 22:10:07 2018 (r328954) @@ -89,6 +89,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_object.h> #include <vm/vm_page.h> #include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> #include <vm/vm_map.h> #include <vm/vm_pager.h> #include <vm/vm_extern.h> Modified: head/sys/vm/vm_kern.c ============================================================================== --- head/sys/vm/vm_kern.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/vm_kern.c Tue Feb 6 22:10:07 2018 (r328954) @@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_page.h> #include <vm/vm_pageout.h> #include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> #include <vm/vm_radix.h> #include <vm/vm_extern.h> #include <vm/uma.h> @@ -196,7 +197,7 @@ retry: if (!vm_page_reclaim_contig_domain(domain, pflags, 1, low, high, PAGE_SIZE, 0) && (flags & M_WAITOK) != 0) - VM_WAIT; + vm_wait_domain(domain); VM_OBJECT_WLOCK(object); tries++; goto retry; @@ -205,9 +206,9 @@ retry: vmem_free(vmem, addr, size); return (0); } - KASSERT(vm_phys_domidx(m) == domain, + KASSERT(vm_phys_domain(m) == domain, ("kmem_alloc_attr_domain: Domain mismatch %d != %d", - vm_phys_domidx(m), domain)); + vm_phys_domain(m), domain)); if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; @@ -280,7 +281,7 @@ retry: if (!vm_page_reclaim_contig_domain(domain, pflags, npages, low, high, alignment, boundary) && (flags & M_WAITOK) != 0) - VM_WAIT; + vm_wait_domain(domain); VM_OBJECT_WLOCK(object); tries++; goto retry; @@ -288,9 +289,9 @@ retry: vmem_free(vmem, addr, size); return (0); } - KASSERT(vm_phys_domidx(m) == domain, + KASSERT(vm_phys_domain(m) == domain, ("kmem_alloc_contig_domain: Domain mismatch %d != %d", - vm_phys_domidx(m), domain)); + vm_phys_domain(m), domain)); end_m = m + npages; tmp = addr; for (; m < end_m; m++) { @@ -452,9 +453,9 @@ retry: kmem_unback(object, addr, i); return (KERN_NO_SPACE); } - KASSERT(vm_phys_domidx(m) == domain, + KASSERT(vm_phys_domain(m) == domain, ("kmem_back_domain: Domain mismatch %d != %d", - vm_phys_domidx(m), domain)); + vm_phys_domain(m), domain)); if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); KASSERT((m->oflags & VPO_UNMANAGED) != 0, @@ -514,7 +515,7 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_ end = offset + size; VM_OBJECT_WLOCK(object); m = vm_page_lookup(object, atop(offset)); - domain = vm_phys_domidx(m); + domain = vm_phys_domain(m); for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); vm_page_unwire(m, PQ_NONE); Modified: head/sys/vm/vm_map.c ============================================================================== --- head/sys/vm/vm_map.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/vm_map.c Tue Feb 6 22:10:07 2018 (r328954) @@ -2011,7 +2011,7 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_p * free pages allocating pv entries. */ if (((flags & MAP_PREFAULT_MADVISE) != 0 && - vm_cnt.v_free_count < vm_cnt.v_free_reserved) || + vm_page_count_severe()) || ((flags & MAP_PREFAULT_PARTIAL) != 0 && tmpidx >= threshold)) { psize = tmpidx; Modified: head/sys/vm/vm_meter.c ============================================================================== --- head/sys/vm/vm_meter.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/vm_meter.c Tue Feb 6 22:10:07 2018 (r328954) @@ -53,6 +53,8 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_page.h> #include <vm/vm_extern.h> #include <vm/vm_param.h> +#include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/vm_object.h> @@ -213,9 +215,6 @@ vmtotal(SYSCTL_HANDLER_ARGS) total.t_dw++; else total.t_sl++; - if (td->td_wchan == - &vm_cnt.v_free_count) - total.t_pw++; } break; case TDS_CAN_RUN: @@ -283,7 +282,8 @@ vmtotal(SYSCTL_HANDLER_ARGS) } } mtx_unlock(&vm_object_list_mtx); - total.t_free = vm_cnt.v_free_count; + total.t_pw = vm_wait_count(); + total.t_free = vm_free_count(); #if defined(COMPAT_FREEBSD11) /* sysctl(8) allocates twice as much memory as reported by sysctl(3) */ if (curproc->p_osrel < P_OSREL_VMTOTAL64 && (req->oldlen == @@ -339,7 +339,7 @@ sysctl_handle_vmstat(SYSCTL_HANDLER_ARGS) #define VM_STATS(parent, var, descr) \ SYSCTL_OID(parent, OID_AUTO, var, CTLTYPE_U64 | CTLFLAG_MPSAFE | \ - CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr); + CTLFLAG_RD, &vm_cnt.var, 0, sysctl_handle_vmstat, "QU", descr) #define VM_STATS_VM(var, descr) VM_STATS(_vm_stats_vm, var, descr) #define VM_STATS_SYS(var, descr) VM_STATS(_vm_stats_sys, var, descr) @@ -379,19 +379,36 @@ VM_STATS_VM(v_vforkpages, "VM pages affected by vfork( VM_STATS_VM(v_rforkpages, "VM pages affected by rfork()"); VM_STATS_VM(v_kthreadpages, "VM pages affected by fork() by kernel"); +static int +sysctl_handle_vmstat_proc(SYSCTL_HANDLER_ARGS) +{ + u_int (*fn)(void); + uint32_t val; + + fn = arg1; + val = fn(); + return (SYSCTL_OUT(req, &val, sizeof(val))); +} + +#define VM_STATS_PROC(var, descr, fn) \ + SYSCTL_OID(_vm_stats_vm, OID_AUTO, var, CTLTYPE_U32 | CTLFLAG_MPSAFE | \ + CTLFLAG_RD, fn, 0, sysctl_handle_vmstat_proc, "IU", descr) + #define VM_STATS_UINT(var, descr) \ SYSCTL_UINT(_vm_stats_vm, OID_AUTO, var, CTLFLAG_RD, &vm_cnt.var, 0, descr) + VM_STATS_UINT(v_page_size, "Page size in bytes"); VM_STATS_UINT(v_page_count, "Total number of pages in system"); VM_STATS_UINT(v_free_reserved, "Pages reserved for deadlock"); VM_STATS_UINT(v_free_target, "Pages desired free"); VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold"); -VM_STATS_UINT(v_free_count, "Free pages"); +VM_STATS_PROC(v_free_count, "Free pages", vm_free_count); VM_STATS_UINT(v_wire_count, "Wired pages"); -VM_STATS_UINT(v_active_count, "Active pages"); +VM_STATS_PROC(v_active_count, "Active pages", vm_active_count); VM_STATS_UINT(v_inactive_target, "Desired inactive pages"); -VM_STATS_UINT(v_inactive_count, "Inactive pages"); -VM_STATS_UINT(v_laundry_count, "Pages eligible for laundering"); +VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count); +VM_STATS_PROC(v_laundry_count, "Pages eligible for laundering", + vm_laundry_count); VM_STATS_UINT(v_pageout_free_min, "Min pages reserved for kernel"); VM_STATS_UINT(v_interrupt_free_min, "Reserved pages for interrupt code"); VM_STATS_UINT(v_free_severe, "Severe page depletion point"); @@ -406,3 +423,52 @@ SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_cache_count, CTL SYSCTL_UINT(_vm_stats_vm, OID_AUTO, v_tcached, CTLFLAG_RD, SYSCTL_NULL_UINT_PTR, 0, "Dummy for compatibility"); #endif + +u_int +vm_free_count(void) +{ + u_int v; + int i; + + v = 0; + for (i = 0; i < vm_ndomains; i++) + v += vm_dom[i].vmd_free_count; + + return (v); +} + +static +u_int +vm_pagequeue_count(int pq) +{ + u_int v; + int i; + + v = 0; + for (i = 0; i < vm_ndomains; i++) + v += vm_dom[i].vmd_pagequeues[pq].pq_cnt; + + return (v); +} + +u_int +vm_active_count(void) +{ + + return vm_pagequeue_count(PQ_ACTIVE); +} + +u_int +vm_inactive_count(void) +{ + + return vm_pagequeue_count(PQ_INACTIVE); +} + +u_int +vm_laundry_count(void) +{ + + return vm_pagequeue_count(PQ_LAUNDRY); +} + Modified: head/sys/vm/vm_object.c ============================================================================== --- head/sys/vm/vm_object.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/vm_object.c Tue Feb 6 22:10:07 2018 (r328954) @@ -96,6 +96,8 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_page.h> #include <vm/vm_pageout.h> #include <vm/vm_pager.h> +#include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> #include <vm/swap_pager.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> Modified: head/sys/vm/vm_object.h ============================================================================== --- head/sys/vm/vm_object.h Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/vm_object.h Tue Feb 6 22:10:07 2018 (r328954) @@ -297,6 +297,17 @@ vm_object_color(vm_object_t object, u_short color) } } +static __inline bool +vm_object_reserv(vm_object_t object) +{ + + if (object != NULL && + (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) == OBJ_COLORED) { + return (true); + } + return (false); +} + void vm_object_clear_flag(vm_object_t object, u_short bits); void vm_object_pip_add(vm_object_t object, short i); void vm_object_pip_subtract(vm_object_t object, short i); Modified: head/sys/vm/vm_page.c ============================================================================== --- head/sys/vm/vm_page.c Tue Feb 6 22:08:43 2018 (r328953) +++ head/sys/vm/vm_page.c Tue Feb 6 22:10:07 2018 (r328954) @@ -116,8 +116,9 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_object.h> #include <vm/vm_page.h> #include <vm/vm_pageout.h> -#include <vm/vm_pager.h> #include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> +#include <vm/vm_pager.h> #include <vm/vm_radix.h> #include <vm/vm_reserv.h> #include <vm/vm_extern.h> @@ -136,10 +137,16 @@ extern int vmem_startup_count(void); */ struct vm_domain vm_dom[MAXMEMDOM]; -struct mtx_padalign __exclusive_cache_line vm_page_queue_free_mtx; struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT]; +struct mtx_padalign __exclusive_cache_line vm_domainset_lock; +domainset_t __exclusive_cache_line vm_min_domains; +domainset_t __exclusive_cache_line vm_severe_domains; +static int vm_min_waiters; +static int vm_severe_waiters; +static int vm_pageproc_waiters; + /* * bogus page -- for I/O to/from partially complete buffers, * or for paging into sparsely invalid regions. @@ -164,24 +171,22 @@ static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARG SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages"); -/* Is the page daemon waiting for free pages? */ -static int vm_pageout_pages_needed; - static uma_zone_t fakepg_zone; static void vm_page_alloc_check(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); -static void vm_page_free_phys(vm_page_t m); -static void vm_page_free_wakeup(void); +static void vm_page_free_phys(struct vm_domain *vmd, vm_page_t m); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); -static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, - vm_paddr_t high); -static int vm_page_alloc_fail(vm_object_t object, int req); +static int vm_page_reclaim_run(int req_class, int domain, u_long npages, + vm_page_t m_run, vm_paddr_t high); +static void vm_domain_free_wakeup(struct vm_domain *); +static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, + int req); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); @@ -318,6 +323,7 @@ vm_page_blacklist_next(char **list, char *end) static void vm_page_blacklist_check(char *list, char *end) { + struct vm_domain *vmd; vm_paddr_t pa; vm_page_t m; char *next; @@ -330,9 +336,10 @@ vm_page_blacklist_check(char *list, char *end) m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) continue; - mtx_lock(&vm_page_queue_free_mtx); + vmd = vm_pagequeue_domain(m); + vm_domain_free_lock(vmd); ret = vm_phys_unfree_page(m); - mtx_unlock(&vm_page_queue_free_mtx); + vm_domain_free_unlock(vmd); if (ret == TRUE) { TAILQ_INSERT_TAIL(&blacklist_head, m, listq); if (bootverbose) @@ -395,28 +402,23 @@ sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS) } static void -vm_page_domain_init(struct vm_domain *vmd) +vm_page_domain_init(int domain) { + struct vm_domain *vmd; struct vm_pagequeue *pq; int i; + vmd = VM_DOMAIN(domain); + bzero(vmd, sizeof(*vmd)); *__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) = "vm inactive pagequeue"; - *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) = - &vm_cnt.v_inactive_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) = "vm active pagequeue"; - *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) = - &vm_cnt.v_active_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) = "vm laundry pagequeue"; - *__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) = - &vm_cnt.v_laundry_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) = "vm unswappable pagequeue"; - /* Unswappable dirty pages are counted as being in the laundry. */ - *__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) = - &vm_cnt.v_laundry_count; + vmd->vmd_domain = domain; vmd->vmd_page_count = 0; vmd->vmd_free_count = 0; vmd->vmd_segs = 0; @@ -427,6 +429,7 @@ vm_page_domain_init(struct vm_domain *vmd) mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK); } + mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF); } /* @@ -463,7 +466,6 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segi vm_offset_t vm_page_startup(vm_offset_t vaddr) { - struct vm_domain *vmd; struct vm_phys_seg *seg; vm_page_t m; char *list, *listend; @@ -494,11 +496,11 @@ vm_page_startup(vm_offset_t vaddr) /* * Initialize the page and queue locks. */ - mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF); + mtx_init(&vm_domainset_lock, "vm domainset lock", NULL, MTX_DEF); for (i = 0; i < PA_LOCK_COUNT; i++) mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); for (i = 0; i < vm_ndomains; i++) - vm_page_domain_init(&vm_dom[i]); + vm_page_domain_init(i); /* * Allocate memory for use when boot strapping the kernel memory @@ -704,7 +706,6 @@ vm_page_startup(vm_offset_t vaddr) * physical memory allocator's free lists. */ vm_cnt.v_page_count = 0; - vm_cnt.v_free_count = 0; for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; for (m = seg->first_page, pa = seg->start; pa < seg->end; @@ -719,6 +720,8 @@ vm_page_startup(vm_offset_t vaddr) * or doesn't overlap any of them. */ for (i = 0; phys_avail[i + 1] != 0; i += 2) { + struct vm_domain *vmd; + if (seg->start < phys_avail[i] || seg->end > phys_avail[i + 1]) continue; @@ -726,13 +729,14 @@ vm_page_startup(vm_offset_t vaddr) m = seg->first_page; pagecount = (u_long)atop(seg->end - seg->start); - mtx_lock(&vm_page_queue_free_mtx); + vmd = VM_DOMAIN(seg->domain); + vm_domain_free_lock(vmd); vm_phys_free_contig(m, pagecount); - vm_phys_freecnt_adj(m, (int)pagecount); - mtx_unlock(&vm_page_queue_free_mtx); + vm_domain_freecnt_adj(vmd, (int)pagecount); + vm_domain_free_unlock(vmd); vm_cnt.v_page_count += (u_int)pagecount; - vmd = &vm_dom[seg->domain]; + vmd = VM_DOMAIN(seg->domain); vmd->vmd_page_count += (u_int)pagecount; vmd->vmd_segs |= 1UL << m->segind; break; @@ -1657,12 +1661,40 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pi return (m); } +/* + * Returns true if the number of free pages exceeds the minimum + * for the request class and false otherwise. + */ +int +vm_domain_available(struct vm_domain *vmd, int req, int npages) +{ + + vm_domain_free_assert_locked(vmd); + req = req & VM_ALLOC_CLASS_MASK; + + /* + * The page daemon is allowed to dig deeper into the free page list. + */ + if (curproc == pageproc && req != VM_ALLOC_INTERRUPT) + req = VM_ALLOC_SYSTEM; + + if (vmd->vmd_free_count >= npages + vmd->vmd_free_reserved || + (req == VM_ALLOC_SYSTEM && + vmd->vmd_free_count >= npages + vmd->vmd_interrupt_free_min) || + (req == VM_ALLOC_INTERRUPT && + vmd->vmd_free_count >= npages)) + return (1); + + return (0); +} + vm_page_t vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, int req, vm_page_t mpred) { + struct vm_domain *vmd; vm_page_t m; - int flags, req_class; + int flags; u_int free_count; KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && @@ -1678,34 +1710,27 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pind if (object != NULL) VM_OBJECT_ASSERT_WLOCKED(object); - req_class = req & VM_ALLOC_CLASS_MASK; - - /* - * The page daemon is allowed to dig deeper into the free page list. - */ - if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) - req_class = VM_ALLOC_SYSTEM; - - /* - * Allocate a page if the number of free pages exceeds the minimum - * for the request class. - */ again: m = NULL; - mtx_lock(&vm_page_queue_free_mtx); - if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || - (req_class == VM_ALLOC_SYSTEM && - vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || - (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count > 0)) { +#if VM_NRESERVLEVEL > 0 + if (vm_object_reserv(object) && + (m = vm_reserv_extend(req, object, pindex, domain, mpred)) + != NULL) { + domain = vm_phys_domain(m); + vmd = VM_DOMAIN(domain); + goto found; + } +#endif + vmd = VM_DOMAIN(domain); + vm_domain_free_lock(vmd); + if (vm_domain_available(vmd, req, 1)) { /* * Can we allocate the page from a reservation? */ #if VM_NRESERVLEVEL > 0 - if (object == NULL || (object->flags & (OBJ_COLORED | - OBJ_FICTITIOUS)) != OBJ_COLORED || (m = - vm_reserv_alloc_page(object, pindex, domain, - mpred)) == NULL) + if (!vm_object_reserv(object) || + (m = vm_reserv_alloc_page(object, pindex, + domain, mpred)) == NULL) #endif { /* @@ -1727,7 +1752,7 @@ again: /* * Not allocatable, give up. */ - if (vm_page_alloc_fail(object, req)) + if (vm_domain_alloc_fail(vmd, object, req)) goto again; return (NULL); } @@ -1736,8 +1761,18 @@ again: * At this point we had better have found a good page. */ KASSERT(m != NULL, ("missing page")); - free_count = vm_phys_freecnt_adj(m, -1); - mtx_unlock(&vm_page_queue_free_mtx); + free_count = vm_domain_freecnt_adj(vmd, -1); + vm_domain_free_unlock(vmd); + + /* + * Don't wakeup too often - wakeup the pageout daemon when + * we would be nearly out of memory. + */ + if (vm_paging_needed(vmd, free_count)) + pagedaemon_wakeup(vmd->vmd_domain); +#if VM_NRESERVLEVEL > 0 +found: +#endif vm_page_alloc_check(m); /* @@ -1770,7 +1805,7 @@ again: if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { - pagedaemon_wakeup(); + pagedaemon_wakeup(domain); if (req & VM_ALLOC_WIRED) { atomic_subtract_int(&vm_cnt.v_wire_count, 1); m->wire_count = 0; @@ -1795,13 +1830,6 @@ again: } else m->pindex = pindex; - /* - * Don't wakeup too often - wakeup the pageout daemon when - * we would be nearly out of memory. - */ - if (vm_paging_needed(free_count)) - pagedaemon_wakeup(); - return (m); } @@ -1869,9 +1897,9 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { + struct vm_domain *vmd; vm_page_t m, m_ret, mpred; u_int busy_lock, flags, oflags; - int req_class; mpred = NULL; /* XXX: pacify gcc */ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && @@ -1889,14 +1917,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin object)); } KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); - req_class = req & VM_ALLOC_CLASS_MASK; - /* - * The page daemon is allowed to dig deeper into the free page list. - */ - if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) - req_class = VM_ALLOC_SYSTEM; - if (object != NULL) { mpred = vm_radix_lookup_le(&object->rtree, pindex); KASSERT(mpred == NULL || mpred->pindex != pindex, @@ -1908,19 +1929,25 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pin * below the lower bound for the allocation class? */ again: +#if VM_NRESERVLEVEL > 0 + if (vm_object_reserv(object) && + (m_ret = vm_reserv_extend_contig(req, object, pindex, domain, + npages, low, high, alignment, boundary, mpred)) != NULL) { + domain = vm_phys_domain(m_ret); + vmd = VM_DOMAIN(domain); + goto found; + } +#endif m_ret = NULL; - mtx_lock(&vm_page_queue_free_mtx); - if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || - (req_class == VM_ALLOC_SYSTEM && - vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) || - (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count >= npages)) { *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** _______________________________________________ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"