Author: glebius
Date: Fri Feb  9 04:45:39 2018
New Revision: 329058
URL: https://svnweb.freebsd.org/changeset/base/329058

Log:
  Fix boot_pages exhaustion on machines with many domains and cores, where
  size of UMA zone allocation is greater than page size. In this case zone
  of zones can not use UMA_MD_SMALL_ALLOC, and we  need to postpone switch
  off of this zone from startup_alloc() until full launch of VM.
  
  o Always supply number of VM zones to uma_startup_count(). On machines
    with UMA_MD_SMALL_ALLOC ignore it completely, unless zsize goes over
    a page. In the latter case account VM zones for number of allocations
    from the zone of zones.
  o Rewrite startup_alloc() so that it will immediately switch off from
    itself any zone that is already capable of running real alloc.
    In worst case scenario we may leak a single page here. See comment
    in uma_startup_count().
  o Hardcode call to uma_startup2() into vm_mem_init(). Otherwise some
    extra SYSINITs, e.g. vm_page_init() may sneak in before.
  o While here, remove uma_boot_pages_mtx. With recent changes to boot
    pages calculation, we are guaranteed to use all of the boot_pages
    in the early single threaded stage.
  
  Reported & tested by: mav

Modified:
  head/sys/kern/kern_malloc.c
  head/sys/vm/uma_core.c
  head/sys/vm/vm_init.c
  head/sys/vm/vm_page.c

Modified: head/sys/kern/kern_malloc.c
==============================================================================
--- head/sys/kern/kern_malloc.c Fri Feb  9 03:07:12 2018        (r329057)
+++ head/sys/kern/kern_malloc.c Fri Feb  9 04:45:39 2018        (r329058)
@@ -96,8 +96,6 @@ __FBSDID("$FreeBSD$");
 dtrace_malloc_probe_func_t     dtrace_malloc_probe;
 #endif
 
-extern void    uma_startup2(void);
-
 #if defined(INVARIANTS) || defined(MALLOC_MAKE_FAILURES) ||            \
     defined(DEBUG_MEMGUARD) || defined(DEBUG_REDZONE)
 #define        MALLOC_DEBUG    1
@@ -928,8 +926,6 @@ mallocinit(void *dummy)
        mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
 
        kmeminit();
-
-       uma_startup2();
 
        if (kmem_zmax < PAGE_SIZE || kmem_zmax > KMEM_ZMAX)
                kmem_zmax = KMEM_ZMAX;

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c      Fri Feb  9 03:07:12 2018        (r329057)
+++ head/sys/vm/uma_core.c      Fri Feb  9 04:45:39 2018        (r329058)
@@ -134,13 +134,10 @@ static struct rwlock_padalign __exclusive_cache_line u
 
 /*
  * Pointer and counter to pool of pages, that is preallocated at
- * startup to bootstrap UMA.  Early zones continue to use the pool
- * until it is depleted, so allocations may happen after boot, thus
- * we need a mutex to protect it.
+ * startup to bootstrap UMA.
  */
 static char *bootmem;
 static int boot_pages;
-static struct mtx uma_boot_pages_mtx;
 
 static struct sx uma_drain_lock;
 
@@ -1081,37 +1078,46 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, int do
        int pages;
 
        keg = zone_first_keg(zone);
-       pages = howmany(bytes, PAGE_SIZE);
-       KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
 
        /*
-        * Check our small startup cache to see if it has pages remaining.
+        * If we are in BOOT_BUCKETS or higher, than switch to real
+        * allocator.  Zones with page sized slabs switch at BOOT_PAGEALLOC.
         */
-       mtx_lock(&uma_boot_pages_mtx);
-       if (pages <= boot_pages) {
-#ifdef DIAGNOSTIC
-               printf("%s from \"%s\", %d boot pages left\n", __func__,
-                   zone->uz_name, boot_pages);
+       switch (booted) {
+               case BOOT_COLD:
+               case BOOT_STRAPPED:
+                       break;
+               case BOOT_PAGEALLOC:
+                       if (keg->uk_ppera > 1)
+                               break;
+               case BOOT_BUCKETS:
+               case BOOT_RUNNING:
+#ifdef UMA_MD_SMALL_ALLOC
+                       keg->uk_allocf = (keg->uk_ppera > 1) ?
+                           page_alloc : uma_small_alloc;
+#else
+                       keg->uk_allocf = page_alloc;
 #endif
-               mem = bootmem;
-               boot_pages -= pages;
-               bootmem += pages * PAGE_SIZE;
-               mtx_unlock(&uma_boot_pages_mtx);
-               *pflag = UMA_SLAB_BOOT;
-               return (mem);
+                       return keg->uk_allocf(zone, bytes, domain, pflag, wait);
        }
-       mtx_unlock(&uma_boot_pages_mtx);
-       if (booted < BOOT_PAGEALLOC)
-               panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
+
        /*
-        * Now that we've booted reset these users to their real allocator.
+        * Check our small startup cache to see if it has pages remaining.
         */
-#ifdef UMA_MD_SMALL_ALLOC
-       keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
-#else
-       keg->uk_allocf = page_alloc;
+       pages = howmany(bytes, PAGE_SIZE);
+       KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
+       if (pages > boot_pages)
+               panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
+#ifdef DIAGNOSTIC
+       printf("%s from \"%s\", %d boot pages left\n", __func__, zone->uz_name,
+           boot_pages);
 #endif
-       return keg->uk_allocf(zone, bytes, domain, pflag, wait);
+       mem = bootmem;
+       boot_pages -= pages;
+       bootmem += pages * PAGE_SIZE;
+       *pflag = UMA_SLAB_BOOT;
+
+       return (mem);
 }
 
 /*
@@ -1789,9 +1795,9 @@ zone_foreach(void (*zfunc)(uma_zone_t))
 #define        UMA_BOOT_ALIGN  32
 static int zsize, ksize;
 int
-uma_startup_count(int zones)
+uma_startup_count(int vm_zones)
 {
-       int pages;
+       int zones, pages;
 
        ksize = sizeof(struct uma_keg) +
            (sizeof(struct uma_domain) * vm_ndomains);
@@ -1806,12 +1812,17 @@ uma_startup_count(int zones)
        pages = howmany(roundup(zsize, CACHE_LINE_SIZE) * 2 +
            roundup(ksize, CACHE_LINE_SIZE), PAGE_SIZE);
 
-       zones += UMA_BOOT_ZONES;
+#ifdef UMA_MD_SMALL_ALLOC
+       zones = UMA_BOOT_ZONES;
+#else
+       zones = UMA_BOOT_ZONES + vm_zones;
+       vm_zones = 0;
+#endif
 
        /* Memory for the rest of startup zones, UMA and VM, ... */
        if (zsize > UMA_SLAB_SIZE)
-               pages += zones * howmany(roundup2(zsize, UMA_BOOT_ALIGN),
-                   UMA_SLAB_SIZE);
+               pages += (zones + vm_zones) *
+                   howmany(roundup2(zsize, UMA_BOOT_ALIGN), UMA_SLAB_SIZE);
        else
                pages += howmany(zones,
                    UMA_SLAB_SPACE / roundup2(zsize, UMA_BOOT_ALIGN));
@@ -1872,7 +1883,6 @@ uma_startup(void *mem, int npages)
        args.flags = UMA_ZFLAG_INTERNAL;
        zone_ctor(kegs, zsize, &args, M_WAITOK);
 
-       mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
        bootmem = mem;
        boot_pages = npages;
 
@@ -1917,6 +1927,9 @@ void
 uma_startup2(void)
 {
 
+#ifdef DIAGNOSTIC
+       printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
+#endif
        booted = BOOT_BUCKETS;
        sx_init(&uma_drain_lock, "umadrain");
        bucket_enable();

Modified: head/sys/vm/vm_init.c
==============================================================================
--- head/sys/vm/vm_init.c       Fri Feb  9 03:07:12 2018        (r329057)
+++ head/sys/vm/vm_init.c       Fri Feb  9 04:45:39 2018        (r329058)
@@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_extern.h>
 
 extern void    uma_startup1(void);
+extern void    uma_startup2(void);
 extern void    vm_radix_reserve_kva(void);
 
 #if VM_NRESERVLEVEL > 0
@@ -183,9 +184,9 @@ vm_mem_init(dummy)
 #ifndef        UMA_MD_SMALL_ALLOC
        /* Set up radix zone to use noobj_alloc. */
        vm_radix_reserve_kva();
-       /* Announce page availability to UMA. */
-       uma_startup1();
 #endif
+       /* Announce full page availability to UMA. */
+       uma_startup2();
        kmem_init_zero_region();
        pmap_init();
        vm_pager_init();

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c       Fri Feb  9 03:07:12 2018        (r329057)
+++ head/sys/vm/vm_page.c       Fri Feb  9 04:45:39 2018        (r329058)
@@ -506,16 +506,13 @@ vm_page_startup(vm_offset_t vaddr)
         * Allocate memory for use when boot strapping the kernel memory
         * allocator.  Tell UMA how many zones we are going to create
         * before going fully functional.  UMA will add its zones.
-        */
-#ifdef UMA_MD_SMALL_ALLOC
-       boot_pages = uma_startup_count(0);
-#else
-       /*
+        *
         * VM startup zones: vmem, vmem_btag, VM OBJECT, RADIX NODE, MAP,
         * KMAP ENTRY, MAP ENTRY, VMSPACE.
         */
        boot_pages = uma_startup_count(8);
 
+#ifndef UMA_MD_SMALL_ALLOC
        /* vmem_startup() calls uma_prealloc(). */
        boot_pages += vmem_startup_count();
        /* vm_map_startup() calls uma_prealloc(). */
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to