Author: jeff
Date: Fri Nov 29 03:14:10 2019
New Revision: 355203
URL: https://svnweb.freebsd.org/changeset/base/355203

Log:
  Handle large mallocs by going directly to kmem.  Taking a detour through
  UMA does not provide any additional value.
  
  Reviewed by:  markj
  Differential Revision:        https://reviews.freebsd.org/D22563

Modified:
  head/sys/kern/kern_malloc.c
  head/sys/vm/memguard.c
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/kern/kern_malloc.c
==============================================================================
--- head/sys/kern/kern_malloc.c Fri Nov 29 02:16:45 2019        (r355202)
+++ head/sys/kern/kern_malloc.c Fri Nov 29 03:14:10 2019        (r355203)
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/vmmeter.h>
 #include <sys/proc.h>
+#include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
@@ -78,6 +79,8 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_pagequeue.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 #include <vm/uma_dbg.h>
@@ -552,6 +555,52 @@ malloc_dbg(caddr_t *vap, size_t *sizep, struct malloc_
 #endif
 
 /*
+ * Handle large allocations and frees by using kmem_malloc directly.
+ */
+static inline bool
+malloc_large_slab(uma_slab_t slab)
+{
+       uintptr_t va;
+
+       va = (uintptr_t)slab;
+       return ((va & 1) != 0);
+}
+
+static inline size_t
+malloc_large_size(uma_slab_t slab)
+{
+       uintptr_t va;
+
+       va = (uintptr_t)slab;
+       return (va >> 1);
+}
+
+static caddr_t
+malloc_large(size_t *size, struct domainset *policy, int flags)
+{
+       vm_offset_t va;
+       size_t sz;
+
+       sz = roundup(*size, PAGE_SIZE);
+       va = kmem_malloc_domainset(policy, sz, flags);
+       if (va != 0) {
+               /* The low bit is unused for slab pointers. */
+               vsetzoneslab(va, NULL, (void *)((sz << 1) | 1));
+               uma_total_inc(sz);
+               *size = sz;
+       }
+       return ((caddr_t)va);
+}
+
+static void
+free_large(void *addr, size_t size)
+{
+
+       kmem_free((vm_offset_t)addr, size);
+       uma_total_dec(size);
+}
+
+/*
  *     malloc:
  *
  *     Allocate a block of memory.
@@ -588,9 +637,7 @@ void *
                        size = zone->uz_size;
                malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
        } else {
-               size = roundup(size, PAGE_SIZE);
-               zone = NULL;
-               va = uma_large_malloc(size, flags);
+               va = malloc_large(&size, DOMAINSET_RR(), flags);
                malloc_type_allocated(mtp, va == NULL ? 0 : size);
        }
        if (flags & M_WAITOK)
@@ -605,46 +652,27 @@ void *
 }
 
 static void *
-malloc_domain(size_t size, struct malloc_type *mtp, int domain, int flags)
+malloc_domain(size_t size, int *indxp, struct malloc_type *mtp, int domain,
+    int flags)
 {
        int indx;
        caddr_t va;
        uma_zone_t zone;
-#if defined(DEBUG_REDZONE)
-       unsigned long osize = size;
-#endif
 
-#ifdef MALLOC_DEBUG
-       va = NULL;
-       if (malloc_dbg(&va, &size, mtp, flags) != 0)
-               return (va);
-#endif
-       if (size <= kmem_zmax && (flags & M_EXEC) == 0) {
-               if (size & KMEM_ZMASK)
-                       size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
-               indx = kmemsize[size >> KMEM_ZSHIFT];
-               zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)];
+       KASSERT(size <= kmem_zmax && (flags & M_EXEC) == 0,
+           ("malloc_domain: Called with bad flag / size combination."));
+       if (size & KMEM_ZMASK)
+               size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
+       indx = kmemsize[size >> KMEM_ZSHIFT];
+       zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)];
 #ifdef MALLOC_PROFILE
-               krequests[size >> KMEM_ZSHIFT]++;
+       krequests[size >> KMEM_ZSHIFT]++;
 #endif
-               va = uma_zalloc_domain(zone, NULL, domain, flags);
-               if (va != NULL)
-                       size = zone->uz_size;
-               malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
-       } else {
-               size = roundup(size, PAGE_SIZE);
-               zone = NULL;
-               va = uma_large_malloc_domain(size, domain, flags);
-               malloc_type_allocated(mtp, va == NULL ? 0 : size);
-       }
-       if (flags & M_WAITOK)
-               KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL"));
-       else if (va == NULL)
-               t_malloc_fail = time_uptime;
-#ifdef DEBUG_REDZONE
+       va = uma_zalloc_domain(zone, NULL, domain, flags);
        if (va != NULL)
-               va = redzone_setup(va, osize);
-#endif
+               size = zone->uz_size;
+       *indxp = indx;
+
        return ((void *) va);
 }
 
@@ -653,16 +681,39 @@ malloc_domainset(size_t size, struct malloc_type *mtp,
     int flags)
 {
        struct vm_domainset_iter di;
-       void *ret;
+       caddr_t ret;
        int domain;
+       int indx;
 
-       vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
-       do {
-               ret = malloc_domain(size, mtp, domain, flags);
-               if (ret != NULL)
-                       break;
-       } while (vm_domainset_iter_policy(&di, &domain) == 0);
+#if defined(DEBUG_REDZONE)
+       unsigned long osize = size;
+#endif
+#ifdef MALLOC_DEBUG
+       ret= NULL;
+       if (malloc_dbg(&ret, &size, mtp, flags) != 0)
+               return (ret);
+#endif
+       if (size <= kmem_zmax && (flags & M_EXEC) == 0) {
+               vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
+               do {
+                       ret = malloc_domain(size, &indx, mtp, domain, flags);
+               } while (ret == NULL &&
+                   vm_domainset_iter_policy(&di, &domain) == 0);
+               malloc_type_zone_allocated(mtp, ret == NULL ? 0 : size, indx);
+       } else {
+               /* Policy is handled by kmem. */
+               ret = malloc_large(&size, ds, flags);
+               malloc_type_allocated(mtp, ret == NULL ? 0 : size);
+       }
 
+       if (flags & M_WAITOK)
+               KASSERT(ret != NULL, ("malloc(M_WAITOK) returned NULL"));
+       else if (ret == NULL)
+               t_malloc_fail = time_uptime;
+#ifdef DEBUG_REDZONE
+       if (ret != NULL)
+               ret = redzone_setup(ret, osize);
+#endif
        return (ret);
 }
 
@@ -755,15 +806,15 @@ free(void *addr, struct malloc_type *mtp)
                panic("free: address %p(%p) has not been allocated.\n",
                    addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
 
-       if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
+       if (__predict_true(!malloc_large_slab(slab))) {
                size = zone->uz_size;
 #ifdef INVARIANTS
                free_save_type(addr, mtp, size);
 #endif
                uma_zfree_arg(zone, addr, slab);
        } else {
-               size = slab->us_size;
-               uma_large_free(slab);
+               size = malloc_large_size(slab);
+               free_large(addr, size);
        }
        malloc_type_freed(mtp, size);
 }
@@ -789,15 +840,15 @@ free_domain(void *addr, struct malloc_type *mtp)
                panic("free_domain: address %p(%p) has not been allocated.\n",
                    addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
 
-       if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
+       if (__predict_true(!malloc_large_slab(slab))) {
                size = zone->uz_size;
 #ifdef INVARIANTS
                free_save_type(addr, mtp, size);
 #endif
                uma_zfree_domain(zone, addr, slab);
        } else {
-               size = slab->us_size;
-               uma_large_free(slab);
+               size = malloc_large_size(slab);
+               free_large(addr, size);
        }
        malloc_type_freed(mtp, size);
 }
@@ -844,10 +895,10 @@ realloc(void *addr, size_t size, struct malloc_type *m
            ("realloc: address %p out of range", (void *)addr));
 
        /* Get the size of the original block */
-       if (!(slab->us_flags & UMA_SLAB_MALLOC))
+       if (!malloc_large_slab(slab))
                alloc = zone->uz_size;
        else
-               alloc = slab->us_size;
+               alloc = malloc_large_size(slab);
 
        /* Reuse the original block if appropriate */
        if (size <= alloc

Modified: head/sys/vm/memguard.c
==============================================================================
--- head/sys/vm/memguard.c      Fri Nov 29 02:16:45 2019        (r355202)
+++ head/sys/vm/memguard.c      Fri Nov 29 03:14:10 2019        (r355203)
@@ -311,7 +311,7 @@ memguard_alloc(unsigned long req_size, int flags)
         * When we pass our memory limit, reject sub-page allocations.
         * Page-size and larger allocations will use the same amount
         * of physical memory whether we allocate or hand off to
-        * uma_large_alloc(), so keep those.
+        * malloc_large(), so keep those.
         */
        if (vmem_size(memguard_arena, VMEM_ALLOC) >= memguard_physlimit &&
            req_size < PAGE_SIZE) {

Modified: head/sys/vm/uma.h
==============================================================================
--- head/sys/vm/uma.h   Fri Nov 29 02:16:45 2019        (r355202)
+++ head/sys/vm/uma.h   Fri Nov 29 03:14:10 2019        (r355203)
@@ -615,7 +615,6 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free free
 #define UMA_SLAB_KERNEL        0x04            /* Slab alloced from kmem */
 #define UMA_SLAB_PRIV  0x08            /* Slab alloced from priv allocator */
 #define UMA_SLAB_OFFP  0x10            /* Slab is managed separately  */
-#define UMA_SLAB_MALLOC        0x20            /* Slab is a large malloc slab 
*/
 /* 0x02, 0x40, and 0x80 are available */
 
 /*

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c      Fri Nov 29 02:16:45 2019        (r355202)
+++ head/sys/vm/uma_core.c      Fri Nov 29 03:14:10 2019        (r355203)
@@ -149,10 +149,10 @@ static struct sx uma_reclaim_lock;
  * kmem soft limit, initialized by uma_set_limit().  Ensure that early
  * allocations don't trigger a wakeup of the reclaim thread.
  */
-static unsigned long uma_kmem_limit = LONG_MAX;
+unsigned long uma_kmem_limit = LONG_MAX;
 SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
     "UMA kernel memory soft limit");
-static unsigned long uma_kmem_total;
+unsigned long uma_kmem_total;
 SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
     "UMA kernel memory usage");
 
@@ -326,22 +326,6 @@ static int zone_warnings = 1;
 SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
     "Warn when UMA zones becomes full");
 
-/* Adjust bytes under management by UMA. */
-static inline void
-uma_total_dec(unsigned long size)
-{
-
-       atomic_subtract_long(&uma_kmem_total, size);
-}
-
-static inline void
-uma_total_inc(unsigned long size)
-{
-
-       if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit)
-               uma_reclaim_wakeup();
-}
-
 /*
  * This routine checks to see whether or not it's safe to enable buckets.
  */
@@ -4081,57 +4065,6 @@ int
 uma_zone_exhausted_nolock(uma_zone_t zone)
 {
        return (zone->uz_sleepers > 0);
-}
-
-void *
-uma_large_malloc_domain(vm_size_t size, int domain, int wait)
-{
-       struct domainset *policy;
-       vm_offset_t addr;
-       uma_slab_t slab;
-
-       if (domain != UMA_ANYDOMAIN) {
-               /* avoid allocs targeting empty domains */
-               if (VM_DOMAIN_EMPTY(domain))
-                       domain = UMA_ANYDOMAIN;
-       }
-       slab = zone_alloc_item(slabzone, NULL, domain, wait);
-       if (slab == NULL)
-               return (NULL);
-       policy = (domain == UMA_ANYDOMAIN) ? DOMAINSET_RR() :
-           DOMAINSET_FIXED(domain);
-       addr = kmem_malloc_domainset(policy, size, wait);
-       if (addr != 0) {
-               vsetzoneslab(addr, NULL, slab);
-               slab->us_data = (void *)addr;
-               slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
-               slab->us_size = size;
-               slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
-                   pmap_kextract(addr)));
-               uma_total_inc(size);
-       } else {
-               zone_free_item(slabzone, slab, NULL, SKIP_NONE);
-       }
-
-       return ((void *)addr);
-}
-
-void *
-uma_large_malloc(vm_size_t size, int wait)
-{
-
-       return uma_large_malloc_domain(size, UMA_ANYDOMAIN, wait);
-}
-
-void
-uma_large_free(uma_slab_t slab)
-{
-
-       KASSERT((slab->us_flags & UMA_SLAB_KERNEL) != 0,
-           ("uma_large_free:  Memory not allocated with uma_large_malloc."));
-       kmem_free((vm_offset_t)slab->us_data, slab->us_size);
-       uma_total_dec(slab->us_size);
-       zone_free_item(slabzone, slab, NULL, SKIP_NONE);
 }
 
 static void

Modified: head/sys/vm/uma_int.h
==============================================================================
--- head/sys/vm/uma_int.h       Fri Nov 29 02:16:45 2019        (r355202)
+++ head/sys/vm/uma_int.h       Fri Nov 29 03:14:10 2019        (r355203)
@@ -281,10 +281,7 @@ BITSET_DEFINE(slabbits, SLAB_SETSIZE);
  * store and subdivides it into individually allocatable items.
  */
 struct uma_slab {
-       union {
-               LIST_ENTRY(uma_slab)    _us_link;       /* slabs in zone */
-               unsigned long   _us_size;       /* Size of allocation */
-       } us_type;
+       LIST_ENTRY(uma_slab)    us_link;        /* slabs in zone */
        SLIST_ENTRY(uma_slab)   us_hlink;       /* Link for hash table */
        uint8_t         *us_data;               /* First item */
        struct slabbits us_free;                /* Free bitmask. */
@@ -296,9 +293,6 @@ struct uma_slab {
        uint8_t         us_domain;              /* Backing NUMA domain. */
 };
 
-#define        us_link us_type._us_link
-#define        us_size us_type._us_size
-
 #if MAXMEMDOM >= 255
 #error "Slab domain type insufficient"
 #endif
@@ -402,9 +396,6 @@ struct uma_zone {
 #ifdef _KERNEL
 /* Internal prototypes */
 static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data);
-void *uma_large_malloc(vm_size_t size, int wait);
-void *uma_large_malloc_domain(vm_size_t size, int domain, int wait);
-void uma_large_free(uma_slab_t slab);
 
 /* Lock Macros */
 
@@ -498,6 +489,25 @@ vsetzoneslab(vm_offset_t va, uma_zone_t zone, uma_slab
        p = PHYS_TO_VM_PAGE(pmap_kextract(va));
        p->plinks.uma.slab = slab;
        p->plinks.uma.zone = zone;
+}
+
+extern unsigned long uma_kmem_limit;
+extern unsigned long uma_kmem_total;
+
+/* Adjust bytes under management by UMA. */
+static inline void
+uma_total_dec(unsigned long size)
+{
+
+       atomic_subtract_long(&uma_kmem_total, size);
+}
+
+static inline void
+uma_total_inc(unsigned long size)
+{
+
+       if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit)
+               uma_reclaim_wakeup();
 }
 
 /*
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to