Author: glebius
Date: Mon Apr  8 19:10:45 2013
New Revision: 249264
URL: http://svnweb.freebsd.org/changeset/base/249264

Log:
  Merge from projects/counters: UMA_ZONE_PCPU zones.
  
    These zones have slab size == sizeof(struct pcpu), but request from VM
  enough pages to fit (uk_slabsize * mp_ncpus). An item allocated from such
  zone would have a separate twin for each CPU in the system, and these twins
  are at a distance of sizeof(struct pcpu) from each other. This magic value
  of distance would allow us to make some optimizations later.
  
    To address private item from a CPU simple arithmetics should be used:
  
    item = (type *)((char *)base + sizeof(struct pcpu) * curcpu)
  
    These arithmetics are available as zpcpu_get() macro in pcpu.h.
  
    To introduce non-page size slabs a new field had been added to uma_keg
  uk_slabsize. This shifted some frequently used fields of uma_keg to the
  fourth cache line on amd64. To mitigate this pessimization, uma_keg fields
  were a bit rearranged and least frequently used uk_name and uk_link moved
  down to the fourth cache line. All other fields, that are dereferenced
  frequently fit into first three cache lines.
  
  Sponsored by: Nginx, Inc.

Modified:
  head/share/man/man9/zone.9
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/share/man/man9/zone.9
==============================================================================
--- head/share/man/man9/zone.9  Mon Apr  8 19:03:01 2013        (r249263)
+++ head/share/man/man9/zone.9  Mon Apr  8 19:10:45 2013        (r249264)
@@ -153,6 +153,23 @@ See
 .Fn uma_find_refcnt .
 .It Dv UMA_ZONE_NODUMP
 Pages belonging to the zone will not be included into mini-dumps.
+.It Dv UMA_ZONE_PCPU
+An allocation from zone would have
+.Va mp_ncpu
+shadow copies, that are privately assigned to CPUs.
+A CPU can address its private copy using base allocation address plus
+multiple of current CPU id and
+.Fn sizeof "struct pcpu" :
+.Bd -literal -offset indent
+foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
+ ...
+foo_base = uma_zalloc(foo_zone, ...);
+ ...
+critical_enter();
+foo_pcpu = (foo_t *)zpcpu_get(foo_base);
+/* do something with foo_pcpu */
+critical_exit();
+.Ed
 .It Dv UMA_ZONE_OFFPAGE
 By default book-keeping of items within a slab is done in the slab page itself.
 This flag explicitly tells subsystem that book-keeping structure should be

Modified: head/sys/vm/uma.h
==============================================================================
--- head/sys/vm/uma.h   Mon Apr  8 19:03:01 2013        (r249263)
+++ head/sys/vm/uma.h   Mon Apr  8 19:10:45 2013        (r249264)
@@ -252,6 +252,10 @@ int uma_zsecond_add(uma_zone_t zone, uma
                                         * Zone's pages will not be included in
                                         * mini-dumps.
                                         */
+#define        UMA_ZONE_PCPU           0x8000  /*
+                                        * Allocates mp_ncpus slabs sized to
+                                        * sizeof(struct pcpu).
+                                        */
 
 /*
  * These flags are shared between the keg and zone.  In zones wishing to add
@@ -260,7 +264,7 @@ int uma_zsecond_add(uma_zone_t zone, uma
  */
 #define        UMA_ZONE_INHERIT                                                
\
     (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE |            \
-    UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB)
+    UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU)
 
 /* Definitions for align */
 #define UMA_ALIGN_PTR  (sizeof(void *) - 1)    /* Alignment fit for ptr */

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c      Mon Apr  8 19:03:01 2013        (r249263)
+++ head/sys/vm/uma_core.c      Mon Apr  8 19:10:45 2013        (r249264)
@@ -765,9 +765,9 @@ finished:
                            SKIP_NONE, ZFREE_STATFREE);
 #ifdef UMA_DEBUG
                printf("%s: Returning %d bytes.\n",
-                   keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
+                   keg->uk_name, PAGE_SIZE * keg->uk_ppera);
 #endif
-               keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
+               keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
        }
 }
 
@@ -865,7 +865,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t
                wait |= M_NODUMP;
 
        /* zone is passed for legacy reasons. */
-       mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
+       mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
        if (mem == NULL) {
                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
                        zone_free_item(keg->uk_slabzone, slab, NULL,
@@ -927,7 +927,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t
                        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
                                zone_free_item(keg->uk_slabzone, slab,
                                    NULL, SKIP_NONE, ZFREE_STATFREE);
-                       keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
+                       keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera,
                            flags);
                        KEG_LOCK(keg);
                        return (NULL);
@@ -1138,16 +1138,27 @@ keg_small_init(uma_keg_t keg)
        u_int wastedspace;
        u_int shsize;
 
-       KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
+       if (keg->uk_flags & UMA_ZONE_PCPU) {
+               keg->uk_slabsize = sizeof(struct pcpu);
+               keg->uk_ppera = howmany(mp_ncpus * sizeof(struct pcpu),
+                   PAGE_SIZE);
+       } else {
+               keg->uk_slabsize = UMA_SLAB_SIZE;
+               keg->uk_ppera = 1;
+       }
+
        rsize = keg->uk_size;
 
-       if (rsize < UMA_SMALLEST_UNIT)
-               rsize = UMA_SMALLEST_UNIT;
        if (rsize & keg->uk_align)
                rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
+       if (rsize < keg->uk_slabsize / 256)
+               rsize = keg->uk_slabsize / 256;
 
        keg->uk_rsize = rsize;
-       keg->uk_ppera = 1;
+
+       KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
+           keg->uk_rsize < sizeof(struct pcpu),
+           ("%s: size %u too large", __func__, keg->uk_rsize));
 
        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
                shsize = 0;
@@ -1159,10 +1170,12 @@ keg_small_init(uma_keg_t keg)
                shsize = sizeof(struct uma_slab);
        }
 
-       keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
-       KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
+       keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
+       KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255,
+           ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
+
        memused = keg->uk_ipers * rsize + shsize;
-       wastedspace = UMA_SLAB_SIZE - memused;
+       wastedspace = keg->uk_slabsize - memused;
 
        /*
         * We can't do OFFPAGE if we're internal or if we've been
@@ -1175,24 +1188,26 @@ keg_small_init(uma_keg_t keg)
            (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
                return;
 
-       if ((wastedspace >= UMA_MAX_WASTE) &&
-           (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
-               keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
-               KASSERT(keg->uk_ipers <= 255,
-                   ("keg_small_init: keg->uk_ipers too high!"));
+       if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
+           (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
+               keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
+               KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255,
+                   ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 #ifdef UMA_DEBUG
                printf("UMA decided we need offpage slab headers for "
                    "keg: %s, calculated wastedspace = %d, "
                    "maximum wasted space allowed = %d, "
                    "calculated ipers = %d, "
                    "new wasted space = %d\n", keg->uk_name, wastedspace,
-                   UMA_MAX_WASTE, keg->uk_ipers,
-                   UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
+                   keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
+                   keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
 #endif
                keg->uk_flags |= UMA_ZONE_OFFPAGE;
-               if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
-                       keg->uk_flags |= UMA_ZONE_HASH;
        }
+
+       if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
+           (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
+               keg->uk_flags |= UMA_ZONE_HASH;
 }
 
 /*
@@ -1209,19 +1224,15 @@ keg_small_init(uma_keg_t keg)
 static void
 keg_large_init(uma_keg_t keg)
 {
-       int pages;
 
        KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
        KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
            ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
+       KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
+           ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
 
-       pages = keg->uk_size / UMA_SLAB_SIZE;
-
-       /* Account for remainder */
-       if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
-               pages++;
-
-       keg->uk_ppera = pages;
+       keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
+       keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
        keg->uk_ipers = 1;
        keg->uk_rsize = keg->uk_size;
 
@@ -1242,6 +1253,9 @@ keg_cachespread_init(uma_keg_t keg)
        int pages;
        int rsize;
 
+       KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
+           ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
+
        alignsize = keg->uk_align + 1;
        rsize = keg->uk_size;
        /*
@@ -1259,6 +1273,7 @@ keg_cachespread_init(uma_keg_t keg)
        pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
        keg->uk_rsize = rsize;
        keg->uk_ppera = pages;
+       keg->uk_slabsize = UMA_SLAB_SIZE;
        keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
        keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
        KASSERT(keg->uk_ipers <= uma_max_ipers,
@@ -1308,6 +1323,13 @@ keg_ctor(void *mem, int size, void *udat
        if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
                keg->uk_flags |= UMA_ZONE_VTOSLAB;
 
+       if (arg->flags & UMA_ZONE_PCPU)
+#ifdef SMP
+               keg->uk_flags |= UMA_ZONE_OFFPAGE;
+#else
+               keg->uk_flags &= ~UMA_ZONE_PCPU;
+#endif
+
        /*
         * The +UMA_FRITM_SZ added to uk_size is to account for the
         * linkage that is added to the size in keg_small_init().  If
@@ -1385,7 +1407,7 @@ keg_ctor(void *mem, int size, void *udat
                if (totsize & UMA_ALIGN_PTR)
                        totsize = (totsize & ~UMA_ALIGN_PTR) +
                            (UMA_ALIGN_PTR + 1);
-               keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
+               keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
 
                if (keg->uk_flags & UMA_ZONE_REFCNT)
                        totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
@@ -1401,7 +1423,7 @@ keg_ctor(void *mem, int size, void *udat
                 * mathematically possible for all cases, so we make
                 * sure here anyway.
                 */
-               if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
+               if (totsize > PAGE_SIZE * keg->uk_ppera) {
                        printf("zone %s ipers %d rsize %d size %d\n",
                            zone->uz_name, keg->uk_ipers, keg->uk_rsize,
                            keg->uk_size);
@@ -1676,7 +1698,8 @@ uma_startup(void *bootmem, int boot_page
         * that we need to go to offpage slab headers.  Or, if we do,
         * then we trap that condition below and panic in the INVARIANTS case.
         */
-       wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
+       wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) -
+           (UMA_SLAB_SIZE / UMA_MAX_WASTE);
        totsize = wsize;
        objsize = UMA_SMALLEST_UNIT;
        while (totsize >= wsize) {
@@ -1689,7 +1712,8 @@ uma_startup(void *bootmem, int boot_page
                objsize--;
        uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
 
-       wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
+       wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
+           (UMA_SLAB_SIZE / UMA_MAX_WASTE);
        totsize = wsize;
        objsize = UMA_SMALLEST_UNIT;
        while (totsize >= wsize) {

Modified: head/sys/vm/uma_int.h
==============================================================================
--- head/sys/vm/uma_int.h       Mon Apr  8 19:03:01 2013        (r249263)
+++ head/sys/vm/uma_int.h       Mon Apr  8 19:10:45 2013        (r249264)
@@ -120,8 +120,8 @@
 
 #define UMA_BOOT_PAGES         64      /* Pages allocated for startup */
 
-/* Max waste before going to off page slab management */
-#define UMA_MAX_WASTE  (UMA_SLAB_SIZE / 10)
+/* Max waste percentage before going to off page slab management */
+#define UMA_MAX_WASTE  10
 
 /*
  * I doubt there will be many cases where this is exceeded. This is the initial
@@ -197,12 +197,9 @@ typedef struct uma_cache * uma_cache_t;
  *
  */
 struct uma_keg {
-       LIST_ENTRY(uma_keg)     uk_link;        /* List of all kegs */
-
        struct mtx      uk_lock;        /* Lock for the keg */
        struct uma_hash uk_hash;
 
-       const char      *uk_name;               /* Name of creating zone. */
        LIST_HEAD(,uma_zone)    uk_zones;       /* Keg's zones */
        LIST_HEAD(,uma_slab)    uk_part_slab;   /* partially allocated slabs */
        LIST_HEAD(,uma_slab)    uk_free_slab;   /* empty slab list */
@@ -225,10 +222,15 @@ struct uma_keg {
        vm_offset_t     uk_kva;         /* Zone base KVA */
        uma_zone_t      uk_slabzone;    /* Slab zone backing us, if OFFPAGE */
 
+       u_int16_t       uk_slabsize;    /* Slab size for this keg */
        u_int16_t       uk_pgoff;       /* Offset to uma_slab struct */
        u_int16_t       uk_ppera;       /* pages per allocation from backend */
        u_int16_t       uk_ipers;       /* Items per slab */
        u_int32_t       uk_flags;       /* Internal flags */
+
+       /* Least used fields go to the last cache line. */
+       const char      *uk_name;               /* Name of creating zone. */
+       LIST_ENTRY(uma_keg)     uk_link;        /* List of all kegs */
 };
 typedef struct uma_keg * uma_keg_t;
 
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to