Author: mav
Date: Sat Oct  3 07:50:15 2015
New Revision: 288562
URL: https://svnweb.freebsd.org/changeset/base/288562

Log:
  MFC r286625:
  5376 arc_kmem_reap_now() should not result in clearing arc_no_grow
  
  Reviewed by: Christopher Siden <christopher.si...@delphix.com>
  Reviewed by: George Wilson <george.wil...@delphix.com>
  Reviewed by: Steven Hartland <kill...@multiplay.co.uk>
  Reviewed by: Richard Elling <richard.ell...@richardelling.com>
  Approved by: Dan McDonald <dan...@omniti.com>
  Author: Matthew Ahrens <mahr...@delphix.com>
  
  illumos/illumos-gate@2ec99e3e987d8aa273f1e9ba2b983557d058198c

Modified:
  stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c      Sat Oct 
 3 07:49:16 2015        (r288561)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c      Sat Oct 
 3 07:50:15 2015        (r288562)
@@ -153,13 +153,7 @@ static kmutex_t            arc_reclaim_thr_lock;
 static kcondvar_t      arc_reclaim_thr_cv;     /* used to signal reclaim thr */
 static uint8_t         arc_thread_exit;
 
-#define        ARC_REDUCE_DNLC_PERCENT 3
-uint_t arc_reduce_dnlc_percent = ARC_REDUCE_DNLC_PERCENT;
-
-typedef enum arc_reclaim_strategy {
-       ARC_RECLAIM_AGGR,               /* Aggressive reclaim strategy */
-       ARC_RECLAIM_CONS                /* Conservative reclaim strategy */
-} arc_reclaim_strategy_t;
+uint_t arc_reduce_dnlc_percent = 3;
 
 /*
  * The number of iterations through arc_evict_*() before we
@@ -174,7 +168,19 @@ static int         arc_grow_retry = 60;
 static int             arc_p_min_shift = 4;
 
 /* log2(fraction of arc to reclaim) */
-static int             arc_shrink_shift = 5;
+static int             arc_shrink_shift = 7;
+
+/*
+ * log2(fraction of ARC which must be free to allow growing).
+ * I.e. If there is less than arc_c >> arc_no_grow_shift free memory,
+ * when reading a new block into the ARC, we will evict an equal-sized block
+ * from the ARC.
+ *
+ * This must be less than arc_shrink_shift, so that when we shrink the ARC,
+ * we will still not allow it to grow.
+ */
+int                    arc_no_grow_shift = 5;
+
 
 /*
  * minimum lifespan of a prefetch block in clock ticks
@@ -3058,13 +3064,10 @@ arc_flush(spa_t *spa)
 }
 
 void
-arc_shrink(void)
+arc_shrink(int64_t to_free)
 {
 
        if (arc_c > arc_c_min) {
-               uint64_t to_free;
-
-               to_free = arc_c >> arc_shrink_shift;
                DTRACE_PROBE4(arc__shrink, uint64_t, arc_c, uint64_t,
                        arc_c_min, uint64_t, arc_p, uint64_t, to_free);
                if (arc_c > arc_c_min + to_free)
@@ -3092,44 +3095,76 @@ arc_shrink(void)
        }
 }
 
-static int needfree = 0;
+static long needfree = 0;
 
-static int
-arc_reclaim_needed(void)
+typedef enum free_memory_reason_t {
+       FMR_UNKNOWN,
+       FMR_NEEDFREE,
+       FMR_LOTSFREE,
+       FMR_SWAPFS_MINFREE,
+       FMR_PAGES_PP_MAXIMUM,
+       FMR_HEAP_ARENA,
+       FMR_ZIO_ARENA,
+       FMR_ZIO_FRAG,
+} free_memory_reason_t;
+
+int64_t last_free_memory;
+free_memory_reason_t last_free_reason;
+
+/*
+ * Additional reserve of pages for pp_reserve.
+ */
+int64_t arc_pages_pp_reserve = 64;
+
+/*
+ * Additional reserve of pages for swapfs.
+ */
+int64_t arc_swapfs_reserve = 64;
+
+/*
+ * Return the amount of memory that can be consumed before reclaim will be
+ * needed.  Positive if there is sufficient free memory, negative indicates
+ * the amount of memory that needs to be freed up.
+ */
+static int64_t
+arc_available_memory(void)
 {
+       int64_t lowest = INT64_MAX;
+       int64_t n;
+       free_memory_reason_t r = FMR_UNKNOWN;
 
 #ifdef _KERNEL
-
-       if (needfree) {
-               DTRACE_PROBE(arc__reclaim_needfree);
-               return (1);
+       if (needfree > 0) {
+               n = PAGESIZE * (-needfree);
+               if (n < lowest) {
+                       lowest = n;
+                       r = FMR_NEEDFREE;
+               }
        }
 
        /*
         * Cooperate with pagedaemon when it's time for it to scan
         * and reclaim some pages.
         */
-       if (freemem < zfs_arc_free_target) {
-               DTRACE_PROBE2(arc__reclaim_freemem, uint64_t,
-                   freemem, uint64_t, zfs_arc_free_target);
-               return (1);
+       n = PAGESIZE * (int64_t)(freemem - zfs_arc_free_target);
+       if (n < lowest) {
+               lowest = n;
+               r = FMR_LOTSFREE;
        }
 
 #ifdef sun
        /*
-        * take 'desfree' extra pages, so we reclaim sooner, rather than later
-        */
-       extra = desfree;
-
-       /*
         * check that we're out of range of the pageout scanner.  It starts to
         * schedule paging if freemem is less than lotsfree and needfree.
         * lotsfree is the high-water mark for pageout, and needfree is the
         * number of needed free pages.  We add extra pages here to make sure
         * the scanner doesn't start up while we're freeing memory.
         */
-       if (freemem < lotsfree + needfree + extra)
-               return (1);
+       n = PAGESIZE * (freemem - lotsfree - needfree - desfree);
+       if (n < lowest) {
+               lowest = n;
+               r = FMR_LOTSFREE;
+       }
 
        /*
         * check to make sure that swapfs has enough space so that anon
@@ -3138,8 +3173,13 @@ arc_reclaim_needed(void)
         * swap pages.  We also add a bit of extra here just to prevent
         * circumstances from getting really dire.
         */
-       if (availrmem < swapfs_minfree + swapfs_reserve + extra)
-               return (1);
+       n = PAGESIZE * (availrmem - swapfs_minfree - swapfs_reserve -
+           desfree - arc_swapfs_reserve);
+       if (n < lowest) {
+               lowest = n;
+               r = FMR_SWAPFS_MINFREE;
+       }
+
 
        /*
         * Check that we have enough availrmem that memory locking (e.g., via
@@ -3148,8 +3188,12 @@ arc_reclaim_needed(void)
         * drops below pages_pp_maximum, page locking mechanisms such as
         * page_pp_lock() will fail.)
         */
-       if (availrmem <= pages_pp_maximum)
-               return (1);
+       n = PAGESIZE * (availrmem - pages_pp_maximum -
+           arc_pages_pp_reserve);
+       if (n < lowest) {
+               lowest = n;
+               r = FMR_PAGES_PP_MAXIMUM;
+       }
 
 #endif /* sun */
 #if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
@@ -3164,12 +3208,11 @@ arc_reclaim_needed(void)
         * heap is allocated.  (Or, in the calculation, if less than 1/4th is
         * free)
         */
-       if (vmem_size(heap_arena, VMEM_FREE) <
-           (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) {
-               DTRACE_PROBE2(arc__reclaim_used, uint64_t,
-                   vmem_size(heap_arena, VMEM_FREE), uint64_t,
-                   (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2);
-               return (1);
+       n = vmem_size(heap_arena, VMEM_FREE) -
+           (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)
+       if (n < lowest) {
+               lowest = n;
+               r = FMR_HEAP_ARENA;
        }
 #define        zio_arena       NULL
 #else
@@ -3185,29 +3228,50 @@ arc_reclaim_needed(void)
         * to aggressively evict memory from the arc in order to avoid
         * memory fragmentation issues.
         */
-       if (zio_arena != NULL &&
-           vmem_size(zio_arena, VMEM_FREE) <
-           (vmem_size(zio_arena, VMEM_ALLOC) >> 4))
-               return (1);
+       if (zio_arena != NULL) {
+               n = vmem_size(zio_arena, VMEM_FREE) -
+                   (vmem_size(zio_arena, VMEM_ALLOC) >> 4);
+               if (n < lowest) {
+                       lowest = n;
+                       r = FMR_ZIO_ARENA;
+               }
+       }
 
        /*
         * Above limits know nothing about real level of KVA fragmentation.
         * Start aggressive reclamation if too little sequential KVA left.
         */
-       if (vmem_size(heap_arena, VMEM_MAXFREE) < zfs_max_recordsize) {
-               DTRACE_PROBE2(arc__reclaim_maxfree, uint64_t,
-                   vmem_size(heap_arena, VMEM_MAXFREE),
-                   uint64_t, zfs_max_recordsize);
-               return (1);
+       if (lowest > 0) {
+               n = (vmem_size(heap_arena, VMEM_MAXFREE) < zfs_max_recordsize) ?
+                   -(vmem_size(heap_arena, VMEM_ALLOC) >> 4) : INT64_MAX;
+               if (n < lowest) {
+                       lowest = n;
+                       r = FMR_ZIO_FRAG;
+               }
        }
 
 #else  /* _KERNEL */
+       /* Every 100 calls, free a small amount */
        if (spa_get_random(100) == 0)
-               return (1);
+               lowest = -1024;
 #endif /* _KERNEL */
-       DTRACE_PROBE(arc__reclaim_no);
 
-       return (0);
+       last_free_memory = lowest;
+       last_free_reason = r;
+       DTRACE_PROBE2(arc__available_memory, int64_t, lowest, int, r);
+       return (lowest);
+}
+
+
+/*
+ * Determine if the system is under memory pressure and is asking
+ * to reclaim memory. A return value of TRUE indicates that the system
+ * is under memory pressure and that the arc should adjust accordingly.
+ */
+static boolean_t
+arc_reclaim_needed(void)
+{
+       return (arc_available_memory() < 0);
 }
 
 extern kmem_cache_t    *zio_buf_cache[];
@@ -3215,7 +3279,7 @@ extern kmem_cache_t       *zio_data_buf_cache[
 extern kmem_cache_t    *range_seg_cache;
 
 static __noinline void
-arc_kmem_reap_now(arc_reclaim_strategy_t strat)
+arc_kmem_reap_now(void)
 {
        size_t                  i;
        kmem_cache_t            *prev_cache = NULL;
@@ -3238,13 +3302,6 @@ arc_kmem_reap_now(arc_reclaim_strategy_t
 #endif
 #endif
 
-       /*
-        * An aggressive reclamation will shrink the cache size as well as
-        * reap free buffers from the arc kmem caches.
-        */
-       if (strat == ARC_RECLAIM_AGGR)
-               arc_shrink();
-
        for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
                if (zio_buf_cache[i] != prev_cache) {
                        prev_cache = zio_buf_cache[i];
@@ -3261,12 +3318,13 @@ arc_kmem_reap_now(arc_reclaim_strategy_t
        kmem_cache_reap_now(range_seg_cache);
 
 #ifdef sun
-       /*
-        * Ask the vmem arena to reclaim unused memory from its
-        * quantum caches.
-        */
-       if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
+       if (zio_arena != NULL) {
+               /*
+                * Ask the vmem arena to reclaim unused memory from its
+                * quantum caches.
+                */
                vmem_qcache_reap(zio_arena);
+       }
 #endif
        DTRACE_PROBE(arc__kmem_reap_end);
 }
@@ -3275,46 +3333,44 @@ static void
 arc_reclaim_thread(void *dummy __unused)
 {
        clock_t                 growtime = 0;
-       arc_reclaim_strategy_t  last_reclaim = ARC_RECLAIM_CONS;
        callb_cpr_t             cpr;
 
        CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
 
        mutex_enter(&arc_reclaim_thr_lock);
        while (arc_thread_exit == 0) {
-               if (arc_reclaim_needed()) {
+               int64_t free_memory = arc_available_memory();
+               if (free_memory < 0) {
 
-                       if (arc_no_grow) {
-                               if (last_reclaim == ARC_RECLAIM_CONS) {
-                                       DTRACE_PROBE(arc__reclaim_aggr_no_grow);
-                                       last_reclaim = ARC_RECLAIM_AGGR;
-                               } else {
-                                       last_reclaim = ARC_RECLAIM_CONS;
-                               }
-                       } else {
-                               arc_no_grow = TRUE;
-                               last_reclaim = ARC_RECLAIM_AGGR;
-                               DTRACE_PROBE(arc__reclaim_aggr);
-                               membar_producer();
-                       }
+                       arc_no_grow = B_TRUE;
+                       arc_warm = B_TRUE;
 
-                       /* reset the growth delay for every reclaim */
+                       /*
+                        * Wait at least zfs_grow_retry (default 60) seconds
+                        * before considering growing.
+                        */
                        growtime = ddi_get_lbolt() + (arc_grow_retry * hz);
 
-                       if (needfree && last_reclaim == ARC_RECLAIM_CONS) {
-                               /*
-                                * If needfree is TRUE our vm_lowmem hook
-                                * was called and in that case we must free some
-                                * memory, so switch to aggressive mode.
-                                */
-                               arc_no_grow = TRUE;
-                               last_reclaim = ARC_RECLAIM_AGGR;
-                       }
-                       arc_kmem_reap_now(last_reclaim);
-                       arc_warm = B_TRUE;
+                       arc_kmem_reap_now();
 
-               } else if (arc_no_grow && ddi_get_lbolt() >= growtime) {
-                       arc_no_grow = FALSE;
+                       /*
+                        * If we are still low on memory, shrink the ARC
+                        * so that we have arc_shrink_min free space.
+                        */
+                       free_memory = arc_available_memory();
+
+                       int64_t to_free =
+                           (arc_c >> arc_shrink_shift) - free_memory;
+                       if (to_free > 0) {
+#ifdef _KERNEL
+                               to_free = MAX(to_free, ptob(needfree));
+#endif
+                               arc_shrink(to_free);
+                       }
+               } else if (free_memory < arc_c >> arc_no_grow_shift) {
+                       arc_no_grow = B_TRUE;
+               } else if (ddi_get_lbolt() >= growtime) {
+                       arc_no_grow = B_FALSE;
                }
 
                arc_adjust();
@@ -4784,7 +4840,8 @@ arc_lowmem(void *arg __unused, int howto
 {
 
        mutex_enter(&arc_reclaim_thr_lock);
-       needfree = 1;
+       /* XXX: Memory deficit should be passed as argument. */
+       needfree = btoc(arc_c >> arc_shrink_shift);
        DTRACE_PROBE(arc__needfree);
        cv_signal(&arc_reclaim_thr_cv);
 
@@ -4868,6 +4925,12 @@ arc_init(void)
        if (zfs_arc_shrink_shift > 0)
                arc_shrink_shift = zfs_arc_shrink_shift;
 
+       /*
+        * Ensure that arc_no_grow_shift is less than arc_shrink_shift.
+        */
+       if (arc_no_grow_shift >= arc_shrink_shift)
+               arc_no_grow_shift = arc_shrink_shift - 1;
+
        if (zfs_arc_p_min_shift > 0)
                arc_p_min_shift = zfs_arc_p_min_shift;
 
_______________________________________________
svn-src-stable-10@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-stable-10
To unsubscribe, send any mail to "svn-src-stable-10-unsubscr...@freebsd.org"

Reply via email to