On 2010-Jul-12 19:38:18 +1000, Peter Jeremy <pe...@server.vk2pj.dyndns.org> 
wrote:
>I have been using the attached arc.patch1 based on a patch written by
>Artem Belevich <fbsdl...@src.cx> (see http://pastebin.com/ZCkzkWcs )
>for about a month.  I have had reasonable success with it (and junked
>my cronjob) but have managed to wedge my system a couple of times
>whilst doing zfs send|recv.  Whilst looking at that diff, I just
>noticed a nasty signed/unsigned bug that could bite in low memory
>conditions and have revised it to arc.patch2 (untested as yet).

Let try actually attaching those patches...  Sorry.

-- 
Peter Jeremy
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c,v
retrieving revision 1.22.2.6
diff -u -r1.22.2.6 arc.c
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c        24 May 2010 
20:09:40 -0000      1.22.2.6
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c        12 Jun 2010 
21:04:13 -0000
@@ -183,10 +183,15 @@
 int zfs_arc_shrink_shift = 0;
 int zfs_arc_p_min_shift = 0;
 
+uint64_t zfs_arc_bp_active;
+uint64_t zfs_arc_bp_inactive;
+
 TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
 TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
 TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
 TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable);
+TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active);
+TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive);
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0,
     "Maximum ARC size");
@@ -195,6 +200,11 @@
 SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
     &zfs_mdcomp_disable, 0, "Disable metadata compression");
 
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN, 
&zfs_arc_bp_active, 0,
+    "Start ARC backpressure if active memory is below this limit");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN, 
&zfs_arc_bp_inactive, 0,
+    "Start ARC backpressure if inactive memory is below this limit");
+
 /*
  * Note that buffers can be in one of 6 states:
  *     ARC_anon        - anonymous (discussed below)
@@ -2103,7 +2113,6 @@
 }
 
 static int needfree = 0;
-
 static int
 arc_reclaim_needed(void)
 {
@@ -2112,20 +2121,58 @@
 #endif
 
 #ifdef _KERNEL
-       if (needfree)
-               return (1);
+       /* We've grown too much, */
        if (arc_size > arc_c_max)
                return (1);
+
+       /* Pagedaemon is stuck, let's free something right away */
+       if (vm_pageout_pages_needed)
+               return 1;
+
+       /* Check if inactive list have grown too much */
+       if ( zfs_arc_bp_inactive
+            && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) {
+               /* tell pager to reap 1/2th of inactive queue*/
+               atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2);
+               pagedaemon_wakeup();
+               return needfree;
+       }
+
+       /* Same for active list... */
+       if ( zfs_arc_bp_active
+            && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) {
+               atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2);
+               pagedaemon_wakeup();
+               return needfree;
+       }
+
+       
+       /* Old style behavior -- ARC gives up memory whenever page daemon 
asks.. */
+       if (needfree)
+               return 1;
+
+       /*
+         We got here either because active/inactive lists are
+         getting short or because we've been called during voluntary
+         ARC size checks. Kind of gray area...
+       */
+
+       /* If we didn't reach our minimum yet, don't rush to give memory up..*/
        if (arc_size <= arc_c_min)
                return (0);
 
+       /* If we're really short on memory now, give it up. */
+       if (vm_page_count_min()) {
+               return (1);
+       }
+       
        /*
-        * If pages are needed or we're within 2048 pages
-        * of needing to page need to reclaim
+        * If we're within 2048 pages of pagedaemon start, reclaim...
         */
-       if (vm_pages_needed || (vm_paging_target() > -2048))
+       if (vm_pages_needed && (vm_paging_target() > -2048))
                return (1);
 
+
 #if 0
        /*
         * take 'desfree' extra pages, so we reclaim sooner, rather than later
@@ -2169,8 +2216,6 @@
                return (1);
 #endif
 #else
-       if (kmem_used() > (kmem_size() * 3) / 4)
-               return (1);
 #endif
 
 #else
@@ -2279,7 +2324,7 @@
                if (arc_eviction_list != NULL)
                        arc_do_user_evicts();
 
-               if (arc_reclaim_needed()) {
+               if (needfree) {
                        needfree = 0;
 #ifdef _KERNEL
                        wakeup(&needfree);
@@ -3611,10 +3656,15 @@
 {
 #ifdef _KERNEL
        uint64_t inflight_data = arc_anon->arcs_size;
-       uint64_t available_memory = ptoa((uintmax_t)cnt.v_free_count);
+       uint64_t available_memory;
        static uint64_t page_load = 0;
        static uint64_t last_txg = 0;
 
+        /* How much memory is potentially available */
+        available_memory = ptoa((uintmax_t)cnt.v_free_count);
+        available_memory += ptoa((uintmax_t)cnt.v_cache_count);
+        available_memory -= ptoa((uintmax_t)cnt.v_free_min);
+        
 #if 0
 #if defined(__i386)
        available_memory =
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
===================================================================
RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c,v
retrieving revision 1.22.2.6
diff -u -r1.22.2.6 arc.c
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c        24 May 2010 
20:09:40 -0000      1.22.2.6
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c        12 Jul 2010 
09:21:31 -0000
@@ -183,10 +183,15 @@
 int zfs_arc_shrink_shift = 0;
 int zfs_arc_p_min_shift = 0;
 
+uint64_t zfs_arc_bp_active;
+uint64_t zfs_arc_bp_inactive;
+
 TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
 TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
 TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
 TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable);
+TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active);
+TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive);
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0,
     "Maximum ARC size");
@@ -195,6 +200,11 @@
 SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
     &zfs_mdcomp_disable, 0, "Disable metadata compression");
 
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN, 
&zfs_arc_bp_active, 0,
+    "Start ARC backpressure if active memory is below this limit");
+SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN, 
&zfs_arc_bp_inactive, 0,
+    "Start ARC backpressure if inactive memory is below this limit");
+
 /*
  * Note that buffers can be in one of 6 states:
  *     ARC_anon        - anonymous (discussed below)
@@ -2103,7 +2113,6 @@
 }
 
 static int needfree = 0;
-
 static int
 arc_reclaim_needed(void)
 {
@@ -2112,20 +2121,58 @@
 #endif
 
 #ifdef _KERNEL
-       if (needfree)
-               return (1);
+       /* We've grown too much, */
        if (arc_size > arc_c_max)
                return (1);
+
+       /* Pagedaemon is stuck, let's free something right away */
+       if (vm_pageout_pages_needed)
+               return 1;
+
+       /* Check if inactive list have grown too much */
+       if ( zfs_arc_bp_inactive
+            && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) {
+               /* tell pager to reap 1/2th of inactive queue*/
+               atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2);
+               pagedaemon_wakeup();
+               return needfree;
+       }
+
+       /* Same for active list... */
+       if ( zfs_arc_bp_active
+            && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) {
+               atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2);
+               pagedaemon_wakeup();
+               return needfree;
+       }
+
+       
+       /* Old style behavior -- ARC gives up memory whenever page daemon 
asks.. */
+       if (needfree)
+               return 1;
+
+       /*
+         We got here either because active/inactive lists are
+         getting short or because we've been called during voluntary
+         ARC size checks. Kind of gray area...
+       */
+
+       /* If we didn't reach our minimum yet, don't rush to give memory up..*/
        if (arc_size <= arc_c_min)
                return (0);
 
+       /* If we're really short on memory now, give it up. */
+       if (vm_page_count_min()) {
+               return (1);
+       }
+       
        /*
-        * If pages are needed or we're within 2048 pages
-        * of needing to page need to reclaim
+        * If we're within 2048 pages of pagedaemon start, reclaim...
         */
-       if (vm_pages_needed || (vm_paging_target() > -2048))
+       if (vm_pages_needed && (vm_paging_target() > -2048))
                return (1);
 
+
 #if 0
        /*
         * take 'desfree' extra pages, so we reclaim sooner, rather than later
@@ -2169,8 +2216,6 @@
                return (1);
 #endif
 #else
-       if (kmem_used() > (kmem_size() * 3) / 4)
-               return (1);
 #endif
 
 #else
@@ -2279,7 +2324,7 @@
                if (arc_eviction_list != NULL)
                        arc_do_user_evicts();
 
-               if (arc_reclaim_needed()) {
+               if (needfree) {
                        needfree = 0;
 #ifdef _KERNEL
                        wakeup(&needfree);
@@ -3611,10 +3656,17 @@
 {
 #ifdef _KERNEL
        uint64_t inflight_data = arc_anon->arcs_size;
-       uint64_t available_memory = ptoa((uintmax_t)cnt.v_free_count);
+       uint64_t available_memory;
        static uint64_t page_load = 0;
        static uint64_t last_txg = 0;
 
+        /* How much memory is potentially available */
+       available_memory = (uint64_t)cnt.v_free_count + cnt.v_cache_count;
+       if (available_memory > cnt.v_free_min)
+               available_memory = ptoa(available_memory - cnt.v_free_min);
+       else
+               available_memory = 0;
+
 #if 0
 #if defined(__i386)
        available_memory =

Attachment: pgp8gmNzycqYQ.pgp
Description: PGP signature

Reply via email to