On 2010-Jul-12 19:38:18 +1000, Peter Jeremy <pe...@server.vk2pj.dyndns.org> wrote: >I have been using the attached arc.patch1 based on a patch written by >Artem Belevich <fbsdl...@src.cx> (see http://pastebin.com/ZCkzkWcs ) >for about a month. I have had reasonable success with it (and junked >my cronjob) but have managed to wedge my system a couple of times >whilst doing zfs send|recv. Whilst looking at that diff, I just >noticed a nasty signed/unsigned bug that could bite in low memory >conditions and have revised it to arc.patch2 (untested as yet).
Let try actually attaching those patches... Sorry. -- Peter Jeremy
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =================================================================== RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c,v retrieving revision 1.22.2.6 diff -u -r1.22.2.6 arc.c --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 24 May 2010 20:09:40 -0000 1.22.2.6 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 12 Jun 2010 21:04:13 -0000 @@ -183,10 +183,15 @@ int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; +uint64_t zfs_arc_bp_active; +uint64_t zfs_arc_bp_inactive; + TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable); +TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active); +TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive); SYSCTL_DECL(_vfs_zfs); SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, "Maximum ARC size"); @@ -195,6 +200,11 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN, &zfs_mdcomp_disable, 0, "Disable metadata compression"); +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN, &zfs_arc_bp_active, 0, + "Start ARC backpressure if active memory is below this limit"); +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN, &zfs_arc_bp_inactive, 0, + "Start ARC backpressure if inactive memory is below this limit"); + /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) @@ -2103,7 +2113,6 @@ } static int needfree = 0; - static int arc_reclaim_needed(void) { @@ -2112,20 +2121,58 @@ #endif #ifdef _KERNEL - if (needfree) - return (1); + /* We've grown too much, */ if (arc_size > arc_c_max) return (1); + + /* Pagedaemon is stuck, let's free something right away */ + if (vm_pageout_pages_needed) + return 1; + + /* Check if inactive list have grown too much */ + if ( zfs_arc_bp_inactive + && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) { + /* tell pager to reap 1/2th of inactive queue*/ + atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2); + pagedaemon_wakeup(); + return needfree; + } + + /* Same for active list... */ + if ( zfs_arc_bp_active + && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) { + atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2); + pagedaemon_wakeup(); + return needfree; + } + + + /* Old style behavior -- ARC gives up memory whenever page daemon asks.. */ + if (needfree) + return 1; + + /* + We got here either because active/inactive lists are + getting short or because we've been called during voluntary + ARC size checks. Kind of gray area... + */ + + /* If we didn't reach our minimum yet, don't rush to give memory up..*/ if (arc_size <= arc_c_min) return (0); + /* If we're really short on memory now, give it up. */ + if (vm_page_count_min()) { + return (1); + } + /* - * If pages are needed or we're within 2048 pages - * of needing to page need to reclaim + * If we're within 2048 pages of pagedaemon start, reclaim... */ - if (vm_pages_needed || (vm_paging_target() > -2048)) + if (vm_pages_needed && (vm_paging_target() > -2048)) return (1); + #if 0 /* * take 'desfree' extra pages, so we reclaim sooner, rather than later @@ -2169,8 +2216,6 @@ return (1); #endif #else - if (kmem_used() > (kmem_size() * 3) / 4) - return (1); #endif #else @@ -2279,7 +2324,7 @@ if (arc_eviction_list != NULL) arc_do_user_evicts(); - if (arc_reclaim_needed()) { + if (needfree) { needfree = 0; #ifdef _KERNEL wakeup(&needfree); @@ -3611,10 +3656,15 @@ { #ifdef _KERNEL uint64_t inflight_data = arc_anon->arcs_size; - uint64_t available_memory = ptoa((uintmax_t)cnt.v_free_count); + uint64_t available_memory; static uint64_t page_load = 0; static uint64_t last_txg = 0; + /* How much memory is potentially available */ + available_memory = ptoa((uintmax_t)cnt.v_free_count); + available_memory += ptoa((uintmax_t)cnt.v_cache_count); + available_memory -= ptoa((uintmax_t)cnt.v_free_min); + #if 0 #if defined(__i386) available_memory =
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =================================================================== RCS file: /usr/ncvs/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c,v retrieving revision 1.22.2.6 diff -u -r1.22.2.6 arc.c --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 24 May 2010 20:09:40 -0000 1.22.2.6 +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 12 Jul 2010 09:21:31 -0000 @@ -183,10 +183,15 @@ int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; +uint64_t zfs_arc_bp_active; +uint64_t zfs_arc_bp_inactive; + TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); TUNABLE_INT("vfs.zfs.mdcomp_disable", &zfs_mdcomp_disable); +TUNABLE_QUAD("vfs.zfs.arc_bp_active", &zfs_arc_bp_active); +TUNABLE_QUAD("vfs.zfs.arc_bp_inactive", &zfs_arc_bp_inactive); SYSCTL_DECL(_vfs_zfs); SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, "Maximum ARC size"); @@ -195,6 +200,11 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN, &zfs_mdcomp_disable, 0, "Disable metadata compression"); +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_active, CTLFLAG_RW|CTLFLAG_TUN, &zfs_arc_bp_active, 0, + "Start ARC backpressure if active memory is below this limit"); +SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_bp_inactive, CTLFLAG_RW|CTLFLAG_TUN, &zfs_arc_bp_inactive, 0, + "Start ARC backpressure if inactive memory is below this limit"); + /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) @@ -2103,7 +2113,6 @@ } static int needfree = 0; - static int arc_reclaim_needed(void) { @@ -2112,20 +2121,58 @@ #endif #ifdef _KERNEL - if (needfree) - return (1); + /* We've grown too much, */ if (arc_size > arc_c_max) return (1); + + /* Pagedaemon is stuck, let's free something right away */ + if (vm_pageout_pages_needed) + return 1; + + /* Check if inactive list have grown too much */ + if ( zfs_arc_bp_inactive + && (ptoa((uintmax_t)cnt.v_inactive_count) > zfs_arc_bp_inactive)) { + /* tell pager to reap 1/2th of inactive queue*/ + atomic_add_int(&vm_pageout_deficit, cnt.v_inactive_count/2); + pagedaemon_wakeup(); + return needfree; + } + + /* Same for active list... */ + if ( zfs_arc_bp_active + && (ptoa((uintmax_t)cnt.v_active_count) > zfs_arc_bp_active)) { + atomic_add_int(&vm_pageout_deficit, cnt.v_active_count/2); + pagedaemon_wakeup(); + return needfree; + } + + + /* Old style behavior -- ARC gives up memory whenever page daemon asks.. */ + if (needfree) + return 1; + + /* + We got here either because active/inactive lists are + getting short or because we've been called during voluntary + ARC size checks. Kind of gray area... + */ + + /* If we didn't reach our minimum yet, don't rush to give memory up..*/ if (arc_size <= arc_c_min) return (0); + /* If we're really short on memory now, give it up. */ + if (vm_page_count_min()) { + return (1); + } + /* - * If pages are needed or we're within 2048 pages - * of needing to page need to reclaim + * If we're within 2048 pages of pagedaemon start, reclaim... */ - if (vm_pages_needed || (vm_paging_target() > -2048)) + if (vm_pages_needed && (vm_paging_target() > -2048)) return (1); + #if 0 /* * take 'desfree' extra pages, so we reclaim sooner, rather than later @@ -2169,8 +2216,6 @@ return (1); #endif #else - if (kmem_used() > (kmem_size() * 3) / 4) - return (1); #endif #else @@ -2279,7 +2324,7 @@ if (arc_eviction_list != NULL) arc_do_user_evicts(); - if (arc_reclaim_needed()) { + if (needfree) { needfree = 0; #ifdef _KERNEL wakeup(&needfree); @@ -3611,10 +3656,17 @@ { #ifdef _KERNEL uint64_t inflight_data = arc_anon->arcs_size; - uint64_t available_memory = ptoa((uintmax_t)cnt.v_free_count); + uint64_t available_memory; static uint64_t page_load = 0; static uint64_t last_txg = 0; + /* How much memory is potentially available */ + available_memory = (uint64_t)cnt.v_free_count + cnt.v_cache_count; + if (available_memory > cnt.v_free_min) + available_memory = ptoa(available_memory - cnt.v_free_min); + else + available_memory = 0; + #if 0 #if defined(__i386) available_memory =
pgp8gmNzycqYQ.pgp
Description: PGP signature