On 1/12/07, Linus Torvalds <[EMAIL PROTECTED]> wrote:
On Thu, 11 Jan 2007, Roy Huang wrote: > > On a embedded systerm, limiting page cache can relieve memory > fragmentation. There is a patch against 2.6.19, which limit every > opened file page cache and total pagecache. When the limit reach, it > will release the page cache overrun the limit. I do think that something like this is probably a good idea, even on non-embedded setups. We historically couldn't do this, because mapped pages were too damn hard to remove, but that's obviously not much of a problem any more. However, the page-cache limit should NOT be some compile-time constant. It should work the same way the "dirty page" limit works, and probably just default to "feel free to use 90% of memory for page cache". Linus
The attached patch limit the page cache by a simple way: 1) If request memory from page cache, Set a flag to mark this kind of allocation: static inline struct page *page_cache_alloc(struct address_space *x) { - return __page_cache_alloc(mapping_gfp_mask(x)); + return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_PAGECACHE); } 2) Have zone_watermark_ok done this limit: + if (alloc_flags & ALLOC_PAGECACHE){ + min = min + VFS_CACHE_LIMIT; + } + if (free_pages <= min + z->lowmem_reserve[classzone_idx]) return 0; 3) So, when __alloc_pages is called by page cache, pass the ALLOC_PAGECACHE into get_page_from_freelist to trigger the pagecache limit branch in zone_watermark_ok. This approach works on my side, I'll make a new patch to make the limit tunable in the proc fs soon. The following is the patch: ===================================================== Index: mm/page_alloc.c =================================================================== --- mm/page_alloc.c (revision 2645) +++ mm/page_alloc.c (working copy) @@ -892,6 +892,9 @@ failed: #define ALLOC_HARDER 0x10 /* try to alloc harder */ #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ +#define ALLOC_PAGECACHE 0x80 /* __GFP_PAGECACHE set */ + +#define VFS_CACHE_LIMIT 0x400 /* limit VFS cache page */ /* * Return 1 if free pages are above 'mark'. This takes into account the order @@ -910,6 +913,10 @@ int zone_watermark_ok(struct zone *z, in if (alloc_flags & ALLOC_HARDER) min -= min / 4; + if (alloc_flags & ALLOC_PAGECACHE){ + min = min + VFS_CACHE_LIMIT; + } + if (free_pages <= min + z->lowmem_reserve[classzone_idx]) return 0; for (o = 0; o < order; o++) { @@ -1000,8 +1007,12 @@ restart: return NULL; } - page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, - zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); + if (gfp_mask & __GFP_PAGECACHE) + page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, + zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_PAGECACHE); + else + page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, + zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); if (page) goto got_pg; @@ -1027,6 +1038,9 @@ restart: if (wait) alloc_flags |= ALLOC_CPUSET; + if (gfp_mask & __GFP_PAGECACHE) + alloc_flags |= ALLOC_PAGECACHE; + /* * Go through the zonelist again. Let __GFP_HIGH and allocations * coming from realtime tasks go deeper into reserves. Index: include/linux/gfp.h =================================================================== --- include/linux/gfp.h (revision 2645) +++ include/linux/gfp.h (working copy) @@ -46,6 +46,7 @@ struct vm_area_struct; #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ +#define __GFP_PAGECACHE ((__force gfp_t)0x80000u) /* Is page cache allocation ? */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) Index: include/linux/pagemap.h =================================================================== --- include/linux/pagemap.h (revision 2645) +++ include/linux/pagemap.h (working copy) @@ -62,7 +62,7 @@ static inline struct page *__page_cache_ static inline struct page *page_cache_alloc(struct address_space *x) { - return __page_cache_alloc(mapping_gfp_mask(x)); + return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_PAGECACHE); } static inline struct page *page_cache_alloc_cold(struct address_space *x) ===================================================== Welcome any comments and suggestions, Thanks, -Aubrey
Index: mm/page_alloc.c =================================================================== --- mm/page_alloc.c (revision 2645) +++ mm/page_alloc.c (working copy) @@ -892,6 +892,9 @@ failed: #define ALLOC_HARDER 0x10 /* try to alloc harder */ #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ +#define ALLOC_PAGECACHE 0x80 /* __GFP_PAGECACHE set */ + +#define VFS_CACHE_LIMIT 0x400 /* limit VFS cache page */ /* * Return 1 if free pages are above 'mark'. This takes into account the order @@ -910,6 +913,10 @@ int zone_watermark_ok(struct zone *z, in if (alloc_flags & ALLOC_HARDER) min -= min / 4; + if (alloc_flags & ALLOC_PAGECACHE){ + min = min + VFS_CACHE_LIMIT; + } + if (free_pages <= min + z->lowmem_reserve[classzone_idx]) return 0; for (o = 0; o < order; o++) { @@ -1000,8 +1007,12 @@ restart: return NULL; } - page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, - zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); + if (gfp_mask & __GFP_PAGECACHE) + page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, + zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_PAGECACHE); + else + page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, + zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET); if (page) goto got_pg; @@ -1027,6 +1038,9 @@ restart: if (wait) alloc_flags |= ALLOC_CPUSET; + if (gfp_mask & __GFP_PAGECACHE) + alloc_flags |= ALLOC_PAGECACHE; + /* * Go through the zonelist again. Let __GFP_HIGH and allocations * coming from realtime tasks go deeper into reserves. Index: include/linux/gfp.h =================================================================== --- include/linux/gfp.h (revision 2645) +++ include/linux/gfp.h (working copy) @@ -46,6 +46,7 @@ struct vm_area_struct; #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ +#define __GFP_PAGECACHE ((__force gfp_t)0x80000u) /* Is page cache allocation ? */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) Index: include/linux/pagemap.h =================================================================== --- include/linux/pagemap.h (revision 2645) +++ include/linux/pagemap.h (working copy) @@ -62,7 +62,7 @@ static inline struct page *__page_cache_ static inline struct page *page_cache_alloc(struct address_space *x) { - return __page_cache_alloc(mapping_gfp_mask(x)); + return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_PAGECACHE); } static inline struct page *page_cache_alloc_cold(struct address_space *x)