Re: O_DIRECT question

Roy Huang Thu, 11 Jan 2007 00:06:07 -0800

On a embedded systerm, limiting page cache can relieve memory
fragmentation. There is a patch against 2.6.19, which limit every
opened file page cache and total pagecache. When the limit reach, it
will release the page cache overrun the limit.



Index: include/linux/pagemap.h
===================================================================
--- include/linux/pagemap.h     (revision 2628)
+++ include/linux/pagemap.h     (working copy)
@@ -12,6 +12,7 @@
#include <asm/uaccess.h>
#include <linux/gfp.h>

+extern int total_pagecache_limit;
/*
 * Bits in mapping->flags.  The lower __GFP_BITS_SHIFT bits are the page
 * allocation mode flags.
Index: include/linux/fs.h
===================================================================
--- include/linux/fs.h  (revision 2628)
+++ include/linux/fs.h  (working copy)
@@ -444,6 +444,10 @@
        spinlock_t              private_lock;   /* for use by the address_space 
*/
        struct list_head        private_list;   /* ditto */
        struct address_space    *assoc_mapping; /* ditto */
+#ifdef CONFIG_LIMIT_PAGECACHE
+       unsigned long           pages_limit;
+       struct list_head        page_head;
+#endif
} __attribute__((aligned(sizeof(long))));
        /*
         * On most architectures that alignment is already the case; but
Index: include/linux/mm.h
===================================================================
--- include/linux/mm.h  (revision 2628)
+++ include/linux/mm.h  (working copy)
@@ -231,6 +231,9 @@
#else
#define VM_BUG_ON(condition) do { } while(0)
#endif
+#ifdef CONFIG_LIMIT_PAGECACHE
+       struct list_head page_list;
+#endif

/*
 * Methods to modify the page usage count.
@@ -1030,7 +1033,21 @@

/* mm/page-writeback.c */
int write_one_page(struct page *page, int wait);
+/* possible outcome of pageout() */

+typedef enum {
+       /* failed to write page out, page is locked */
+       PAGE_KEEP,
+       /* move page to the active list, page is locked */
+       PAGE_ACTIVATE,
+       /* page has been sent to the disk successfully, page is unlocked */
+       PAGE_SUCCESS,
+       /* page is clean and locked */
+       PAGE_CLEAN,
+} pageout_t;
+
+pageout_t pageout(struct page *page, struct address_space *mapping);
+
/* readahead.c */
#define VM_MAX_READAHEAD        128     /* kbytes */
#define VM_MIN_READAHEAD        16      /* kbytes (includes current page) */
Index: init/Kconfig
===================================================================
--- init/Kconfig        (revision 2628)
+++ init/Kconfig        (working copy)
@@ -419,6 +419,19 @@
          option replaces shmem and tmpfs with the much simpler ramfs code,
          which may be appropriate on small systems without swap.

+config LIMIT_PAGECACHE
+       bool "Limit page caches" if EMBEDDED
+
+config PAGECACHE_LIMIT
+       int "Page cache limit for every file in page unit"
+       depends on LIMIT_PAGECACHE
+       default 32
+
+config PAGECACHE_LIMIT_TOTAL
+       int "Total page cache limit in MB unit"
+       depends on LIMIT_PAGECACHE
+       default 10
+
choice
       prompt "Page frame management algorithm"
       default BUDDY
Index: fs/inode.c
===================================================================
--- fs/inode.c  (revision 2628)
+++ fs/inode.c  (working copy)
@@ -205,6 +205,10 @@
        INIT_LIST_HEAD(&inode->inotify_watches);
        mutex_init(&inode->inotify_mutex);
#endif
+#ifdef CONFIG_LIMIT_PAGECACHE
+       INIT_LIST_HEAD(&inode->i_data.page_head);
+       inode->i_data.pages_limit = CONFIG_PAGECACHE_LIMIT;
+#endif
}

EXPORT_SYMBOL(inode_init_once);
Index: mm/filemap.c
===================================================================
--- mm/filemap.c        (revision 2628)
+++ mm/filemap.c        (working copy)
@@ -18,6 +18,7 @@
#include <linux/capability.h>
#include <linux/kernel_stat.h>
#include <linux/mm.h>
+#include <linux/mm_inline.h>
#include <linux/swap.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
@@ -30,6 +31,9 @@
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/cpuset.h>
+#include <linux/rmap.h>
+#include <linux/buffer_head.h>
+#include <linux/page-flags.h>
#include "filemap.h"
#include "internal.h"

@@ -119,6 +123,9 @@
        radix_tree_delete(&mapping->page_tree, page->index);
        page->mapping = NULL;
        mapping->nrpages--;
+#ifdef CONFIG_LIMIT_PAGECACHE
+       list_del_init(&page->page_list);
+#endif
        __dec_zone_page_state(page, NR_FILE_PAGES);
}

@@ -169,6 +176,96 @@
        return 0;
}

+#ifdef CONFIG_LIMIT_PAGECACHE
+static void balance_cache(struct address_space *mapping)
+{
+       /* Release half of the pages */
+       int count ;
+       int nr_released = 0;
+       struct page *page;
+       struct zone *zone= NULL;
+       struct pagevec freed_pvec;
+       struct list_head ret_list;
+
+       count = mapping->nrpages /2;
+       pagevec_init(&freed_pvec, 0);
+       INIT_LIST_HEAD(&ret_list);
+       lru_add_drain();
+       while(count-->0) {
+               page = list_entry(mapping->page_head.prev, struct page, 
page_list);
+               zone = page_zone(page);
+               TestClearPageLRU(page);
+               if (PageActive(page))
+                       del_page_from_active_list(zone, page);
+               else
+                       del_page_from_inactive_list(zone, page);
+
+               list_del_init(&page->page_list); /* Remove from current 
process's
page list */
+               get_page(page);
+
+               if (TestSetPageLocked(page))
+                       goto __keep;
+               if (PageWriteback(page))
+                       goto __keep_locked;
+               if (page_referenced(page, 1))
+                       goto __keep_locked;
+               if (PageDirty(page)) {
+                       switch(pageout(page, mapping)) {
+                               case PAGE_KEEP:
+                               case PAGE_ACTIVATE:
+                                       goto __keep_locked;
+                               case PAGE_SUCCESS:
+                                       if (PageWriteback(page) || 
PageDirty(page))
+                                               goto __keep;
+                                       if (TestSetPageLocked(page))
+                                               goto __keep;
+                                       if (PageDirty(page) || 
PageWriteback(page))
+                                               goto __keep_locked;
+                               case PAGE_CLEAN:
+                                       ;
+                       }
+               }
+
+               if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
+                       goto __keep_locked;
+               if (!remove_mapping(mapping, page))
+                       goto __keep_locked;
+
+               unlock_page(page);
+               nr_released++;
+               /* This page maybe in Active LRU */
+               ClearPageActive(page);
+               ClearPageUptodate(page);
+               if (!pagevec_add(&freed_pvec, page))
+                       __pagevec_release_nonlru(&freed_pvec);
+               continue;
+__keep_locked:
+               unlock_page(page);
+__keep:
+               SetPageLRU(page);
+               if (PageActive(page)) {
+                       add_page_to_active_list(zone, page);
+               } else {
+                       add_page_to_inactive_list(zone, page);
+               }
+
+               list_add(&page->page_list, &ret_list);
+       }
+       while(!list_empty(&ret_list)) {
+               page = list_entry(ret_list.prev, struct page, page_list);
+               list_move_tail(&page->page_list, &mapping->page_head);
+               put_page(page);
+       }
+       if (pagevec_count(&freed_pvec))
+               __pagevec_release_nonlru(&freed_pvec);
+
+       if (global_page_state(NR_FILE_PAGES) > total_pagecache_limit)
+               if (zone) {
+                       wakeup_kswapd(zone, 0);
+               }
+}
+#endif
+
/**
 * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
 * @mapping:    address space structure to write
@@ -448,6 +545,10 @@
                        page->mapping = mapping;
                        page->index = offset;
                        mapping->nrpages++;
+#ifdef CONFIG_LIMIT_PAGECACHE
+                       list_add(&page->page_list, &mapping->page_head);
+#endif
+
                        __inc_zone_page_state(page, NR_FILE_PAGES);
                }
                write_unlock_irq(&mapping->tree_lock);
@@ -1085,6 +1186,10 @@
                page_cache_release(cached_page);
        if (filp)
                file_accessed(filp);
+#ifdef CONFIG_LIMIT_PAGECACHE
+       if (mapping->nrpages >= mapping->pages_limit)
+               balance_cache(mapping);
+#endif 
}
EXPORT_SYMBOL(do_generic_mapping_read);

@@ -2195,6 +2300,11 @@
        if (cached_page)
                page_cache_release(cached_page);

+#ifdef CONFIG_LIMIT_PAGECACHE
+       if (mapping->nrpages >= mapping->pages_limit)
+               balance_cache(mapping);
+#endif
+       
        /*
         * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
         */
Index: mm/vmscan.c
===================================================================
--- mm/vmscan.c (revision 2628)
+++ mm/vmscan.c (working copy)
@@ -116,6 +116,7 @@

static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
+int total_pagecache_limit = CONFIG_PAGECACHE_LIMIT_TOTAL * 1024 / 4;

/*
 * Add a shrinker callback to be called from the vm
@@ -292,23 +293,11 @@
        unlock_page(page);
}

-/* possible outcome of pageout() */
-typedef enum {
-       /* failed to write page out, page is locked */
-       PAGE_KEEP,
-       /* move page to the active list, page is locked */
-       PAGE_ACTIVATE,
-       /* page has been sent to the disk successfully, page is unlocked */
-       PAGE_SUCCESS,
-       /* page is clean and locked */
-       PAGE_CLEAN,
-} pageout_t;
-
/*
 * pageout is called by shrink_page_list() for each dirty page.
 * Calls ->writepage().
 */
-static pageout_t pageout(struct page *page, struct address_space *mapping)
+pageout_t pageout(struct page *page, struct address_space *mapping)
{
        /*
         * If the page is dirty, only perform writeback if that write
@@ -1328,7 +1317,11 @@
                        order = pgdat->kswapd_max_order;
                }
                finish_wait(&pgdat->kswapd_wait, &wait);
-               balance_pgdat(pgdat, order);
+               if (global_page_state(NR_FILE_PAGES) >= total_pagecache_limit)
+                       balance_pgdat(pgdat, (global_page_state(NR_FILE_PAGES) \
+                                               - total_pagecache_limit), 
order);
+               else
+                       balance_pgdat(pgdat, order);
        }
        return 0;
}
@@ -1344,8 +1337,10 @@
                return;

        pgdat = zone->zone_pgdat;
-       if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
-               return;
+       if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) {
+               if (global_page_state(NR_FILE_PAGES) < total_pagecache_limit)
+                       return;
+       }
        if (pgdat->kswapd_max_order < order)
                pgdat->kswapd_max_order = order;
        if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: O_DIRECT question

Reply via email to