On Sun, 4 Feb 2001, Alan Cox wrote:

> But try 2.4.1 before worrying too much. That fixed a lot of the
> block performance problems I was seeing (2.4.1 ruins the VM
> performance under paging loads but the I/O speed is fixed ;))

The patch below should fix the paging performance.

I haven't had a whole lot of time to test this (and I won't
have much time due to yet another LVM bug making my workstation
unusable under 2.4 ;(), but it seems to curb worst-case behaviour
during heavy swapping and IO load very fine.

On my home machine (64MB K6-2 500) I didn't manage to make my
mp3 skip while under both heavy paging load and heavy IO load
(using a few of the "standard" benchmark programs).

regards,

Rik
--
Linux MM bugzilla: http://linux-mm.org/bugzilla.shtml

Virtual memory is like a game you can't win;
However, without VM there's truly nothing to lose...

                http://www.surriel.com/
http://www.conectiva.com/       http://distro.conectiva.com/



--- linux-2.4.1/fs/buffer.c.orig        Tue Jan 30 18:13:19 2001
+++ linux-2.4.1/fs/buffer.c     Mon Feb  5 09:59:37 2001
@@ -1052,16 +1052,6 @@
                return 0;
        }
 
-       /*
-        * If we are about to get low on free pages and
-        * cleaning the inactive_dirty pages would help
-        * fix this, wake up bdflush.
-        */
-       shortage = free_shortage();
-       if (shortage && nr_inactive_dirty_pages > shortage &&
-                       nr_inactive_dirty_pages > freepages.high)
-               return 0;
-
        return -1;
 }
 
--- linux-2.4.1/mm/filemap.c.orig       Tue Jan 30 17:02:23 2001
+++ linux-2.4.1/mm/filemap.c    Tue Jan 30 17:25:29 2001
@@ -286,6 +286,34 @@
        spin_unlock(&pagecache_lock);
 }
 
+/*
+ * This function is pretty much like __find_page_nolock(), but it only
+ * requires 2 arguments and doesn't mark the page as touched, making it
+ * ideal for ->writepage() clustering and other places where you don't
+ * want to mark the page referenced.
+ *
+ * The caller needs to hold the pagecache_lock.
+ */
+struct page * __find_page_simple(struct address_space *mapping, unsigned long index)
+{
+       struct page * page = *page_hash(mapping, index);
+       goto inside;
+
+       for (;;) {
+               page = page->next_hash;
+inside:
+               if (!page)
+                       goto not_found;
+               if (page->mapping != mapping)
+                       continue;
+               if (page->index == index)
+                       break;
+       }
+
+not_found:
+       return page;
+}
+
 static inline struct page * __find_page_nolock(struct address_space *mapping, 
unsigned long offset, struct page *page)
 {
        goto inside;
@@ -301,13 +329,14 @@
                        break;
        }
        /*
-        * Touching the page may move it to the active list.
-        * If we end up with too few inactive pages, we wake
-        * up kswapd.
+        * Mark the page referenced, moving inactive pages to the
+        * active list.
         */
-       age_page_up(page);
-       if (inactive_shortage() > inactive_target / 2 && free_shortage())
-                       wakeup_kswapd();
+       if (!PageActive(page))
+               activate_page(page);
+       else
+               SetPageReferenced(page);
+
 not_found:
        return page;
 }
@@ -735,7 +764,6 @@
 {
        struct inode *inode = file->f_dentry->d_inode;
        struct address_space *mapping = inode->i_mapping;
-       struct page **hash;
        struct page *page;
        unsigned long start;
 
@@ -756,8 +784,7 @@
         */
        spin_lock(&pagecache_lock);
        while (--index >= start) {
-               hash = page_hash(mapping, index);
-               page = __find_page_nolock(mapping, index, *hash);
+               page = __find_page_simple(mapping, index);
                if (!page)
                        break;
                deactivate_page(page);
--- linux-2.4.1/mm/page_alloc.c.orig    Tue Jan 30 17:02:23 2001
+++ linux-2.4.1/mm/page_alloc.c Tue Jan 30 17:54:10 2001
@@ -299,21 +299,6 @@
                        !(current->flags & PF_MEMALLOC))
                direct_reclaim = 1;
 
-       /*
-        * If we are about to get low on free pages and we also have
-        * an inactive page shortage, wake up kswapd.
-        */
-       if (inactive_shortage() > inactive_target / 2 && free_shortage())
-               wakeup_kswapd();
-       /*
-        * If we are about to get low on free pages and cleaning
-        * the inactive_dirty pages would fix the situation,
-        * wake up bdflush.
-        */
-       else if (free_shortage() && nr_inactive_dirty_pages > free_shortage()
-                       && nr_inactive_dirty_pages >= freepages.high)
-               wakeup_bdflush(0);
-
 try_again:
        /*
         * First, see if we have any zones with lots of free memory.
--- linux-2.4.1/mm/page_io.c.orig       Tue Jan 30 17:02:23 2001
+++ linux-2.4.1/mm/page_io.c    Tue Jan 30 18:44:11 2001
@@ -42,11 +42,6 @@
        int block_size;
        struct inode *swapf = 0;
 
-       /* Don't allow too many pending pages in flight.. */
-       if ((rw == WRITE) && atomic_read(&nr_async_pages) >
-                       pager_daemon.swap_cluster * (1 << page_cluster))
-               wait = 1;
-
        if (rw == READ) {
                ClearPageUptodate(page);
                kstat.pswpin++;
--- linux-2.4.1/mm/vmscan.c.orig        Tue Jan 30 17:02:23 2001
+++ linux-2.4.1/mm/vmscan.c     Mon Feb  5 12:20:12 2001
@@ -280,7 +280,7 @@
                retval = swap_out_mm(mm, swap_amount(mm));
 
        /* Then, look at the other mm's */
-       counter = mmlist_nr >> priority;
+       counter = (mmlist_nr << SWAP_SHIFT) >> priority;
        do {
                struct list_head *p;
 
@@ -412,14 +412,17 @@
  *
  * This code is heavily inspired by the FreeBSD source code. Thanks
  * go out to Matthew Dillon.
+ *
+ * XXX: restrict number of pageouts in flight...
  */
-#define MAX_LAUNDER            (4 * (1 << page_cluster))
-int page_launder(int gfp_mask, int sync)
+#define MAX_LAUNDER            (1 << page_cluster)
+int page_launder(int gfp_mask, int user)
 {
-       int launder_loop, maxscan, cleaned_pages, maxlaunder;
-       int can_get_io_locks;
+       int launder_loop, maxscan, flushed_pages, freed_pages, maxlaunder;
+       int can_get_io_locks, sync, target, shortage;
        struct list_head * page_lru;
        struct page * page;
+       struct zone_struct * zone;
 
        /*
         * We can only grab the IO locks (eg. for flushing dirty
@@ -427,9 +430,13 @@
         */
        can_get_io_locks = gfp_mask & __GFP_IO;
 
+       target = free_shortage();
+
+       sync = 0;
        launder_loop = 0;
        maxlaunder = 0;
-       cleaned_pages = 0;
+       flushed_pages = 0;
+       freed_pages = 0;
 
 dirty_page_rescan:
        spin_lock(&pagemap_lru_lock);
@@ -437,13 +444,14 @@
        while ((page_lru = inactive_dirty_list.prev) != &inactive_dirty_list &&
                                maxscan-- > 0) {
                page = list_entry(page_lru, struct page, lru);
+               zone = page->zone;
 
                /* Wrong page on list?! (list corruption, should not happen) */
                if (!PageInactiveDirty(page)) {
                        printk("VM: page_launder, wrong page on list.\n");
                        list_del(page_lru);
                        nr_inactive_dirty_pages--;
-                       page->zone->inactive_dirty_pages--;
+                       zone->inactive_dirty_pages--;
                        continue;
                }
 
@@ -457,10 +465,26 @@
                }
 
                /*
+                * Disk IO is really expensive, so we make sure we
+                * don't do more work than needed.
+                * Note that clean pages from zones with enough free
+                * pages still get recycled and dirty pages from these
+                * zones can get flushed due to IO clustering.
+                */
+               if (freed_pages + flushed_pages > target && !free_shortage())
+                       break;
+               if (launder_loop && !maxlaunder)
+                       break;
+               if (launder_loop && zone->inactive_clean_pages +
+                               zone->free_pages > zone->pages_high)
+                       goto skip_page;
+
+               /*
                 * The page is locked. IO in progress?
                 * Move it to the back of the list.
                 */
                if (TryLockPage(page)) {
+skip_page:
                        list_del(page_lru);
                        list_add(page_lru, &inactive_dirty_list);
                        continue;
@@ -490,6 +514,7 @@
                        spin_unlock(&pagemap_lru_lock);
 
                        writepage(page);
+                       flushed_pages++;
                        page_cache_release(page);
 
                        /* And re-start the thing.. */
@@ -508,7 +533,6 @@
                 */
                if (page->buffers) {
                        int wait, clearedbuf;
-                       int freed_page = 0;
                        /*
                         * Since we might be doing disk IO, we have to
                         * drop the spinlock and take an extra reference
@@ -539,12 +563,12 @@
                        /* The buffers were not freed. */
                        if (!clearedbuf) {
                                add_page_to_inactive_dirty_list(page);
+                               flushed_pages++;
 
                        /* The page was only in the buffer cache. */
                        } else if (!page->mapping) {
                                atomic_dec(&buffermem_pages);
-                               freed_page = 1;
-                               cleaned_pages++;
+                               freed_pages++;
 
                        /* The page has more users besides the cache and us. */
                        } else if (page_count(page) > 2) {
@@ -553,7 +577,7 @@
                        /* OK, we "created" a freeable page. */
                        } else /* page->mapping && page_count(page) == 2 */ {
                                add_page_to_inactive_clean_list(page);
-                               cleaned_pages++;
+                               freed_pages++;
                        }
 
                        /*
@@ -564,13 +588,6 @@
                        UnlockPage(page);
                        page_cache_release(page);
 
-                       /* 
-                        * If we're freeing buffer cache pages, stop when
-                        * we've got enough free memory.
-                        */
-                       if (freed_page && !free_shortage())
-                               break;
-                       continue;
                } else if (page->mapping && !PageDirty(page)) {
                        /*
                         * If a page had an extra reference in
@@ -581,7 +598,8 @@
                        del_page_from_inactive_dirty_list(page);
                        add_page_to_inactive_clean_list(page);
                        UnlockPage(page);
-                       cleaned_pages++;
+                       freed_pages++;
+
                } else {
 page_active:
                        /*
@@ -607,20 +625,49 @@
         * loads, flush out the dirty pages before we have to wait on
         * IO.
         */
-       if (can_get_io_locks && !launder_loop && free_shortage()) {
+       shortage = free_shortage();
+       if (can_get_io_locks && !launder_loop && shortage) {
                launder_loop = 1;
-               /* If we cleaned pages, never do synchronous IO. */
-               if (cleaned_pages)
-                       sync = 0;
-               /* We only do a few "out of order" flushes. */
-               maxlaunder = MAX_LAUNDER;
-               /* Kflushd takes care of the rest. */
-               wakeup_bdflush(0);
+
+               /* 
+                * User programs can run page_launder() in parallel so
+                * we only flush a few pages at a time to avoid big IO
+                * storms.   Kswapd, OTOH, is expected usually keep up
+                * with the paging load in the system and doesn't have
+                * the IO storm problem, so it just flushes all pages
+                * needed to fix the free shortage.
+                *
+                * XXX: keep track of nr_async_pages like the old swap
+                * code did?
+                */
+               if (user)
+                       maxlaunder = MAX_LAUNDER;
+               else
+                       maxlaunder = shortage;
+
+               /*
+                * If we are called by a user program, we need to free
+                * some pages. If we couldn't, we'll do the last page IO
+                * synchronously to be sure 
+                */
+               if (user && !freed_pages)
+                       sync = 1;
+
                goto dirty_page_rescan;
        }
 
-       /* Return the number of pages moved to the inactive_clean list. */
-       return cleaned_pages;
+       /*
+        * We have to make sure the data is actually written to
+        * the disk now, otherwise we'll never get enough clean
+        * pages and the system will keep queueing dirty pages
+        * for flushing.
+        */
+       run_task_queue(&tq_disk);
+
+       /*
+        * Return the amount of pages we freed or made freeable.
+        */
+       return freed_pages + flushed_pages;
 }
 
 /**
@@ -631,12 +678,12 @@
  * This function will scan a portion of the active list to find
  * unused pages, those pages will then be moved to the inactive list.
  */
-int refill_inactive_scan(unsigned int priority, int oneshot)
+int refill_inactive_scan(int priority, int target)
 {
        struct list_head * page_lru;
        struct page * page;
        int maxscan, page_active = 0;
-       int ret = 0;
+       int nr_deactivated = 0;
 
        /* Take the lock while messing with the list... */
        spin_lock(&pagemap_lru_lock);
@@ -668,31 +715,38 @@
                         *
                         * SUBTLE: we can have buffer pages with count 1.
                         */
-                       if (page->age == 0 && page_count(page) <=
-                                               (page->buffers ? 2 : 1)) {
-                               deactivate_page_nolock(page);
-                               page_active = 0;
+                       if (page->age == 0) {
+                               int maxcount = (page->buffers ? 2 : 1);
+                               if (page_count(page) <= maxcount) {
+                                       deactivate_page_nolock(page);
+                                       page_active = 0;
+                               } else {
+                                       /* Page still in somebody's RSS? */
+                                       page_active = 1;
+                                       /* XXX: should we call swap_out() if
+                                        * this happens too often ? */
+                               }
                        } else {
                                page_active = 1;
                        }
                }
                /*
                 * If the page is still on the active list, move it
-                * to the other end of the list. Otherwise it was
-                * deactivated by age_page_down and we exit successfully.
+                * to the end of the list. Otherwise we successfully
+                * deactivated a page.
                 */
                if (page_active || PageActive(page)) {
                        list_del(page_lru);
                        list_add(page_lru, &active_list);
                } else {
-                       ret = 1;
-                       if (oneshot)
+                       nr_deactivated++;
+                       if (nr_deactivated >= target)
                                break;
                }
        }
        spin_unlock(&pagemap_lru_lock);
 
-       return ret;
+       return nr_deactivated;
 }
 
 /*
@@ -704,7 +758,7 @@
        pg_data_t *pgdat = pgdat_list;
        int sum = 0;
        int freeable = nr_free_pages() + nr_inactive_clean_pages();
-       int freetarget = freepages.high + inactive_target / 3;
+       int freetarget = freepages.high;
 
        /* Are we low on free pages globally? */
        if (freeable < freetarget)
@@ -772,38 +826,46 @@
 }
 
 /*
- * We need to make the locks finer granularity, but right
- * now we need this so that we can do page allocations
- * without holding the kernel lock etc.
+ * Refill_inactive is the function used to scan and age the pages in
+ * the processes and the active pages, and to move little-used pages
+ * to the inactive list.
  *
- * We want to try to free "count" pages, and we want to 
- * cluster them so that we get good swap-out behaviour.
+ * When called by kswapd, we try to just deactivate as many pages as
+ * needed. This makes it easy for kswapd to keep up with memory
+ * demand.
  *
- * OTOH, if we're a user process (and not kswapd), we
- * really care about latency. In that case we don't try
- * to free too many pages.
+ * However, when we are called by a user process we have to limit the
+ * amount of work done. This way the process can do its allocation and
+ * continue with its real work sooner. It also helps balancing when we
+ * have multiple processes in try_to_free_pages simultaneously.
  */
 #define DEF_PRIORITY (6)
 static int refill_inactive(unsigned int gfp_mask, int user)
 {
        int count, start_count, maxtry;
 
-       count = inactive_shortage() + free_shortage();
-       if (user)
+       if (user) {
+               /* user process */
                count = (1 << page_cluster);
-       start_count = count;
+               maxtry = 6;
+       } else {
+               /* kswapd */
+               count = inactive_shortage() + free_shortage();
+               maxtry = 1 << DEF_PRIORITY;
+       }
 
-       maxtry = 6;
+       start_count = count;
        do {
                if (current->need_resched) {
                        __set_current_state(TASK_RUNNING);
                        schedule();
+                       if (!inactive_shortage())
+                               return 1;
                }
 
-               while (refill_inactive_scan(DEF_PRIORITY, 1)) {
-                       if (--count <= 0)
-                               goto done;
-               }
+               count -= refill_inactive_scan(DEF_PRIORITY, count);
+               if (--count <= 0)
+                       goto done;
 
                /* If refill_inactive_scan failed, try to page stuff out.. */
                swap_out(DEF_PRIORITY, gfp_mask);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/

Reply via email to