Hi,

Attempting to avoid doing I/O has been harmful to throughput here
ever since the queueing/elevator woes were fixed. Ever since then,
tossing attempts at avoidance has improved throughput markedly.

IMHO, any patch which claims to improve throughput via code deletion
should be worth a little eyeball time.. and maybe even a test run ;-)

Comments welcome.

        -Mike

--- linux-2.4.2-ac4/mm/page_alloc.c.org Mon Feb 26 11:19:27 2001
+++ linux-2.4.2-ac4/mm/page_alloc.c     Tue Feb 27 10:31:10 2001
@@ -274,7 +274,7 @@
 struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
 {
        zone_t **zone;
-       int direct_reclaim = 0;
+       int direct_reclaim = 0, loop = 0;
        unsigned int gfp_mask = zonelist->gfp_mask;
        struct page * page;

@@ -366,7 +366,7 @@
         *   able to free some memory we can't free ourselves
         */
        wakeup_kswapd();
-       if (gfp_mask & __GFP_WAIT) {
+       if (gfp_mask & __GFP_WAIT && loop) {
                __set_current_state(TASK_RUNNING);
                current->policy |= SCHED_YIELD;
                schedule();
@@ -440,7 +440,7 @@
                        memory_pressure++;
                        try_to_free_pages(gfp_mask);
                        wakeup_bdflush(0);
-                       if (!order)
+                       if (!order || loop++ < (1 << order))
                                goto try_again;
                }
        }
--- linux-2.4.2-ac4/mm/vmscan.c.org     Mon Feb 26 09:31:46 2001
+++ linux-2.4.2-ac4/mm/vmscan.c Tue Feb 27 09:04:50 2001
@@ -278,6 +278,8 @@
        /* Always start by trying to penalize the process that is allocating memory */
        if (mm)
                retval = swap_out_mm(mm, swap_amount(mm));
+       if (retval)
+               return retval;

        /* Then, look at the other mm's */
        counter = (mmlist_nr << SWAP_SHIFT) >> priority;
@@ -418,8 +420,8 @@
 #define MAX_LAUNDER            (1 << page_cluster)
 int page_launder(int gfp_mask, int user)
 {
-       int launder_loop, maxscan, flushed_pages, freed_pages, maxlaunder;
-       int can_get_io_locks, sync, target, shortage;
+       int maxscan, flushed_pages, freed_pages, maxlaunder;
+       int can_get_io_locks;
        struct list_head * page_lru;
        struct page * page;
        struct zone_struct * zone;
@@ -430,15 +432,10 @@
         */
        can_get_io_locks = gfp_mask & __GFP_IO;

-       target = free_shortage();
-
-       sync = 0;
-       launder_loop = 0;
        maxlaunder = 0;
        flushed_pages = 0;
        freed_pages = 0;

-dirty_page_rescan:
        spin_lock(&pagemap_lru_lock);
        maxscan = nr_inactive_dirty_pages;
        while ((page_lru = inactive_dirty_list.prev) != &inactive_dirty_list &&
@@ -446,6 +443,9 @@
                page = list_entry(page_lru, struct page, lru);
                zone = page->zone;

+               if ((user && freed_pages + flushed_pages > MAX_LAUNDER)
+                               || !free_shortage())
+                       break;
                /* Wrong page on list?! (list corruption, should not happen) */
                if (!PageInactiveDirty(page)) {
                        printk("VM: page_launder, wrong page on list.\n");
@@ -464,18 +464,7 @@
                        continue;
                }

-               /*
-                * Disk IO is really expensive, so we make sure we
-                * don't do more work than needed.
-                * Note that clean pages from zones with enough free
-                * pages still get recycled and dirty pages from these
-                * zones can get flushed due to IO clustering.
-                */
-               if (freed_pages + flushed_pages > target && !free_shortage())
-                       break;
-               if (launder_loop && !maxlaunder)
-                       break;
-               if (launder_loop && zone->inactive_clean_pages +
+               if (zone->inactive_clean_pages +
                                zone->free_pages > zone->pages_high)
                        goto skip_page;

@@ -500,14 +489,6 @@
                        if (!writepage)
                                goto page_active;

-                       /* First time through? Move it to the back of the list */
-                       if (!launder_loop) {
-                               list_del(page_lru);
-                               list_add(page_lru, &inactive_dirty_list);
-                               UnlockPage(page);
-                               continue;
-                       }
-
                        /* OK, do a physical asynchronous write to swap.  */
                        ClearPageDirty(page);
                        page_cache_get(page);
@@ -517,7 +498,6 @@
                        /* XXX: all ->writepage()s should use nr_async_pages */
                        if (!PageSwapCache(page))
                                flushed_pages++;
-                       maxlaunder--;
                        page_cache_release(page);

                        /* And re-start the thing.. */
@@ -535,7 +515,7 @@
                 * buffer pages
                 */
                if (page->buffers) {
-                       int wait, clearedbuf;
+                       int clearedbuf;
                        /*
                         * Since we might be doing disk IO, we have to
                         * drop the spinlock and take an extra reference
@@ -545,16 +525,8 @@
                        page_cache_get(page);
                        spin_unlock(&pagemap_lru_lock);

-                       /* Will we do (asynchronous) IO? */
-                       if (launder_loop && maxlaunder == 0 && sync)
-                               wait = 2;       /* Synchrounous IO */
-                       else if (launder_loop && maxlaunder-- > 0)
-                               wait = 1;       /* Async IO */
-                       else
-                               wait = 0;       /* No IO */
-
                        /* Try to free the page buffers. */
-                       clearedbuf = try_to_free_buffers(page, wait);
+                       clearedbuf = try_to_free_buffers(page, can_get_io_locks);

                        /*
                         * Re-take the spinlock. Note that we cannot
@@ -566,7 +538,7 @@
                        /* The buffers were not freed. */
                        if (!clearedbuf) {
                                add_page_to_inactive_dirty_list(page);
-                               if (wait)
+                               if (can_get_io_locks)
                                        flushed_pages++;

                        /* The page was only in the buffer cache. */
@@ -619,61 +591,8 @@
        spin_unlock(&pagemap_lru_lock);

        /*
-        * If we don't have enough free pages, we loop back once
-        * to queue the dirty pages for writeout. When we were called
-        * by a user process (that /needs/ a free page) and we didn't
-        * free anything yet, we wait synchronously on the writeout of
-        * MAX_SYNC_LAUNDER pages.
-        *
-        * We also wake up bdflush, since bdflush should, under most
-        * loads, flush out the dirty pages before we have to wait on
-        * IO.
-        */
-       shortage = free_shortage();
-       if (can_get_io_locks && !launder_loop && shortage) {
-               launder_loop = 1;
-
-               /*
-                * User programs can run page_launder() in parallel so
-                * we only flush a few pages at a time to avoid big IO
-                * storms.   Kswapd, OTOH, is expected usually keep up
-                * with the paging load in the system and doesn't have
-                * the IO storm problem, so it just flushes all pages
-                * needed to fix the free shortage.
-                */
-               maxlaunder = shortage;
-               maxlaunder -= flushed_pages;
-               maxlaunder -= atomic_read(&nr_async_pages);
-
-               if (maxlaunder <= 0)
-                       goto out;
-
-               if (user && maxlaunder > MAX_LAUNDER)
-                       maxlaunder = MAX_LAUNDER;
-
-               /*
-                * If we are called by a user program, we need to free
-                * some pages. If we couldn't, we'll do the last page IO
-                * synchronously to be sure
-                */
-               if (user && !freed_pages)
-                       sync = 1;
-
-               goto dirty_page_rescan;
-       }
-
-       /*
-        * We have to make sure the data is actually written to
-        * the disk now, otherwise we'll never get enough clean
-        * pages and the system will keep queueing dirty pages
-        * for flushing.
-        */
-       run_task_queue(&tq_disk);
-
-       /*
         * Return the amount of pages we freed or made freeable.
         */
-out:
        return freed_pages + flushed_pages;
 }

@@ -846,7 +765,7 @@
  * continue with its real work sooner. It also helps balancing when we
  * have multiple processes in try_to_free_pages simultaneously.
  */
-#define DEF_PRIORITY (6)
+#define DEF_PRIORITY (2)
 static int refill_inactive(unsigned int gfp_mask, int user)
 {
        int count, start_count, maxtry;
@@ -981,14 +900,6 @@
                /* If needed, try to free some memory. */
                if (inactive_shortage() || free_shortage())
                        do_try_to_free_pages(GFP_KSWAPD, 0);
-
-               /*
-                * Do some (very minimal) background scanning. This
-                * will scan all pages on the active list once
-                * every minute. This clears old referenced bits
-                * and moves unused pages to the inactive list.
-                */
-               refill_inactive_scan(DEF_PRIORITY, 0);

                /* Once a second, recalculate some VM stats. */
                if (time_after(jiffies, recalc + HZ)) {

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to