Re: regression caused by cgroups optimization in 3.17-rc2

Johannes Weiner Fri, 05 Sep 2014 07:48:34 -0700

On Fri, Sep 05, 2014 at 11:25:37AM +0200, Michal Hocko wrote:
> @@ -900,10 +900,10 @@ void lru_add_drain_all(void)
>   * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
>   * will free it.
>   */
> -void release_pages(struct page **pages, int nr, bool cold)
> +static void release_lru_pages(struct page **pages, int nr,
> +                           struct list_head *pages_to_free)
>  {
>       int i;
> -     LIST_HEAD(pages_to_free);
>       struct zone *zone = NULL;
>       struct lruvec *lruvec;
>       unsigned long uninitialized_var(flags);
> @@ -943,11 +943,26 @@ void release_pages(struct page **pages, int nr, bool 
> cold)
>               /* Clear Active bit in case of parallel mark_page_accessed */
>               __ClearPageActive(page);
>  
> -             list_add(&page->lru, &pages_to_free);
> +             list_add(&page->lru, pages_to_free);
>       }
>       if (zone)
>               spin_unlock_irqrestore(&zone->lru_lock, flags);
> +}
> +/*
> + * Batched page_cache_release(). Frees and uncharges all given pages
> + * for which the reference count drops to 0.
> + */
> +void release_pages(struct page **pages, int nr, bool cold)
> +{
> +     LIST_HEAD(pages_to_free);
>  
> +     while (nr) {
> +             int batch = min(nr, PAGEVEC_SIZE);
> +
> +             release_lru_pages(pages, batch, &pages_to_free);
> +             pages += batch;
> +             nr -= batch;
> +     }


We might be able to process a lot more pages in one go if nobody else
needs the lock or the CPU.  Can't we just cycle the lock or reschedule
if necessary?

diff --git a/mm/swap.c b/mm/swap.c
index 6b2dc3897cd5..ee0cf21dd521 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -944,6 +944,15 @@ void release_pages(struct page **pages, int nr, bool cold)
                __ClearPageActive(page);
 
                list_add(&page->lru, &pages_to_free);
+
+               if (should_resched() ||
+                   (zone && spin_needbreak(&zone->lru_lock))) {
+                       if (zone) {
+                               spin_unlock_irqrestore(&zone->lru_lock, flags);
+                               zone = NULL;
+                       }
+                       cond_resched();
+               }
        }
        if (zone)
                spin_unlock_irqrestore(&zone->lru_lock, flags);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3e0ec83d000c..c487ca4682a4 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -262,19 +262,12 @@ void free_page_and_swap_cache(struct page *page)
  */
 void free_pages_and_swap_cache(struct page **pages, int nr)
 {
-       struct page **pagep = pages;
+       int i;
 
        lru_add_drain();
-       while (nr) {
-               int todo = min(nr, PAGEVEC_SIZE);
-               int i;
-
-               for (i = 0; i < todo; i++)
-                       free_swap_cache(pagep[i]);
-               release_pages(pagep, todo, false);
-               pagep += todo;
-               nr -= todo;
-       }
+       for (i = 0; i < nr; i++)
+               free_swap_cache(pages[i]);
+       release_pages(pages, nr, false);
 }
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: regression caused by cgroups optimization in 3.17-rc2

Reply via email to