migration failing due to memcg limits

Wanpeng Li Thu, 12 Jul 2012 02:11:13 -0700

On Thu, Jul 12, 2012 at 04:54:07PM +0800, Wanpeng Li wrote:
>On Wed, Jul 11, 2012 at 07:02:13PM +0200, Johannes Weiner wrote:
>>Compaction (and page migration in general) can currently be hindered
>>through pages being owned by memory cgroups that are at their limits
>>and unreclaimable.
>>
>>The reason is that the replacement page is being charged against the
>>limit while the page being replaced is also still charged.  But this
>>seems unnecessary, given that only one of the two pages will still be
>>in use after migration finishes.
>>
>>This patch changes the memcg migration sequence so that the
>>replacement page is not charged.  Whatever page is still in use after
>>successful or failed migration gets to keep the charge of the page
>>that was going to be replaced.
>>
>>The replacement page will still show up temporarily in the rss/cache
>>statistics, this can be fixed in a later patch as it's less urgent.
>>
>
>So I want to know after this patch be merged if mem_cgroup_wait_acct_move
>still make sense, if the answer is no, I will send a patch to remove it.


And if this still make sense, I want to change check in
mem_cgroup_do_charge:

if (mem_cgroup_wait_acct_move(mem_over_limit))
        return CHARGE_RETRY;

=>

if (mem_cgroup_wait_acct_move(mem_over_limit) && 
                       mem_cgroup_margin(mem_over_limit) >= nr_pages)
        return CHARGE_RETRY;

Since mem_cgroup_relcaim can reclaim some pages, but in
mem_cgroup_reclaim function there are some exit condition:

total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
if(total && (flag & MEM_CGROUP_RECLAIM_SHRINK))
        break;

and 

if (mem_cgroup_margin(memcg))
        break;

So maybe mem_cgroup_reclaim not reclaim enough pages >= nr_pages, this
time we should go to mem_cgroup_handle_oom instead of return
CHARGE_RETRY.

Hopefully, you can verify if my idea make sense.


>>Reported-by: David Rientjes <rient...@google.com>
>>Signed-off-by: Johannes Weiner <han...@cmpxchg.org>
>>Acked-by: KAMEZAWA Hiroyuki <kamezawa.hir...@jp.fujitsu.com>
>>Acked-by: Michal Hocko <mho...@suse.cz>
>>---
>> include/linux/memcontrol.h |   11 +++----
>> mm/memcontrol.c            |   67 +++++++++++++++++++++++--------------------
>> mm/migrate.c               |   27 ++++--------------
>> 3 files changed, 47 insertions(+), 58 deletions(-)
>>
>>diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>>index 5a3ee64..8d9489f 100644
>>--- a/include/linux/memcontrol.h
>>+++ b/include/linux/memcontrol.h
>>@@ -98,9 +98,9 @@ int mm_match_cgroup(const struct mm_struct *mm, const 
>>struct mem_cgroup *cgroup)
>> 
>> extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
>> 
>>-extern int
>>-mem_cgroup_prepare_migration(struct page *page,
>>-     struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask);
>>+extern void
>>+mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
>>+                          struct mem_cgroup **memcgp);
>> extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
>>      struct page *oldpage, struct page *newpage, bool migration_ok);
>> 
>>@@ -276,11 +276,10 @@ static inline struct cgroup_subsys_state
>>      return NULL;
>> }
>> 
>>-static inline int
>>+static inline void
>> mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
>>-     struct mem_cgroup **memcgp, gfp_t gfp_mask)
>>+                          struct mem_cgroup **memcgp)
>> {
>>-     return 0;
>> }
>> 
>> static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
>>diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>index e8ddc00..12ee2de 100644
>>--- a/mm/memcontrol.c
>>+++ b/mm/memcontrol.c
>>@@ -2977,7 +2977,8 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup 
>>*memcg,
>>  * uncharge if !page_mapped(page)
>>  */
>> static struct mem_cgroup *
>>-__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
>>+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
>>+                          bool end_migration)
>> {
>>      struct mem_cgroup *memcg = NULL;
>>      unsigned int nr_pages = 1;
>>@@ -3021,7 +3022,16 @@ __mem_cgroup_uncharge_common(struct page *page, enum 
>>charge_type ctype)
>>              /* fallthrough */
>>      case MEM_CGROUP_CHARGE_TYPE_DROP:
>>              /* See mem_cgroup_prepare_migration() */
>>-             if (page_mapped(page) || PageCgroupMigration(pc))
>>+             if (page_mapped(page))
>>+                     goto unlock_out;
>>+             /*
>>+              * Pages under migration may not be uncharged.  But
>>+              * end_migration() /must/ be the one uncharging the
>>+              * unused post-migration page and so it has to call
>>+              * here with the migration bit still set.  See the
>>+              * res_counter handling below.
>>+              */
>>+             if (!end_migration && PageCgroupMigration(pc))
>>                      goto unlock_out;
>>              break;
>>      case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
>>@@ -3055,7 +3065,12 @@ __mem_cgroup_uncharge_common(struct page *page, enum 
>>charge_type ctype)
>>              mem_cgroup_swap_statistics(memcg, true);
>>              mem_cgroup_get(memcg);
>>      }
>>-     if (!mem_cgroup_is_root(memcg))
>>+     /*
>>+      * Migration does not charge the res_counter for the
>>+      * replacement page, so leave it alone when phasing out the
>>+      * page that is unused after the migration.
>>+      */
>>+     if (!end_migration && !mem_cgroup_is_root(memcg))
>>              mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
>> 
>>      return memcg;
>>@@ -3071,14 +3086,14 @@ void mem_cgroup_uncharge_page(struct page *page)
>>      if (page_mapped(page))
>>              return;
>>      VM_BUG_ON(page->mapping && !PageAnon(page));
>>-     __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON);
>>+     __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
>> }
>> 
>> void mem_cgroup_uncharge_cache_page(struct page *page)
>> {
>>      VM_BUG_ON(page_mapped(page));
>>      VM_BUG_ON(page->mapping);
>>-     __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
>>+     __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
>> }
>> 
>> /*
>>@@ -3142,7 +3157,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, 
>>swp_entry_t ent, bool swapout)
>>      if (!swapout) /* this was a swap cache but the swap is unused ! */
>>              ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
>> 
>>-     memcg = __mem_cgroup_uncharge_common(page, ctype);
>>+     memcg = __mem_cgroup_uncharge_common(page, ctype, false);
>> 
>>      /*
>>       * record memcg information,  if swapout && memcg != NULL,
>>@@ -3232,19 +3247,18 @@ static inline int 
>>mem_cgroup_move_swap_account(swp_entry_t entry,
>>  * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
>>  * page belongs to.
>>  */
>>-int mem_cgroup_prepare_migration(struct page *page,
>>-     struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
>>+void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
>>+                               struct mem_cgroup **memcgp)
>> {
>>      struct mem_cgroup *memcg = NULL;
>>      struct page_cgroup *pc;
>>      enum charge_type ctype;
>>-     int ret = 0;
>> 
>>      *memcgp = NULL;
>> 
>>      VM_BUG_ON(PageTransHuge(page));
>>      if (mem_cgroup_disabled())
>>-             return 0;
>>+             return;
>> 
>>      pc = lookup_page_cgroup(page);
>>      lock_page_cgroup(pc);
>>@@ -3289,24 +3303,9 @@ int mem_cgroup_prepare_migration(struct page *page,
>>       * we return here.
>>       */
>>      if (!memcg)
>>-             return 0;
>>+             return;
>> 
>>      *memcgp = memcg;
>>-     ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
>>-     css_put(&memcg->css);/* drop extra refcnt */
>>-     if (ret) {
>>-             if (PageAnon(page)) {
>>-                     lock_page_cgroup(pc);
>>-                     ClearPageCgroupMigration(pc);
>>-                     unlock_page_cgroup(pc);
>>-                     /*
>>-                      * The old page may be fully unmapped while we kept it.
>>-                      */
>>-                     mem_cgroup_uncharge_page(page);
>>-             }
>>-             /* we'll need to revisit this error code (we have -EINTR) */
>>-             return -ENOMEM;
>>-     }
>>      /*
>>       * We charge new page before it's used/mapped. So, even if unlock_page()
>>       * is called before end_migration, we can catch all events on this new
>>@@ -3319,8 +3318,12 @@ int mem_cgroup_prepare_migration(struct page *page,
>>              ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
>>      else
>>              ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
>>+     /*
>>+      * The page is committed to the memcg, but it's not actually
>>+      * charged to the res_counter since we plan on replacing the
>>+      * old one and only one page is going to be left afterwards.
>>+      */
>>      __mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
>>-     return ret;
>> }
>> 
>> /* remove redundant charge if migration failed*/
>>@@ -3342,6 +3345,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
>>              used = newpage;
>>              unused = oldpage;
>>      }
>>+     anon = PageAnon(used);
>>+     __mem_cgroup_uncharge_common(unused,
>>+             anon ? MEM_CGROUP_CHARGE_TYPE_ANON
>>+                  : MEM_CGROUP_CHARGE_TYPE_CACHE,
>>+             true);
>>+     css_put(&memcg->css);
>>      /*
>>       * We disallowed uncharge of pages under migration because mapcount
>>       * of the page goes down to zero, temporarly.
>>@@ -3351,10 +3360,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
>>      lock_page_cgroup(pc);
>>      ClearPageCgroupMigration(pc);
>>      unlock_page_cgroup(pc);
>>-     anon = PageAnon(used);
>>-     __mem_cgroup_uncharge_common(unused,
>>-             anon ? MEM_CGROUP_CHARGE_TYPE_ANON
>>-                  : MEM_CGROUP_CHARGE_TYPE_CACHE);
>> 
>>      /*
>>       * If a page is a file cache, radix-tree replacement is very atomic
>>diff --git a/mm/migrate.c b/mm/migrate.c
>>index 8137aea..aa06bf4 100644
>>--- a/mm/migrate.c
>>+++ b/mm/migrate.c
>>@@ -687,7 +687,6 @@ static int __unmap_and_move(struct page *page, struct 
>>page *newpage,
>> {
>>      int rc = -EAGAIN;
>>      int remap_swapcache = 1;
>>-     int charge = 0;
>>      struct mem_cgroup *mem;
>>      struct anon_vma *anon_vma = NULL;
>> 
>>@@ -729,12 +728,7 @@ static int __unmap_and_move(struct page *page, struct 
>>page *newpage,
>>      }
>> 
>>      /* charge against new page */
>>-     charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL);
>>-     if (charge == -ENOMEM) {
>>-             rc = -ENOMEM;
>>-             goto unlock;
>>-     }
>>-     BUG_ON(charge);
>>+     mem_cgroup_prepare_migration(page, newpage, &mem);
>> 
>>      if (PageWriteback(page)) {
>>              /*
>>@@ -824,8 +818,7 @@ static int __unmap_and_move(struct page *page, struct 
>>page *newpage,
>>              put_anon_vma(anon_vma);
>> 
>> uncharge:
>>-     if (!charge)
>>-             mem_cgroup_end_migration(mem, page, newpage, rc == 0);
>>+     mem_cgroup_end_migration(mem, page, newpage, rc == 0);
>> unlock:
>>      unlock_page(page);
>> out:
>>@@ -1519,10 +1512,9 @@ migrate_misplaced_page(struct page *page, struct 
>>mm_struct *mm, int node)
>> {
>>      struct page *oldpage = page, *newpage;
>>      struct address_space *mapping = page_mapping(page);
>>-     struct mem_cgroup *mcg;
>>+     struct mem_cgroup *memcg;
>>      unsigned int gfp;
>>      int rc = 0;
>>-     int charge = -ENOMEM;
>> 
>>      VM_BUG_ON(!PageLocked(page));
>>      VM_BUG_ON(page_mapcount(page));
>>@@ -1556,12 +1548,7 @@ migrate_misplaced_page(struct page *page, struct 
>>mm_struct *mm, int node)
>>      if (!trylock_page(newpage))
>>              BUG();          /* new page should be unlocked!!! */
>> 
>>-     // XXX hnaz, is this right?
>>-     charge = mem_cgroup_prepare_migration(page, newpage, &mcg, gfp);
>>-     if (charge == -ENOMEM) {
>>-             rc = charge;
>>-             goto out;
>>-     }
>>+     mem_cgroup_prepare_migration(page, newpage, &memcg);
>> 
>>      newpage->index = page->index;
>>      newpage->mapping = page->mapping;
>>@@ -1581,11 +1568,9 @@ migrate_misplaced_page(struct page *page, struct 
>>mm_struct *mm, int node)
>>              page = newpage;
>>      }
>> 
>>+     mem_cgroup_end_migration(memcg, oldpage, newpage, !rc);
>> out:
>>-     if (!charge)
>>-             mem_cgroup_end_migration(mcg, oldpage, newpage, !rc);
>>-
>>-       if (oldpage != page)
>>+     if (oldpage != page)
>>                put_page(oldpage);
>> 
>>      if (rc) {
>>-- 
>>1.7.7.6
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 01/10] mm: memcg: fix compaction/migration failing due to memcg limits

Reply via email to