On Mon 16-06-14 15:54:23, Johannes Weiner wrote:
> Transparent huge page charges prefer falling back to regular pages
> rather than spending a lot of time in direct reclaim.
> 
> Desired reclaim behavior is usually declared in the gfp mask, but THP
> charges use GFP_KERNEL and then rely on the fact that OOM is disabled
> for THP charges, and that OOM-disabled charges currently skip reclaim.
> Needless to say, this is anything but obvious and quite error prone.
> 
> Convert THP charges to use GFP_TRANSHUGE instead, which implies
> __GFP_NORETRY, to indicate the low-latency requirement.

Maybe we can get one step further and even get rid of oom parameter.
It is only THP (handled by this patch) and mem_cgroup_do_precharge that
want OOM disabled explicitly.

GFP_KERNEL & (~__GFP_NORETRY) is ugly and something like GFP_NO_OOM
would be better but this is just a quick scratch.

What do you think?
---
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 52550bbff1ef..5d247822b03a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2555,15 +2555,13 @@ static int memcg_cpu_hotplug_callback(struct 
notifier_block *nb,
  * mem_cgroup_try_charge - try charging a memcg
  * @memcg: memcg to charge
  * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
  *
  * Returns 0 if @memcg was charged successfully, -EINTR if the charge
  * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
  */
 static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
                                 gfp_t gfp_mask,
-                                unsigned int nr_pages,
-                                bool oom)
+                                unsigned int nr_pages)
 {
        unsigned int batch = max(CHARGE_BATCH, nr_pages);
        int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -2647,7 +2645,7 @@ retry:
        if (fatal_signal_pending(current))
                goto bypass;
 
-       if (!oom)
+       if (!oom_gfp_allowed(gfp_mask))
                goto nomem;
 
        mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(batch));
@@ -2675,15 +2673,14 @@ done:
  */
 static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
                                 gfp_t gfp_mask,
-                                unsigned int nr_pages,
-                                bool oom)
+                                unsigned int nr_pages)
 
 {
        struct mem_cgroup *memcg;
        int ret;
 
        memcg = get_mem_cgroup_from_mm(mm);
-       ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
+       ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
        css_put(&memcg->css);
        if (ret == -EINTR)
                memcg = root_mem_cgroup;
@@ -2900,8 +2897,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, 
gfp_t gfp, u64 size)
        if (ret)
                return ret;
 
-       ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
-                                   oom_gfp_allowed(gfp));
+       ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT);
        if (ret == -EINTR)  {
                /*
                 * mem_cgroup_try_charge() chosed to bypass to root due to
@@ -3650,7 +3646,6 @@ int mem_cgroup_charge_anon(struct page *page,
 {
        unsigned int nr_pages = 1;
        struct mem_cgroup *memcg;
-       bool oom = true;
 
        if (mem_cgroup_disabled())
                return 0;
@@ -3666,10 +3661,10 @@ int mem_cgroup_charge_anon(struct page *page,
                 * Never OOM-kill a process for a huge page.  The
                 * fault handler will fall back to regular pages.
                 */
-               oom = false;
+               VM_BUG_ON(oom_gfp_allowed(gfp_mask));
        }
 
-       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
+       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
        if (!memcg)
                return -ENOMEM;
        __mem_cgroup_commit_charge(memcg, page, nr_pages,
@@ -3706,7 +3701,7 @@ static int __mem_cgroup_try_charge_swapin(struct 
mm_struct *mm,
                memcg = try_get_mem_cgroup_from_page(page);
        if (!memcg)
                memcg = get_mem_cgroup_from_mm(mm);
-       ret = mem_cgroup_try_charge(memcg, mask, 1, true);
+       ret = mem_cgroup_try_charge(memcg, mask, 1);
        css_put(&memcg->css);
        if (ret == -EINTR)
                memcg = root_mem_cgroup;
@@ -3733,7 +3728,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, 
struct page *page,
        if (!PageSwapCache(page)) {
                struct mem_cgroup *memcg;
 
-               memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+               memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
                if (!memcg)
                        return -ENOMEM;
                *memcgp = memcg;
@@ -3802,7 +3797,7 @@ int mem_cgroup_charge_file(struct page *page, struct 
mm_struct *mm,
                return 0;
        }
 
-       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+       memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
        if (!memcg)
                return -ENOMEM;
        __mem_cgroup_commit_charge(memcg, page, 1, type, false);
@@ -6414,7 +6409,8 @@ one_by_one:
                        batch_count = PRECHARGE_COUNT_AT_ONCE;
                        cond_resched();
                }
-               ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
+               /* Do not trigger OOM killer from this path */
+               ret = mem_cgroup_try_charge(memcg, GFP_KERNEL & 
(~__GFP_NORETRY), 1);
                if (ret)
                        /* mem_cgroup_clear_mc() will do uncharge later */
                        return ret;
-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to