Recently added alloc_pages_before_oomkill gained new caller with this
patchset and I think it just grown to deserve a simpler code flow.
What do you think about this on top of the series?

---
>From f1f6035ea0df65e7619860b013f2fabdda65233e Mon Sep 17 00:00:00 2001
From: Michal Hocko <mho...@suse.com>
Date: Fri, 1 Dec 2017 10:05:25 +0100
Subject: [PATCH] mm, oom: simplify alloc_pages_before_oomkill handling

alloc_pages_before_oomkill is the last attempt to allocate memory before
we go and try to kill a process or a memcg. It's success path always has
to properly clean up the oc state (namely victim reference count). Let's
pull this into alloc_pages_before_oomkill directly rather than risk
somebody will forget to do it in future. Also document that we _know_
alloc_pages_before_oomkill violates proper layering and that is a
pragmatic decision.

Signed-off-by: Michal Hocko <mho...@suse.com>
---
 include/linux/oom.h |  2 +-
 mm/oom_kill.c       | 21 +++------------------
 mm/page_alloc.c     | 24 ++++++++++++++++++++++--
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 10f495c8454d..7052e0a20e13 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -121,7 +121,7 @@ extern void oom_killer_enable(void);
 
 extern struct task_struct *find_lock_task_mm(struct task_struct *p);
 
-extern struct page *alloc_pages_before_oomkill(const struct oom_control *oc);
+extern bool alloc_pages_before_oomkill(struct oom_control *oc);
 
 extern int oom_evaluate_task(struct task_struct *task, void *arg);
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 4678468bae17..5c2cd299757b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1102,8 +1102,7 @@ bool out_of_memory(struct oom_control *oc)
        if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
            current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
            current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
-               oc->page = alloc_pages_before_oomkill(oc);
-               if (oc->page)
+               if (alloc_pages_before_oomkill(oc))
                        return true;
                get_task_struct(current);
                oc->chosen_task = current;
@@ -1112,13 +1111,8 @@ bool out_of_memory(struct oom_control *oc)
        }
 
        if (mem_cgroup_select_oom_victim(oc)) {
-               oc->page = alloc_pages_before_oomkill(oc);
-               if (oc->page) {
-                       if (oc->chosen_memcg &&
-                           oc->chosen_memcg != INFLIGHT_VICTIM)
-                               mem_cgroup_put(oc->chosen_memcg);
+               if (alloc_pages_before_oomkill(oc))
                        return true;
-               }
 
                if (oom_kill_memcg_victim(oc)) {
                        delay = true;
@@ -1127,17 +1121,8 @@ bool out_of_memory(struct oom_control *oc)
        }
 
        select_bad_process(oc);
-       /*
-        * Try really last second allocation attempt after we selected an OOM
-        * victim, for somebody might have managed to free memory while we were
-        * selecting an OOM victim which can take quite some time.
-        */
-       oc->page = alloc_pages_before_oomkill(oc);
-       if (oc->page) {
-               if (oc->chosen_task && oc->chosen_task != INFLIGHT_VICTIM)
-                       put_task_struct(oc->chosen_task);
+       if (alloc_pages_before_oomkill(oc))
                return true;
-       }
        /* Found nothing?!?! Either we hang forever, or we panic. */
        if (!oc->chosen_task && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {
                dump_header(oc, NULL);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0d518e9b2ee8..9e65fa06ee10 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4146,7 +4146,17 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int 
order,
        return page;
 }
 
-struct page *alloc_pages_before_oomkill(const struct oom_control *oc)
+/*
+ * Try really last second allocation attempt after we selected an OOM victim,
+ * for somebody might have managed to free memory while we were selecting an
+ * OOM victim which can take quite some time.
+ *
+ * This function is a blatant layer violation example because we cross the page
+ * allocator and reclaim (OOM killer) layers. Doing this right from the design
+ * POV would be much more complex though so let's close our eyes and pretend
+ * everything is allright.
+ */
+bool alloc_pages_before_oomkill(struct oom_control *oc)
 {
        /*
         * Go through the zonelist yet one more time, keep very high watermark
@@ -4167,7 +4177,17 @@ struct page *alloc_pages_before_oomkill(const struct 
oom_control *oc)
        reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
        if (reserve_flags)
                alloc_flags = reserve_flags;
-       return get_page_from_freelist(gfp_mask, oc->order, alloc_flags, oc->ac);
+       oc->page = get_page_from_freelist(gfp_mask, oc->order, alloc_flags, 
oc->ac);
+       if (!oc->page)
+               return false;
+
+       /*
+        * we are skipping the remaining oom steps so clean up the pending oc
+        * state
+        */
+       if (oc->chosen_task && oc->chosen_task != INFLIGHT_VICTIM)
+               put_task_struct(oc->chosen_task);
+       return true;
 }
 
 static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
-- 
2.15.0

-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to