After rebase to VZ8 the code which sets memcg->overdraft was moved to
mem_cgroup_scan_tasks() which is completely wrong (for instance in VZ7
we had this code in oom_unlock(), so everywhere in oom we always had
->overdraft information properly set). Now we don't have proper refresh
of ->overdraft information in two cases: in global oom and in berserker.

Let's fix this by spliting the refresh code to separate function
refresh_mem_cgroup_overdraft() and call it where it is really needed
(where later in stack oom_badness uses the refreshed ->overdraft).

Fixes: c31dabeaf42d ("memcg: add oom_guarantee")
Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>

Feature: mm/oom: OOM guarantee feature
---
 include/linux/memcontrol.h |  6 ++++++
 mm/memcontrol.c            | 30 +++++++++++++++++++-----------
 mm/oom_kill.c              |  4 ++++
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4719eb84894f..9c2b8774639e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -22,6 +22,7 @@
 #include <linux/writeback.h>
 #include <linux/page-flags.h>
 #include <linux/virtinfo.h>
+#include <linux/oom.h>
 
 struct mem_cgroup;
 struct obj_cgroup;
@@ -1004,6 +1005,7 @@ void mem_cgroup_print_oom_context(struct mem_cgroup 
*memcg,
 
 void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg);
+void refresh_mem_cgroup_overdraft(struct oom_control *oc);
 
 static inline void mem_cgroup_enter_user_fault(void)
 {
@@ -1529,6 +1531,10 @@ static inline unsigned long mem_cgroup_overdraft(struct 
mem_cgroup *memcg)
        return 0;
 }
 
+static inline void refresh_mem_cgroup_overdraft(struct oom_control *oc)
+{
+}
+
 static inline void
 mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a0a7c3b4ed35..8ca27af8d902 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1308,17 +1308,6 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
        for_each_mem_cgroup_tree(iter, memcg) {
                struct css_task_iter it;
                struct task_struct *task;
-               struct mem_cgroup *parent;
-
-               /*
-                * Update overdraft of each cgroup under us. This
-                * information will be used in oom_badness.
-                */
-               iter->overdraft = mem_cgroup_overdraft(iter);
-               parent = parent_mem_cgroup(iter);
-               if (parent && iter != memcg)
-                       iter->overdraft = max(iter->overdraft,
-                                       parent->overdraft);
 
                css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
                while (!ret && (task = css_task_iter_next(&it)))
@@ -1518,6 +1507,25 @@ unsigned long mem_cgroup_overdraft(struct mem_cgroup 
*memcg)
        return usage > guarantee ? (usage - guarantee) : 0;
 }
 
+void refresh_mem_cgroup_overdraft(struct oom_control *oc)
+{
+       struct mem_cgroup *iter;
+
+       for_each_mem_cgroup_tree(iter, oc->memcg) {
+               struct mem_cgroup *parent;
+
+               /*
+                * Update overdraft of each cgroup under us. This
+                * information will be used in oom_badness.
+                */
+               iter->overdraft = mem_cgroup_overdraft(iter);
+               parent = parent_mem_cgroup(iter);
+               if (parent && iter != oc->memcg)
+                       iter->overdraft = max(iter->overdraft,
+                                             parent->overdraft);
+       }
+}
+
 bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int 
vfs_cache_min_ratio)
 {
        unsigned long anon, file, dcache;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1d44024d6c6f..d27e04295e7f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -378,6 +378,8 @@ static void select_bad_process(struct oom_control *oc)
 {
        oc->chosen_points = LONG_MIN;
 
+       refresh_mem_cgroup_overdraft(oc);
+
        if (is_memcg_oom(oc))
                mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
        else {
@@ -1073,6 +1075,8 @@ static void oom_berserker(struct oom_control *oc)
        if (rage < 0)
                return;
 
+       refresh_mem_cgroup_overdraft(oc);
+
        /*
         * So, we are in rage. Kill (1 << rage) youngest tasks that are
         * as bad as the victim.
-- 
2.43.0

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to