After rebase to VZ8 the code which sets memcg->overdraft was moved to mem_cgroup_scan_tasks() which is completely wrong (for instance in VZ7 we had this code in oom_unlock(), so everywhere in oom we always had ->overdraft information properly set). Now we don't have proper refresh of ->overdraft information in two cases: in global oom and in berserker.
Let's fix this by spliting the refresh code to separate function refresh_mem_cgroup_overdraft() and call it where it is really needed (where later in stack oom_badness uses the refreshed ->overdraft). https://virtuozzo.atlassian.net/browse/PSBM-154224 Fixes: c31dabeaf42d ("memcg: add oom_guarantee") Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> Feature: mm/oom: OOM guarantee feature --- include/linux/memcontrol.h | 6 ++++++ mm/memcontrol.c | 30 +++++++++++++++++++----------- mm/oom_kill.c | 4 ++++ 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 4719eb84894f..9c2b8774639e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -22,6 +22,7 @@ #include <linux/writeback.h> #include <linux/page-flags.h> #include <linux/virtinfo.h> +#include <linux/oom.h> struct mem_cgroup; struct obj_cgroup; @@ -1004,6 +1005,7 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg); unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg); +void refresh_mem_cgroup_overdraft(struct oom_control *oc); static inline void mem_cgroup_enter_user_fault(void) { @@ -1529,6 +1531,10 @@ static inline unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg) return 0; } +static inline void refresh_mem_cgroup_overdraft(struct oom_control *oc) +{ +} + static inline void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a0a7c3b4ed35..8ca27af8d902 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1308,17 +1308,6 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, for_each_mem_cgroup_tree(iter, memcg) { struct css_task_iter it; struct task_struct *task; - struct mem_cgroup *parent; - - /* - * Update overdraft of each cgroup under us. This - * information will be used in oom_badness. - */ - iter->overdraft = mem_cgroup_overdraft(iter); - parent = parent_mem_cgroup(iter); - if (parent && iter != memcg) - iter->overdraft = max(iter->overdraft, - parent->overdraft); css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it); while (!ret && (task = css_task_iter_next(&it))) @@ -1518,6 +1507,25 @@ unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg) return usage > guarantee ? (usage - guarantee) : 0; } +void refresh_mem_cgroup_overdraft(struct oom_control *oc) +{ + struct mem_cgroup *iter; + + for_each_mem_cgroup_tree(iter, oc->memcg) { + struct mem_cgroup *parent; + + /* + * Update overdraft of each cgroup under us. This + * information will be used in oom_badness. + */ + iter->overdraft = mem_cgroup_overdraft(iter); + parent = parent_mem_cgroup(iter); + if (parent && iter != oc->memcg) + iter->overdraft = max(iter->overdraft, + parent->overdraft); + } +} + bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio) { unsigned long anon, file, dcache; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 1d44024d6c6f..d27e04295e7f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -378,6 +378,8 @@ static void select_bad_process(struct oom_control *oc) { oc->chosen_points = LONG_MIN; + refresh_mem_cgroup_overdraft(oc); + if (is_memcg_oom(oc)) mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); else { @@ -1073,6 +1075,8 @@ static void oom_berserker(struct oom_control *oc) if (rage < 0) return; + refresh_mem_cgroup_overdraft(oc); + /* * So, we are in rage. Kill (1 << rage) youngest tasks that are * as bad as the victim. -- 2.43.0 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel