We saw systemd reading /proc/<pid>/cgroup quiet a lot and we also see that cgroup_mutex is contended enough to make it's waiters wait for several seconds.
Assuming that cgroup_root_mutex should be less contended as roots change not so much let's switch /proc/<pid>/cgroup to it. https://jira.sw.ru/browse/PSBM-139206 Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> --- Let's build rpm for it, test it on customer test nodes and see if it helps first. --- kernel/cgroup.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 64f4227b2305..1f5d80130070 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -705,13 +705,14 @@ static inline bool css_has_host_cgroups(struct css_set *css_set) * Return the cgroup for "task" from the given hierarchy. Must be * called with cgroup_mutex held. */ -static struct cgroup *task_cgroup_from_root(struct task_struct *task, - struct cgroupfs_root *root) +static struct cgroup *__task_cgroup_from_root(struct task_struct *task, + struct cgroupfs_root *root, + int nocgmtx) { struct css_set *css; struct cgroup *res = NULL; - BUG_ON(!mutex_is_locked(&cgroup_mutex)); + BUG_ON(!nocgmtx && !mutex_is_locked(&cgroup_mutex)); read_lock(&css_set_lock); /* * No need to lock the task - since we hold cgroup_mutex the @@ -736,6 +737,12 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, return res; } +static struct cgroup *task_cgroup_from_root(struct task_struct *task, + struct cgroupfs_root *root) +{ + return __task_cgroup_from_root(task, root, false); +} + /* * There is one global cgroup mutex. We also require taking * task_lock() when dereferencing a task's cgroup subsys pointers. @@ -5578,7 +5585,8 @@ int proc_cgroup_show(struct seq_file *m, void *v) retval = 0; - mutex_lock(&cgroup_mutex); + /* without cgroup_mutex we must protect roots list */ + mutex_lock(&cgroup_root_mutex); for_each_active_root(root) { struct cgroup_subsys *ss; @@ -5594,8 +5602,13 @@ int proc_cgroup_show(struct seq_file *m, void *v) seq_printf(m, "%sname=%s", count ? "," : "", root->name); seq_putc(m, ':'); - cgrp = task_cgroup_from_root(tsk, root); + + /* without cgroup_mutex we take rcu to make cgrp valid */ + rcu_read_lock(); + cgrp = __task_cgroup_from_root(tsk, root, true); retval = cgroup_path_ve(cgrp, buf, PAGE_SIZE); + rcu_read_unlock(); + if (retval < 0) goto out_unlock; seq_puts(m, buf); @@ -5603,7 +5616,7 @@ int proc_cgroup_show(struct seq_file *m, void *v) } out_unlock: - mutex_unlock(&cgroup_mutex); + mutex_unlock(&cgroup_root_mutex); put_task_struct(tsk); out_free: kfree(buf); -- 2.35.3 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel