The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh9-5.14.0-4.vz9.10.1 ------> commit 5697a105caf70bf9468025ca7192afebdbe80fbd Author: Valeriy Vdovin <valeriy.vdo...@virtuozzo.com> Date: Wed Oct 13 14:13:29 2021 +0300
ve/cgroup: Skip non-virtualized roots in cgroup_{,un}mark_ve_roots() During container start there might be a situation when not all cgroup hierarchies get virtualized by container manager (like vzctl). By virtualizing a cgroup hierarchy I mean creation of sub-directory within a particular mounted cgroup. When container starts it looks in css set of it's init process to list all affilated cgroups and perform actions on each. But non-virtualized cgroups will also be present in init's css_set and they should not be touched from inside of any non root ve. Signed-off-by: Valeriy Vdovin <valeriy.vdo...@virtuozzo.com> Reviewed-by: Kirill Tkhai <ktk...@virtuozzo.com> vz9 changes: we want to drop release agent virtualization, but let's still sanitize ve cgroup roots marking, all related patches are merged and reworked here. https://jira.sw.ru/browse/PSBM-134002 (cherry-picked from vz8 commit de090f989b240d1004540d2f4d775e66996b57e2) Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> --- include/linux/cgroup.h | 2 +- kernel/cgroup/cgroup.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++- kernel/ve/ve.c | 6 ++++- 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 99bd069a476d..4dc3f2f007f1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -887,7 +887,7 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); #ifdef CONFIG_VE -extern void cgroup_mark_ve_root(struct ve_struct *ve); +extern int cgroup_mark_ve_root(struct ve_struct *ve); void cgroup_unmark_ve_roots(struct ve_struct *ve); #endif diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a425f06eceea..fbe8483b7035 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1961,7 +1961,58 @@ struct ve_struct *get_curr_ve(void) return ve; } -void cgroup_mark_ve_root(struct ve_struct *ve) +/* + * Let's skip optional cgroups in Virtuozzo containers. Admin on host can + * do "mount -t cgroup cgroup -onone,name=namedcgroup /mnt", and this should + * not break containers. + */ +static inline bool is_virtualized_cgroup(struct cgroup *cgrp) +{ + /* Cgroup v2 */ + if (cgrp->root == &cgrp_dfl_root) + return false; + +#if IS_ENABLED(CONFIG_CGROUP_DEBUG) + if (cgrp->subsys[debug_cgrp_id]) + return false; +#endif + + if (cgrp->root->subsys_mask) + return true; + + if (!strcmp(cgrp->root->name, "systemd")) + return true; + + return false; +} + +/* + * Iterate all cgroups in a given css_set and for all obligatory Virtuozzo + * container cgroups check that container has its own cgroup subdirectory: + * non-host and non-intersecting with other container subdirectories. + */ +static inline bool ve_check_root_cgroups(struct css_set *cset) +{ + struct cgrp_cset_link *link; + + lockdep_assert_held(&css_set_lock); + + list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { + if (!is_virtualized_cgroup(link->cgrp)) + continue; + + /* Host cgroups not allowed */ + if (!link->cgrp->kn->parent) + return true; + + /* Nested CGRP_VE_ROOT not allowed */ + if (cgroup_get_ve_root1(link->cgrp)) + return true; + } + return false; +} + +int cgroup_mark_ve_root(struct ve_struct *ve) { struct cgrp_cset_link *link; struct css_set *cset; @@ -1977,13 +2028,23 @@ void cgroup_mark_ve_root(struct ve_struct *ve) cset = rcu_dereference_protected(ve->ve_ns, lockdep_is_held(&ve->op_sem))->cgroup_ns->root_cset; + if (ve_check_root_cgroups(cset)) { + spin_unlock_irq(&css_set_lock); + return -EINVAL; + } + list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { cgrp = link->cgrp; + + if (!is_virtualized_cgroup(cgrp)) + continue; + set_bit(CGRP_VE_ROOT, &cgrp->flags); } link_ve_root_cpu_cgroup(cset->subsys[cpu_cgrp_id]); spin_unlock_irq(&css_set_lock); + return 0; } void cgroup_unmark_ve_roots(struct ve_struct *ve) @@ -2004,6 +2065,10 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve) list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { cgrp = link->cgrp; + + if (!is_virtualized_cgroup(cgrp)) + continue; + clear_bit(CGRP_VE_ROOT, &cgrp->flags); } diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index f9aaf135f630..5acd2baa2a08 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -505,7 +505,9 @@ static int ve_start_container(struct ve_struct *ve) if (err < 0) goto err_iterate; - cgroup_mark_ve_root(ve); + err = cgroup_mark_ve_root(ve); + if (err) + goto err_mark_ve; ve->is_running = 1; @@ -515,6 +517,8 @@ static int ve_start_container(struct ve_struct *ve) return 0; +err_mark_ve: + ve_hook_iterate_fini(VE_SS_CHAIN, ve); err_iterate: ve_stop_umh(ve); err_umh: _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel