The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after ark-5.14 ------> commit affacbc14c10da357be5130278d05caffea4f8c7 Author: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com> Date: Tue Sep 28 14:42:33 2021 +0300
ve/kthreadd: emulate kthreadd by kworker thread, attached to CT This is the first version of kthreadd in a container (there will some improvements on top). With this patch kthreadd won't be visible in a container, but its children will be. Signed-off-by: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com> (cherry-picked from vz8 commit 1cc3a639c3fb ("ve/kthreadd: emulate kthreadd by kworker thread, attached to CT")) Signed-off-by: Nikita Yushchenko <nikita.yushche...@virtuozzo.com> --- include/linux/ve.h | 2 ++ kernel/ve/ve.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/include/linux/ve.h b/include/linux/ve.h index 18d52c6c63cf..6f75bd084e2e 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -59,6 +59,8 @@ struct ve_struct { u64 _uevent_seqnum; int _randomize_va_space; + + struct kthread_worker *kthreadd_worker; }; extern int nr_ve; diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index 75299258d677..da86dae1a5f4 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -20,6 +20,9 @@ #include <linux/mutex.h> #include <linux/kmapset.h> #include <linux/mm.h> +#include <linux/kthread.h> +#include <linux/nsproxy.h> +#include <linux/fs_struct.h> #include <uapi/linux/vzcalluser.h> #include <net/rtnetlink.h> @@ -262,6 +265,87 @@ static void ve_drop_context(struct ve_struct *ve) ve->init_cred = NULL; } +static void ve_stop_kthreadd(struct ve_struct *ve) +{ + kthread_destroy_worker(ve->kthreadd_worker); + ve->kthreadd_worker = NULL; +} + +struct kthread_attach_work { + struct kthread_work work; + struct completion done; + struct task_struct *target; + int result; +}; + +static void kthread_attach_fn(struct kthread_work *w) +{ + struct kthread_attach_work *work = container_of(w, + struct kthread_attach_work, work); + struct task_struct *target = work->target; + struct cred *cred; + int err; + + get_nsproxy(target->nsproxy); + switch_task_namespaces(current, target->nsproxy); + + err = unshare_fs_struct(); + if (err) + goto out; + set_fs_root(current->fs, &target->fs->root); + set_fs_pwd(current->fs, &target->fs->root); + + err = -ENOMEM; + cred = prepare_kernel_cred(target); + if (!cred) + goto out; + err = commit_creds(cred); + if (err) + goto out; + + err = cgroup_attach_task_all(target, current); + if (err) + goto out; +out: + work->result = err; + complete(&work->done); +} + +static struct kthread_worker *ve_create_kworker(struct ve_struct *ve) +{ + struct kthread_worker *w; + struct kthread_attach_work attach = { + KTHREAD_WORK_INIT(attach.work, kthread_attach_fn), + COMPLETION_INITIALIZER_ONSTACK(attach.done), + .target = current, + }; + + w = kthread_create_worker(0, "worker/%s", ve_name(ve)); + if (IS_ERR(w)) + return w; + + kthread_queue_work(w, &attach.work); + wait_for_completion(&attach.done); + if (attach.result) { + kthread_destroy_worker(w); + return ERR_PTR(attach.result); + } + + return w; +} + +static int ve_start_kthreadd(struct ve_struct *ve) +{ + struct kthread_worker *w; + + w = ve_create_kworker(ve); + if (IS_ERR(w)) + return PTR_ERR(w); + + ve->kthreadd_worker = w; + return 0; +} + /* under ve->op_sem write-lock */ static int ve_start_container(struct ve_struct *ve) { @@ -307,6 +391,10 @@ static int ve_start_container(struct ve_struct *ve) if (err) goto err_list; + err = ve_start_kthreadd(ve); + if (err) + goto err_kthreadd; + err = ve_hook_iterate_init(VE_SS_CHAIN, ve); if (err < 0) goto err_iterate; @@ -322,6 +410,8 @@ static int ve_start_container(struct ve_struct *ve) return 0; err_iterate: + ve_stop_kthreadd(ve); +err_kthreadd: ve_list_del(ve); err_list: ve_drop_context(ve); @@ -352,6 +442,10 @@ void ve_stop_ns(struct pid_namespace *pid_ns) * anymore, setup it again if needed. */ ve->is_pseudosuper = 0; + /* + * Stop kthreads, or zap_pid_ns_processes() will wait them forever. + */ + ve_stop_kthreadd(ve); unlock: up_write(&ve->op_sem); } _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel