From: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com> Due to changes in RH8.4 we need to rewrork it, actually the logic becomes much more simple, we mount/umount single tmpts per ve on cgroup creation/removal, all actual devtmpfs mount calls only increase a refcount on corresponding ve's mount like with hosts devtmps.
Original commit message: Previousely, we implemented full-featured devtmpfs virtualization for VE: when a device is created in a VE "namespace", we send a signal to kdevtmpfs to create the devnode on devtmpfs mount corresponding to the VE. This seems to be over-complicated: all this work can be done from userspace, because we only have a hardcoded list of devices created exclusively for VE on container start. Those are tty-related stuff and mem devices, and we only need the latter to create devtmpfs nodes. Moreover, it is buggy: ve_stop_ns, which destroys VE devtmpfs mount can be called before a VE tty device is unregistered, resulting in a KP: https://jira.sw.ru/browse/PSBM-35077 This patch therefore simplified it. It makes the kernel only provide a single empty tmpfs mount per VE, which appears on an attempt to mount devtmpfs from inside a VE. The content of the fs is to be filled by the userspace on container start, which will be done in the scope of https://jira.sw.ru/browse/PSBM-35146 All this patch does is provides each VE with its own empty single tmpfs mount, which appears on an attempt to mount "devtmpfs". It's up to the userspace to populate this fs on container start, all kernel requests to create a device node inside a VE are ignored. Signed-off-by: Vladimir Davydov <vdavy...@parallels.com> Signed-off-by: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com> https://jira.sw.ru/browse/PSBM-131158 Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> v2 by khorenko@: s/FS_USERNS_MOUNT/FS_VE_MOUNT/ Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com> --- drivers/base/devtmpfs.c | 24 ++++++++++++++++++++++++ include/linux/device.h | 2 ++ include/linux/ve.h | 2 ++ kernel/ve/ve.c | 6 ++++++ 4 files changed, 34 insertions(+) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 8be352ab4ddb..b3a3cbe65daa 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -27,6 +27,7 @@ #include <linux/kthread.h> #include <linux/init_syscalls.h> #include <uapi/linux/mount.h> +#include <linux/ve.h> #include "base.h" static struct task_struct *thread; @@ -59,6 +60,12 @@ static struct dentry *public_dev_mount(struct file_system_type *fs_type, int fla const char *dev_name, void *data) { struct super_block *s = mnt->mnt_sb; +#ifdef CONFIG_VE + struct ve_struct *ve = get_exec_env(); + + if (!ve_is_super(ve)) + s = ve->devtmpfs_mnt->mnt_sb; +#endif atomic_inc(&s->s_active); down_write(&s->s_umount); return dget(s->s_root); @@ -79,6 +86,7 @@ static struct file_system_type internal_fs_type = { static struct file_system_type dev_fs_type = { .name = "devtmpfs", .mount = public_dev_mount, + .fs_flags = FS_VIRTUALIZED | FS_VE_MOUNT, }; #ifdef CONFIG_BLOCK @@ -438,6 +446,22 @@ static int __ref devtmpfsd(void *p) return 0; } +int ve_mount_devtmpfs(struct ve_struct *ve) +{ + char opts[] = "mode=0755"; + struct vfsmount *mnt; + + mnt = vfs_kern_mount(&internal_fs_type, 0, "devtmpfs", opts); + if (IS_ERR(mnt)) { + printk(KERN_ERR "CT#%s: devtmpfs: unable to create devtmpfs %ld\n", + ve_name(ve), PTR_ERR(mnt)); + return PTR_ERR(mnt); + } + ve->devtmpfs_mnt = mnt; + + return 0; +} + /* * Create devtmpfs instance, driver-core devices will add their device * nodes here. diff --git a/include/linux/device.h b/include/linux/device.h index 65d84b67b024..8b1511b1af44 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -950,8 +950,10 @@ bool kill_device(struct device *dev); #ifdef CONFIG_DEVTMPFS int devtmpfs_mount(void); +extern int ve_mount_devtmpfs(struct ve_struct *ve); #else static inline int devtmpfs_mount(void) { return 0; } +static inline int ve_mount_devtmpfs(struct ve_struct *ve) { return 0; } #endif /* drivers/base/power/shutdown.c */ diff --git a/include/linux/ve.h b/include/linux/ve.h index ffe068ec5fe7..e8514c5a0afb 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -26,6 +26,7 @@ struct nsproxy; struct veip_struct; struct user_namespace; struct cn_private; +struct vfsmount; struct ve_struct { struct cgroup_subsys_state css; @@ -103,6 +104,7 @@ struct ve_struct { unsigned long aio_nr; unsigned long aio_max_nr; #endif + struct vfsmount *devtmpfs_mnt; }; struct ve_devmnt { diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index 38ede55d65b7..af46a9b597df 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -32,6 +32,7 @@ #include <linux/ctype.h> #include <linux/tty.h> #include <linux/genhd.h> +#include <linux/device.h> #include <uapi/linux/vzcalluser.h> #include <net/rtnetlink.h> @@ -700,6 +701,10 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_ if (err) goto err_vdso; + err = ve_mount_devtmpfs(ve); + if (err) + goto err_vdso; /* The same as above, correct */ + do_init: init_rwsem(&ve->op_sem); INIT_LIST_HEAD(&ve->ve_list); @@ -792,6 +797,7 @@ static void ve_destroy(struct cgroup_subsys_state *css) kmapset_unlink(&ve->sysfs_perms_key, &sysfs_ve_perms_set); ve_log_destroy(ve); ve_free_vdso(ve); + mntput(ve->devtmpfs_mnt); #if IS_ENABLED(CONFIG_BINFMT_MISC) kfree(ve->binfmt_misc); #endif _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel