From: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com> This patch adds lightweight proc virtualization: entries with S_ISVTX bit are visible from all instances, entries without S_ISVTX are visible only via proc mounted from init-pid-namespace.
https://jira.sw.ru/browse/PSBM-18019 Plus fixes for : https://jira.sw.ru/browse/PSBM-18021 https://jira.sw.ru/browse/PSBM-40359 https://jira.sw.ru/browse/PSBM-58574 https://jira.sw.ru/browse/PSBM-55920 https://bugs.openvz.org/browse/OVZ-6834 Signed-off-by: Konstantin Khlebnikov <khlebni...@openvz.org> Signed-off-by: Vladimir Davydov <vdavy...@parallels.com> Signed-off-by: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com> Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com> (cherry picked from commit 886a1c051dab5a162199ee4e0c995a9b112cdabe) Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com> +++ ve/proc/netfilter: Get rid of per-CT iptables mask https://jira.sw.ru/browse/PSBM-127787 Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com> (cherry picked from vz8 commit 1c6b6fe2023ba275ab9e6834b810d6e2c12be0a7) Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> --- fs/filesystems.c | 2 +- fs/locks.c | 2 +- fs/proc/base.c | 2 +- fs/proc/cmdline.c | 2 +- fs/proc/cpuinfo.c | 2 +- fs/proc/devices.c | 2 +- fs/proc/generic.c | 48 +++++++++++++++++++++++++++++++--------- fs/proc/inode.c | 4 ++-- fs/proc/loadavg.c | 2 +- fs/proc/meminfo.c | 2 +- fs/proc/proc_net.c | 2 +- fs/proc/proc_sysctl.c | 2 +- fs/proc/root.c | 7 +++--- fs/proc/self.c | 2 +- fs/proc/stat.c | 2 +- fs/proc/uptime.c | 2 +- fs/proc/version.c | 2 +- include/linux/proc_fs.h | 11 +++++++-- ipc/util.c | 2 +- kernel/cgroup/cgroup.c | 3 ++- kernel/module.c | 4 +--- mm/swapfile.c | 2 +- mm/vmstat.c | 2 +- net/netfilter/x_tables.c | 9 +++++--- 24 files changed, 79 insertions(+), 41 deletions(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index 87336a9c8104..81680933d82f 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -260,7 +260,7 @@ static int filesystems_proc_show(struct seq_file *m, void *v) static int __init proc_filesystems_init(void) { - proc_create_single("filesystems", 0, NULL, filesystems_proc_show); + proc_create_single("filesystems", S_ISVTX, NULL, filesystems_proc_show); return 0; } module_init(proc_filesystems_init); diff --git a/fs/locks.c b/fs/locks.c index 3edcf30793d5..3330ba82ebdf 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -3044,7 +3044,7 @@ static const struct seq_operations locks_seq_operations = { static int __init proc_locks_init(void) { - proc_create_seq_private("locks", 0, NULL, &locks_seq_operations, + proc_net_create_seq_private("locks", 0, NULL, &locks_seq_operations, sizeof(struct locks_iterator), NULL); return 0; } diff --git a/fs/proc/base.c b/fs/proc/base.c index b0afbb1ab317..2c25b9039a4c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1086,7 +1086,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) task_pid_nr(task)); } else { if ((short)oom_adj < task->signal->oom_score_adj_min && - !capable(CAP_SYS_RESOURCE)) { + !ve_capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_unlock; } diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index 0aa73950f4cb..607cc42865f6 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c @@ -16,7 +16,7 @@ static int cmdline_proc_show(struct seq_file *m, void *v) static int __init proc_cmdline_init(void) { - proc_create_single("cmdline", 0, NULL, cmdline_proc_show); + proc_net_create_single("cmdline", 0, NULL, cmdline_proc_show); return 0; } fs_initcall(proc_cmdline_init); diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c index 419760fd77bd..f1793017a3dc 100644 --- a/fs/proc/cpuinfo.c +++ b/fs/proc/cpuinfo.c @@ -26,7 +26,7 @@ static const struct proc_ops cpuinfo_proc_ops = { static int __init proc_cpuinfo_init(void) { - proc_create("cpuinfo", 0, NULL, &cpuinfo_proc_ops); + proc_create("cpuinfo", S_ISVTX, NULL, &cpuinfo_proc_ops); return 0; } fs_initcall(proc_cpuinfo_init); diff --git a/fs/proc/devices.c b/fs/proc/devices.c index 837971e74109..7b8efb3d94d7 100644 --- a/fs/proc/devices.c +++ b/fs/proc/devices.c @@ -54,7 +54,7 @@ static const struct seq_operations devinfo_ops = { static int __init proc_devices_init(void) { - proc_create_seq("devices", 0, NULL, &devinfo_ops); + proc_net_create_seq("devices", 0, NULL, &devinfo_ops); return 0; } fs_initcall(proc_devices_init); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 5b78739e60e4..24cdac695f33 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -115,6 +115,11 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, return true; } +bool proc_in_container(struct super_block *sb) +{ + return !ve_is_super(get_exec_env()); +} + static int proc_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *iattr) { @@ -122,6 +127,10 @@ static int proc_notify_change(struct user_namespace *mnt_userns, struct proc_dir_entry *de = PDE(inode); int error; + if (proc_in_container(dentry->d_sb) && + (iattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID))) + return -EPERM; + error = setattr_prepare(&init_user_ns, dentry, iattr); if (error) return error; @@ -129,8 +138,14 @@ static int proc_notify_change(struct user_namespace *mnt_userns, setattr_copy(&init_user_ns, inode, iattr); mark_inode_dirty(inode); - proc_set_user(de, inode->i_uid, inode->i_gid); - de->mode = inode->i_mode; + if (iattr->ia_valid & ATTR_UID) + de->uid = inode->i_uid; + if (iattr->ia_valid & ATTR_GID) + de->gid = inode->i_gid; + if (iattr->ia_valid & ATTR_MODE) + de->mode = (de->mode & ~S_IRWXUGO) | + (inode->i_mode & S_IRWXUGO); + return 0; } @@ -245,10 +260,15 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, struct proc_dir_entry *de) { struct inode *inode; + bool in_container = proc_in_container(dentry->d_sb); read_lock(&proc_subdir_lock); de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); if (de) { + if (in_container && !(de->mode & S_ISVTX)) { + read_unlock(&proc_subdir_lock); + return ERR_PTR(-ENOENT); + } pde_get(de); read_unlock(&proc_subdir_lock); inode = proc_get_inode(dir->i_sb, de); @@ -285,6 +305,7 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx, struct proc_dir_entry *de) { int i; + bool in_container = proc_in_container(file->f_path.dentry->d_sb); if (!dir_emit_dots(file, ctx)) return 0; @@ -297,14 +318,22 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx, read_unlock(&proc_subdir_lock); return 0; } - if (!i) - break; + if (!in_container || (de->mode & S_ISVTX)) { + if (!i) + break; + i--; + } de = pde_subdir_next(de); - i--; } do { struct proc_dir_entry *next; + + if (in_container && !(de->mode & S_ISVTX)) { + de = pde_subdir_next(de); + continue; + } + pde_get(de); read_unlock(&proc_subdir_lock); if (!dir_emit(ctx, de->name, de->namelen, @@ -453,13 +482,12 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, return ent; } -struct proc_dir_entry *proc_symlink(const char *name, +struct proc_dir_entry *proc_symlink_mode(const char *name, umode_t mode, struct proc_dir_entry *parent, const char *dest) { struct proc_dir_entry *ent; - ent = __proc_create(&parent, name, - (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); + ent = __proc_create(&parent, name, S_IFLNK | mode, 1); if (ent) { ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); @@ -474,7 +502,7 @@ struct proc_dir_entry *proc_symlink(const char *name, } return ent; } -EXPORT_SYMBOL(proc_symlink); +EXPORT_SYMBOL(proc_symlink_mode); struct proc_dir_entry *_proc_mkdir(const char *name, umode_t mode, struct proc_dir_entry *parent, void *data, bool force_lookup) @@ -542,7 +570,7 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, if ((mode & S_IFMT) == 0) mode |= S_IFREG; - if ((mode & S_IALLUGO) == 0) + if ((mode & S_IRWXUGO) == 0) mode |= S_IRUGO; if (WARN_ON_ONCE(!S_ISREG(mode))) return NULL; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 599eb724ff2d..35699dc6248f 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -658,8 +658,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) return inode; } - if (de->mode) { - inode->i_mode = de->mode; + if (de->mode & (S_IFMT | S_IRWXUGO)) { + inode->i_mode = de->mode & (S_IFMT | S_IRWXUGO); inode->i_uid = de->uid; inode->i_gid = de->gid; } diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index f32878d9a39f..c651c6a2d285 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -27,7 +27,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v) static int __init proc_loadavg_init(void) { - proc_create_single("loadavg", 0, NULL, loadavg_proc_show); + proc_net_create_single("loadavg", 0, NULL, loadavg_proc_show); return 0; } fs_initcall(proc_loadavg_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 90985676a9e7..8f7335f464c7 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -164,7 +164,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) static int __init proc_meminfo_init(void) { - proc_create_single("meminfo", 0, NULL, meminfo_proc_show); + proc_net_create_single("meminfo", 0, NULL, meminfo_proc_show); return 0; } fs_initcall(proc_meminfo_init); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 15c2e55d2ed2..058b4ae6f07c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -391,7 +391,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { int __init proc_net_init(void) { - proc_symlink("net", NULL, "self/net"); + proc_symlink_mode("net", S_ISVTX | S_IRWXUGO, NULL, "self/net"); return register_pernet_subsys(&proc_net_ns_ops); } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f6415add610e..842ee27f08a8 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1727,7 +1727,7 @@ int __init proc_sys_init(void) { struct proc_dir_entry *proc_sys_root; - proc_sys_root = proc_mkdir("sys", NULL); + proc_sys_root = proc_mkdir_mode("sys", S_ISVTX | S_IRUGO | S_IXUGO, NULL); proc_sys_root->proc_iops = &proc_sys_dir_operations; proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; diff --git a/fs/proc/root.c b/fs/proc/root.c index 02a4a3d631b0..5e5944a0daeb 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -291,12 +291,13 @@ void __init proc_root_init(void) set_proc_pid_nlink(); proc_self_init(); proc_thread_self_init(); - proc_symlink("mounts", NULL, "self/mounts"); + proc_symlink_mode("mounts", S_ISVTX | S_IRWXUGO, NULL, "self/mounts"); proc_net_init(); - proc_mkdir("fs", NULL); + proc_mkdir_mode("fs", S_ISVTX | S_IRUGO | S_IXUGO, NULL); proc_mkdir("driver", NULL); - proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ + /* somewhere for the nfsd filesystem to be mounted */ + proc_mkdir_mode("fs/nfsd", S_ISVTX | S_IRUGO | S_IXUGO, NULL); #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ proc_create_mount_point("openprom"); diff --git a/fs/proc/self.c b/fs/proc/self.c index 72cd69bcaf4a..31644779dd6e 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -47,7 +47,7 @@ int proc_setup_self(struct super_block *s) if (inode) { inode->i_ino = self_inum; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - inode->i_mode = S_IFLNK | S_IRWXUGO; + inode->i_mode = S_IFLNK | S_IRWXUGO | S_ISVTX; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_op = &proc_self_inode_operations; diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6561a06ef905..3f102d658b33 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -236,7 +236,7 @@ static const struct proc_ops stat_proc_ops = { static int __init proc_stat_init(void) { - proc_create("stat", 0, NULL, &stat_proc_ops); + proc_create("stat", S_ISVTX, NULL, &stat_proc_ops); return 0; } fs_initcall(proc_stat_init); diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 5a1b228964fb..1e51f5598586 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -35,7 +35,7 @@ static int uptime_proc_show(struct seq_file *m, void *v) static int __init proc_uptime_init(void) { - proc_create_single("uptime", 0, NULL, uptime_proc_show); + proc_net_create_single("uptime", 0, NULL, uptime_proc_show); return 0; } fs_initcall(proc_uptime_init); diff --git a/fs/proc/version.c b/fs/proc/version.c index b449f186577f..233801792fe8 100644 --- a/fs/proc/version.c +++ b/fs/proc/version.c @@ -17,7 +17,7 @@ static int version_proc_show(struct seq_file *m, void *v) static int __init proc_version_init(void) { - proc_create_single("version", 0, NULL, version_proc_show); + proc_net_create_single("version", 0, NULL, version_proc_show); return 0; } fs_initcall(proc_version_init); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 51504694bf41..aa59b35b9184 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -79,8 +79,13 @@ typedef int (*proc_write_t)(struct file *, char *, size_t); extern void proc_root_init(void); extern void proc_flush_pid(struct pid *); -extern struct proc_dir_entry *proc_symlink(const char *, - struct proc_dir_entry *, const char *); +extern struct proc_dir_entry *proc_symlink_mode(const char *name, umode_t mode, + struct proc_dir_entry *parent, const char *dest); +static inline struct proc_dir_entry *proc_symlink(const char *name, + struct proc_dir_entry *parent, const char *dest) +{ + return proc_symlink_mode(name, S_IRWXUGO, parent, dest); +} struct proc_dir_entry *_proc_mkdir(const char *, umode_t, struct proc_dir_entry *, void *, bool); extern struct proc_dir_entry *proc_mkdir(const char *, struct proc_dir_entry *); extern struct proc_dir_entry *proc_mkdir_data(const char *, umode_t, @@ -161,6 +166,8 @@ static inline void proc_flush_pid(struct pid *pid) static inline struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent,const char *dest) { return NULL;} +static inline struct proc_dir_entry *proc_symlink_mode(const char *name, + umode_t m, struct proc_dir_entry *p, const char *d) { return NULL; } static inline struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) {return NULL;} static inline struct proc_dir_entry *proc_create_mount_point(const char *name) { return NULL; } diff --git a/ipc/util.c b/ipc/util.c index 0027e47626b7..963945a9d2e9 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -150,7 +150,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header, iface->show = show; pde = proc_create_data(path, - S_IRUGO, /* world readable */ + S_ISVTX | S_IRUGO, /* world readable */ NULL, /* parent dir */ &sysvipc_proc_ops, iface); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index da0e69dae51b..111732c40ffc 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5942,7 +5942,8 @@ int __init cgroup_init(void) WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup")); WARN_ON(register_filesystem(&cgroup_fs_type)); WARN_ON(register_filesystem(&cgroup2_fs_type)); - WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show)); + WARN_ON(!proc_net_create_single("cgroups", 0, NULL, + proc_cgroupstats_show)); #ifdef CONFIG_CPUSETS WARN_ON(register_filesystem(&cpuset_fs_type)); #endif diff --git a/kernel/module.c b/kernel/module.c index c402a6949394..c227b6d25a52 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4532,8 +4532,6 @@ static char *module_flags(struct module *mod, char *buf) static void *m_start(struct seq_file *m, loff_t *pos) { mutex_lock(&module_mutex); - if (!ve_is_super(get_exec_env())) - return NULL; return seq_list_start(&modules, *pos); } @@ -4620,7 +4618,7 @@ static const struct proc_ops modules_proc_ops = { static int __init proc_modules_init(void) { - proc_create("modules", 0, NULL, &modules_proc_ops); + proc_create("modules", S_ISVTX, NULL, &modules_proc_ops); return 0; } module_init(proc_modules_init); diff --git a/mm/swapfile.c b/mm/swapfile.c index 1e07d1c776f2..6c3cdf079fb3 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2817,7 +2817,7 @@ static const struct proc_ops swaps_proc_ops = { static int __init procswaps_init(void) { - proc_create("swaps", 0, NULL, &swaps_proc_ops); + proc_create("swaps", S_ISVTX, NULL, &swaps_proc_ops); return 0; } __initcall(procswaps_init); diff --git a/mm/vmstat.c b/mm/vmstat.c index b0534e068166..44ce039d19c6 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -2046,7 +2046,7 @@ void __init init_mm_internals(void) #ifdef CONFIG_PROC_FS proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); - proc_create_seq("vmstat", 0444, NULL, &vmstat_op); + proc_net_create_seq("vmstat", 0444, NULL, &vmstat_op); proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); #endif } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c507a7f8d2c0..2eedcaeeec45 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1756,6 +1756,7 @@ int xt_proto_init(struct net *net, u_int8_t af) struct proc_dir_entry *proc; kuid_t root_uid; kgid_t root_gid; + int mode; #endif if (af >= ARRAY_SIZE(xt_prefix)) @@ -1763,12 +1764,14 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS + mode = 0440 | S_ISVTX; + root_uid = make_kuid(net->user_ns, 0); root_gid = make_kgid(net->user_ns, 0); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc = proc_net_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops, + proc = proc_net_create_net_data(buf, mode, net->proc_net, &xt_table_seq_ops, sizeof(struct seq_net_private), (void *)(unsigned long)af); if (!proc) @@ -1778,7 +1781,7 @@ int xt_proto_init(struct net *net, u_int8_t af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc = proc_net_create_seq_private(buf, 0440, net->proc_net, + proc = proc_net_create_seq_private(buf, mode, net->proc_net, &xt_match_seq_ops, sizeof(struct nf_mttg_trav), (void *)(unsigned long)af); if (!proc) @@ -1788,7 +1791,7 @@ int xt_proto_init(struct net *net, u_int8_t af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc = proc_net_create_seq_private(buf, 0440, net->proc_net, + proc = proc_net_create_seq_private(buf, mode, net->proc_net, &xt_target_seq_ops, sizeof(struct nf_mttg_trav), (void *)(unsigned long)af); if (!proc) -- 2.31.1 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel