From: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com>

Intead of hardcoded "init" namespaces.

Signed-off-by: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com>
Reviewed-by: Andrey Ryabinin <aryabi...@virtuozzo.com>

+++
drivers/connector: fix nullptr dereference ve->ve_ns->pid_ns

cn_proc_ack incorrectly assumes that ve->ve_ns is not NULL. Check it.
Also add rcu_dereference since ve->ve_ns is rcu-protected.
For various fill callbacks we can omit rcu_read_lock and rcu_dereference
since ve_ns can only be destroyed after cn_fini_ve, which unregisters
connector and frees its data.

Example of crash when someone tries to register to connector with
ve_ns == NULL

[   52.805823] BUG: unable to handle kernel NULL pointer dereference at 
0000000000000020
[   52.807174] PGD 0 P4D 0
[   52.807548] Oops: 0000 [#1] SMP PTI
[   52.808060] CPU: 0 PID: 311 Comm: a.out ve: 1 Not tainted 4.18.0.ovz.custom 
#79 custom
[   52.809192] Hardware name: Virtuozzo KVM, BIOS 1.11.0-2.vz7.2 04/01/2014
[   52.810183] RIP: 0010:cn_proc_mcast_ctl+0x73/0x1a0
[   52.810994] Code: 41 5c 41 5d c3 48 89 fb 49 89 f4 4c 8b ad c0 08 00 00 e8 
80 f6 95 ff 84 c0 74 cb 48 89 ef e8 f4 a5 8f ff 49 8b 95 98 01 00 00 <46
[   52.813662] RSP: 0018:ffffaadb003cbca0 EFLAGS: 00010206
[   52.814418] RAX: ffffffff980507a0 RBX: ffff8b99bb376a10 RCX: 0000000000000028
[   52.815448] RDX: 0000000000000000 RSI: ffff8b99b34c9628 RDI: ffff8b99bb300000
[   52.816509] RBP: ffff8b99bb300000 R08: ffffffff98225f80 R09: 0000000000000000
[   52.817645] R10: 0000000000000000 R11: 0000000000000028 R12: ffff8b99b34c9628
[   52.818664] R13: ffff8b99bb23b4d0 R14: ffff8b99b34c9628 R15: ffffaadb003cbe28
[   52.819716] FS:  00007f1555cc4540(0000) GS:ffff8b99bea00000(0000) 
knlGS:0000000000000000
[   52.820922] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   52.821746] CR2: 0000000000000020 CR3: 000000003d2d8000 CR4: 00000000000006f0
[   52.822767] Call Trace:
[   52.823183]  ? _cond_resched+0x15/0x30
[   52.823809]  ? __kmalloc_node_track_caller+0x1cb/0x280
[   52.824554]  ? __alloc_skb+0x82/0x1c0
[   52.825097]  ? __netlink_lookup+0xe6/0x150
[   52.825689]  cn_rx_skb+0xe1/0x120
[   52.826174]  netlink_unicast+0x21d/0x260
[   52.826739]  netlink_sendmsg+0x22e/0x400
[   52.827308]  sock_sendmsg+0x4c/0x50
[   52.827815]  __sys_sendto+0xee/0x160
[   52.828337]  ? __sys_bind+0xd2/0xf0
[   52.828845]  ? handle_mm_fault+0xc2/0x1d0
[   52.829428]  ? __do_page_fault+0x23a/0x4f0
[   52.830020]  __x64_sys_sendto+0x24/0x30
[   52.830576]  do_syscall_64+0x5b/0x1d0
[   52.831110]  entry_SYSCALL_64_after_hwframe+0x65/0xca
[   52.831836] RIP: 0033:0x7f1555bfd20c
[   52.832358] Code: 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 41 89 ca 64 8b 
04 25 18 00 00 00 85 c0 75 19 45 31 c9 45 31 c0 b8 2c 00 00 00 0f 05 <40
[   52.835010] RSP: 002b:00007fffc61bb878 EFLAGS: 00000246 ORIG_RAX: 
000000000000002c
[   52.836098] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1555bfd20c
[   52.837115] RDX: 0000000000000028 RSI: 00007fffc61bb890 RDI: 0000000000000003
[   52.838137] RBP: 00007fffc61bb8c0 R08: 0000000000000000 R09: 0000000000000000
[   52.839158] R10: 0000000000000000 R11: 0000000000000246 R12: 00005630710cd120
[   52.840176] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[   52.841196] Modules linked in:
[   52.841639] Features: eBPF/cgroup
[   52.842120] CR2: 0000000000000020
[   52.842645] ---[ end trace 4508d0fc7c54945c ]---
[   52.843323] RIP: 0010:cn_proc_mcast_ctl+0x73/0x1a0
[   52.844018] Code: 41 5c 41 5d c3 48 89 fb 49 89 f4 4c 8b ad c0 08 00 00 e8 
80 f6 95 ff 84 c0 74 cb 48 89 ef e8 f4 a5 8f ff 49 8b 95 98 01 00 00 <46
[   52.846714] RSP: 0018:ffffaadb003cbca0 EFLAGS: 00010206
[   52.847481] RAX: ffffffff980507a0 RBX: ffff8b99bb376a10 RCX: 0000000000000028
[   52.848507] RDX: 0000000000000000 RSI: ffff8b99b34c9628 RDI: ffff8b99bb300000
[   52.849533] RBP: ffff8b99bb300000 R08: ffffffff98225f80 R09: 0000000000000000
[   52.850561] R10: 0000000000000000 R11: 0000000000000028 R12: ffff8b99b34c9628
[   52.851588] R13: ffff8b99bb23b4d0 R14: ffff8b99b34c9628 R15: ffffaadb003cbe28
[   52.852616] FS:  00007f1555cc4540(0000) GS:ffff8b99bea00000(0000) 
knlGS:0000000000000000
[   52.853778] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   52.854649] CR2: 0000000000000020 CR3: 000000003d2d8000 CR4: 00000000000006f0
[   52.855681] Kernel panic - not syncing: Fatal exception
[   52.856567] Kernel Offset: 0x15800000 from 0xffffffff81000000 (relocation 
range: 0xffffffff80000000-0xffffffffbfffffff)
[   52.858158] ---[ end Kernel panic - not syncing: Fatal exception ]---

https://jira.sw.ru/browse/PSBM-130894
Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com>

v2: remove a lot of excessive rcu_dereference in connector

 drivers/connector/cn_proc.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

vz9 change: make fields parent_pid and parent_tgid also take VE pidns

(cherry picked from vz8 commit 6233cc739a7a6ee3d9e399d2df99c216b41e63f1)
Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>
---
 drivers/connector/cn_proc.c | 81 +++++++++++++++++++++++--------------
 1 file changed, 50 insertions(+), 31 deletions(-)

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 455c84125251..c763c62ee2c9 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -131,14 +131,15 @@ static bool fill_fork_event(struct proc_event *ev, struct 
ve_struct *ve,
                            struct task_struct *task, int unused)
 {
        struct task_struct *parent;
+       struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
 
        rcu_read_lock();
        parent = rcu_dereference(task->real_parent);
-       ev->event_data.fork.parent_pid = task_pid_nr_ns(parent, &init_pid_ns);
-       ev->event_data.fork.parent_tgid = task_tgid_nr_ns(parent, &init_pid_ns);
+       ev->event_data.fork.parent_pid = task_pid_nr_ns(parent, pid_ns);
+       ev->event_data.fork.parent_tgid = task_tgid_nr_ns(parent, pid_ns);
        rcu_read_unlock();
-       ev->event_data.fork.child_pid = task_pid_nr_ns(task, &init_pid_ns);
-       ev->event_data.fork.child_tgid = task_tgid_nr_ns(task, &init_pid_ns);
+       ev->event_data.fork.child_pid = task_pid_nr_ns(task, pid_ns);
+       ev->event_data.fork.child_tgid = task_tgid_nr_ns(task, pid_ns);
        return true;
 }
 
@@ -150,8 +151,10 @@ void proc_fork_connector(struct task_struct *task)
 static bool fill_exec_event(struct proc_event *ev, struct ve_struct *ve,
                            struct task_struct *task, int unused)
 {
-       ev->event_data.exec.process_pid = task_pid_nr_ns(task, &init_pid_ns);
-       ev->event_data.exec.process_tgid = task_tgid_nr_ns(task, &init_pid_ns);
+       struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
+
+       ev->event_data.exec.process_pid = task_pid_nr_ns(task, pid_ns);
+       ev->event_data.exec.process_tgid = task_tgid_nr_ns(task, pid_ns);
        return true;
 }
 
@@ -164,17 +167,19 @@ static bool fill_id_event(struct proc_event *ev, struct 
ve_struct *ve,
                          struct task_struct *task, int which_id)
 {
        const struct cred *cred;
+       struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
+       struct user_namespace *user_ns = ve->init_cred->user_ns;
 
-       ev->event_data.id.process_pid = task_pid_nr_ns(task, &init_pid_ns);
-       ev->event_data.id.process_tgid = task_tgid_nr_ns(task, &init_pid_ns);
+       ev->event_data.id.process_pid = task_pid_nr_ns(task, pid_ns);
+       ev->event_data.id.process_tgid = task_tgid_nr_ns(task, pid_ns);
        rcu_read_lock();
        cred = __task_cred(task);
        if (which_id == PROC_EVENT_UID) {
-               ev->event_data.id.r.ruid = from_kuid_munged(&init_user_ns, 
cred->uid);
-               ev->event_data.id.e.euid = from_kuid_munged(&init_user_ns, 
cred->euid);
+               ev->event_data.id.r.ruid = from_kuid_munged(user_ns, cred->uid);
+               ev->event_data.id.e.euid = from_kuid_munged(user_ns, 
cred->euid);
        } else if (which_id == PROC_EVENT_GID) {
-               ev->event_data.id.r.rgid = from_kgid_munged(&init_user_ns, 
cred->gid);
-               ev->event_data.id.e.egid = from_kgid_munged(&init_user_ns, 
cred->egid);
+               ev->event_data.id.r.rgid = from_kgid_munged(user_ns, cred->gid);
+               ev->event_data.id.e.egid = from_kgid_munged(user_ns, 
cred->egid);
        } else {
                rcu_read_unlock();
                return false;
@@ -191,8 +196,10 @@ void proc_id_connector(struct task_struct *task, int 
which_id)
 static bool fill_sid_event(struct proc_event *ev, struct ve_struct *ve,
                           struct task_struct *task, int unused)
 {
-       ev->event_data.sid.process_pid = task_pid_nr_ns(task, &init_pid_ns);
-       ev->event_data.sid.process_tgid = task_tgid_nr_ns(task, &init_pid_ns);
+       struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
+
+       ev->event_data.sid.process_pid = task_pid_nr_ns(task, pid_ns);
+       ev->event_data.sid.process_tgid = task_tgid_nr_ns(task, pid_ns);
        return true;
 }
 
@@ -204,11 +211,13 @@ void proc_sid_connector(struct task_struct *task)
 static bool fill_ptrace_event(struct proc_event *ev, struct ve_struct *ve,
                              struct task_struct *task, int ptrace_id)
 {
-       ev->event_data.ptrace.process_pid  = task_pid_nr_ns(task, &init_pid_ns);
-       ev->event_data.ptrace.process_tgid = task_tgid_nr_ns(task, 
&init_pid_ns);
+       struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
+
+       ev->event_data.ptrace.process_pid  = task_pid_nr_ns(task, pid_ns);
+       ev->event_data.ptrace.process_tgid = task_tgid_nr_ns(task, pid_ns);
        if (ptrace_id == PTRACE_ATTACH) {
-               ev->event_data.ptrace.tracer_pid  = task_pid_nr_ns(current, 
&init_pid_ns);
-               ev->event_data.ptrace.tracer_tgid = task_tgid_nr_ns(current, 
&init_pid_ns);
+               ev->event_data.ptrace.tracer_pid  = task_pid_nr_ns(current, 
pid_ns);
+               ev->event_data.ptrace.tracer_tgid = task_tgid_nr_ns(current, 
pid_ns);
        } else if (ptrace_id == PTRACE_DETACH) {
                ev->event_data.ptrace.tracer_pid  = 0;
                ev->event_data.ptrace.tracer_tgid = 0;
@@ -226,8 +235,10 @@ void proc_ptrace_connector(struct task_struct *task, int 
ptrace_id)
 static bool fill_comm_event(struct proc_event *ev, struct ve_struct *ve,
                            struct task_struct *task, int unused)
 {
-       ev->event_data.comm.process_pid  = task_pid_nr_ns(task, &init_pid_ns);
-       ev->event_data.comm.process_tgid = task_tgid_nr_ns(task, &init_pid_ns);
+       struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
+
+       ev->event_data.comm.process_pid  = task_pid_nr_ns(task, pid_ns);
+       ev->event_data.comm.process_tgid = task_tgid_nr_ns(task, pid_ns);
        get_task_comm(ev->event_data.comm.comm, task);
        return true;
 }
@@ -240,20 +251,21 @@ void proc_comm_connector(struct task_struct *task)
 static bool fill_coredump_event(struct proc_event *ev, struct ve_struct *ve,
                                struct task_struct *task, int unused)
 {
+       struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
        struct task_struct *parent;
 
        ev->event_data.coredump.process_pid =
-               task_pid_nr_ns(task, &init_pid_ns);
+               task_pid_nr_ns(task, pid_ns);
        ev->event_data.coredump.process_tgid =
-               task_tgid_nr_ns(task, &init_pid_ns);
+               task_tgid_nr_ns(task, pid_ns);
 
        rcu_read_lock();
        if (pid_alive(task)) {
                parent = rcu_dereference(task->real_parent);
                ev->event_data.coredump.parent_pid =
-                       task_pid_nr_ns(parent, &init_pid_ns);
+                       task_pid_nr_ns(parent, pid_ns);
                ev->event_data.coredump.parent_tgid =
-                       task_tgid_nr_ns(parent, &init_pid_ns);
+                       task_tgid_nr_ns(parent, pid_ns);
        }
        rcu_read_unlock();
        return true;
@@ -267,10 +279,11 @@ void proc_coredump_connector(struct task_struct *task)
 static bool fill_exit_event(struct proc_event *ev, struct ve_struct *ve,
                            struct task_struct *task, int unused)
 {
+       struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children;
        struct task_struct *parent;
 
-       ev->event_data.exit.process_pid = task_pid_nr_ns(task, &init_pid_ns);
-       ev->event_data.exit.process_tgid = task_tgid_nr_ns(task, &init_pid_ns);
+       ev->event_data.exit.process_pid = task_pid_nr_ns(task, pid_ns);
+       ev->event_data.exit.process_tgid = task_tgid_nr_ns(task, pid_ns);
        ev->event_data.exit.exit_code = task->exit_code;
        ev->event_data.exit.exit_signal = task->exit_signal;
 
@@ -278,9 +291,9 @@ static bool fill_exit_event(struct proc_event *ev, struct 
ve_struct *ve,
        if (pid_alive(task)) {
                parent = rcu_dereference(task->real_parent);
                ev->event_data.exit.parent_pid = task_pid_nr_ns(parent,
-                                                               &init_pid_ns);
+                                                               pid_ns);
                ev->event_data.exit.parent_tgid = task_tgid_nr_ns(parent,
-                                                                 &init_pid_ns);
+                                                                 pid_ns);
        }
        rcu_read_unlock();
        return true;
@@ -332,6 +345,7 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
 {
        enum proc_cn_mcast_op *mc_op = NULL;
        struct ve_struct *ve = get_exec_env();
+       struct nsproxy *ve_ns;
        int err = 0;
 
        if (msg->len != sizeof(*mc_op))
@@ -342,12 +356,17 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
         * and user namespaces so ignore requestors from
         * other namespaces.
         */
-       if ((current_user_ns() != &init_user_ns) ||
-           (task_active_pid_ns(current) != &init_pid_ns))
+       rcu_read_lock();
+       ve_ns = rcu_dereference(ve->ve_ns);
+       if (!current_user_ns_initial() || !ve_ns ||
+           (task_active_pid_ns(current) != ve_ns->pid_ns_for_children)) {
+               rcu_read_unlock();
                return;
+       }
+       rcu_read_unlock();
 
        /* Can only change if privileged. */
-       if (!__netlink_ns_capable(nsp, &init_user_ns, CAP_NET_ADMIN)) {
+       if (!__netlink_ns_capable(nsp, ve_init_user_ns(), CAP_NET_ADMIN)) {
                err = EPERM;
                goto out;
        }
-- 
2.31.1

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to