On 23.06.2021 17:07, Konstantin Khorenko wrote:
From: Pavel Tikhomirov <[email protected]>

This member represents kernel.pid_max sysctl it is vz-specific but
lays on pid namespace. To be able to c/r from libvzctl script it is
better put pid_max in ve cgroup, these way we do not need to enter
container root pid namespace to get/set these sysctl.

Note: we need to be able to set pid_max on running Container,
as we can't set pid_max before we have ve's pidns.

https://jira.sw.ru/browse/PSBM-48397

Signed-off-by: Pavel Tikhomirov <[email protected]>
Acked-by: Cyrill Gorcunov <[email protected]>

Cherry-picked from vz7 commit be980b3141ca ("ve/pid: Export
kernel.pid_max via ve cgroup")

v2 changes:
* vz8 note: read and write handlers do not need to get ve->op_sem,
   ve->ve_ns is rcu protected, so rcu_read_(un)lock() is enough.

   See ve_drop_context():
         rcu_assign_pointer(ve->ve_ns, NULL);
         synchronize_rcu();
         put_nsproxy(ve_ns);

* Also check for ve->is_running in redundant and has been removed.
   Despite the ve->is_running value (even if it's 0 already and the CT is
   being stopped), if we defeference ve->ve_ns under rcu and get !NULL,
   we are safe to write pid_max value.

https://jira.sw.ru/browse/PSBM-102629

Reviewed-by: Pavel Tikhomirov <[email protected]>

Signed-off-by: Konstantin Khorenko <[email protected]>
---
  kernel/ve/ve.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
  1 file changed, 50 insertions(+)

diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 954aa8127d99..9667f9051c02 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -1337,6 +1337,50 @@ enum {
        VE_CF_CLOCK_BOOTBASED,
  };
+static u64 ve_pid_max_read_u64(struct cgroup_subsys_state *css,
+                              struct cftype *cft)
+{
+       struct ve_struct *ve = css_to_ve(css);
+       struct nsproxy *ve_ns;
+       u64 pid_max = 0;
+
+       rcu_read_lock();
+       ve_ns = rcu_dereference(ve->ve_ns);
+       if (ve_ns && ve_ns->pid_ns_for_children)
+               pid_max = ve_ns->pid_ns_for_children->pid_max;
+
+       rcu_read_unlock();
+
+       return pid_max;
+}
+
+extern int pid_max_min, pid_max_max;
+
+static int ve_pid_max_write_running_u64(struct cgroup_subsys_state *css,
+                                       struct cftype *cft, u64 val)
+{
+       struct ve_struct *ve = css_to_ve(css);
+       struct nsproxy *ve_ns;
+
+       if (!ve_is_super(get_exec_env()) &&
+           !ve->is_pseudosuper)
+               return -EPERM;
+
+       rcu_read_lock();
+       ve_ns = rcu_dereference(ve->ve_ns);
+       if (!ve_ns || !ve_ns->pid_ns_for_children) {
+               return -EBUSY;
+       }
+       if (pid_max_min > val || pid_max_max < val) {
+               return -EINVAL;
+       }
+
+       ve->ve_ns->pid_ns_for_children->pid_max = val;
+       rcu_read_unlock();
+
+       return 0;
+}
+
  static int ve_ts_read(struct seq_file *sf, void *v)
  {
        struct ve_struct *ve = css_to_ve(seq_css(sf));
@@ -1735,6 +1779,12 @@ static struct cftype ve_cftypes[] = {
                .write                  = ve_ts_write,
                .private                = VE_CF_CLOCK_BOOTBASED,
        },
+       {
+               .name                   = "pid_max",
+               .flags                  = CFTYPE_NOT_ON_ROOT,
+               .read_u64               = ve_pid_max_read_u64,
+               .write_u64              = ve_pid_max_write_running_u64,
+       },
        {
                .name                   = "netns_max_nr",
                .flags                  = CFTYPE_NOT_ON_ROOT,


--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to