With the addition of "cpuset.cpus.isolated", it makes sense to add the
restriction that load balancing can only be turned off if the CPUs in
the isolated cpuset are subset of "cpuset.cpus.isolated".

Signed-off-by: Waiman Long <long...@redhat.com>
---
 Documentation/cgroup-v2.txt |  7 ++++---
 kernel/cgroup/cpuset.c      | 29 ++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 8d89dc2..c4227ee 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -1554,9 +1554,10 @@ Cpuset Interface Files
        and will not be moved to other CPUs.
 
        This flag is hierarchical and is inherited by child cpusets. It
-       can be turned off only when the CPUs in this cpuset aren't
-       listed in the cpuset.cpus of other sibling cgroups, and all
-       the child cpusets, if present, have this flag turned off.
+       can be explicitly turned off only when it is a direct child of
+       the root cgroup and the CPUs in this cpuset are subset of the
+       root's "cpuset.cpus.isolated".  Moreover, the CPUs cannot be
+       listed in the "cpuset.cpus" of other sibling cgroups.
 
        Once it is off, it cannot be turned back on as long as the
        parent cgroup still has this flag in the off state.
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index c746b18..d05c4c8 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -511,6 +511,16 @@ static int validate_change(struct cpuset *cur, struct 
cpuset *trial)
 
        par = parent_cs(cur);
 
+       /*
+        * On default hierarchy with sched_load_balance flag off, the cpu
+        * list must be a subset of the parent's isolated CPU list, if
+        * defined (root).
+        */
+       if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+           !is_sched_load_balance(trial) && par->isolation_count &&
+           !cpumask_subset(trial->cpus_allowed, par->isolated_cpus))
+               goto out;
+
        /* On legacy hierarchy, we must be a subset of our parent cpuset. */
        ret = -EACCES;
        if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
@@ -1431,10 +1441,16 @@ static int update_flag(cpuset_flagbits_t bit, struct 
cpuset *cs,
        else
                clear_bit(bit, &trialcs->flags);
 
+       balance_flag_changed = (is_sched_load_balance(cs) !=
+                               is_sched_load_balance(trialcs));
+
        /*
         * On default hierarchy, turning off sched_load_balance flag implies
         * an implicit cpu_exclusive. Turning on sched_load_balance will
         * clear the cpu_exclusive flag.
+        *
+        * sched_load_balance can only be turned off if all the CPUs are
+        * in the parent's isolated CPU list.
         */
        if ((bit == CS_SCHED_LOAD_BALANCE) &&
            cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
@@ -1442,15 +1458,22 @@ static int update_flag(cpuset_flagbits_t bit, struct 
cpuset *cs,
                        clear_bit(CS_CPU_EXCLUSIVE, &trialcs->flags);
                else
                        set_bit(CS_CPU_EXCLUSIVE, &trialcs->flags);
+
+               if (balance_flag_changed && !turning_on) {
+                       struct cpuset *parent = parent_cs(cs);
+
+                       err = -EBUSY;
+                       if (!parent->isolation_count ||
+                           !cpumask_subset(trialcs->cpus_allowed,
+                                           parent->cpus_allowed))
+                               goto out;
+               }
        }
 
        err = validate_change(cs, trialcs);
        if (err < 0)
                goto out;
 
-       balance_flag_changed = (is_sched_load_balance(cs) !=
-                               is_sched_load_balance(trialcs));
-
        spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
                        || (is_spread_page(cs) != is_spread_page(trialcs)));
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to