Before moving tasks out of empty cpusets, update_tasks_nodemask()
is called, which calls do_migrate_pages(xx, from, to). Then those
tasks are moved to an ancestor, and do_migrate_pages() is called
again.
The first time: from = node_to_be_offlined, to = empty.
The second time: from = empty, to = ancestor's nodemask.

so looks like no pages will be migrated.

Fix this by:

- Don't call update_tasks_nodemask() on empty cpusets.
- Pass cs->old_mems_allowed to do_migrate_pages().

Signed-off-by: Li Zefan <lize...@huawei.com>
---
 kernel/cpuset.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9bb6a47..de7f6c1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1574,9 +1574,16 @@ static void cpuset_attach(struct cgroup *cgrp, struct 
cgroup_taskset *tset)
                struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs);
 
                mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
-               if (is_memory_migrate(cs))
-                       cpuset_migrate_mm(mm, &mems_oldcs->mems_allowed,
+               if (is_memory_migrate(cs)) {
+                       /*
+                        * old_mems_allowed is the same with mems_allowed,
+                        * except if this task is being moved automatically
+                        * due to hotplug, and in this case mems_allowed is
+                        * empty and old_mems_allowed is the offflined node.
+                        */
+                       cpuset_migrate_mm(mm, &mems_oldcs->old_mems_allowed,
                                          &cpuset_attach_nodemask_to);
+               }
                mmput(mm);
        }
 
@@ -2168,7 +2175,7 @@ static void cpuset_propagate_hotplug_workfn(struct 
work_struct *work)
         * for empty cpuset to take on ancestor's cpumask
         */
        if ((sane && cpumask_empty(cs->cpus_allowed)) ||
-           !cpumask_empty(&off_cpus))
+           (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed)))
                update_tasks_cpumask(cs, NULL);
 
        mutex_lock(&callback_mutex);
@@ -2180,7 +2187,7 @@ static void cpuset_propagate_hotplug_workfn(struct 
work_struct *work)
         * for empty cpuset to take on ancestor's nodemask
         */
        if ((sane && nodes_empty(cs->mems_allowed)) ||
-           !nodes_empty(off_mems))
+           (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed)))
                update_tasks_nodemask(cs, NULL);
 
        is_empty = cpumask_empty(cs->cpus_allowed) ||
-- 
1.8.0.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to