RT kernels need to ensure that all tasks which are not per CPU kthreads
have left the outgoing CPU to guarantee that no tasks are force migrated
within a migrate disabled section.

There is also some desire to (ab)use fine grained CPU hotplug control to
clear a CPU from active state to force migrate tasks which are not per CPU
kthreads away for power control purposes.

Add a mechanism which waits until all tasks which should leave the CPU
after the CPU active flag is cleared have moved to a different online CPU.

Signed-off-by: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Juri Lelli <juri.le...@redhat.com>
Cc: Vincent Guittot <vincent.guit...@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggem...@arm.com>
Cc: Steven Rostedt <rost...@goodmis.org>
Cc: Ben Segall <bseg...@google.com>
Cc: Mel Gorman <mgor...@suse.de>
Cc: Daniel Bristot de Oliveira <bris...@redhat.com>
Cc: Valentin Schneider <valentin.schnei...@arm.com>
---
 kernel/sched/core.c  |   44 +++++++++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h |    4 ++++
 2 files changed, 45 insertions(+), 3 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6892,11 +6892,24 @@ static bool balance_push(struct rq *rq)
        SCHED_WARN_ON(rq->cpu != smp_processor_id());
 
        /*
-        * Both the cpu-hotplug and stop task are in this case and are
+        * Both the cpu-hotplug and stop task are in this class and are
         * required to complete the hotplug process.
         */
-       if (is_per_cpu_kthread(push_task))
+       if (is_per_cpu_kthread(push_task)) {
+               /*
+                * If this is the idle task on the outgoing CPU try to wake
+                * up the hotplug control thread which might wait for the
+                * last task to vanish. The rcuwait_active() check is
+                * accurate here because the waiter is pinned on this CPU
+                * and can't obviously be running in parallel.
+                */
+               if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) {
+                       raw_spin_unlock(&rq->lock);
+                       rcuwait_wake_up(&rq->hotplug_wait);
+                       raw_spin_lock(&rq->lock);
+               }
                return false;
+       }
 
        get_task_struct(push_task);
        /*
@@ -6929,13 +6942,31 @@ static void balance_push_set(int cpu, bo
        rq_unlock_irqrestore(rq, &rf);
 }
 
-#else
+/*
+ * Invoked from a CPUs hotplug control thread after the CPU has been marked
+ * inactive. All tasks which are not per CPU kernel threads are either
+ * pushed off this CPU now via balance_push() or placed on a different CPU
+ * during wakeup. Wait until the CPU is quiescent.
+ */
+static void balance_hotplug_wait(void)
+{
+       struct rq *rq = this_rq();
+
+       rcuwait_wait_event(&rq->hotplug_wait, rq->nr_running == 1,
+                          TASK_UNINTERRUPTIBLE);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static inline void balance_push_set(int cpu, bool on) { }
 
 static inline bool balance_push(struct rq *rq)
 {
        return false;
 }
 
+static inline void balance_hotplug_wait(void) { }
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 void set_rq_online(struct rq *rq)
@@ -7090,6 +7121,10 @@ int sched_cpu_deactivate(unsigned int cp
                return ret;
        }
        sched_domains_numa_masks_clear(cpu);
+
+       /* Wait for all non per CPU kernel threads to vanish. */
+       balance_hotplug_wait();
+
        return 0;
 }
 
@@ -7330,6 +7365,9 @@ void __init sched_init(void)
 
                rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
 #endif
+#ifdef CONFIG_HOTPLUG_CPU
+               rcuwait_init(&rq->hotplug_wait);
+#endif
 #endif /* CONFIG_SMP */
                hrtick_rq_init(rq);
                atomic_set(&rq->nr_iowait, 0);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1004,6 +1004,10 @@ struct rq {
 
        /* This is used to determine avg_idle's max value */
        u64                     max_idle_balance_cost;
+
+#ifdef CONFIG_HOTPLUG_CPU
+       struct rcuwait          hotplug_wait;
+#endif
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING

Reply via email to