3.8-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Thomas Gleixner <t...@linutronix.de>

commit f2530dc71cf0822f90bb63ea4600caaef33a66bb upstream.

The smpboot threads rely on the park/unpark mechanism which binds per
cpu threads on a particular core. Though the functionality is racy:

CPU0                    CPU1                CPU2
unpark(T)                                   wake_up_process(T)
  clear(SHOULD_PARK)    T runs
                        leave parkme() due to !SHOULD_PARK
  bind_to(CPU2)         BUG_ON(wrong CPU)

We cannot let the tasks move themself to the target CPU as one of
those tasks is actually the migration thread itself, which requires
that it starts running on the target cpu right away.

The solution to this problem is to prevent wakeups in park mode which
are not from unpark(). That way we can guarantee that the association
of the task to the target cpu is working correctly.

Add a new task state (TASK_PARKED) which prevents other wakeups and
use this state explicitly for the unpark wakeup.

Peter noticed: Also, since the task state is visible to userspace and
all the parked tasks are still in the PID space, its a good hint in ps
and friends that these tasks aren't really there for the moment.

The migration thread has another related issue.

CPU0                     CPU1
Bring up CPU2
create_thread(T)
park(T)
 wait_for_completion()
                         parkme()
                         complete()
sched_set_stop_task()
                         schedule(TASK_PARKED)

The sched_set_stop_task() call is issued while the task is on the
runqueue of CPU1 and that confuses the hell out of the stop_task class
on that cpu. So we need the same synchronizaion before
sched_set_stop_task().

Reported-by: Dave Jones <da...@redhat.com>
Reported-and-tested-by: Dave Hansen <d...@sr71.net>
Reported-and-tested-by: Borislav Petkov <b...@alien8.de>
Acked-by: Peter Ziljstra <pet...@infradead.org>
Cc: Srivatsa S. Bhat <srivatsa.b...@linux.vnet.ibm.com>
Cc: dhi...@gmail.com
Cc: Ingo Molnar <mi...@kernel.org>
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1304091635430.21884@ionos
Signed-off-by: Thomas Gleixner <t...@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>

---
 fs/proc/array.c              |    1 
 include/linux/sched.h        |    5 ++--
 include/trace/events/sched.h |    2 -
 kernel/kthread.c             |   52 +++++++++++++++++++++++--------------------
 4 files changed, 33 insertions(+), 27 deletions(-)

--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -143,6 +143,7 @@ static const char * const task_state_arr
        "x (dead)",             /*  64 */
        "K (wakekill)",         /* 128 */
        "W (waking)",           /* 256 */
+       "P (parked)",           /* 512 */
 };
 
 static inline const char *get_task_state(struct task_struct *tsk)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -163,9 +163,10 @@ print_cfs_rq(struct seq_file *m, int cpu
 #define TASK_DEAD              64
 #define TASK_WAKEKILL          128
 #define TASK_WAKING            256
-#define TASK_STATE_MAX         512
+#define TASK_PARKED            512
+#define TASK_STATE_MAX         1024
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
 
 extern char ___assert_task_state[1 - 2*!!(
                sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -147,7 +147,7 @@ TRACE_EVENT(sched_switch,
                  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
                                { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
                                { 16, "Z" }, { 32, "X" }, { 64, "x" },
-                               { 128, "W" }) : "R",
+                               { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R",
                __entry->prev_state & TASK_STATE_MAX ? "+" : "",
                __entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *t
 
 static void __kthread_parkme(struct kthread *self)
 {
-       __set_current_state(TASK_INTERRUPTIBLE);
+       __set_current_state(TASK_PARKED);
        while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
                if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
                        complete(&self->parked);
                schedule();
-               __set_current_state(TASK_INTERRUPTIBLE);
+               __set_current_state(TASK_PARKED);
        }
        clear_bit(KTHREAD_IS_PARKED, &self->flags);
        __set_current_state(TASK_RUNNING);
@@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_no
 }
 EXPORT_SYMBOL(kthread_create_on_node);
 
-static void __kthread_bind(struct task_struct *p, unsigned int cpu)
+static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
 {
+       /* Must have done schedule() in kthread() before we set_task_cpu */
+       if (!wait_task_inactive(p, state)) {
+               WARN_ON(1);
+               return;
+       }
        /* It's safe because the task is inactive. */
        do_set_cpus_allowed(p, cpumask_of(cpu));
        p->flags |= PF_THREAD_BOUND;
@@ -274,12 +279,7 @@ static void __kthread_bind(struct task_s
  */
 void kthread_bind(struct task_struct *p, unsigned int cpu)
 {
-       /* Must have done schedule() in kthread() before we set_task_cpu */
-       if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
-               WARN_ON(1);
-               return;
-       }
-       __kthread_bind(p, cpu);
+       __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(kthread_bind);
 
@@ -324,6 +324,22 @@ static struct kthread *task_get_live_kth
        return NULL;
 }
 
+static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
+{
+       clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+       /*
+        * We clear the IS_PARKED bit here as we don't wait
+        * until the task has left the park code. So if we'd
+        * park before that happens we'd see the IS_PARKED bit
+        * which might be about to be cleared.
+        */
+       if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
+               if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
+                       __kthread_bind(k, kthread->cpu, TASK_PARKED);
+               wake_up_state(k, TASK_PARKED);
+       }
+}
+
 /**
  * kthread_unpark - unpark a thread created by kthread_create().
  * @k:         thread created by kthread_create().
@@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *
 {
        struct kthread *kthread = task_get_live_kthread(k);
 
-       if (kthread) {
-               clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
-               /*
-                * We clear the IS_PARKED bit here as we don't wait
-                * until the task has left the park code. So if we'd
-                * park before that happens we'd see the IS_PARKED bit
-                * which might be about to be cleared.
-                */
-               if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
-                       if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
-                               __kthread_bind(k, kthread->cpu);
-                       wake_up_process(k);
-               }
-       }
+       if (kthread)
+               __kthread_unpark(k, kthread);
        put_task_struct(k);
 }
 
@@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k)
        trace_sched_kthread_stop(k);
        if (kthread) {
                set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
-               clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+               __kthread_unpark(k, kthread);
                wake_up_process(k);
                wait_for_completion(&kthread->exited);
        }


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to