On Tue, Feb 17, 2015 at 02:33:58PM +0300, Kirill Tkhai wrote:

> So, we move task_rq_lock() to sched.h, and dl_task_timer() uses it?

Yep, like this. I've also modified your earlier patch to dl_task_time()
back to its original form.

---
Subject: sched: Make dl_task_time() use task_rq_lock()
From: Peter Zijlstra <pet...@infradead.org>
Date: Tue Feb 17 13:22:25 CET 2015

Kirill reported that a dl task can be throttled and dequeued at the
same time. This happens, when it becomes throttled in schedule(),
which is called to go to sleep:

current->state = TASK_INTERRUPTIBLE;
schedule()
    deactivate_task()
        dequeue_task_dl()
            update_curr_dl()
                start_dl_timer()
            __dequeue_task_dl()
    prev->on_rq = 0;

This invalidates the assumption from commit 0f397f2c90ce ("sched/dl:
Fix race in dl_task_timer()"):

  "The only reason we don't strictly need ->pi_lock now is because
   we're guaranteed to have p->state == TASK_RUNNING here and are
   thus free of ttwu races".

And therefore we have to use the full task_rq_lock() here.

This further amends the fact that we forgot to update the rq lock loop
for TASK_ON_RQ_MIGRATE, from commit cca26e8009d1 ("sched: Teach
scheduler to understand TASK_ON_RQ_MIGRATING state").

Reported-by: Kirill Tkhai <ktk...@parallels.com>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 kernel/sched/core.c     |   76 ------------------------------------------------
 kernel/sched/deadline.c |   12 +------
 kernel/sched/sched.h    |   76 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 85 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -307,82 +307,6 @@ __read_mostly int scheduler_running;
 int sysctl_sched_rt_runtime = 950000;
 
 /*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
-       __acquires(rq->lock)
-{
-       struct rq *rq;
-
-       lockdep_assert_held(&p->pi_lock);
-
-       for (;;) {
-               rq = task_rq(p);
-               raw_spin_lock(&rq->lock);
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-                       return rq;
-               raw_spin_unlock(&rq->lock);
-
-               while (unlikely(task_on_rq_migrating(p)))
-                       cpu_relax();
-       }
-}
-
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
-       __acquires(p->pi_lock)
-       __acquires(rq->lock)
-{
-       struct rq *rq;
-
-       for (;;) {
-               raw_spin_lock_irqsave(&p->pi_lock, *flags);
-               rq = task_rq(p);
-               raw_spin_lock(&rq->lock);
-               /*
-                *      move_queued_task()              task_rq_lock()
-                *
-                *      ACQUIRE (rq->lock)
-                *      [S] ->on_rq = MIGRATING         [L] rq = task_rq()
-                *      WMB (__set_task_cpu())          ACQUIRE (rq->lock);
-                *      [S] ->cpu = new_cpu             [L] task_rq()
-                *                                      [L] ->on_rq
-                *      RELEASE (rq->lock)
-                *
-                * If we observe the old cpu in task_rq_lock, the acquire of
-                * the old rq->lock will fully serialize against the stores.
-                *
-                * If we observe the new cpu in task_rq_lock, the acquire will
-                * pair with the WMB to ensure we must then also see migrating.
-                */
-               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-                       return rq;
-               raw_spin_unlock(&rq->lock);
-               raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-
-               while (unlikely(task_on_rq_migrating(p)))
-                       cpu_relax();
-       }
-}
-
-static void __task_rq_unlock(struct rq *rq)
-       __releases(rq->lock)
-{
-       raw_spin_unlock(&rq->lock);
-}
-
-static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
-       __releases(rq->lock)
-       __releases(p->pi_lock)
-{
-       raw_spin_unlock(&rq->lock);
-       raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-}
-
-/*
  * this_rq_lock - lock this runqueue and disable interrupts.
  */
 static struct rq *this_rq_lock(void)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_time
                                                     struct sched_dl_entity,
                                                     dl_timer);
        struct task_struct *p = dl_task_of(dl_se);
+       unsigned long flags;
        struct rq *rq;
-again:
-       rq = task_rq(p);
-       raw_spin_lock(&rq->lock);
 
-       if (rq != task_rq(p)) {
-               /* Task was moved, retrying. */
-               raw_spin_unlock(&rq->lock);
-               goto again;
-       }
+       rq = task_rq_lock(current, &flags);
 
        /*
         * We need to take care of several possible races here:
@@ -555,7 +549,7 @@ static enum hrtimer_restart dl_task_time
                push_dl_task(rq);
 #endif
 unlock:
-       raw_spin_unlock(&rq->lock);
+       task_rq_unlock(rq, current, &flags);
 
        return HRTIMER_NORESTART;
 }
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1484,6 +1484,82 @@ static inline void double_raw_lock(raw_s
 }
 
 /*
+ * __task_rq_lock - lock the rq @p resides on.
+ */
+static inline struct rq *__task_rq_lock(struct task_struct *p)
+       __acquires(rq->lock)
+{
+       struct rq *rq;
+
+       lockdep_assert_held(&p->pi_lock);
+
+       for (;;) {
+               rq = task_rq(p);
+               raw_spin_lock(&rq->lock);
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+                       return rq;
+               raw_spin_unlock(&rq->lock);
+
+               while (unlikely(task_on_rq_migrating(p)))
+                       cpu_relax();
+       }
+}
+
+/*
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
+ */
+static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long 
*flags)
+       __acquires(p->pi_lock)
+       __acquires(rq->lock)
+{
+       struct rq *rq;
+
+       for (;;) {
+               raw_spin_lock_irqsave(&p->pi_lock, *flags);
+               rq = task_rq(p);
+               raw_spin_lock(&rq->lock);
+               /*
+                *      move_queued_task()              task_rq_lock()
+                *
+                *      ACQUIRE (rq->lock)
+                *      [S] ->on_rq = MIGRATING         [L] rq = task_rq()
+                *      WMB (__set_task_cpu())          ACQUIRE (rq->lock);
+                *      [S] ->cpu = new_cpu             [L] task_rq()
+                *                                      [L] ->on_rq
+                *      RELEASE (rq->lock)
+                *
+                * If we observe the old cpu in task_rq_lock, the acquire of
+                * the old rq->lock will fully serialize against the stores.
+                *
+                * If we observe the new cpu in task_rq_lock, the acquire will
+                * pair with the WMB to ensure we must then also see migrating.
+                */
+               if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+                       return rq;
+               raw_spin_unlock(&rq->lock);
+               raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+
+               while (unlikely(task_on_rq_migrating(p)))
+                       cpu_relax();
+       }
+}
+
+static inline void __task_rq_unlock(struct rq *rq)
+       __releases(rq->lock)
+{
+       raw_spin_unlock(&rq->lock);
+}
+
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+       __releases(rq->lock)
+       __releases(p->pi_lock)
+{
+       raw_spin_unlock(&rq->lock);
+       raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+}
+
+/*
  * double_rq_lock - safely lock two runqueues
  *
  * Note this does not disable interrupts like task_rq_lock,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to