We should deboost before waking the high-prio task such that
we don't run two tasks with the 'same' priority.

The patch fixed the logic, and introduced rt_mutex_postunlock()
to do some code refactor.

Suggested-by: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Xunlei Pang <xlp...@redhat.com>
---
 kernel/futex.c                  |  5 ++---
 kernel/locking/rtmutex.c        | 28 ++++++++++++++++++++++++----
 kernel/locking/rtmutex_common.h |  1 +
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 4e1a53e..4ae3523 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1524,9 +1524,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, 
struct futex_q *this,
         * scheduled away before the wake up can take place.
         */
        spin_unlock(&hb->lock);
-       wake_up_q(&wake_q);
-       if (deboost)
-               rt_mutex_adjust_prio(current);
+
+       rt_mutex_postunlock(&wake_q, deboost);
 
        return 0;
 }
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3e74660..42bc59b 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1390,12 +1390,32 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
        } else {
                bool deboost = slowfn(lock, &wake_q);
 
-               wake_up_q(&wake_q);
+               rt_mutex_postunlock(&wake_q, deboost);
+       }
+}
+
 
-               /* Undo pi boosting if necessary: */
-               if (deboost)
-                       rt_mutex_adjust_prio(current);
+/*
+ * Undo pi boosting (if necessary) and wake top waiter.
+ */
+void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost)
+{
+       /*
+        * We should deboost before waking the high-prio task such that
+        * we don't run two tasks with the 'same' priority. This however
+        * can lead to prio-inversion if we would get preempted after
+        * the deboost but before waking our high-prio task, hence the
+        * preempt_disable.
+        */
+       if (deboost) {
+               preempt_disable();
+               rt_mutex_adjust_prio(current);
        }
+
+       wake_up_q(wake_q);
+
+       if (deboost)
+               preempt_enable();
 }
 
 /**
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 4f5f83c..93b0924 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -111,6 +111,7 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
 extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct 
hrtimer_sleeper *to);
 extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
                                  struct wake_q_head *wqh);
+extern void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost);
 extern void rt_mutex_adjust_prio(struct task_struct *task);
 
 #ifdef CONFIG_DEBUG_RT_MUTEXES
-- 
1.8.3.1

Reply via email to