Dear RT folks!

I'm pleased to announce the v4.11.7-rt3 patch set. 

Changes since v4.11.7-rt2:

  - Clearing a swap-slot took a sleeping lock in a preempt-disable
    region. Fixed by dropping the preempt-disable region.

  - The capability check code on arm64 took a mutex in a atomic section.
    The backport of a few patches from upstream made this visible and
    now the fix for this has been also backported.

  - The removal of TASK_ALL in the last release uncovered a bug where we
    mixed normal wake ups and wake ups made for waiters of sleeping
    spinlock. Reported by Mike Galbraith.

  - Lock stealing for RTMutex wasn't working in v4.11. Reported and
    fixed by Mike Galbraith.

  - Code now compiles for RT + !CONFIG_POSIX_TIMERS. Reported by kbuild
    test robot.

Known issues
        - CPU hotplug got a little better but can deadlock.

The delta patch against v4.11.7-rt2 is appended below and can be found here:
 
     
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/incr/patch-4.11.7-rt2-rt3.patch.xz

You can get this release via the git tree at:

    git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git 
v4.11.7-rt3

The RT patch against v4.11.7 can be found here:

    
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patch-4.11.7-rt3.patch.xz

The split quilt queue is available at:

    
https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.11/older/patches-4.11.7-rt3.tar.xz

Sebastian
diff --git a/arch/arm64/include/asm/cpufeature.h 
b/arch/arm64/include/asm/cpufeature.h
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -115,6 +115,7 @@ struct arm64_cpu_capabilities {
 
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
+extern struct static_key_false arm64_const_caps_ready;
 
 bool this_cpu_has_cap(unsigned int cap);
 
@@ -124,7 +125,7 @@ static inline bool cpu_have_feature(unsigned int num)
 }
 
 /* System capability check for constant caps */
-static inline bool cpus_have_const_cap(int num)
+static inline bool __cpus_have_const_cap(int num)
 {
        if (num >= ARM64_NCAPS)
                return false;
@@ -138,6 +139,14 @@ static inline bool cpus_have_cap(unsigned int num)
        return test_bit(num, cpu_hwcaps);
 }
 
+static inline bool cpus_have_const_cap(int num)
+{
+       if (static_branch_likely(&arm64_const_caps_ready))
+               return __cpus_have_const_cap(num);
+       else
+               return cpus_have_cap(num);
+}
+
 static inline void cpus_set_cap(unsigned int num)
 {
        if (num >= ARM64_NCAPS) {
@@ -145,7 +154,6 @@ static inline void cpus_set_cap(unsigned int num)
                        num, ARM64_NCAPS);
        } else {
                __set_bit(num, cpu_hwcaps);
-               static_branch_enable(&cpu_hwcap_keys[num]);
        }
 }
 
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_types.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -356,9 +357,12 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
                                       unsigned long vector_ptr)
 {
        /*
-        * Call initialization code, and switch to the full blown
-        * HYP code.
+        * Call initialization code, and switch to the full blown HYP code.
+        * If the cpucaps haven't been finalized yet, something has gone very
+        * wrong, and hyp will crash and burn when it uses any
+        * cpus_have_const_cap() wrapper.
         */
+       BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
        __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
 }
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -975,8 +975,16 @@ void update_cpu_capabilities(const struct 
arm64_cpu_capabilities *caps,
  */
 void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
-       for (; caps->matches; caps++)
-               if (caps->enable && cpus_have_cap(caps->capability))
+       for (; caps->matches; caps++) {
+               unsigned int num = caps->capability;
+
+               if (!cpus_have_cap(num))
+                       continue;
+
+               /* Ensure cpus_have_const_cap(num) works */
+               static_branch_enable(&cpu_hwcap_keys[num]);
+
+               if (caps->enable) {
                        /*
                         * Use stop_machine() as it schedules the work allowing
                         * us to modify PSTATE, instead of on_each_cpu() which
@@ -984,6 +992,8 @@ void __init enable_cpu_capabilities(const struct 
arm64_cpu_capabilities *caps)
                         * we return.
                         */
                        stop_machine(caps->enable, NULL, cpu_online_mask);
+               }
+       }
 }
 
 /*
@@ -1086,6 +1096,14 @@ static void __init setup_feature_capabilities(void)
        enable_cpu_capabilities(arm64_features);
 }
 
+DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
+EXPORT_SYMBOL(arm64_const_caps_ready);
+
+static void __init mark_const_caps_ready(void)
+{
+       static_branch_enable(&arm64_const_caps_ready);
+}
+
 /*
  * Check if the current CPU has a given feature capability.
  * Should be called from non-preemptible context.
@@ -1112,6 +1130,7 @@ void __init setup_cpu_features(void)
        /* Set the CPU feature capabilies */
        setup_feature_capabilities();
        enable_errata_workarounds();
+       mark_const_caps_ready();
        setup_elf_hwcaps(arm64_elf_hwcaps);
 
        if (system_supports_32bit_el0())
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -167,7 +167,7 @@ extern struct cred init_cred;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
-#ifdef CONFIG_PREEMPT_RT_BASE
+#if defined(CONFIG_POSIX_TIMERS) && defined(CONFIG_PREEMPT_RT_BASE)
 # define INIT_TIMER_LIST               .posix_timer_list = NULL,
 #else
 # define INIT_TIMER_LIST
diff --git a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -797,6 +797,7 @@ struct task_struct {
        raw_spinlock_t                  pi_lock;
 
        struct wake_q_node              wake_q;
+       struct wake_q_node              wake_q_sleeper;
 
 #ifdef CONFIG_RT_MUTEXES
        /* PI waiters blocked on a rt_mutex held by this task: */
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -46,8 +46,20 @@ static inline void wake_q_init(struct wake_q_head *head)
        head->lastp = &head->first;
 }
 
-extern void wake_q_add(struct wake_q_head *head,
-                      struct task_struct *task);
+extern void __wake_q_add(struct wake_q_head *head,
+                        struct task_struct *task, bool sleeper);
+static inline void wake_q_add(struct wake_q_head *head,
+                             struct task_struct *task)
+{
+       __wake_q_add(head, task, false);
+}
+
+static inline void wake_q_add_sleeper(struct wake_q_head *head,
+                                     struct task_struct *task)
+{
+       __wake_q_add(head, task, true);
+}
+
 extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
 static inline void wake_up_q(struct wake_q_head *head)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -575,6 +575,7 @@ static struct task_struct *dup_task_struct(struct 
task_struct *orig, int node)
        tsk->splice_pipe = NULL;
        tsk->task_frag.page = NULL;
        tsk->wake_q.next = NULL;
+       tsk->wake_q_sleeper.next = NULL;
 
        account_kernel_stack(tsk, 1);
 
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -236,26 +236,19 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex 
*lock,
 }
 #endif
 
-#define STEAL_NORMAL  0
-#define STEAL_LATERAL 1
-
 /*
  * Only use with rt_mutex_waiter_{less,equal}()
  */
-#define task_to_waiter(p)      \
-       &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = 
(p)->dl.deadline }
+#define task_to_waiter(p) &(struct rt_mutex_waiter) \
+       { .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) }
 
 static inline int
 rt_mutex_waiter_less(struct rt_mutex_waiter *left,
-                    struct rt_mutex_waiter *right, int mode)
+                    struct rt_mutex_waiter *right)
 {
-       if (mode == STEAL_NORMAL) {
-               if (left->prio < right->prio)
-                       return 1;
-       } else {
-               if (left->prio <= right->prio)
-                       return 1;
-       }
+       if (left->prio < right->prio)
+               return 1;
+
        /*
         * If both waiters have dl_prio(), we check the deadlines of the
         * associated tasks.
@@ -287,6 +280,27 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
        return 1;
 }
 
+#define STEAL_NORMAL  0
+#define STEAL_LATERAL 1
+
+static inline int
+rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode)
+{
+       struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
+
+       if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter))
+               return 1;
+
+       /*
+        * Note that RT tasks are excluded from lateral-steals
+        * to prevent the introduction of an unbounded latency.
+        */
+       if (mode == STEAL_NORMAL || rt_task(waiter->task))
+               return 0;
+
+       return rt_mutex_waiter_equal(waiter, top_waiter);
+}
+
 static void
 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
 {
@@ -298,7 +312,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, struct 
rt_mutex_waiter *waiter)
        while (*link) {
                parent = *link;
                entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
-               if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
+               if (rt_mutex_waiter_less(waiter, entry)) {
                        link = &parent->rb_left;
                } else {
                        link = &parent->rb_right;
@@ -337,7 +351,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct 
rt_mutex_waiter *waiter)
        while (*link) {
                parent = *link;
                entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
-               if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) {
+               if (rt_mutex_waiter_less(waiter, entry)) {
                        link = &parent->rb_left;
                } else {
                        link = &parent->rb_right;
@@ -847,6 +861,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
  * @task:   The task which wants to acquire the lock
  * @waiter: The waiter that is queued to the lock's wait tree if the
  *         callsite called task_blocked_on_lock(), otherwise NULL
+ * @mode:   Lock steal mode (STEAL_NORMAL, STEAL_LATERAL)
  */
 static int __try_to_take_rt_mutex(struct rt_mutex *lock,
                                  struct task_struct *task,
@@ -886,14 +901,11 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock,
         */
        if (waiter) {
                /*
-                * If waiter is not the highest priority waiter of
-                * @lock, give up.
+                * If waiter is not the highest priority waiter of @lock,
+                * or its peer when lateral steal is allowed, give up.
                 */
-               if (waiter != rt_mutex_top_waiter(lock)) {
-                       /* XXX rt_mutex_waiter_less() ? */
+               if (!rt_mutex_steal(lock, waiter, mode))
                        return 0;
-               }
-
                /*
                 * We can acquire the lock. Remove the waiter from the
                 * lock waiters tree.
@@ -910,25 +922,12 @@ static int __try_to_take_rt_mutex(struct rt_mutex *lock,
                 * not need to be dequeued.
                 */
                if (rt_mutex_has_waiters(lock)) {
-                       struct task_struct *pown = 
rt_mutex_top_waiter(lock)->task;
-
-                       if (task != pown)
-                               return 0;
-
                        /*
-                        * Note that RT tasks are excluded from lateral-steals
-                        * to prevent the introduction of an unbounded latency.
+                        * If @task->prio is greater than the top waiter
+                        * priority (kernel view), or equal to it when a
+                        * lateral steal is forbidden, @task lost.
                         */
-                       if (rt_task(task))
-                               mode = STEAL_NORMAL;
-                       /*
-                        * If @task->prio is greater than or equal to
-                        * the top waiter priority (kernel view),
-                        * @task lost.
-                        */
-                       if (!rt_mutex_waiter_less(task_to_waiter(task),
-                                                 rt_mutex_top_waiter(lock),
-                                                 mode))
+                       if (!rt_mutex_steal(lock, task_to_waiter(task), mode))
                                return 0;
                        /*
                         * The current top waiter stays enqueued. We
@@ -1507,7 +1506,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head 
*wake_q,
         */
        preempt_disable();
        if (waiter->savestate)
-               wake_q_add(wake_sleeper_q, waiter->task);
+               wake_q_add_sleeper(wake_sleeper_q, waiter->task);
        else
                wake_q_add(wake_q, waiter->task);
        raw_spin_unlock(&current->pi_lock);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -437,9 +437,15 @@ static bool set_nr_if_polling(struct task_struct *p)
 #endif
 #endif
 
-void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+void __wake_q_add(struct wake_q_head *head, struct task_struct *task,
+                 bool sleeper)
 {
-       struct wake_q_node *node = &task->wake_q;
+       struct wake_q_node *node;
+
+       if (sleeper)
+               node = &task->wake_q_sleeper;
+       else
+               node = &task->wake_q;
 
        /*
         * Atomically grab the task, if ->wake_q is !nil already it means
@@ -468,12 +474,17 @@ void __wake_up_q(struct wake_q_head *head, bool sleeper)
        while (node != WAKE_Q_TAIL) {
                struct task_struct *task;
 
-               task = container_of(node, struct task_struct, wake_q);
+               if (sleeper)
+                       task = container_of(node, struct task_struct, 
wake_q_sleeper);
+               else
+                       task = container_of(node, struct task_struct, wake_q);
                BUG_ON(!task);
                /* Task can safely be re-inserted now: */
                node = node->next;
-               task->wake_q.next = NULL;
-
+               if (sleeper)
+                       task->wake_q_sleeper.next = NULL;
+               else
+                       task->wake_q.next = NULL;
                /*
                 * wake_up_process() implies a wmb() to pair with the queueing
                 * in wake_q_add() so as not to miss wakeups.
diff --git a/localversion-rt b/localversion-rt
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt2
+-rt3
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,11 +267,11 @@ int free_swap_slot(swp_entry_t entry)
 {
        struct swap_slots_cache *cache;
 
-       cache = &get_cpu_var(swp_slots);
+       cache = raw_cpu_ptr(&swp_slots);
        if (use_swap_slot_cache && cache->slots_ret) {
                spin_lock_irq(&cache->free_lock);
                /* Swap slots cache may be deactivated before acquiring lock */
-               if (!use_swap_slot_cache) {
+               if (!use_swap_slot_cache || !cache->slots_ret) {
                        spin_unlock_irq(&cache->free_lock);
                        goto direct_free;
                }
@@ -291,7 +291,6 @@ int free_swap_slot(swp_entry_t entry)
 direct_free:
                swapcache_free_entries(&entry, 1);
        }
-       put_cpu_var(swp_slots);
 
        return 0;
 }

Reply via email to