On x86, sleeping while on an IST or irq stack has a surprisingly
good chance of working, but it can also fail dramatically.  Add an
arch hook to allow schedule and __might_sleep to catch sleeping on
the wrong stack.

This will also catch do_exit from a funny stack, which could leave
an IST stack shifted or an NMI nesting count incremented.

Signed-off-by: Andy Lutomirski <l...@amacapital.net>
---
 arch/x86/Kconfig                   |  1 +
 arch/x86/include/asm/thread_info.h | 17 +++++++++++++++++
 arch/x86/kernel/irq_32.c           | 13 +++----------
 include/linux/thread_info.h        |  7 +++++++
 kernel/Kconfig.locks               |  3 +++
 kernel/sched/core.c                | 14 ++++++++++----
 6 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ded8a6774ac9..a811286636d2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -137,6 +137,7 @@ config X86
        select HAVE_ACPI_APEI_NMI if ACPI
        select ACPI_LEGACY_TABLES_LOOKUP if ACPI
        select X86_FEATURE_NAMES if PROC_FS
+       select HAVE_ARCH_SCHEDULE_ALLOWED
 
 config INSTRUCTION_DECODER
        def_bool y
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 547e344a6dc6..05701f132473 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -170,6 +170,23 @@ static inline struct thread_info *current_thread_info(void)
        return ti;
 }
 
+static inline unsigned long current_stack_pointer(void)
+{
+       unsigned long sp;
+#ifdef CONFIG_X86_64
+       asm("mov %%rsp,%0" : "=g" (sp));
+#else
+       asm("mov %%esp,%0" : "=g" (sp));
+#endif
+       return sp;
+}
+
+static inline bool arch_schedule_allowed(void)
+{
+       return ((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
+               & ~(THREAD_SIZE - 1)) == 0;
+}
+
 #else /* !__ASSEMBLY__ */
 
 /* how to get the thread information struct from ASM */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 63ce838e5a54..28d28f5eb8f4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -69,16 +69,9 @@ static void call_on_stack(void *func, void *stack)
                     : "memory", "cc", "edx", "ecx", "eax");
 }
 
-/* how to get the current stack pointer from C */
-#define current_stack_pointer ({               \
-       unsigned long sp;                       \
-       asm("mov %%esp,%0" : "=g" (sp));        \
-       sp;                                     \
-})
-
 static inline void *current_stack(void)
 {
-       return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
+       return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
 }
 
 static inline int
@@ -103,7 +96,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, 
int irq)
 
        /* Save the next esp at the bottom of the stack */
        prev_esp = (u32 *)irqstk;
-       *prev_esp = current_stack_pointer;
+       *prev_esp = current_stack_pointer();
 
        if (unlikely(overflow))
                call_on_stack(print_stack_overflow, isp);
@@ -156,7 +149,7 @@ void do_softirq_own_stack(void)
 
        /* Push the previous esp onto the stack */
        prev_esp = (u32 *)irqstk;
-       *prev_esp = current_stack_pointer;
+       *prev_esp = current_stack_pointer();
 
        call_on_stack(__do_softirq, isp);
 }
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ff307b548ed3..6deaf7e97009 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -145,6 +145,13 @@ static inline bool test_and_clear_restore_sigmask(void)
 #error "no set_restore_sigmask() provided and default one won't work"
 #endif
 
+#ifndef CONFIG_HAVE_ARCH_SCHEDULE_ALLOWED
+static inline bool arch_schedule_allowed(void)
+{
+       return true;
+}
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_THREAD_INFO_H */
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 76768ee812b2..2714dc34695a 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -237,3 +237,6 @@ config ARCH_USE_QUEUE_RWLOCK
 config QUEUE_RWLOCK
        def_bool y if ARCH_USE_QUEUE_RWLOCK
        depends on SMP
+
+config HAVE_ARCH_SCHEDULE_ALLOWED
+       bool
\ No newline at end of file
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 240157c13ddc..e51ab65a9750 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2705,8 +2705,12 @@ static inline void schedule_debug(struct task_struct 
*prev)
         * Test if we are atomic. Since do_exit() needs to call into
         * schedule() atomically, we ignore that path. Otherwise whine
         * if we are scheduling when we should not.
+        *
+        * If architectural conditions for scheduling are not met,
+        * complain even if we are in do_exit.
         */
-       if (unlikely(in_atomic_preempt_off() && prev->state != TASK_DEAD))
+       if (unlikely((in_atomic_preempt_off() && prev->state != TASK_DEAD) ||
+                    !arch_schedule_allowed()))
                __schedule_bug(prev);
        rcu_sleep_check();
 
@@ -7200,10 +7204,12 @@ static inline int preempt_count_equals(int 
preempt_offset)
 void __might_sleep(const char *file, int line, int preempt_offset)
 {
        static unsigned long prev_jiffy;        /* ratelimiting */
+       bool arch_ok;
 
        rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
+       arch_ok = arch_schedule_allowed();
        if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-            !is_idle_task(current)) ||
+            !is_idle_task(current) && arch_ok) ||
            system_state != SYSTEM_RUNNING || oops_in_progress)
                return;
        if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
@@ -7214,8 +7220,8 @@ void __might_sleep(const char *file, int line, int 
preempt_offset)
                "BUG: sleeping function called from invalid context at %s:%d\n",
                        file, line);
        printk(KERN_ERR
-               "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-                       in_atomic(), irqs_disabled(),
+               "in_atomic(): %d, irqs_disabled(): %d, arch_schedule_allowed: 
%d, pid: %d, name: %s\n",
+                       in_atomic(), irqs_disabled(), (int)arch_ok,
                        current->pid, current->comm);
 
        debug_show_held_locks(current);
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to