This patch extends the idea of NMI per-cpu buffers to regions that may cause recursive printk() calls and possible deadlocks. Namely, printk() can't handle printk calls from schedule code or printk() calls from lock debugging code (spin_dump() for instance); because those may be called with `sem->lock' already taken or any other `critical' locks (p->pi_lock, etc.). An example of deadlock can be
vprintk_emit() console_unlock() up() << raw_spin_lock_irqsave(&sem->lock, flags); wake_up_process() try_to_wake_up() ttwu_queue() ttwu_activate() activate_task() enqueue_task() enqueue_task_fair() cfs_rq_of() task_of() WARN_ON_ONCE(!entity_is_task(se)) vprintk_emit() console_trylock() down_trylock() raw_spin_lock_irqsave(&sem->lock, flags) ^^^^ deadlock and some other cases. Just like in NMI implementation, the solution uses a per-cpu `printk_func' pointer to 'redirect' printk() calls to a 'safe' callback, that store messages in a per-cpu buffer and flushes them back to logbuf buffer later. Usage example: printk() printk_safe_enter(flags) // // any printk() call from here will endup in vprintk_safe(), // that stores messages in a special per-CPU buffer. // printk_safe_exit(flags) The 'redirection' mechanism, though, has been reworked, as suggested by Petr Mladek. Instead of using a per-cpu @print_func callback we now keep a per-cpu printk-context variable and call either default or nmi vprintk function depending on its value. printk_nmi_entrer/exit and printk_safe_enter/exit, thus, just set/celar corresponding bits in printk-context functions. The patch only adds printk_safe support, we don't use it yet. Signed-off-by: Sergey Senozhatsky <sergey.senozhat...@gmail.com> --- include/linux/printk.h | 21 +++++-- kernel/printk/Makefile | 3 +- kernel/printk/internal.h | 58 ++++++++++-------- kernel/printk/printk.c | 3 - kernel/printk/printk_safe.c | 140 ++++++++++++++++++++++++++++++++++---------- 5 files changed, 160 insertions(+), 65 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index c9a3080..e8ede8f 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -132,17 +132,11 @@ void early_printk(const char *s, ...) { } #endif #ifdef CONFIG_PRINTK_NMI -extern void printk_safe_init(void); extern void printk_safe_nmi_enter(void); extern void printk_safe_nmi_exit(void); -extern void printk_safe_flush(void); -extern void printk_safe_flush_on_panic(void); #else -static inline void printk_safe_init(void) { } static inline void printk_safe_nmi_enter(void) { } static inline void printk_safe_nmi_exit(void) { } -static inline void printk_safe_flush(void) { } -static inline void printk_safe_flush_on_panic(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK @@ -194,6 +188,9 @@ void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); +extern void printk_safe_init(void); +extern void printk_safe_flush(void); +extern void printk_safe_flush_on_panic(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -253,6 +250,18 @@ static inline void dump_stack_print_info(const char *log_lvl) static inline void show_regs_print_info(const char *log_lvl) { } + +static inline void printk_safe_init(void) +{ +} + +static inline void printk_safe_flush(void) +{ +} + +static inline void printk_safe_flush_on_panic(void) +{ +} #endif extern asmlinkage void dump_stack(void) __cold; diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 6079281..87ad14d 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,3 +1,2 @@ -obj-y = printk.o -obj-$(CONFIG_PRINTK_NMI) += printk_safe.o +obj-y = printk_safe.o printk.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7fd2838..015f68f 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -16,26 +16,8 @@ */ #include <linux/percpu.h> -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); - -int __printf(1, 0) vprintk_default(const char *fmt, va_list args); - #ifdef CONFIG_PRINTK_NMI -extern raw_spinlock_t logbuf_lock; - -/* - * printk() could not take logbuf_lock in NMI context. Instead, - * it temporary stores the strings into a per-CPU buffer. - * The alternative implementation is chosen transparently - * via per-CPU variable. - */ -DECLARE_PER_CPU(printk_func_t, printk_func); -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return this_cpu_read(printk_func)(fmt, args); -} - extern atomic_t nmi_message_lost; static inline int get_nmi_message_lost(void) { @@ -44,14 +26,44 @@ static inline int get_nmi_message_lost(void) #else /* CONFIG_PRINTK_NMI */ -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return vprintk_default(fmt, args); -} - static inline int get_nmi_message_lost(void) { return 0; } #endif /* CONFIG_PRINTK_NMI */ + +#ifdef CONFIG_PRINTK + +#define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff +#define PRINTK_SAFE_NMI_CONTEXT_MASK 0x80000000 + +extern raw_spinlock_t logbuf_lock; + +__printf(1, 0) int vprintk_default(const char *fmt, va_list args); +__printf(1, 0) int vprintk_func(const char *fmt, va_list args); +void __printk_safe_enter(void); +void __printk_safe_exit(void); + +#define printk_safe_enter(flags) \ + do { \ + local_irq_save(flags); \ + __printk_safe_enter(); \ + } while (0) + +#define printk_safe_exit(flags) \ + do { \ + __printk_safe_exit(); \ + local_irq_restore(flags); \ + } while (0) + +#else + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } +void __printk_safe_enter(void) { } +void __printk_safe_exit(void) { } + +#define printk_safe_enter(flags) +#define printk_safe_exit(flags) + +#endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index be7f60b..58cfc5c 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2024,9 +2024,6 @@ static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, static size_t cont_print_text(char *text, size_t size) { return 0; } static bool suppress_message_printing(int level) { return false; } -/* Still needs to be defined for users */ -DEFINE_PER_CPU(printk_func_t, printk_func); - #endif /* CONFIG_PRINTK */ #ifdef CONFIG_EARLY_PRINTK diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 1f66163..a2b0b99 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -1,5 +1,5 @@ /* - * printk_safe.c - Safe printk in NMI context + * printk_safe.c - Safe printk for printk-deadlock-prone contexts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -32,15 +32,14 @@ * is later flushed into the main ring buffer via IRQ work. * * The alternative implementation is chosen transparently - * via @printk_func per-CPU variable. + * by examinig current printk() context mask stored in @printk_safe_context + * per-CPU variable. * * The implementation allows to flush the strings also from another CPU. * There are situations when we want to make sure that all buffers * were handled or when IRQs are blocked. */ -DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; static int printk_safe_irq_ready; -atomic_t nmi_message_lost; #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ sizeof(atomic_t) - sizeof(struct irq_work)) @@ -50,27 +49,26 @@ struct printk_safe_seq_buf { struct irq_work work; /* IRQ work that flushes the buffer */ unsigned char buffer[SAFE_LOG_BUF_LEN]; }; + +static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); +static DEFINE_PER_CPU(int, printk_safe_context); + +#ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); +atomic_t nmi_message_lost; +#endif -/* - * Safe printk() for NMI context. It uses a per-CPU buffer to - * store the message. NMIs are not nested, so there is always only - * one writer running. But the buffer might get flushed from another - * CPU, so we need to be careful. - */ -static int vprintk_safe_nmi(const char *fmt, va_list args) +static int printk_safe_log_store(struct printk_safe_seq_buf *s, + const char *fmt, va_list args) { - struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - int add = 0; + int add; size_t len; again: len = atomic_read(&s->len); - if (len >= sizeof(s->buffer)) { - atomic_inc(&nmi_message_lost); - return 0; - } + if (len >= sizeof(s->buffer)) + return -E2BIG; /* * Make sure that all old data have been read before the buffer was @@ -110,7 +108,6 @@ static void printk_safe_flush_line(const char *text, int len) printk_deferred("%.*s", len, text); else printk("%.*s", len, text); - } /* @@ -126,7 +123,7 @@ static void printk_safe_flush_seq_line(struct printk_safe_seq_buf *s, } /* - * Flush data from the associated per_CPU buffer. The function + * Flush data from the associated per-CPU buffer. The function * can be called either via IRQ work or independently. */ static void __printk_safe_flush(struct irq_work *work) @@ -209,8 +206,12 @@ void printk_safe_flush(void) { int cpu; - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { +#ifdef CONFIG_PRINTK_NMI __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); +#endif + __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work); + } } /** @@ -240,14 +241,101 @@ void printk_safe_flush_on_panic(void) printk_safe_flush(); } +#ifdef CONFIG_PRINTK_NMI +/* + * Safe printk() for NMI context. It uses a per-CPU buffer to + * store the message. NMIs are not nested, so there is always only + * one writer running. But the buffer might get flushed from another + * CPU, so we need to be careful. + */ +static int vprintk_safe_nmi(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + int add; + + add = printk_safe_log_store(s, fmt, args); + if (add == -E2BIG) { + atomic_inc(&nmi_message_lost); + add = 0; + } + + return add; +} + +void printk_safe_nmi_enter(void) +{ + this_cpu_or(printk_safe_context, PRINTK_SAFE_NMI_CONTEXT_MASK); +} + +void printk_safe_nmi_exit(void) +{ + this_cpu_and(printk_safe_context, ~PRINTK_SAFE_NMI_CONTEXT_MASK); +} + +#else + +static int vprintk_safe_nmi(const char *fmt, va_list args) +{ + return 0; +} + +#endif /* CONFIG_PRINTK_NMI */ + +/* + * Lockless printk(), to avoid deadlocks should the printk() recurse + * into itself. It uses a per-CPU buffer to store the message, just like + * NMI. + */ +static int vprintk_safe(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq); + + return printk_safe_log_store(s, fmt, args); +} + +/* + * Returns with local IRQs disabled. + * Can be preempted by NMI. + */ +void __printk_safe_enter(void) +{ + this_cpu_inc(printk_safe_context); +} + +/* + * Restores local IRQs state saved in printk_safe_enter(). + * Can be preempted by NMI. + */ +void __printk_safe_exit(void) +{ + this_cpu_dec(printk_safe_context); +} + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) +{ + if (this_cpu_read(printk_safe_context) & PRINTK_SAFE_NMI_CONTEXT_MASK) + return vprintk_safe_nmi(fmt, args); + + if (this_cpu_read(printk_safe_context) & PRINTK_SAFE_CONTEXT_MASK) + return vprintk_safe(fmt, args); + + return vprintk_default(fmt, args); +} + void __init printk_safe_init(void) { int cpu; for_each_possible_cpu(cpu) { - struct printk_safe_seq_buf *s = &per_cpu(nmi_print_seq, cpu); + struct printk_safe_seq_buf *s; + s = &per_cpu(safe_print_seq, cpu); init_irq_work(&s->work, __printk_safe_flush); + +#ifdef CONFIG_PRINTK_NMI + s = &per_cpu(nmi_print_seq, cpu); + init_irq_work(&s->work, __printk_safe_flush); +#endif } /* Make sure that IRQ works are initialized before enabling. */ @@ -257,13 +345,3 @@ void __init printk_safe_init(void) /* Flush pending messages that did not have scheduled IRQ works. */ printk_safe_flush(); } - -void printk_safe_nmi_enter(void) -{ - this_cpu_write(printk_func, vprintk_safe_nmi); -} - -void printk_safe_nmi_exit(void) -{ - this_cpu_write(printk_func, vprintk_default); -} -- 2.10.1.382.ga23ca1b