On Wed 2017-03-29 18:25:05, Sergey Senozhatsky wrote: > This patch introduces a dedicated printing kernel thread - printk_kthread. > The main purpose of this kthread is to offload printing to a non-atomic > and always scheduleable context, which eliminates 4) and makes 1)-3) less > critical. printk() now just appends log messages to the kernel log buffer > and wake_up()s printk_kthread instead of locking console_sem and calling > into potentially unsafe console_unlock(). > > diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c > index 2d07678e9ff9..ab6b3b2a68c6 100644 > --- a/kernel/printk/printk.c > +++ b/kernel/printk/printk.c > @@ -445,6 +447,42 @@ static char __log_buf[__LOG_BUF_LEN] > __aligned(LOG_ALIGN); > static char *log_buf = __log_buf; > static u32 log_buf_len = __LOG_BUF_LEN; > > +static struct task_struct *printk_kthread __read_mostly; > +/* > + * We can't call into the scheduler (wake_up() printk kthread) during > + * suspend/kexec/etc. This temporarily switches printk to old behaviour. > + */ > +static atomic_t printk_emergency __read_mostly; > +/* > + * Disable printk_kthread permanently. Unlike `oops_in_progress' > + * it doesn't go back to 0. > + */
The comment is not valid once we allow to modify the variable using the sysfs knob. > @@ -1765,17 +1803,40 @@ asmlinkage int vprintk_emit(int facility, int level, > > printed_len += log_output(facility, level, lflags, dict, dictlen, text, > text_len); > > + /* > + * Emergency level indicates that the system is unstable and, thus, > + * we better stop relying on wake_up(printk_kthread) and try to do > + * a direct printing. > + */ > + if (level == LOGLEVEL_EMERG) > + printk_kthread_disabled = true; > + > + set_bit(PRINTK_PENDING_OUTPUT, &printk_pending); > logbuf_unlock_irqrestore(flags); > > /* If called from the scheduler, we can not call up(). */ > if (!in_sched) { > /* > - * Try to acquire and then immediately release the console > - * semaphore. The release will print out buffers and wake up > - * /dev/kmsg and syslog() users. > + * Under heavy printing load/slow serial console/etc > + * console_unlock() can stall CPUs, which can result in > + * soft/hard-lockups, lost interrupts, RCU stalls, etc. > + * Therefore we attempt to print the messages to console > + * from a dedicated printk_kthread, which always runs in > + * schedulable context. > */ > - if (console_trylock()) > - console_unlock(); > + if (printk_kthread_enabled()) { > + printk_safe_enter_irqsave(flags); > + wake_up_process(printk_kthread); > + printk_safe_exit_irqrestore(flags); I am really happy that we have the printk_safe stuff available! > + } else { > + /* > + * Try to acquire and then immediately release the > + * console semaphore. The release will print out > + * buffers and wake up /dev/kmsg and syslog() users. > + */ > + if (console_trylock()) > + console_unlock(); > + } > } > > return printed_len; > @@ -1882,6 +1943,9 @@ static size_t msg_print_text(const struct printk_log > *msg, > bool syslog, char *buf, size_t size) { return 0; } > static bool suppress_message_printing(int level) { return false; } > > +void printk_emergency_begin(void) {} > +void printk_emergency_end(void) {} > + > #endif /* CONFIG_PRINTK */ > > #ifdef CONFIG_EARLY_PRINTK > @@ -2164,6 +2228,13 @@ void console_unlock(void) > bool do_cond_resched, retry; > > if (console_suspended) { > + /* > + * Avoid an infinite loop in printk_kthread function > + * when console_unlock() cannot flush messages because > + * we suspended consoles. Someone else will print the > + * messages from resume_console(). > + */ > + clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending); Great catch! > up_console_sem(); > return; > } > @@ -2182,6 +2253,7 @@ void console_unlock(void) > console_may_schedule = 0; > > again: > + clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending); This will not help if new messages appear during call_console_drivers(). I would move this line after the for(;;) cycle. It will be cleared when all messages are really handled. Otherwise, it looks fine to me. Best Regards, Petr