On Wed, Mar 16, 2016 at 02:39:44PM +0900, Byungchul Park wrote:
> On Tue, Mar 15, 2016 at 11:07:38PM +0900, Sergey Senozhatsky wrote:
> > 
> > something like this (*minimally tested so far*).
> > 
> > -- move wake_up() and friends under the logbuf section; so we can detect
>                                           ^^^^^^^^^^^^^^
>                                         section protected by logbuf_lock?

I am afraid that I mis-interpreted the meaning of *under*. :-( You may mean
*below*, but I interpreted it as *within*. I am sorry for that caused by my
poor english. I absolutely agree with your mention, if it mean *below*.

> 
> >    printk() recursion from wake_up()
> 
> Excuse me, but I fear that it can cause an unnecessary deadlock.
> Furthermore it cannot be handled if it is caused by logbuf_lock. IMHO,
> nothing including synced printk can help us in this case. Is there
> something I missed? I'am sorry if I did, but could you let me know if so?
> 
> I mean that it would be better to keep the wake_up and friend out of the
> critical section by logbuf_lock.
> 
> Thanks,
> Byungchul
> 
> > 
> > -- in recursion_bug branch switch to sync printk. we don't know why did we
> >    recurse, may be because of wake_up()->spin_lock(). doing
> >    kthread_stop()->wake_up_process() can be unsafe, I guess, just set
> >    `printk_sync' to true.
> > 
> > 
> > Signed-off-by: Jan Kara <j...@suse.cz>
> > Signed-off-by: Sergey Senozhatsky <sergey.senozhat...@gmail.com>
> > ---
> >  Documentation/kernel-parameters.txt |  10 ++
> >  kernel/printk/printk.c              | 202 
> > +++++++++++++++++++++++++-----------
> >  2 files changed, 154 insertions(+), 58 deletions(-)
> > 
> > diff --git a/Documentation/kernel-parameters.txt 
> > b/Documentation/kernel-parameters.txt
> > index 1e58ae9..454999e 100644
> > --- a/Documentation/kernel-parameters.txt
> > +++ b/Documentation/kernel-parameters.txt
> > @@ -3114,6 +3114,16 @@ bytes respectively. Such letter suffixes can also be 
> > entirely omitted.
> >     printk.time=    Show timing data prefixed to each printk message line
> >                     Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
> >  
> > +   printk.synchronous=
> > +                   By default kernel messages are printed to console
> > +                   asynchronously (except during early boot or when oops
> > +                   is happening). That avoids kernel stalling behind slow
> > +                   serial console and thus avoids softlockups, interrupt
> > +                   timeouts, or userspace timing out during heavy printing.
> > +                   However for debugging problems, printing messages to
> > +                   console immediately may be desirable. This option
> > +                   enables such behavior.
> > +
> >     processor.max_cstate=   [HW,ACPI]
> >                     Limit processor to maximum C-state
> >                     max_cstate=9 overrides any DMI blacklist limit.
> > diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> > index d5fd844..38baed1 100644
> > --- a/kernel/printk/printk.c
> > +++ b/kernel/printk/printk.c
> > @@ -46,6 +46,7 @@
> >  #include <linux/utsname.h>
> >  #include <linux/ctype.h>
> >  #include <linux/uio.h>
> > +#include <linux/kthread.h>
> >  
> >  #include <asm/uaccess.h>
> >  #include <asm-generic/sections.h>
> > @@ -284,6 +285,105 @@ static char __log_buf[__LOG_BUF_LEN] 
> > __aligned(LOG_ALIGN);
> >  static char *log_buf = __log_buf;
> >  static u32 log_buf_len = __LOG_BUF_LEN;
> >  
> > +/*
> > + * When true, printing to console will happen synchronously unless someone 
> > else
> > + * is already printing messages.
> > + *
> > + * The default value on UP systems is 'true'.
> > + */
> > +static bool __read_mostly printk_sync = !IS_ENABLED(CONFIG_SMP);
> > +module_param_named(synchronous, printk_sync, bool, S_IRUGO | S_IWUSR);
> > +MODULE_PARM_DESC(synchronous, "make printing to console synchronous");
> > +
> > +/* Printing kthread for async vprintk_emit() */
> > +static struct task_struct *printk_thread;
> > +/* Wait for printing wakeups from async vprintk_emit() */
> > +static DECLARE_WAIT_QUEUE_HEAD(printing_wait);
> > +
> > +static int printing_func(void *data)
> > +{
> > +   while (1) {
> > +           DECLARE_WAITQUEUE(wait, current);
> > +           set_current_state(TASK_INTERRUPTIBLE);
> > +           add_wait_queue(&printing_wait, &wait);
> > +
> > +           schedule();
> > +           remove_wait_queue(&printing_wait, &wait);
> > +
> > +           console_lock();
> > +           console_unlock();
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static int __init init_printk_thread(void)
> > +{
> > +   if (printk_sync)
> > +           return 0;
> > +
> > +   printk_thread = kthread_run(printing_func, NULL, "printk");
> > +   BUG_ON(IS_ERR(printk_thread));
> > +   return 0;
> > +}
> > +late_initcall(init_printk_thread);
> > +
> > +/*
> > + * Delayed printk version, for scheduler-internal messages:
> > + */
> > +#define PRINTK_PENDING_WAKEUP      (1<<0)
> > +#define PRINTK_PENDING_OUTPUT      (1<<1)
> > +
> > +static DEFINE_PER_CPU(int, printk_pending);
> > +
> > +static void wake_up_klogd_work_func(struct irq_work *irq_work)
> > +{
> > +   int pending = __this_cpu_xchg(printk_pending, 0);
> > +
> > +   if (pending & PRINTK_PENDING_OUTPUT) {
> > +           if (!printk_sync && printk_thread) {
> > +                   wake_up(&printing_wait);
> > +           } else {
> > +                   /*
> > +                    * If trylock fails, someone else is doing
> > +                    * the printing
> > +                    */
> > +                   if (console_trylock())
> > +                           console_unlock();
> > +           }
> > +   }
> > +
> > +   if (pending & PRINTK_PENDING_WAKEUP)
> > +           wake_up_interruptible(&log_wait);
> > +}
> > +
> > +static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
> > +   .func = wake_up_klogd_work_func,
> > +   .flags = IRQ_WORK_LAZY,
> > +};
> > +
> > +void wake_up_klogd(void)
> > +{
> > +   preempt_disable();
> > +   if (waitqueue_active(&log_wait)) {
> > +           this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
> > +           irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > +   }
> > +   preempt_enable();
> > +}
> > +
> > +int printk_deferred(const char *fmt, ...)
> > +{
> > +   va_list args;
> > +   int r;
> > +
> > +   va_start(args, fmt);
> > +   r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
> > +   va_end(args);
> > +
> > +   return r;
> > +}
> > +
> >  /* Return log buffer address */
> >  char *log_buf_addr_get(void)
> >  {
> > @@ -1609,6 +1709,8 @@ asmlinkage int vprintk_emit(int facility, int level,
> >                         const char *dict, size_t dictlen,
> >                         const char *fmt, va_list args)
> >  {
> > +   /* cpu currently holding logbuf_lock in this function */
> > +   static unsigned int logbuf_cpu = UINT_MAX;
> >     static bool recursion_bug;
> >     static char textbuf[LOG_LINE_MAX];
> >     char *text = textbuf;
> > @@ -1618,12 +1720,17 @@ asmlinkage int vprintk_emit(int facility, int level,
> >     int this_cpu;
> >     int printed_len = 0;
> >     bool in_sched = false;
> > -   /* cpu currently holding logbuf_lock in this function */
> > -   static unsigned int logbuf_cpu = UINT_MAX;
> > +   bool in_panic = console_loglevel == CONSOLE_LOGLEVEL_MOTORMOUTH;
> > +   bool sync_print = printk_sync;
> >  
> >     if (level == LOGLEVEL_SCHED) {
> >             level = LOGLEVEL_DEFAULT;
> > +           /*
> > +            * Deferred sched messages must not be printed
> > +            * synchronously regardless the @printk_sync or @in_panic.
> > +            */
> >             in_sched = true;
> > +           sync_print = false;
> >     }
> >  
> >     boot_delay_msec(level);
> > @@ -1660,6 +1767,7 @@ asmlinkage int vprintk_emit(int facility, int level,
> >             static const char recursion_msg[] =
> >                     "BUG: recent printk recursion!";
> >  
> > +           printk_sync = true;
> >             recursion_bug = false;
> >             /* emit KERN_CRIT message */
> >             printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
> > @@ -1749,13 +1857,45 @@ asmlinkage int vprintk_emit(int facility, int level,
> >                                              dict, dictlen, text, text_len);
> >     }
> >  
> > +   /*
> > +    * By default we print message to console asynchronously so that kernel
> > +    * doesn't get stalled due to slow serial console. That can lead to
> > +    * softlockups, lost interrupts, or userspace timing out under heavy
> > +    * printing load.
> > +    *
> > +    * However we resort to synchronous printing of messages during early
> > +    * boot, when synchronous printing was explicitly requested by
> > +    * kernel parameter, or when console_verbose() was called to print
> > +    * everything during panic / oops.
> > +    */
> > +   if (!sync_print) {
> > +           if (in_sched) {
> > +                   /*
> > +                    * @in_sched messages may come too early, when we don't
> > +                    * yet have @printk_thread. We can't print deferred
> > +                    * messages directly, because this may deadlock, route
> > +                    * them via IRQ context.
> > +                    */
> > +                   __this_cpu_or(printk_pending,
> > +                                   PRINTK_PENDING_OUTPUT);
> > +                   irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > +           } else if (printk_thread && !in_panic) {
> > +                   /*
> > +                    * This will wakeup the printing kthread and offload
> > +                    * printing to a schedulable context.
> > +                    */
> > +                   wake_up(&printing_wait);
> > +           } else {
> > +                   sync_print = true;
> > +           }
> > +   }
> > +
> >     logbuf_cpu = UINT_MAX;
> >     raw_spin_unlock(&logbuf_lock);
> >     lockdep_on();
> >     local_irq_restore(flags);
> >  
> > -   /* If called from the scheduler, we can not call up(). */
> > -   if (!in_sched) {
> > +   if (sync_print) {
> >             lockdep_off();
> >             /*
> >              * Try to acquire and then immediately release the console
> > @@ -2724,60 +2864,6 @@ late_initcall(printk_late_init);
> >  
> >  #if defined CONFIG_PRINTK
> >  /*
> > - * Delayed printk version, for scheduler-internal messages:
> > - */
> > -#define PRINTK_PENDING_WAKEUP      0x01
> > -#define PRINTK_PENDING_OUTPUT      0x02
> > -
> > -static DEFINE_PER_CPU(int, printk_pending);
> > -
> > -static void wake_up_klogd_work_func(struct irq_work *irq_work)
> > -{
> > -   int pending = __this_cpu_xchg(printk_pending, 0);
> > -
> > -   if (pending & PRINTK_PENDING_OUTPUT) {
> > -           /* If trylock fails, someone else is doing the printing */
> > -           if (console_trylock())
> > -                   console_unlock();
> > -   }
> > -
> > -   if (pending & PRINTK_PENDING_WAKEUP)
> > -           wake_up_interruptible(&log_wait);
> > -}
> > -
> > -static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
> > -   .func = wake_up_klogd_work_func,
> > -   .flags = IRQ_WORK_LAZY,
> > -};
> > -
> > -void wake_up_klogd(void)
> > -{
> > -   preempt_disable();
> > -   if (waitqueue_active(&log_wait)) {
> > -           this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
> > -           irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > -   }
> > -   preempt_enable();
> > -}
> > -
> > -int printk_deferred(const char *fmt, ...)
> > -{
> > -   va_list args;
> > -   int r;
> > -
> > -   preempt_disable();
> > -   va_start(args, fmt);
> > -   r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
> > -   va_end(args);
> > -
> > -   __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
> > -   irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > -   preempt_enable();
> > -
> > -   return r;
> > -}
> > -
> > -/*
> >   * printk rate limiting, lifted from the networking subsystem.
> >   *
> >   * This enforces a rate limit: not more than 10 kernel messages
> > -- 
> > 2.8.0.rc0

Reply via email to