kdb_trap_printk allows to pass normal printk() messages to kdb via
vkdb_printk(). For example, it is used to get backtrace using
the classic show_stack(), see kdb_show_stack().

vkdb_printf() tries to avoid a potential infinite loop by disabling
the trap. But this approach is racy, for example:

CPU1                                    CPU2

vkdb_printf()
  // assume that kdb_trap_printk == 0
  saved_trap_printk = kdb_trap_printk;
  kdb_trap_printk = 0;

                                        kdb_show_stack()
                                          kdb_trap_printk++;

Problem1: Now, a nested printk() on CPU0 calls vkdb_printf()
          even when it should have been disabled. It will not
          cause a deadlock but...

   // using the outdated saved value: 0
   kdb_trap_printk = saved_trap_printk;

                                          kdb_trap_printk--;

Problem2: Now, kdb_trap_printk == -1 and will stay like this.
   It means that all messages will get passed to kdb from
   now on.

This patch removes the racy saved_trap_printk handling. Instead,
the recursion is prevented by a check for the locked CPU.

The solution is still kind of racy. A non-related printk(), from
another process, might get trapped by vkdb_printf(). And the wanted
printk() might not get trapped because kdb_printf_cpu is assigned.
But this problem existed even with the original code.

A proper solution would be to get_cpu() before setting kdb_trap_printk
and trap messages only from this CPU. I am not sure if it is worth
the effort, though.

In fact, the race is very theoretical. When kdb is running any of
the commands that use kdb_trap_printk there is a single active CPU
and the other CPUs should be in a holding pen inside kgdb_cpu_enter().

The only time this is violated is when there is a timeout waiting
for the other CPUs to report to the holding pen.

Finally, note that the situation is a bit schizophrenic. vkdb_printf()
explicitly allows recursion but only from KDB code that calls
kdb_printf() directly. On the other hand, the generic printk()
recursion is not allowed because it might cause an infinite loop.
This is why we could not hide the decision inside vkdb_printf()
easily.

Signed-off-by: Petr Mladek <pmla...@suse.com>
---
 include/linux/kdb.h       | 1 +
 kernel/debug/kdb/kdb_io.c | 9 ++-------
 kernel/printk/printk.c    | 3 ++-
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index eb706188dc23..68bd88223417 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -161,6 +161,7 @@ enum kdb_msgsrc {
 };
 
 extern int kdb_trap_printk;
+extern int kdb_printf_cpu;
 extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
                                      va_list args);
 extern __printf(1, 2) int kdb_printf(const char *, ...);
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index cf495c7a8519..351b4f785270 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -30,6 +30,7 @@
 char kdb_prompt_str[CMD_BUFLEN];
 
 int kdb_trap_printk;
+int kdb_printf_cpu = -1;
 
 static int kgdb_transition_check(char *buffer)
 {
@@ -554,24 +555,19 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, 
va_list ap)
        int linecount;
        int colcount;
        int logging, saved_loglevel = 0;
-       int saved_trap_printk;
        int retlen = 0;
        int fnd, len;
        int this_cpu, old_cpu;
-       static int kdb_printf_cpu = -1;
        char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
        char *moreprompt = "more> ";
        struct console *c = console_drivers;
        unsigned long uninitialized_var(flags);
 
-       local_irq_save(flags);
-       saved_trap_printk = kdb_trap_printk;
-       kdb_trap_printk = 0;
-
        /* Serialize kdb_printf if multiple cpus try to write at once.
         * But if any cpu goes recursive in kdb, just print the output,
         * even if it is interleaved with any other text.
         */
+       local_irq_save(flags);
        this_cpu = smp_processor_id();
        for (;;) {
                old_cpu = cmpxchg(&kdb_printf_cpu, -1, this_cpu);
@@ -849,7 +845,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, 
va_list ap)
                console_loglevel = saved_loglevel;
        /* kdb_printf_cpu locked the code above. */
        smp_store_release(&kdb_printf_cpu, old_cpu);
-       kdb_trap_printk = saved_trap_printk;
        local_irq_restore(flags);
        return retlen;
 }
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f7a55e9ff2f7..2403800bb5f9 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1926,7 +1926,8 @@ int vprintk_default(const char *fmt, va_list args)
        int r;
 
 #ifdef CONFIG_KGDB_KDB
-       if (unlikely(kdb_trap_printk)) {
+       /* Allow to pass printk() to kdb but avoid a recursion. */
+       if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) {
                r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
                return r;
        }
-- 
1.8.5.6

Reply via email to