kexec disables (or "shoots down") all CPUs other than a crashing CPU before
entering the 2nd kernel. This disablement is done via NMI, and the crashing
CPU wait for the completions by spinning at most for 1 second.
However, there is a race window if this NMI handling doesn't complete within
the 1 second on some CPU, which cause the fragile situation where only a
portion of online CPUs are responsive to MCE interrupt. If MCE happens during
this race window, MCE synchronization always timeouts and results in kernel
panic. So the user-visible effect of this bug is kdump failure.

Note that this race window did exist when current MCE handler was implemented
around 2.6.32, and recently commit 716079f66eac ("mce: Panic when a core has
reached a timeout") made it more visible by changing the default behavior of
the synchronization timeout from "ignore" to "panic".

This patch adds a global variable representing that the system is running
kdump code in order to "turn off" the MCE handling code in kdump context.

Signed-off-by: Naoya Horiguchi <n-horigu...@ah.jp.nec.com>
Cc: <sta...@vger.kernel.org>        [2.6.32+]
---
 arch/x86/include/asm/mce.h       |  1 +
 arch/x86/kernel/cpu/mcheck/mce.c | 13 +++++++++++++
 arch/x86/kernel/crash.c          |  8 ++++++++
 3 files changed, 22 insertions(+)

diff --git v3.19.orig/arch/x86/include/asm/mce.h 
v3.19/arch/x86/include/asm/mce.h
index 51b26e895933..7ae9927d781a 100644
--- v3.19.orig/arch/x86/include/asm/mce.h
+++ v3.19/arch/x86/include/asm/mce.h
@@ -175,6 +175,7 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 
*c) { }
 #endif
 
 int mce_available(struct cpuinfo_x86 *c);
+void cpu_emergency_mce_disable(void);
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
 DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git v3.19.orig/arch/x86/kernel/cpu/mcheck/mce.c 
v3.19/arch/x86/kernel/cpu/mcheck/mce.c
index 3112b79ace8e..3a155b9e276e 100644
--- v3.19.orig/arch/x86/kernel/cpu/mcheck/mce.c
+++ v3.19/arch/x86/kernel/cpu/mcheck/mce.c
@@ -87,6 +87,8 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int                     cpu_missing;
 
+static int                     under_crashdumping;
+
 /* CMCI storm detection filter */
 static DEFINE_PER_CPU(unsigned long, mce_polled_error);
 
@@ -1085,6 +1087,12 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
        DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
        char *msg = "Unknown";
 
+       if (under_crashdumping) {
+               pr_err("CPU#%d: Machine Check ignored because crash dump is 
running\n",
+                      smp_processor_id());
+               return;
+       }
+
        this_cpu_inc(mce_exception_count);
 
        if (!cfg->banks)
@@ -2104,6 +2112,11 @@ static void mce_syscore_shutdown(void)
        mce_disable_error_reporting();
 }
 
+void cpu_emergency_mce_disable(void)
+{
+       under_crashdumping = 1;
+}
+
 /*
  * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
  * Only one CPU is active at this time, the others get re-added later using
diff --git v3.19.orig/arch/x86/kernel/crash.c v3.19/arch/x86/kernel/crash.c
index aceb2f90c716..08c9eaaaa8cb 100644
--- v3.19.orig/arch/x86/kernel/crash.c
+++ v3.19/arch/x86/kernel/crash.c
@@ -34,6 +34,7 @@
 #include <asm/cpu.h>
 #include <asm/reboot.h>
 #include <asm/virtext.h>
+#include <asm/mce.h>
 
 /* Alignment required for elf header segment */
 #define ELF_CORE_HEADER_ALIGN   4096
@@ -157,6 +158,13 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
        /* The kernel is broken so disable interrupts */
        local_irq_disable();
 
+       /*
+        * MCE should be disabled in all CPUs together, because otherwise there
+        * exists a race window where a portion of online CPUs is responsive to
+        * MCE, which causes MCE synchronization timeout.
+        */
+       cpu_emergency_mce_disable();
+
        kdump_nmi_shootdown_cpus();
 
        /*
-- 
1.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to