On Fri, 15 Dec 2017 19:14:55 +1100 Balbir Singh <bsinghar...@gmail.com> wrote:
> Certain HMI's such as malfunction error propagate through > all threads/core on the system. If a thread was offline > prior to us crashing the system and jumping to the kdump > kernel, bad things happen when it wakes up due to an HMI > in the kdump kernel. > > There are several possible ways to solve this problem > > 1. Put the offline cores in a state such that they are > not woken up for machine check and HMI errors. This > does not work, since we might need to wake up offline > threads to handle TB errors > 2. Ignore HMI errors, setup HMEER to mask HMI errors, > but this still leads the window open for any MCEs > and masking them for the duration of the dump might > be a concern > 3. Wake up offline CPUs, as in send them to > crash_ipi_callback (not wake them up as in mark them > online as seen by the hotplug). kexec does a > wake_online_cpus() call, this patch does something > similar, but instead sends an IPI and forces them to > crash_ipi_callback() > > This patch takes approach #3. > > Care is taken to enable this only for powenv platforms > via crash_wake_offline (a global value set at setup > time). The crash code sends out IPI's to all CPU's > which then move to crash_ipi_callback and kexec_smp_wait(). > Reviewed-by: Nicholas Piggin <npig...@gmail.com> > Signed-off-by: Balbir Singh <bsinghar...@gmail.com> > --- > > Changelog v4 > - Handle the case for crash IPI's sent via non NMI > - Drop system reset via SCOM in non-power saving > mode > > arch/powerpc/include/asm/kexec.h | 2 ++ > arch/powerpc/kernel/crash.c | 13 ++++++++++++- > arch/powerpc/kernel/smp.c | 18 ++++++++++++++++++ > arch/powerpc/platforms/powernv/smp.c | 28 ++++++++++++++++++++++++++++ > 4 files changed, 60 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/kexec.h > b/arch/powerpc/include/asm/kexec.h > index 4419d435639a..9dcbfa6bbb91 100644 > --- a/arch/powerpc/include/asm/kexec.h > +++ b/arch/powerpc/include/asm/kexec.h > @@ -73,6 +73,8 @@ extern void kexec_smp_wait(void); /* get and clear naca > physid, wait for > master to copy new code to 0 */ > extern int crashing_cpu; > extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); > +extern void crash_ipi_callback(struct pt_regs *); > +extern int crash_wake_offline; > > struct kimage; > struct pt_regs; > diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c > index 29c56ca2ddfd..00b215125d3e 100644 > --- a/arch/powerpc/kernel/crash.c > +++ b/arch/powerpc/kernel/crash.c > @@ -44,6 +44,14 @@ > #define REAL_MODE_TIMEOUT 10000 > > static int time_to_dump; > +/* > + * crash_wake_offline should be set to 1 by platforms that intend to wake > + * up offline cpus prior to jumping to a kdump kernel. Currently powernv > + * sets it to 1, since we want to avoid things from happening when an > + * offline CPU wakes up due to something like an HMI (malfunction error), > + * which propagates to all threads. > + */ > +int crash_wake_offline; > > #define CRASH_HANDLER_MAX 3 > /* List of shutdown handles */ > @@ -63,7 +71,7 @@ static int handle_fault(struct pt_regs *regs) > #ifdef CONFIG_SMP > > static atomic_t cpus_in_crash; > -static void crash_ipi_callback(struct pt_regs *regs) > +void crash_ipi_callback(struct pt_regs *regs) > { > static cpumask_t cpus_state_saved = CPU_MASK_NONE; > > @@ -106,6 +114,9 @@ static void crash_kexec_prepare_cpus(int cpu) > > printk(KERN_EMERG "Sending IPI to other CPUs\n"); > > + if (crash_wake_offline) > + ncpus = num_present_cpus() - 1; > + > crash_send_ipi(crash_ipi_callback); > smp_wmb(); > > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c > index e0a4c1f82e25..bbe7634b3a43 100644 > --- a/arch/powerpc/kernel/smp.c > +++ b/arch/powerpc/kernel/smp.c > @@ -543,7 +543,25 @@ void smp_send_debugger_break(void) > #ifdef CONFIG_KEXEC_CORE > void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) > { > + int cpu; > + > smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); > + if (kdump_in_progress() && crash_wake_offline) { > + for_each_present_cpu(cpu) { > + if (cpu_online(cpu)) > + continue; > + /* > + * crash_ipi_callback will wait for > + * all cpus, including offline CPUs. > + * We don't care about nmi_ipi_function. > + * Offline cpus will jump straight into > + * crash_ipi_callback, we can skip the > + * entire NMI dance and waiting for > + * cpus to clear pending mask, etc. > + */ > + do_smp_send_nmi_ipi(cpu); > + } > + } > } > #endif > > diff --git a/arch/powerpc/platforms/powernv/smp.c > b/arch/powerpc/platforms/powernv/smp.c > index ba030669eca1..9664c8461f03 100644 > --- a/arch/powerpc/platforms/powernv/smp.c > +++ b/arch/powerpc/platforms/powernv/smp.c > @@ -37,6 +37,8 @@ > #include <asm/kvm_ppc.h> > #include <asm/ppc-opcode.h> > #include <asm/cpuidle.h> > +#include <asm/kexec.h> > +#include <asm/reg.h> > > #include "powernv.h" > > @@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void) > } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { > unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); > asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); > + } else if ((srr1 & wmask) == SRR1_WAKERESET) { > + irq_set_pending_from_srr1(srr1); > + /* Does not return */ > } > + > smp_mb(); > > + /* > + * For kdump kernels, we process the ipi and jump to > + * crash_ipi_callback > + */ > + if (kdump_in_progress()) { > + /* > + * If we got to this point, we've not used > + * NMI's, otherwise we would have gone > + * via the SRR1_WAKERESET path. We are > + * using regular IPI's for waking up offline > + * threads. > + */ > + struct pt_regs regs; > + > + ppc_save_regs(®s); > + crash_ipi_callback(®s); > + /* Does not return */ > + } > + > if (cpu_core_split_required()) > continue; > > @@ -371,5 +396,8 @@ void __init pnv_smp_init(void) > > #ifdef CONFIG_HOTPLUG_CPU > ppc_md.cpu_die = pnv_smp_cpu_kill_self; > +#ifdef CONFIG_KEXEC_CORE > + crash_wake_offline = 1; > +#endif > #endif > }