From: Reza Arbab <ar...@linux.ibm.com> If a notifier returns NOTIFY_STOP, consider the MCE handled, just as we do when machine_check_early() returns 1.
Signed-off-by: Reza Arbab <ar...@linux.ibm.com> --- arch/powerpc/include/asm/asm-prototypes.h | 2 +- arch/powerpc/include/asm/mce.h | 3 +- arch/powerpc/kernel/exceptions-64s.S | 3 ++ arch/powerpc/kernel/mce.c | 37 ++++++++++++++++++----- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index f66f26ef3ce0..49ee8f08de2a 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -72,7 +72,7 @@ void machine_check_exception(struct pt_regs *regs); void emulation_assist_interrupt(struct pt_regs *regs); long do_slb_fault(struct pt_regs *regs, unsigned long ea); void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err); -void machine_check_notify(struct pt_regs *regs); +long machine_check_notify(struct pt_regs *regs); /* signals, syscalls and interrupts */ long sys_swapcontext(struct ucontext __user *old_ctx, diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 948bef579086..240dd1fdfe35 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -122,7 +122,8 @@ struct machine_check_event { enum MCE_UeErrorType ue_error_type:8; u8 effective_address_provided; u8 physical_address_provided; - u8 reserved_1[5]; + u8 process_event; + u8 reserved_1[4]; u64 effective_address; u64 physical_address; u8 reserved_2[8]; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 2e56014fca21..c83e38a403fd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -460,6 +460,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early) addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_notify + ld r11,RESULT(r1) + or r3,r3,r11 + std r3,RESULT(r1) ld r12,_MSR(r1) BEGIN_FTR_SECTION diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 0ab171b41ede..4a37928ab30e 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -157,6 +157,8 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->u.ue_error.physical_address_provided = true; mce->u.ue_error.physical_address = phys_addr; } + + mce->u.ue_error.process_event = true; } return; } @@ -241,6 +243,10 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; + if (evt.error_type == MCE_ERROR_TYPE_UE && + !evt.u.ue_error.process_event) + return; + index = __this_cpu_inc_return(mce_queue_count) - 1; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { @@ -647,16 +653,31 @@ long hmi_exception_realmode(struct pt_regs *regs) return 1; } -void machine_check_notify(struct pt_regs *regs) +long machine_check_notify(struct pt_regs *regs) { - struct machine_check_event evt; + int index = __this_cpu_read(mce_nest_count) - 1; + struct machine_check_event *evt; + int rc; - if (!get_mce_event(&evt, MCE_EVENT_DONTRELEASE)) - return; + if (index < 0 || index >= MAX_MC_EVT) + return 0; - blocking_notifier_call_chain(&mce_notifier_list, 0, &evt); + evt = this_cpu_ptr(&mce_event[index]); - if (evt.error_type == MCE_ERROR_TYPE_UE && - evt.u.ue_error.physical_address_provided) - machine_check_ue_event(&evt); + rc = blocking_notifier_call_chain(&mce_notifier_list, 0, evt); + if (rc & NOTIFY_STOP_MASK) { + if (evt->error_type == MCE_ERROR_TYPE_UE) + evt->u.ue_error.process_event = false; + + if ((rc & NOTIFY_STOP_MASK) && (regs->msr & MSR_RI)) + evt->disposition = MCE_DISPOSITION_RECOVERED; + + return 1; + } + + if (evt->error_type == MCE_ERROR_TYPE_UE && + evt->u.ue_error.physical_address_provided) + machine_check_ue_event(evt); + + return 0; } -- 2.20.1