From: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>

Once we get high level MCE error event from opal, process it and figure
out if it recoverable or not. If yes, take corrective actions.

TODO:
- Rework on handling of asynchronous MCE errors.
  - Update opal_recover_mce() to ignore async errors.
- Update flush_and_reload_slb() to avoid SLB reload in radix mode.

Signed-off-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mce.h        |    3 +++
 arch/powerpc/kernel/mce.c             |   26 +++++++++++++++++++++++
 arch/powerpc/kernel/mce_power.c       |   38 +++++++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal.c |    2 ++
 4 files changed, 69 insertions(+)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 36db6b0..69e4a42 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -88,9 +88,12 @@ extern void save_mce_event(struct pt_regs *regs, long 
handled,
                           struct mce_error_info *mce_err, uint64_t nip,
                           uint64_t addr);
 extern int get_mce_event(struct OpalMachineCheckEvent *mce, bool release);
+extern int set_mce_event(struct OpalMachineCheckEvent *mce);
 extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
 extern void machine_check_print_event_info(struct OpalMachineCheckEvent *evt);
 extern uint64_t get_mce_fault_addr(struct OpalMachineCheckEvent *evt);
+extern long handle_mce_errors(struct pt_regs *regs,
+                                       struct OpalMachineCheckEvent *evt);
 
 #endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 51a7c64..36da14a3 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -166,6 +166,32 @@ int get_mce_event(struct OpalMachineCheckEvent *mce, bool 
release)
        return ret;
 }
 
+int set_mce_event(struct OpalMachineCheckEvent *mce)
+{
+       int index = __this_cpu_inc_return(mce_nest_count) - 1;
+       struct OpalMachineCheckEvent *mc_evt = this_cpu_ptr(&mce_event[index]);
+       int ret = 0;
+
+       /* Sanity check */
+       if (index < 0)
+               return ret;
+
+       /* Check if we have MCE info slot within array limit. */
+       if (index < MAX_MC_EVT) {
+               /* Copy the event structure and release the original */
+               if (mce) {
+                       *mc_evt = *mce;
+                       /* endian conversions */
+                       mc_evt->srr0 = be64_to_cpu(mce->srr0);
+                       mc_evt->srr1 = be64_to_cpu(mce->srr1);
+                       mc_evt->u.ue_error.effective_address =
+                               be64_to_cpu(mce->u.ue_error.effective_address);
+               }
+               ret = 1;
+       }
+       return ret;
+}
+
 void release_mce_event(void)
 {
        get_mce_event(NULL, true);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 7353991..91ed2ef 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -372,3 +372,41 @@ long __machine_check_early_realmode_p8(struct pt_regs 
*regs)
        save_mce_event(regs, handled, &mce_error_info, nip, addr);
        return handled;
 }
+
+static long flush_tlb(void)
+{
+       long handled = 0;
+
+       if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+               cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
+               handled = 1;
+       }
+       return handled;
+}
+
+long handle_mce_errors(struct pt_regs *regs, struct OpalMachineCheckEvent *evt)
+{
+       long handled = 1;
+
+       if (evt->disposition == MCE_DISPOSITION_RECOVERED)
+               return handled;
+
+       switch (evt->error_type) {
+       case MCE_ERROR_TYPE_UE:
+               handled = mce_handle_ue_error(regs);
+               break;
+       case MCE_ERROR_TYPE_SLB:
+       case MCE_ERROR_TYPE_ERAT:
+               flush_and_reload_slb();
+               handled = 1;
+               break;
+       case MCE_ERROR_TYPE_TLB:
+               handled = flush_tlb();
+               break;
+       default:
+               handled = 0;
+       }
+       if (handled)
+               evt->disposition = MCE_DISPOSITION_RECOVERED;
+       return handled;
+}
diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 263c57e..f1115c4 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -501,6 +501,8 @@ int opal_machine_check_early(struct pt_regs *regs, long 
*handled)
        if (rc != OPAL_SUCCESS)
                return -1;
 
+       *handled = handle_mce_errors(regs, &evt);
+       set_mce_event(&evt);
        return 0;
 }
 

Reply via email to