From: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>

Extract the MCE error details from RTAS extended log and display it to
console.

With this patch you should now see mce logs like below:

[  142.371818] Severe Machine check interrupt [Recovered]
[  142.371822]   NIP [d00000000ca301b8]: init_module+0x1b8/0x338 [bork_kernel]
[  142.371822]   Initiator: CPU
[  142.371823]   Error type: SLB [Multihit]
[  142.371824]     Effective address: d00000000ca70000

Signed-off-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/rtas.h      |    5 +
 arch/powerpc/platforms/pseries/ras.c |  131 ++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index ceeed2dd489b..26bc3d5c4992 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -197,6 +197,11 @@ static inline uint8_t rtas_error_extended(const struct 
rtas_error_log *elog)
        return (elog->byte1 & 0x04) >> 2;
 }
 
+static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
+{
+       return (elog->byte2 & 0xf0) >> 4;
+}
+
 #define rtas_error_type(x)     ((x)->byte3)
 
 static inline
diff --git a/arch/powerpc/platforms/pseries/ras.c 
b/arch/powerpc/platforms/pseries/ras.c
index 9aa7885e0148..7d4d2b8bc019 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -427,6 +427,135 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
        return 0; /* need to perform reset */
 }
 
+#define VAL_TO_STRING(ar, val) ((val < ARRAY_SIZE(ar)) ? ar[val] : "Unknown")
+
+static void pseries_print_mce_info(struct pt_regs *regs,
+                                               struct rtas_error_log *errp)
+{
+       const char *level, *sevstr;
+       struct pseries_errorlog *pseries_log;
+       struct pseries_mc_errorlog *mce_log;
+       uint8_t error_type, err_sub_type;
+       uint64_t addr;
+       uint8_t initiator = rtas_error_initiator(errp);
+       int disposition = rtas_error_disposition(errp);
+
+       static const char * const initiators[] = {
+               "Unknown",
+               "CPU",
+               "PCI",
+               "ISA",
+               "Memory",
+               "Power Mgmt",
+       };
+       static const char * const mc_err_types[] = {
+               "UE",
+               "SLB",
+               "ERAT",
+               "TLB",
+               "D-Cache",
+               "Unknown",
+               "I-Cache",
+       };
+       static const char * const mc_ue_types[] = {
+               "Indeterminate",
+               "Instruction fetch",
+               "Page table walk ifetch",
+               "Load/Store",
+               "Page table walk Load/Store",
+       };
+
+       /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
+       static const char * const mc_slb_types[] = {
+               "Parity",
+               "Multihit",
+               "Indeterminate",
+       };
+
+       /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
+       static const char * const mc_soft_types[] = {
+               "Unknown",
+               "Parity",
+               "Multihit",
+               "Indeterminate",
+       };
+
+       if (!rtas_error_extended(errp)) {
+               pr_err("Machine check interrupt: Missing extended error log\n");
+               return;
+       }
+
+       pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+       if (pseries_log == NULL)
+               return;
+
+       mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+
+       error_type = rtas_mc_error_type(mce_log);
+       err_sub_type = rtas_mc_error_sub_type(mce_log);
+
+       switch (rtas_error_severity(errp)) {
+       case RTAS_SEVERITY_NO_ERROR:
+               level = KERN_INFO;
+               sevstr = "Harmless";
+               break;
+       case RTAS_SEVERITY_WARNING:
+               level = KERN_WARNING;
+               sevstr = "";
+               break;
+       case RTAS_SEVERITY_ERROR:
+       case RTAS_SEVERITY_ERROR_SYNC:
+               level = KERN_ERR;
+               sevstr = "Severe";
+               break;
+       case RTAS_SEVERITY_FATAL:
+       default:
+               level = KERN_ERR;
+               sevstr = "Fatal";
+               break;
+       }
+
+       printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+               disposition == RTAS_DISP_FULLY_RECOVERED ?
+               "Recovered" : "Not recovered");
+       if (user_mode(regs)) {
+               printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level,
+                       regs->nip, current->pid, current->comm);
+       } else {
+               printk("%s  NIP [%016lx]: %pS\n", level, regs->nip,
+                       (void *)regs->nip);
+       }
+       printk("%s  Initiator: %s\n", level,
+                               VAL_TO_STRING(initiators, initiator));
+
+       switch (error_type) {
+       case PSERIES_MC_ERROR_TYPE_UE:
+               printk("%s  Error type: %s [%s]\n", level,
+                       VAL_TO_STRING(mc_err_types, error_type),
+                       VAL_TO_STRING(mc_ue_types, err_sub_type));
+               break;
+       case PSERIES_MC_ERROR_TYPE_SLB:
+               printk("%s  Error type: %s [%s]\n", level,
+                       VAL_TO_STRING(mc_err_types, error_type),
+                       VAL_TO_STRING(mc_slb_types, err_sub_type));
+               break;
+       case PSERIES_MC_ERROR_TYPE_ERAT:
+       case PSERIES_MC_ERROR_TYPE_TLB:
+               printk("%s  Error type: %s [%s]\n", level,
+                       VAL_TO_STRING(mc_err_types, error_type),
+                       VAL_TO_STRING(mc_soft_types, err_sub_type));
+               break;
+       default:
+               printk("%s  Error type: %s\n", level,
+                       VAL_TO_STRING(mc_err_types, error_type));
+               break;
+       }
+
+       addr = rtas_mc_get_effective_addr(mce_log);
+       if (addr)
+               printk("%s    Effective address: %016llx\n", level, addr);
+}
+
 static int mce_handle_error(struct rtas_error_log *errp)
 {
        struct pseries_errorlog *pseries_log;
@@ -481,6 +610,8 @@ static int recover_mce(struct pt_regs *regs, struct 
rtas_error_log *err)
        int recovered = 0;
        int disposition = rtas_error_disposition(err);
 
+       pseries_print_mce_info(regs, err);
+
        if (!(regs->msr & MSR_RI)) {
                /* If MSR_RI isn't set, we cannot recover */
                recovered = 0;

Reply via email to