On 06/08/2018 07:21 AM, Nicholas Piggin wrote: > On Thu, 07 Jun 2018 22:59:04 +0530 > Mahesh J Salgaonkar <mah...@linux.vnet.ibm.com> wrote: > >> From: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com> >> >> Extract the MCE error details from RTAS extended log and display it to >> console. >> >> With this patch you should now see mce logs like below: >> >> [ 142.371818] Severe Machine check interrupt [Recovered] >> [ 142.371822] NIP [d00000000ca301b8]: init_module+0x1b8/0x338 >> [bork_kernel] >> [ 142.371822] Initiator: CPU >> [ 142.371823] Error type: SLB [Multihit] >> [ 142.371824] Effective address: d00000000ca70000 >> >> Signed-off-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com> >> --- >> arch/powerpc/include/asm/rtas.h | 5 + >> arch/powerpc/platforms/pseries/ras.c | 128 >> +++++++++++++++++++++++++++++++++- >> 2 files changed, 131 insertions(+), 2 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/rtas.h >> b/arch/powerpc/include/asm/rtas.h >> index 3f2fba7ef23b..8100a95c133a 100644 >> --- a/arch/powerpc/include/asm/rtas.h >> +++ b/arch/powerpc/include/asm/rtas.h >> @@ -190,6 +190,11 @@ static inline uint8_t rtas_error_extended(const struct >> rtas_error_log *elog) >> return (elog->byte1 & 0x04) >> 2; >> } >> >> +static inline uint8_t rtas_error_initiator(const struct rtas_error_log >> *elog) >> +{ >> + return (elog->byte2 & 0xf0) >> 4; >> +} >> + >> #define rtas_error_type(x) ((x)->byte3) >> >> static inline >> diff --git a/arch/powerpc/platforms/pseries/ras.c >> b/arch/powerpc/platforms/pseries/ras.c >> index e56759d92356..cd9446980092 100644 >> --- a/arch/powerpc/platforms/pseries/ras.c >> +++ b/arch/powerpc/platforms/pseries/ras.c >> @@ -422,7 +422,130 @@ int pSeries_system_reset_exception(struct pt_regs >> *regs) >> return 0; /* need to perform reset */ >> } >> >> -static int mce_handle_error(struct rtas_error_log *errp) >> +#define VAL_TO_STRING(ar, val) ((val < ARRAY_SIZE(ar)) ? ar[val] : >> "Unknown") >> + >> +static void pseries_print_mce_info(struct pt_regs *regs, >> + struct rtas_error_log *errp, int disposition) >> +{ >> + const char *level, *sevstr; >> + struct pseries_errorlog *pseries_log; >> + struct pseries_mc_errorlog *mce_log; >> + uint8_t error_type, err_sub_type; >> + uint8_t initiator = rtas_error_initiator(errp); >> + uint64_t addr; >> + >> + static const char * const initiators[] = { >> + "Unknown", >> + "CPU", >> + "PCI", >> + "ISA", >> + "Memory", >> + "Power Mgmt", >> + }; >> + static const char * const mc_err_types[] = { >> + "UE", >> + "SLB", >> + "ERAT", >> + "TLB", >> + "D-Cache", >> + "Unknown", >> + "I-Cache", >> + }; >> + static const char * const mc_ue_types[] = { >> + "Indeterminate", >> + "Instruction fetch", >> + "Page table walk ifetch", >> + "Load/Store", >> + "Page table walk Load/Store", >> + }; >> + >> + /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ >> + static const char * const mc_slb_types[] = { >> + "Parity", >> + "Multihit", >> + "Indeterminate", >> + }; >> + >> + /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ >> + static const char * const mc_soft_types[] = { >> + "Unknown", >> + "Parity", >> + "Multihit", >> + "Indeterminate", >> + }; >> + >> + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); >> + if (pseries_log == NULL) >> + return; >> + >> + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; >> + >> + error_type = rtas_mc_error_type(mce_log); >> + err_sub_type = rtas_mc_error_sub_type(mce_log); >> + >> + switch (rtas_error_severity(errp)) { >> + case RTAS_SEVERITY_NO_ERROR: >> + level = KERN_INFO; >> + sevstr = "Harmless"; >> + break; >> + case RTAS_SEVERITY_WARNING: >> + level = KERN_WARNING; >> + sevstr = ""; >> + break; >> + case RTAS_SEVERITY_ERROR: >> + case RTAS_SEVERITY_ERROR_SYNC: >> + level = KERN_ERR; >> + sevstr = "Severe"; >> + break; >> + case RTAS_SEVERITY_FATAL: >> + default: >> + level = KERN_ERR; >> + sevstr = "Fatal"; >> + break; >> + } >> + >> + printk("%s%s Machine check interrupt [%s]\n", level, sevstr, >> + disposition == RTAS_DISP_FULLY_RECOVERED ? >> + "Recovered" : "Not recovered"); >> + if (user_mode(regs)) { >> + printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level, >> + regs->nip, current->pid, current->comm); >> + } else { >> + printk("%s NIP [%016lx]: %pS\n", level, regs->nip, >> + (void *)regs->nip); >> + } > > I think it's probably still useful to print pid/comm for kernel mode > faults if !in_interrupt()... I see you're basically taking kernel/mce.c > and doing the same thing. > > Is there any reasonable way to share code here?
I did think of doing that, but I wanted make this patch series simple enough to be able to make backport easy for very old kernels. I will work on consolidating the code as enhancement later. Thanks, -Mahesh.