parse AER uncorrectable and correctable error status
registers to print error type and severity.

output looks like
EEH:AER Uncorrectable Error
EEH:AER Error Type: Data Link Protocol Error [Fatal]

Signed-off-by: Ganesh Goudar <ganes...@linux.ibm.com>
---
 arch/powerpc/kernel/eeh.c | 84 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 83fe99861eb1..03e1e2eeb679 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -139,6 +139,49 @@ struct eeh_stats {
 
 static struct eeh_stats eeh_stats;
 
+static const char * const aer_uncor_errors[] = {
+       "Undefined",
+       "Undefined",
+       "Undefined",
+       "Undefined",
+       "Data Link Protocol",
+       "Surprise Down",
+       "Poisoned TLP",
+       "Flow Control Protocol",
+       "Completion Timeout",
+       "Completer Abort",
+       "Unexpected Completion",
+       "Receiver Overflow",
+       "Malformed TLP",
+       "ECRC Error",
+       "Unsupported Request",
+       "ACS Violation",
+       "Uncorrectable Internal Error",
+       "MC Blocked TLP",
+       "AtomicOp Egress Blocked",
+       "TLPPrefix Blocked",
+       "Poisoned TLP Egress Blocked"
+};
+
+static const char * const aer_cor_errors[] = {
+       "Receiver Error",
+       "Undefined",
+       "Undefined",
+       "Undefined",
+       "Undefined",
+       "Undefined",
+       "Bad TLP",
+       "Bad DLLP",
+       "Replay Num Rollover",
+       "Undefined",
+       "Undefined",
+       "Undefined",
+       "Replay Timer Timeout",
+       "Advisory Non-Fatal Error",
+       "Corrected Internal Error",
+       "Header Log Overflow",
+};
+
 static int __init eeh_setup(char *str)
 {
        if (!strcmp(str, "off"))
@@ -160,6 +203,43 @@ void eeh_show_enabled(void)
                pr_info("EEH: No capable adapters found: recovery disabled.\n");
 }
 
+static void eeh_parse_aer_registers(struct eeh_dev *edev, int cap)
+{
+       int i;
+       const char *error_type;
+       u32 uncor_status, uncor_severity, cor_status;
+
+       eeh_ops->read_config(edev, cap + PCI_ERR_UNCOR_STATUS, 4, 
&uncor_status);
+       eeh_ops->read_config(edev, cap + PCI_ERR_UNCOR_SEVER, 4, 
&uncor_severity);
+       eeh_ops->read_config(edev, cap + PCI_ERR_COR_STATUS, 4, &cor_status);
+
+       if (!uncor_status && !cor_status)
+               return;
+
+       if (uncor_status) {
+               pr_err("EEH:AER Uncorrectable Error\n");
+               for (i = 0; i < ARRAY_SIZE(aer_uncor_errors); i++) {
+                       if (uncor_status & (1 << i)) {
+                               error_type = (i < ARRAY_SIZE(aer_uncor_errors))
+                                            ? aer_uncor_errors[i] : "Unknown";
+                               pr_err("EEH:AER Error Type: %s [%s]\n", 
error_type,
+                                      (uncor_severity & (1 << i)) ? "Fatal" : 
"Non-Fatal");
+                       }
+               }
+       }
+
+       if (cor_status) {
+               pr_err("EEH:AER Correctable Error\n");
+               for (i = 0; i < ARRAY_SIZE(aer_cor_errors); i++) {
+                       if (cor_status & (1 << i)) {
+                               error_type = (i < ARRAY_SIZE(aer_cor_errors))
+                                             ? aer_cor_errors[i] : "Unknown";
+                               pr_err("EEH:AER Error Type: %s\n", error_type);
+                       }
+               }
+       }
+}
+
 /*
  * This routine captures assorted PCI configuration space data
  * for the indicated PCI device, and puts them into a buffer
@@ -237,9 +317,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char 
*buf, size_t len)
                pr_warn("%s\n", buffer);
        }
 
-       /* If AER capable, dump it */
+       /* If AER capable, parse and dump it */
        cap = edev->aer_cap;
        if (cap) {
+               eeh_parse_aer_registers(edev, cap);
+
                n += scnprintf(buf+n, len-n, "pci-e AER:\n");
                pr_warn("EEH: PCI-E AER capability register set follows:\n");
 
-- 
2.48.1


Reply via email to