Add a trace event for hardware errors reported by the ARMv8.2
RAS extension registers.

Signed-off-by: Tyler Baicar <bai...@os.amperecomputing.com>
---
 arch/arm64/kernel/ras.c   |  3 +++
 drivers/acpi/arm64/aest.c |  4 ++++
 include/ras/ras_event.h   | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+)

diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
index ca47efa..4e34d63 100644
--- a/arch/arm64/kernel/ras.c
+++ b/arch/arm64/kernel/ras.c
@@ -5,6 +5,7 @@
 #include <linux/smp.h>
 
 #include <asm/ras.h>
+#include <ras/ras_event.h>
 
 void arch_arm_ras_report_error(void)
 {
@@ -50,6 +51,8 @@ void arch_arm_ras_report_error(void)
                               regs.err_misc1);
                }
 
+               trace_arm_ras_ext_event(0, cpu_num, &regs);
+
                /*
                 * In the future, we will treat UER conditions as potentially
                 * recoverable.
diff --git a/drivers/acpi/arm64/aest.c b/drivers/acpi/arm64/aest.c
index fd4f3b5..21ec583 100644
--- a/drivers/acpi/arm64/aest.c
+++ b/drivers/acpi/arm64/aest.c
@@ -13,6 +13,7 @@
 #include <linux/ratelimit.h>
 
 #include <asm/ras.h>
+#include <ras/ras_event.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt) "ACPI AEST: " fmt
@@ -102,6 +103,9 @@ static void aest_proc(struct aest_node_data *data)
 
                aest_print(data, regs, i);
 
+               trace_arm_ras_ext_event(data->node_type, data->data.proc.id,
+                                       &regs);
+
                if (regs.err_status & ERR_STATUS_UE)
                        fatal = true;
 
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 36c5c5e..8b76cb1 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -339,6 +339,52 @@
 );
 
 /*
+ * ARM RAS Extension Events Report
+ *
+ * This event is generated when an error reported by the ARM RAS extension
+ * hardware is detected.
+ */
+
+#ifdef CONFIG_ARM64
+#include <asm/ras.h>
+TRACE_EVENT(arm_ras_ext_event,
+
+       TP_PROTO(u8 type, u32 id, struct ras_ext_regs *regs),
+
+       TP_ARGS(type, id, regs),
+
+       TP_STRUCT__entry(
+               __field(u8,  type)
+               __field(u32, id)
+               __field(u64, err_fr)
+               __field(u64, err_ctlr)
+               __field(u64, err_status)
+               __field(u64, err_addr)
+               __field(u64, err_misc0)
+               __field(u64, err_misc1)
+       ),
+
+       TP_fast_assign(
+               __entry->type = type;
+               __entry->id = id;
+               __entry->err_fr = regs->err_fr;
+               __entry->err_ctlr = regs->err_ctlr;
+               __entry->err_status = regs->err_status;
+               __entry->err_addr = regs->err_addr;
+               __entry->err_misc0 = regs->err_misc0;
+               __entry->err_misc1 = regs->err_misc1;
+       ),
+
+       TP_printk("type: %d; id: %d; ERR_FR: %llx; ERR_CTLR: %llx; "
+                 "ERR_STATUS: %llx; ERR_ADDR: %llx; ERR_MISC0: %llx; "
+                 "ERR_MISC1: %llx",
+                 __entry->type, __entry->id, __entry->err_fr,
+                 __entry->err_ctlr, __entry->err_status, __entry->err_addr,
+                 __entry->err_misc0, __entry->err_misc1)
+);
+#endif
+
+/*
  * memory-failure recovery action result event
  *
  * unsigned long pfn - Page Frame Number of the corrupted page
-- 
1.8.3.1

Reply via email to