From: Reza Arbab <ar...@linux.ibm.com>

Add an mce notifier intended to service memcpy_mcsafe().

The notifier uses this heuristic; if a UE occurs when accessing device
memory, and the faulting instruction had a fixup entry, the callback
will return NOTIFY_STOP.

This causes the notification mechanism to consider the MCE handled and
continue execution at the fixup address, which returns -EFAULT from the
memcpy_mcsafe() call.

Signed-off-by: Reza Arbab <ar...@linux.ibm.com>
---
 arch/powerpc/kernel/mce.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 0233c0ee45ab..a8348a9bea5b 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -56,6 +56,40 @@ int mce_unregister_notifier(struct notifier_block *nb)
        return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
 }
 
+static int check_memcpy_mcsafe(struct notifier_block *nb, unsigned long val,
+                              void *data)
+{
+       struct machine_check_event *evt = data;
+       unsigned long pfn;
+       struct page *page;
+
+       if (evt->error_type != MCE_ERROR_TYPE_UE ||
+           !evt->u.ue_error.physical_address_provided)
+               return NOTIFY_DONE;
+
+       pfn = evt->u.ue_error.physical_address >> PAGE_SHIFT;
+       page = pfn_to_page(pfn);
+       if (!page)
+               return NOTIFY_DONE;
+
+       /* HMM and PMEM */
+       if (is_zone_device_page(page) && evt->u.ue_error.fixup_address_provided)
+               return NOTIFY_STOP;
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block memcpy_mcsafe_nb = {
+       .notifier_call = check_memcpy_mcsafe
+};
+
+static int __init mce_mcsafe_register(void)
+{
+       mce_register_notifier(&memcpy_mcsafe_nb);
+       return 0;
+}
+arch_initcall(mce_mcsafe_register);
+
 static void mce_set_error_info(struct machine_check_event *mce,
                               struct mce_error_info *mce_err)
 {
-- 
2.20.1

Reply via email to