From: Reza Arbab <ar...@linux.ibm.com> Add an mce notifier intended to service memcpy_mcsafe().
The notifier uses this heuristic; if a UE occurs when accessing device memory, and the faulting instruction had a fixup entry, the callback will return NOTIFY_STOP. This causes the notification mechanism to consider the MCE handled and continue execution at the fixup address, which returns -EFAULT from the memcpy_mcsafe() call. Signed-off-by: Reza Arbab <ar...@linux.ibm.com> --- arch/powerpc/kernel/mce.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 0233c0ee45ab..a8348a9bea5b 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -56,6 +56,40 @@ int mce_unregister_notifier(struct notifier_block *nb) return blocking_notifier_chain_unregister(&mce_notifier_list, nb); } +static int check_memcpy_mcsafe(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct machine_check_event *evt = data; + unsigned long pfn; + struct page *page; + + if (evt->error_type != MCE_ERROR_TYPE_UE || + !evt->u.ue_error.physical_address_provided) + return NOTIFY_DONE; + + pfn = evt->u.ue_error.physical_address >> PAGE_SHIFT; + page = pfn_to_page(pfn); + if (!page) + return NOTIFY_DONE; + + /* HMM and PMEM */ + if (is_zone_device_page(page) && evt->u.ue_error.fixup_address_provided) + return NOTIFY_STOP; + + return NOTIFY_DONE; +} + +static struct notifier_block memcpy_mcsafe_nb = { + .notifier_call = check_memcpy_mcsafe +}; + +static int __init mce_mcsafe_register(void) +{ + mce_register_notifier(&memcpy_mcsafe_nb); + return 0; +} +arch_initcall(mce_mcsafe_register); + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { -- 2.20.1