On Thu, May 09, 2024 at 01:39:07PM -0700, Axel Rasmussen wrote:
> For real MCEs, various architectures print log messages when poisoned
> memory is accessed (which results in a SIGBUS). These messages can be
> important for users to understand the issue.
> 
> On the other hand, we have the userfaultfd UFFDIO_POISON operation,
> which can "simulate" memory poisoning. That particular process will get

It also coveres swapin errors as we talked before, so not always SIM.

I was thinking we should also do that report for swapin errors, however
then I noticed it wasn't reported before the replacement of pte markers,
in commit 15520a3f04, since 2022:

@@ -3727,8 +3731,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                        put_page(vmf->page);
                } else if (is_hwpoison_entry(entry)) {
                        ret = VM_FAULT_HWPOISON;
-               } else if (is_swapin_error_entry(entry)) {
-                       ret = VM_FAULT_SIGBUS;
                } else if (is_pte_marker_entry(entry)) {
                        ret = handle_pte_marker(vmf);
                } else {

So I am guessing it could be fine to just turn this report off to syslog.
There will be a back-and-forth on this behavior, but hopefully this is even
rarer than hwpoison so nobody will notice.

With that, the idea looks valid to me, but perhaps a rename is needed.
Maybe _QUIESCE or _SILENT?

> SIGBUS on access to the memory, but this effect is tied to an MM, rather
> than being global like a real poison event. So, we don't want to log
> about this case to the global kernel log; instead, let the process
> itself log or whatever else it wants to do. This avoids spamming the
> kernel log, and avoids e.g. drowning out real events with simulated
> ones.
> 
> To identify this situation, add a new VM_FAULT_HWPOISON_SIM flag. This
> is expected to be set *in addition to* one of the existing
> VM_FAULT_HWPOISON or VM_FAULT_HWPOISON_LARGE flags (which are mutually
> exclusive).
> 
> Signed-off-by: Axel Rasmussen <axelrasmus...@google.com>
> ---
>  arch/parisc/mm/fault.c   | 7 +++++--
>  arch/powerpc/mm/fault.c  | 6 ++++--
>  arch/x86/mm/fault.c      | 6 ++++--
>  include/linux/mm_types.h | 5 +++++
>  mm/hugetlb.c             | 3 ++-
>  mm/memory.c              | 2 +-
>  6 files changed, 21 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
> index c39de84e98b0..e5370bcadf27 100644
> --- a/arch/parisc/mm/fault.c
> +++ b/arch/parisc/mm/fault.c
> @@ -400,9 +400,12 @@ void do_page_fault(struct pt_regs *regs, unsigned long 
> code,
>  #ifdef CONFIG_MEMORY_FAILURE
>               if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
>                       unsigned int lsb = 0;
> -                     printk(KERN_ERR
> +
> +                     if (!(fault & VM_FAULT_HWPOISON_SIM)) {
> +                             pr_err(
>       "MCE: Killing %s:%d due to hardware memory corruption fault at %08lx\n",
> -                     tsk->comm, tsk->pid, address);
> +                             tsk->comm, tsk->pid, address);
> +                     }
>                       /*
>                        * Either small page or large page may be poisoned.
>                        * In other words, VM_FAULT_HWPOISON_LARGE and
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 53335ae21a40..ac5e8a3c7fba 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -140,8 +140,10 @@ static int do_sigbus(struct pt_regs *regs, unsigned long 
> address,
>       if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
>               unsigned int lsb = 0; /* shutup gcc */
>  
> -             pr_err("MCE: Killing %s:%d due to hardware memory corruption 
> fault at %lx\n",
> -                     current->comm, current->pid, address);
> +             if (!(fault & VM_FAULT_HWPOISON_SIM)) {
> +                     pr_err("MCE: Killing %s:%d due to hardware memory 
> corruption fault at %lx\n",
> +                             current->comm, current->pid, address);
> +             }
>  
>               if (fault & VM_FAULT_HWPOISON_LARGE)
>                       lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index e4f3c7721f45..16d077a3ad14 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -928,9 +928,11 @@ do_sigbus(struct pt_regs *regs, unsigned long 
> error_code, unsigned long address,
>               struct task_struct *tsk = current;
>               unsigned lsb = 0;
>  
> -             pr_err_ratelimited(
> +             if (!(fault & VM_FAULT_HWPOISON_SIM)) {
> +                     pr_err_ratelimited(
>       "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
> -                     tsk->comm, tsk->pid, address);
> +                             tsk->comm, tsk->pid, address);
> +             }
>               if (fault & VM_FAULT_HWPOISON_LARGE)
>                       lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
>               if (fault & VM_FAULT_HWPOISON)
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 5240bd7bca33..7f8fc3efc5b2 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -1226,6 +1226,9 @@ typedef __bitwise unsigned int vm_fault_t;
>   * @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded
>   *                           in upper bits
>   * @VM_FAULT_SIGSEGV:                segmentation fault
> + * @VM_FAULT_HWPOISON_SIM    Hit poisoned, PTE marker; this indicates a
> + *                           simulated poison (e.g. via usefaultfd's
> + *                              UFFDIO_POISON), not a "real" hwerror.
>   * @VM_FAULT_NOPAGE:         ->fault installed the pte, not return page
>   * @VM_FAULT_LOCKED:         ->fault locked the returned page
>   * @VM_FAULT_RETRY:          ->fault blocked, must retry
> @@ -1245,6 +1248,7 @@ enum vm_fault_reason {
>       VM_FAULT_HWPOISON       = (__force vm_fault_t)0x000010,
>       VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020,
>       VM_FAULT_SIGSEGV        = (__force vm_fault_t)0x000040,
> +     VM_FAULT_HWPOISON_SIM   = (__force vm_fault_t)0x000080,
>       VM_FAULT_NOPAGE         = (__force vm_fault_t)0x000100,
>       VM_FAULT_LOCKED         = (__force vm_fault_t)0x000200,
>       VM_FAULT_RETRY          = (__force vm_fault_t)0x000400,
> @@ -1270,6 +1274,7 @@ enum vm_fault_reason {
>       { VM_FAULT_HWPOISON,            "HWPOISON" },   \
>       { VM_FAULT_HWPOISON_LARGE,      "HWPOISON_LARGE" },     \
>       { VM_FAULT_SIGSEGV,             "SIGSEGV" },    \
> +     { VM_FAULT_HWPOISON_SIM,        "HWPOISON_SIM" },       \
>       { VM_FAULT_NOPAGE,              "NOPAGE" },     \
>       { VM_FAULT_LOCKED,              "LOCKED" },     \
>       { VM_FAULT_RETRY,               "RETRY" },      \
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 65456230cc71..2b4e0173e806 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6485,7 +6485,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct 
> vm_area_struct *vma,
>                               pte_marker_get(pte_to_swp_entry(entry));
>  
>                       if (marker & PTE_MARKER_POISONED) {
> -                             ret = VM_FAULT_HWPOISON_LARGE |
> +                             ret = VM_FAULT_HWPOISON_SIM |
> +                                   VM_FAULT_HWPOISON_LARGE |
>                                     VM_FAULT_SET_HINDEX(hstate_index(h));
>                               goto out_mutex;
>                       }
> diff --git a/mm/memory.c b/mm/memory.c
> index d2155ced45f8..29a833b996ae 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3910,7 +3910,7 @@ static vm_fault_t handle_pte_marker(struct vm_fault 
> *vmf)
>  
>       /* Higher priority than uffd-wp when data corrupted */
>       if (marker & PTE_MARKER_POISONED)
> -             return VM_FAULT_HWPOISON;
> +             return VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_SIM;
>  
>       if (pte_marker_entry_uffd_wp(entry))
>               return pte_marker_handle_uffd_wp(vmf);
> -- 
> 2.45.0.118.g7fe29c98d7-goog
> 

-- 
Peter Xu

Reply via email to