On 11/06/2015 01:01 PM, Tony Luck wrote:
Extend the severity checking code to add a new context IN_KERN_RECOV
which is used to indicate that the machine check was triggered by code
in the kernel with a fixup entry.

Add code to check for this situation and respond by altering the return
IP to the fixup address and changing the regs->ax so that the recovery
code knows the physical address of the error. Note that we also set bit
63 because 0x0 is a legal physical address.

Signed-off-by: Tony Luck <tony.l...@intel.com>
---
  arch/x86/kernel/cpu/mcheck/mce-severity.c | 19 +++++++++++++++++--
  arch/x86/kernel/cpu/mcheck/mce.c          | 13 ++++++++++---
  2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 9c682c222071..1e83842310e8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -12,6 +12,7 @@
  #include <linux/kernel.h>
  #include <linux/seq_file.h>
  #include <linux/init.h>
+#include <linux/module.h>
  #include <linux/debugfs.h>
  #include <asm/mce.h>

@@ -29,7 +30,7 @@
   * panic situations)
   */

-enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
  enum ser { SER_REQUIRED = 1, NO_SER = 2 };
  enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };

@@ -48,6 +49,7 @@ static struct severity {
  #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
  #define  KERNEL               .context = IN_KERNEL
  #define  USER         .context = IN_USER
+#define  KERNEL_RECOV  .context = IN_KERNEL_RECOV
  #define  SER          .ser = SER_REQUIRED
  #define  NOSER                .ser = NO_SER
  #define  EXCP         .excp = EXCP_CONTEXT
@@ -87,6 +89,10 @@ static struct severity {
                EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
                ),
        MCESEV(
+               PANIC, "In kernel and no restart IP",
+               EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
+               ),
+       MCESEV(
                DEFERRED, "Deferred error",
                NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
                ),
@@ -123,6 +129,11 @@ static struct severity {
                MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
                ),
        MCESEV(
+               AR, "Action required: data load error recoverable area of 
kernel",
+               SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, 
MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+               KERNEL_RECOV
+               ),
+       MCESEV(
                AR, "Action required: data load error in a user process",
                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, 
MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
                USER
@@ -183,7 +194,11 @@ static struct severity {
   */
  static int error_context(struct mce *m)
  {
-       return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+       if ((m->cs & 3) == 3)
+               return IN_USER;
+       if (search_mcexception_tables(m->ip))
+               return IN_KERNEL_RECOV;
+       return IN_KERNEL;
  }

  /*
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9d014b82a124..472d11150b7a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -31,6 +31,7 @@
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/init.h>
+#include <linux/module.h>
  #include <linux/kmod.h>
  #include <linux/poll.h>
  #include <linux/nmi.h>
@@ -1132,9 +1133,15 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
                if (no_way_out)
                        mce_panic("Fatal machine check on current CPU", &m, 
msg);
                if (worst == MCE_AR_SEVERITY) {
-                       recover_paddr = m.addr;
-                       if (!(m.mcgstatus & MCG_STATUS_RIPV))
-                               flags |= MF_MUST_KILL;
+                       if ((m.cs & 3) == 3) {
+                               recover_paddr = m.addr;
+                               if (!(m.mcgstatus & MCG_STATUS_RIPV))
+                                       flags |= MF_MUST_KILL;
+                       } else if (fixup_mcexception(regs)) {
+                               regs->ax = BIT(63) | m.addr;
+                       } else
+                               mce_panic("Failed kernel mode recovery",
+                                         &m, NULL);

Maybe I'm misunderstanding this, but presumably you shouldn't call fixup_mcexception unless you've first verified RIPV (i.e. that the ip you're looking up in the table is valid).

Also... I find the general flow of this code very hard to follow. It's critical that an MCE hitting kernel mode not get as far as ist_begin_non_atomic. It was already hard enough to tell that the code follows that rule, and now it's even harder. Would it make sense to add clear assertions that m.cs == regs->cs and that user_mode(regs) when you get to the end? Simplifying the control flow might also be nice.

                } else if (kill_it) {
                        force_sig(SIGBUS, current);
                }


I would argue that this should happen in the non-atomic section. It's probably okay as long as we came from user mode, but it's more obviously safe in the non-atomic section.

--Andy

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to