On 11/7/2014 7:40 PM, Chen Yucong wrote:
Until now, the mce_severity mechanism can only identify the severity
of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter
out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for handling
UCNA/DEFERRED error. In order to do this, the patch introduces a new
severity level - MCE_UCNA/DEFERRED_SEVERITY.

In addition, mce_severity is specific to machine check exception,
and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity
mechanism in non-exception context, the patch also introduces a new
argument (is_excp) for mce_severity. `is_excp' is used to explicitly
specify the calling context of mce_severity.

Signed-off-by: Chen Yucong <[email protected]>
---
  arch/x86/include/asm/mce.h                |    4 ++++
  arch/x86/kernel/cpu/mcheck/mce-internal.h |    4 +++-
  arch/x86/kernel/cpu/mcheck/mce-severity.c |   21 ++++++++++++++++-----
  arch/x86/kernel/cpu/mcheck/mce.c          |   14 ++++++++------
  drivers/edac/mce_amd.h                    |    3 ---
  5 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 276392f..51b26e89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
  #define MCI_STATUS_S   (1ULL<<56)  /* Signaled machine check */
  #define MCI_STATUS_AR  (1ULL<<55)  /* Action required */
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED    (1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON      (1ULL<<43)  /* access poisonous data */
+
  /*
   * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
   * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..10b4690 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
enum severity_level {
        MCE_NO_SEVERITY,
+       MCE_DEFERRED_SEVERITY,
+       MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
        MCE_KEEP_SEVERITY,
        MCE_SOME_SEVERITY,
        MCE_AO_SEVERITY,
@@ -21,7 +23,7 @@ struct mce_bank {
        char                    attrname[ATTR_LEN];     /* attribute name */
  };
-int mce_severity(struct mce *a, int tolerant, char **msg);
+int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
  struct dentry *mce_get_debugfs_dir(void);
extern struct mce_bank *mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..c61feb3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -31,6 +31,7 @@
enum context { IN_KERNEL = 1, IN_USER = 2 };
  enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
static struct severity {
        u64 mask;
@@ -40,6 +41,7 @@ static struct severity {
        unsigned char mcgres;
        unsigned char ser;
        unsigned char context;
+       unsigned char excp;
        unsigned char covered;
        char *msg;
  } severities[] = {
@@ -48,6 +50,8 @@ static struct severity {
  #define  USER         .context = IN_USER
  #define  SER          .ser = SER_REQUIRED
  #define  NOSER                .ser = NO_SER
+#define  EXCP          .excp = EXCP_CONTEXT
+#define  NOEXCP                .excp = NO_EXCP
  #define  BITCLR(x)    .mask = x, .result = 0
  #define  BITSET(x)    .mask = x, .result = x
  #define  MCGMASK(x, y)        .mcgmask = x, .mcgres = y
@@ -71,16 +75,20 @@ static struct severity {
        /* When MCIP is not set something is very confused */
        MCESEV(
                PANIC, "MCIP not set in MCA handler",
-               MCGMASK(MCG_STATUS_MCIP, 0)
+               EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
                ),
        /* Neither return not error IP -- no chance to recover -> PANIC */
        MCESEV(
                PANIC, "Neither restart nor error IP",
-               MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
+               EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
                ),
        MCESEV(
                PANIC, "In kernel and no restart IP",
-               KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+               EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+               ),
+       MCESEV(
+               DEFERRED, "Deferred error",
+               NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
                ),

We don't need to have MCI_STATUS_POISON in the MASK() here as a deferred error is indicated by a {UC=0, Deferred = 1}
(Older docs might be unclear on that..)

And it still says ADM on the commit message :)

- Aravind.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to