Initialization and handling for LMCE
- boot time option to disable LMCE for that boot instance
- Check for capability via IA32_MCG_CAP

Incorporated feedback from Boris from V1

See http://www.intel.com/sdm Volume 3 System Programming Guide, Chapter 15
for more information on MSR's and documentation on Local MCE.

Signed-off-by: Ashok Raj <ashok....@intel.com>
---
 Documentation/x86/x86_64/boot-options.txt |  3 ++
 arch/x86/include/asm/mce.h                |  5 +++
 arch/x86/kernel/cpu/mcheck/mce.c          |  3 ++
 arch/x86/kernel/cpu/mcheck/mce_intel.c    | 59 +++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+)

diff --git a/Documentation/x86/x86_64/boot-options.txt 
b/Documentation/x86/x86_64/boot-options.txt
index 5223479..79edee0 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -31,6 +31,9 @@ Machine check
                (e.g. BIOS or hardware monitoring applications), conflicting
                with OS's error handling, and you cannot deactivate the agent,
                then this option will be a help.
+   mce=no_lmce
+               Do not opt-in to Local MCE delivery. Use legacy method
+               to broadcast MCE's.
    mce=bootlog
                Enable logging of machine checks left over from booting.
                Disabled by default on AMD because some BIOS leave bogus ones.
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 677a408..8ba4d7a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -109,6 +109,7 @@ struct mce_log {
 struct mca_config {
        bool dont_log_ce;
        bool cmci_disabled;
+       bool lmce_disabled;
        bool ignore_ce;
        bool disabled;
        bool ser;
@@ -173,12 +174,16 @@ void cmci_clear(void);
 void cmci_reenable(void);
 void cmci_rediscover(void);
 void cmci_recheck(void);
+void lmce_clear(void);
+void lmce_enable(void);
 #else
 static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
 static inline void cmci_clear(void) {}
 static inline void cmci_reenable(void) {}
 static inline void cmci_rediscover(void) {}
 static inline void cmci_recheck(void) {}
+static inline void lmce_clear(void) {}
+static inline void lmce_enable(void) {}
 #endif
 
 #ifdef CONFIG_X86_MCE_AMD
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e535533..d10aada 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1976,6 +1976,7 @@ void mce_disable_bank(int bank)
 /*
  * mce=off Disables machine check
  * mce=no_cmci Disables CMCI
+ * mce=no_lmce Disables LMCE
  * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
  * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
  * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
@@ -1999,6 +2000,8 @@ static int __init mcheck_enable(char *str)
                cfg->disabled = true;
        else if (!strcmp(str, "no_cmci"))
                cfg->cmci_disabled = true;
+       else if (!strcmp(str, "no_lmce"))
+               cfg->lmce_disabled = true;
        else if (!strcmp(str, "dont_log_ce"))
                cfg->dont_log_ce = true;
        else if (!strcmp(str, "ignore_ce"))
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c 
b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b4a41cf..7d500b6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -91,6 +91,37 @@ static int cmci_supported(int *banks)
        return !!(cap & MCG_CMCI_P);
 }
 
+static bool lmce_supported(void)
+{
+       u64 cap, feature_control;
+
+       if (mca_cfg.lmce_disabled)
+               return false;
+
+       rdmsrl(MSR_IA32_MCG_CAP, cap);
+       /*
+        * LMCE depends on recovery support in the processor.
+        * Hence both MCG_SER_P and MCG_LMCE_P should be present in
+        * MCG_CAP
+        */
+       if (!((cap & (MCG_SER_P | MCG_LMCE_P)) == (MCG_SER_P | MCG_LMCE_P)))
+               return false;
+
+       /*
+        * BIOS should indicate support for LMCE by setting
+        * bit20 in IA32_FEATURE_CONTROL. without which touching
+        * MCG_EXT_CTL will generate #GP fault.
+        */
+       rdmsrl(MSR_IA32_FEATURE_CONTROL, feature_control);
+       if (((feature_control & (FEATURE_CONTROL_LOCKED |
+               FEATURE_CONTROL_LMCE)) == (FEATURE_CONTROL_LOCKED |
+                               FEATURE_CONTROL_LMCE)))
+               return true;
+       else
+               return false;
+
+}
+
 bool mce_intel_cmci_poll(void)
 {
        if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
@@ -405,6 +436,34 @@ static void intel_init_cmci(void)
        cmci_recheck();
 }
 
+void intel_init_lmce(void)
+{
+       u64 val;
+
+       if (!lmce_supported())
+               return;
+
+       rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+       val |= MCG_EXT_CTL_LMCE_EN;
+       wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
+}
+
+/*
+ * Disable LMCE on this CPU for all banks it owns when it goes down.
+ * This allows other CPUs to claim the banks on rediscovery.
+ */
+void lmce_clear(void)
+{
+       u64 val;
+
+       if (!lmce_supported())
+               return;
+
+       rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+       val &= ~MCG_EXT_CTL_LMCE_EN;
+       wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
+}
+
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
        intel_init_thermal(c);
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to