4.2-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Xie XiuQi <[email protected]>

commit 1b48465500611a2dc5e75800c61ac352e22d41c3 upstream.

Zhang Liguang reported the following issue:

1) System detects a CMCI storm on the current CPU.

2) Kernel disables the CMCI interrupt on banks owned by the
   current CPU and switches to poll mode

3) After the CMCI storm subsides, kernel switches back to
   interrupt mode

4) We expect the system to reenable the CMCI interrupt on banks
   owned by the current CPU

   mce_intel_adjust_timer
   |-> cmci_reenable
       |-> cmci_discover     # owned banks are ignored here

  static void cmci_discover(int banks)
        ...
        for (i = 0; i < banks; i++) {
                ...
                if (test_bit(i, owned)) # ownd banks is ignore here
                        continue;

So convert cmci_storm_disable_banks() to
cmci_toggle_interrupt_mode() which controls whether to enable or
disable CMCI interrupts with its argument.

NB: We cannot clear the owned bit because the banks won't be
polled, otherwise. See:

  27f6c573e0f7 ("x86, CMCI: Add proper detection of end of CMCI storms")

for more info.

Reported-by: Zhang Liguang <[email protected]>
Signed-off-by: Xie XiuQi <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: [email protected]
Cc: linux-edac <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>

---
 arch/x86/kernel/cpu/mcheck/mce_intel.c |   41 ++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 18 deletions(-)

--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long
        per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
 }
 
+static void cmci_toggle_interrupt_mode(bool on)
+{
+       unsigned long flags, *owned;
+       int bank;
+       u64 val;
+
+       raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+       owned = this_cpu_ptr(mce_banks_owned);
+       for_each_set_bit(bank, owned, MAX_NR_BANKS) {
+               rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+
+               if (on)
+                       val |= MCI_CTL2_CMCI_EN;
+               else
+                       val &= ~MCI_CTL2_CMCI_EN;
+
+               wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+       }
+       raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+}
+
 unsigned long cmci_intel_adjust_timer(unsigned long interval)
 {
        if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
@@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(un
                 */
                if (!atomic_read(&cmci_storm_on_cpus)) {
                        __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
-                       cmci_reenable();
+                       cmci_toggle_interrupt_mode(true);
                        cmci_recheck();
                }
                return CMCI_POLL_INTERVAL;
@@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(un
        }
 }
 
-static void cmci_storm_disable_banks(void)
-{
-       unsigned long flags, *owned;
-       int bank;
-       u64 val;
-
-       raw_spin_lock_irqsave(&cmci_discover_lock, flags);
-       owned = this_cpu_ptr(mce_banks_owned);
-       for_each_set_bit(bank, owned, MAX_NR_BANKS) {
-               rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
-               val &= ~MCI_CTL2_CMCI_EN;
-               wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
-       }
-       raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
-}
-
 static bool cmci_storm_detect(void)
 {
        unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
@@ -223,7 +228,7 @@ static bool cmci_storm_detect(void)
        if (cnt <= CMCI_STORM_THRESHOLD)
                return false;
 
-       cmci_storm_disable_banks();
+       cmci_toggle_interrupt_mode(false);
        __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
        r = atomic_add_return(1, &cmci_storm_on_cpus);
        mce_timer_kick(CMCI_STORM_INTERVAL);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to