This patch disables GartTlbWlk errors on AMD Fam10h CPUs if
the BIOS forgets to do is (or is just too old). Letting
these errors enabled can cause a sync-flood on the CPU
causing a reboot.

This patch is the fix for

        https://bugzilla.kernel.org/show_bug.cgi?id=33012

on my machine.

Signed-off-by: Joerg Roedel <joerg.roedel at amd.com>
---
 arch/x86/include/asm/msr-index.h |    4 ++++
 arch/x86/kernel/cpu/amd.c        |   19 +++++++++++++++++++
 2 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index fd5a1f3..3cce714 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -96,11 +96,15 @@
 #define MSR_IA32_MC0_ADDR              0x00000402
 #define MSR_IA32_MC0_MISC              0x00000403

+#define MSR_AMD64_MC0_MASK             0xc0010044
+
 #define MSR_IA32_MCx_CTL(x)            (MSR_IA32_MC0_CTL + 4*(x))
 #define MSR_IA32_MCx_STATUS(x)         (MSR_IA32_MC0_STATUS + 4*(x))
 #define MSR_IA32_MCx_ADDR(x)           (MSR_IA32_MC0_ADDR + 4*(x))
 #define MSR_IA32_MCx_MISC(x)           (MSR_IA32_MC0_MISC + 4*(x))

+#define MSR_AMD64_MCx_MASK(x)          (MSR_AMD64_MC0_MASK + (x))
+
 /* These are consecutive and not in the normal 4er MCE bank block */
 #define MSR_IA32_MC0_CTL2              0x00000280
 #define MSR_IA32_MCx_CTL2(x)           (MSR_IA32_MC0_CTL2 + (x))
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3ecece0..3532d3b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -615,6 +615,25 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
        /* As a rule processors have APIC timer running in deep C states */
        if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
                set_cpu_cap(c, X86_FEATURE_ARAT);
+
+       /*
+        * Disable GART TLB Walk Errors on Fam10h. We do this here
+        * because this is always needed when GART is enabled, even in a
+        * kernel which has no MCE support built in.
+        */
+       if (c->x86 == 0x10) {
+               /*
+                * BIOS should disable GartTlbWlk Errors themself. If
+                * it doesn't do it here as suggested by the BKDG.
+                *
+                * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
+                */
+               u64 mask;
+
+               rdmsrl(MSR_AMD64_MCx_MASK(4), mask);
+               mask |= (1 << 10);
+               wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
+       }
 }

 #ifdef CONFIG_X86_32
-- 
1.7.1

Reply via email to