When mcheck_init() doesn't recognize the CPU vendor, it will undo the
all-banks allocation, and it will in particular not install the CPU
notifier. This way APs will pointlessly try to re-establish an
all-banks allocation, while then falling over NULL pointers due to the
notifier hot having run and hence not having allocated anything for
them.

Prevent both from happening, and additionally delay writing MCG_CTL
until no errors can occur anymore in mca_cap_init().

Fixes: 741367e77d6c ("mce: Clean-up mcheck_init handler")
Fixes: a5e1b534ac6f ("x86: mce cleanup for both Intel and AMD mce logic")
Fixes: 560cf418c845 ("x86/mcheck: allow varying bank counts per CPU")
Reported-by: Teddy Astie <teddy.as...@vates.tech>
Signed-off-by: Jan Beulich <jbeul...@suse.com>

--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -634,16 +634,13 @@ static void set_poll_bankmask(struct cpu
 }
 
 /* The perbank ctl/status init is platform specific because of AMD's quirk */
-static int mca_cap_init(void)
+static int mca_cap_init(bool bsp)
 {
     uint64_t msr_content;
     unsigned int nr, cpu = smp_processor_id();
 
     rdmsrl(MSR_IA32_MCG_CAP, msr_content);
 
-    if ( msr_content & MCG_CTL_P ) /* Control register present ? */
-        wrmsrl(MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
-
     per_cpu(nr_mce_banks, cpu) = nr = MASK_EXTR(msr_content, MCG_CAP_COUNT);
 
     if ( !nr )
@@ -654,8 +651,11 @@ static int mca_cap_init(void)
         return -ENODEV;
     }
 
+    if ( !bsp && !mca_allbanks )
+        return -ENODATA;
+
     /* mcabanks_alloc depends on nr_mce_banks */
-    if ( !mca_allbanks || nr > mca_allbanks->num )
+    if ( bsp || nr > mca_allbanks->num )
     {
         unsigned int i;
         struct mca_banks *all = mcabanks_alloc(nr);
@@ -667,6 +667,9 @@ static int mca_cap_init(void)
         mcabanks_free(xchg(&mca_allbanks, all));
     }
 
+    if ( msr_content & MCG_CTL_P ) /* Control register present ? */
+        wrmsrl(MSR_IA32_MCG_CTL, ~0ULL);
+
     return 0;
 }
 
@@ -751,7 +754,7 @@ void mcheck_init(struct cpuinfo_x86 *c,
     }
 
     /*Hardware Enable */
-    if ( mca_cap_init() )
+    if ( mca_cap_init(bsp) )
         return;
 
     if ( !bsp )

Reply via email to