If we hit the VM_BUG_ON(), we're detecting a genuinely bad situation,
but we're very unlikely to get a useful call trace.

Make it a warning instead.

Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/mm/tlb.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index dbbcfd59726a..f4e471dd1526 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -121,8 +121,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
mm_struct *next,
         * hypothetical buggy code that directly switches to swapper_pg_dir
         * without going through leave_mm() / switch_mm_irqs_off() or that
         * does something like write_cr3(read_cr3_pa()).
+        *
+        * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
+        * isn't free.
         */
-       VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
+#ifdef CONFIG_DEBUG_VM
+       if (WARN_ON_ONCE(__read_cr3() !=
+                        (__sme_pa(real_prev->pgd) | prev_asid))) {
+               /*
+                * If we were to BUG here, we'd be very likely to kill
+                * the system so hard that we don't see the call trace.
+                * Try to recover instead by ignoring the error and doing
+                * a global flush to minimize the change of corruption.
+                *
+                * (This is far from being a fully correct recovery.
+                *  Architecturally, the CPU could prefetch something
+                *  back into an incorrect ASID slot and leave it there
+                *  to cause trouble down the road.  It's better than
+                *  nothing, though.)
+                */
+               __flush_tlb_all();
+       }
+#endif
 
        if (real_prev == next) {
                VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
-- 
2.13.5

Reply via email to