Hi Dave,
Thanks for taking time to review!
On 2026/3/5 01:59, Dave Hansen wrote:
On 3/3/26 18:10, Lance Yang wrote:
...
+ if (pv_ops.mmu.flush_tlb_multi == native_flush_tlb_multi &&
+ !cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ pv_ops.mmu.flush_tlb_multi_implies_ipi_broadcast = true;
+ static_branch_enable(&tlb_ipi_broadcast_key);
+ }
+}
...
+#ifndef CONFIG_PARAVIRT
+void __init native_pv_tlb_init(void)
+{
+ /*
+ * For non-PARAVIRT builds, check if native TLB flush sends real IPIs
+ * (i.e., not using INVLPGB broadcast invalidation).
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ static_branch_enable(&tlb_ipi_broadcast_key);
+}
+#endif
I really despise duplicated logic. The X86_FEATURE_INVLPGB check is
small, but it is duplicated. You're also setting the static branch in a
*bunch* of different places.
Sorry for the mess :(
Can this be arranged so that the PV code just tells the core code that
it is compatible with flush_tlb_multi_implies_ipi_broadcast?
Yeah, much much better and cleaner!
void __init bool is_pv_ok(void)
{
/* This check is super sketchy an unexplained: */
if (pv_ops.mmu.flush_tlb_multi_implies_ipi_broadcast)
return true;
if (pv_ops.mmu.flush_tlb_multi != native_flush_tlb_multi)
return false;
pv_ops.mmu.flush_tlb_multi_implies_ipi_broadcast = true;
return true;
}
void __init tlb_init(void)
{
if (!is_pv_ok())
return;
if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
return;
static_branch_enable(&tlb_ipi_broadcast_key);
}
Isn't that like a billion times more readable? It has one
X86_FEATURE_INVLPGB check and one static_branch_enable() point and no
#ifdeffery other than defining a stub is_pv_ok().
Yep, absolutely ;) Will rework it for v7 as you suggested.
BTW, why is there even an early return for the case where
flush_tlb_multi_implies_ipi_broadcast is already set? Isn't this
decision made once on the boot CPU and then never touched again? Do any
PV instances actually set the bit?
Good point. No PV backend sets it today, so we don't need that check or
even the property. I'll drop them. If a PV backend ever needs to
indicate it sends real IPIs, we can add the property back then.
Does the following look reasonable?
---8<---
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 866ea78ba156..74292abe8852 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -5,11 +5,19 @@
#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
+#define tlb_table_flush_implies_ipi_broadcast
tlb_table_flush_implies_ipi_broadcast
+static inline bool tlb_table_flush_implies_ipi_broadcast(void);
+
#include <asm-generic/tlb.h>
#include <linux/kernel.h>
#include <vdso/bits.h>
#include <vdso/page.h>
+static inline bool tlb_table_flush_implies_ipi_broadcast(void)
+{
+ return static_branch_likely(&tlb_ipi_broadcast_key);
+}
+
static inline void tlb_flush(struct mmu_gather *tlb)
{
unsigned long start = 0UL, end = TLB_FLUSH_ALL;
@@ -20,7 +28,12 @@ static inline void tlb_flush(struct mmu_gather *tlb)
end = tlb->end;
}
- flush_tlb_mm_range(tlb->mm, start, end, stride_shift,
tlb->freed_tables);
+ /*
+ * Pass both freed_tables and unshared_tables so that lazy-TLB CPUs
+ * also receive IPIs during unsharing page tables.
+ */
+ flush_tlb_mm_range(tlb->mm, start, end, stride_shift,
+ tlb->freed_tables || tlb->unshared_tables);
}
static inline void invlpg(unsigned long addr)
diff --git a/arch/x86/include/asm/tlbflush.h
b/arch/x86/include/asm/tlbflush.h
index 5a3cdc439e38..d086454eb760 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,6 +5,7 @@
#include <linux/mm_types.h>
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
+#include <linux/jump_label.h>
#include <asm/barrier.h>
#include <asm/processor.h>
@@ -15,9 +16,35 @@
#include <asm/pti.h>
#include <asm/processor-flags.h>
#include <asm/pgtable.h>
+#include <asm/paravirt.h>
DECLARE_PER_CPU(u64, tlbstate_untag_mask);
+DECLARE_STATIC_KEY_FALSE(tlb_ipi_broadcast_key);
+
+#ifdef CONFIG_PARAVIRT
+static inline bool __init pv_tlb_is_native(void)
+{
+ return pv_ops.mmu.flush_tlb_multi == native_flush_tlb_multi;
+}
+#else
+static inline bool __init pv_tlb_is_native(void)
+{
+ return true;
+}
+#endif
+
+static inline void __init native_pv_tlb_init(void)
+{
+ if (!pv_tlb_is_native())
+ return;
+
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return;
+
+ static_branch_enable(&tlb_ipi_broadcast_key);
+}
+
void __flush_tlb_all(void);
#define TLB_FLUSH_ALL -1UL
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5cd6950ab672..3cdb04162843 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1167,6 +1167,7 @@ void __init native_smp_prepare_boot_cpu(void)
switch_gdt_and_percpu_base(me);
native_pv_lock_init();
+ native_pv_tlb_init();
}
void __init native_smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 621e09d049cb..43e60cca38f1 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -26,6 +26,8 @@
#include "mm_internal.h"
+DEFINE_STATIC_KEY_FALSE(tlb_ipi_broadcast_key);
+
#ifdef CONFIG_PARAVIRT
# define STATIC_NOPV
#else
---
Thanks,
Lance