Nicholas Piggin <npig...@gmail.com> writes: > From: Suraj Jitindar Singh <sjitindarsi...@gmail.com> > > The POWER9 vCPU TLB management code assumes all threads in a core share > a TLB, and that TLBIEL execued by one thread will invalidate TLBs for > all threads. This is not the case for SMT8 capable POWER9 and POWER10 > (big core) processors, where the TLB is split between groups of threads. > This results in TLB multi-hits, random data corruption, etc. > > Fix this by introducing cpu_first_tlb_thread_sibling etc., to determine > which siblings share TLBs, and use that in the guest TLB flushing code. > > [npig...@gmail.com: add changelog and comment] > > Signed-off-by: Paul Mackerras <pau...@ozlabs.org> > Signed-off-by: Nicholas Piggin <npig...@gmail.com>
Reviewed-by: Fabiano Rosas <faro...@linux.ibm.com> > --- > arch/powerpc/include/asm/cputhreads.h | 30 +++++++++++++++++++++++++++ > arch/powerpc/kvm/book3s_hv.c | 13 ++++++------ > arch/powerpc/kvm/book3s_hv_builtin.c | 2 +- > arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- > 4 files changed, 39 insertions(+), 8 deletions(-) > > diff --git a/arch/powerpc/include/asm/cputhreads.h > b/arch/powerpc/include/asm/cputhreads.h > index 98c8bd155bf9..b167186aaee4 100644 > --- a/arch/powerpc/include/asm/cputhreads.h > +++ b/arch/powerpc/include/asm/cputhreads.h > @@ -98,6 +98,36 @@ static inline int cpu_last_thread_sibling(int cpu) > return cpu | (threads_per_core - 1); > } > > +/* > + * tlb_thread_siblings are siblings which share a TLB. This is not > + * architected, is not something a hypervisor could emulate and a future > + * CPU may change behaviour even in compat mode, so this should only be > + * used on PowerNV, and only with care. > + */ > +static inline int cpu_first_tlb_thread_sibling(int cpu) > +{ > + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) > + return cpu & ~0x6; /* Big Core */ > + else > + return cpu_first_thread_sibling(cpu); > +} > + > +static inline int cpu_last_tlb_thread_sibling(int cpu) > +{ > + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) > + return cpu | 0x6; /* Big Core */ > + else > + return cpu_last_thread_sibling(cpu); > +} > + > +static inline int cpu_tlb_thread_sibling_step(void) > +{ > + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) > + return 2; /* Big Core */ > + else > + return 1; > +} > + > static inline u32 get_tensr(void) > { > #ifdef CONFIG_BOOKE > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 28a80d240b76..0a8398a63f80 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -2657,7 +2657,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, > struct kvm_vcpu *vcpu) > cpumask_t *cpu_in_guest; > int i; > > - cpu = cpu_first_thread_sibling(cpu); > + cpu = cpu_first_tlb_thread_sibling(cpu); > if (nested) { > cpumask_set_cpu(cpu, &nested->need_tlb_flush); > cpu_in_guest = &nested->cpu_in_guest; > @@ -2671,9 +2671,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, > struct kvm_vcpu *vcpu) > * the other side is the first smp_mb() in kvmppc_run_core(). > */ > smp_mb(); > - for (i = 0; i < threads_per_core; ++i) > - if (cpumask_test_cpu(cpu + i, cpu_in_guest)) > - smp_call_function_single(cpu + i, do_nothing, NULL, 1); > + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); > + i += cpu_tlb_thread_sibling_step()) > + if (cpumask_test_cpu(i, cpu_in_guest)) > + smp_call_function_single(i, do_nothing, NULL, 1); > } > > static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) > @@ -2704,8 +2705,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu > *vcpu, int pcpu) > */ > if (prev_cpu != pcpu) { > if (prev_cpu >= 0 && > - cpu_first_thread_sibling(prev_cpu) != > - cpu_first_thread_sibling(pcpu)) > + cpu_first_tlb_thread_sibling(prev_cpu) != > + cpu_first_tlb_thread_sibling(pcpu)) > radix_flush_cpu(kvm, prev_cpu, vcpu); > if (nested) > nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; > diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c > b/arch/powerpc/kvm/book3s_hv_builtin.c > index 7a0e33a9c980..3edc25c89092 100644 > --- a/arch/powerpc/kvm/book3s_hv_builtin.c > +++ b/arch/powerpc/kvm/book3s_hv_builtin.c > @@ -800,7 +800,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int > pcpu, > * Thus we make all 4 threads use the same bit. > */ > if (cpu_has_feature(CPU_FTR_ARCH_300)) > - pcpu = cpu_first_thread_sibling(pcpu); > + pcpu = cpu_first_tlb_thread_sibling(pcpu); > > if (nested) > need_tlb_flush = &nested->need_tlb_flush; > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > index 7af7c70f1468..407dbf21bcbc 100644 > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > @@ -67,7 +67,7 @@ static int global_invalidates(struct kvm *kvm) > * so use the bit for the first thread to represent the core. > */ > if (cpu_has_feature(CPU_FTR_ARCH_300)) > - cpu = cpu_first_thread_sibling(cpu); > + cpu = cpu_first_tlb_thread_sibling(cpu); > cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); > }