On 3/24/2025 11:01 PM, Mingwei Zhang wrote:
> From: Dapeng Mi <dapeng1...@linux.intel.com>
> 
> Check if rdpmc can be intercepted for mediated vPMU. Simply speaking,
> if guest own all PMU counters in mediated vPMU, then rdpmc interception
> should be disabled to mitigate the performance impact, otherwise rdpmc
> has to be intercepted to avoid guest obtain host counter's data via
> rdpmc instruction.
> 
> Co-developed-by: Mingwei Zhang <mizh...@google.com>
> Signed-off-by: Mingwei Zhang <mizh...@google.com>
> Co-developed-by: Sandipan Das <sandipan....@amd.com>
> Signed-off-by: Sandipan Das <sandipan....@amd.com>
> Signed-off-by: Dapeng Mi <dapeng1...@linux.intel.com>
> ---
>  arch/x86/include/asm/msr-index.h |  1 +
>  arch/x86/kvm/pmu.c               | 34 ++++++++++++++++++++++++++++++++
>  arch/x86/kvm/pmu.h               | 19 ++++++++++++++++++
>  arch/x86/kvm/svm/pmu.c           | 14 ++++++++++++-
>  arch/x86/kvm/vmx/pmu_intel.c     | 18 ++++++++---------
>  5 files changed, 76 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/include/asm/msr-index.h 
> b/arch/x86/include/asm/msr-index.h
> index ca70846ffd55..337f4b0a2998 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -312,6 +312,7 @@
>  #define PERF_CAP_PEBS_FORMAT         0xf00
>  #define PERF_CAP_FW_WRITES           BIT_ULL(13)
>  #define PERF_CAP_PEBS_BASELINE               BIT_ULL(14)
> +#define PERF_CAP_PERF_METRICS                BIT_ULL(15)
>  #define PERF_CAP_PEBS_MASK           (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG 
> | \
>                                        PERF_CAP_PEBS_FORMAT | 
> PERF_CAP_PEBS_BASELINE)
>  
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 92c742ead663..6ad71752be4b 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -604,6 +604,40 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, 
> u64 *data)
>       return 0;
>  }
>  
> +inline bool kvm_rdpmc_in_guest(struct kvm_vcpu *vcpu)
> +{
> +     struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +
> +     if (!kvm_mediated_pmu_enabled(vcpu))
> +             return false;
> +
> +     /*
> +      * VMware allows access to these Pseduo-PMCs even when read via RDPMC
> +      * in Ring3 when CR4.PCE=0.
> +      */
> +     if (enable_vmware_backdoor)
> +             return false;
> +
> +     /*
> +      * FIXME: In theory, perf metrics is always combined with fixed
> +      *        counter 3. it's fair enough to compare the guest and host
> +      *        fixed counter number and don't need to check perf metrics
> +      *        explicitly. However kvm_pmu_cap.num_counters_fixed is limited
> +      *        KVM_MAX_NR_FIXED_COUNTERS (3) as fixed counter 3 is not
> +      *        supported now. perf metrics is still needed to be checked
> +      *        explicitly here. Once fixed counter 3 is supported, the perf
> +      *        metrics checking can be removed.
> +      */
> +     return pmu->nr_arch_gp_counters == kvm_pmu_cap.num_counters_gp &&
> +            pmu->nr_arch_fixed_counters == kvm_pmu_cap.num_counters_fixed &&
> +            vcpu_has_perf_metrics(vcpu) == kvm_host_has_perf_metrics() &&
> +            pmu->counter_bitmask[KVM_PMC_GP] ==
> +                             (BIT_ULL(kvm_pmu_cap.bit_width_gp) - 1) &&
> +            pmu->counter_bitmask[KVM_PMC_FIXED] ==
> +                             (BIT_ULL(kvm_pmu_cap.bit_width_fixed) - 1);
> +}
> +EXPORT_SYMBOL_GPL(kvm_rdpmc_in_guest);
> +
>  void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
>  {
>       if (lapic_in_kernel(vcpu)) {
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index e1d0096f249b..509c995b7871 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -271,6 +271,24 @@ static inline bool pmc_is_globally_enabled(struct 
> kvm_pmc *pmc)
>       return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
>  }
>  
> +static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
> +{
> +     if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
> +             return 0;
> +
> +     return vcpu->arch.perf_capabilities;
> +}
> +
> +static inline bool vcpu_has_perf_metrics(struct kvm_vcpu *vcpu)
> +{
> +     return !!(vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PERF_METRICS);
> +}
> +
> +static inline bool kvm_host_has_perf_metrics(void)
> +{
> +     return !!(kvm_host.perf_capabilities & PERF_CAP_PERF_METRICS);
> +}
> +
>  void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
>  void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
>  int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
> @@ -287,6 +305,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 
> eventsel);
>  bool vcpu_pmu_can_enable(struct kvm_vcpu *vcpu);
>  
>  bool is_vmware_backdoor_pmc(u32 pmc_idx);
> +bool kvm_rdpmc_in_guest(struct kvm_vcpu *vcpu);
>  
>  extern struct kvm_pmu_ops intel_pmu_ops;
>  extern struct kvm_pmu_ops amd_pmu_ops;
> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> index c8b9fd9b5350..153972e944eb 100644
> --- a/arch/x86/kvm/svm/pmu.c
> +++ b/arch/x86/kvm/svm/pmu.c
> @@ -173,7 +173,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct 
> msr_data *msr_info)
>       return 1;
>  }
>  
> -static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
> +static void __amd_pmu_refresh(struct kvm_vcpu *vcpu)
>  {
>       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>       union cpuid_0x80000022_ebx ebx;
> @@ -212,6 +212,18 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
>       bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
>  }
>  
> +static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
> +{
> +     struct vcpu_svm *svm = to_svm(vcpu);
> +
> +     __amd_pmu_refresh(vcpu);
> +
> +     if (kvm_rdpmc_in_guest(vcpu))
> +             svm_clr_intercept(svm, INTERCEPT_RDPMC);
> +     else
> +             svm_set_intercept(svm, INTERCEPT_RDPMC);
> +}
> +

After putting kprobes on kvm_pmu_rdpmc(), I noticed that RDPMC instructions were
getting intercepted for the secondary vCPUs. This happens because when secondary
vCPUs come up, kvm_vcpu_reset() gets called after guest CPUID has been updated.
While RDPMC interception is initially disabled in the kvm_pmu_refresh() path, it
gets re-enabled in the kvm_vcpu_reset() path as svm_vcpu_reset() calls 
init_vmcb().
We should consider adding the following change to avoid that.

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6f9142063cc4..1c9c183092f3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1354,7 +1354,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
                svm_set_intercept(svm, INTERCEPT_SMI);

        svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
-       svm_set_intercept(svm, INTERCEPT_RDPMC);
        svm_set_intercept(svm, INTERCEPT_CPUID);
        svm_set_intercept(svm, INTERCEPT_INVD);
        svm_set_intercept(svm, INTERCEPT_INVLPG);

>  static void amd_pmu_init(struct kvm_vcpu *vcpu)
>  {
>       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index fc017e9a6a0c..2a5f79206b02 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -108,14 +108,6 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct 
> kvm_vcpu *vcpu,
>       return &counters[array_index_nospec(idx, num_counters)];
>  }
>  
> -static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
> -{
> -     if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
> -             return 0;
> -
> -     return vcpu->arch.perf_capabilities;
> -}
> -
>  static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
>  {
>       return (vcpu_get_perf_capabilities(vcpu) & PERF_CAP_FW_WRITES) != 0;
> @@ -456,7 +448,7 @@ static void intel_pmu_enable_fixed_counter_bits(struct 
> kvm_pmu *pmu, u64 bits)
>               pmu->fixed_ctr_ctrl_rsvd &= ~intel_fixed_bits_by_idx(i, bits);
>  }
>  
> -static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> +static void __intel_pmu_refresh(struct kvm_vcpu *vcpu)
>  {
>       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
>       struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
> @@ -564,6 +556,14 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
>       }
>  }
>  
> +static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> +{
> +     __intel_pmu_refresh(vcpu);
> +
> +     exec_controls_changebit(to_vmx(vcpu), CPU_BASED_RDPMC_EXITING,
> +                             !kvm_rdpmc_in_guest(vcpu));
> +}
> +
>  static void intel_pmu_init(struct kvm_vcpu *vcpu)
>  {
>       int i;


Reply via email to