From: Dapeng Mi <dapeng1...@linux.intel.com>

Introduce enable_mediated_pmu global parameter to control if mediated
vPMU can be enabled on KVM level. Even enable_mediated_pmu is set to
true in KVM, user space hypervisor still need to enable mediated vPMU
explicitly by calling KVM_CAP_PMU_CAPABILITY ioctl. This gives
hypervisor flexibility to enable or disable mediated vPMU for each VM.

Mediated vPMU depends on some PMU features on higher PMU version, like
PERF_GLOBAL_STATUS_SET MSR in v4+ for Intel PMU. Thus introduce a
pmu_ops variable MIN_MEDIATED_PMU_VERSION to indicates the minimum host
PMU version which mediated vPMU needs.

Currently enable_mediated_pmu is not exposed to user space as a module
parameter until all mediated vPMU code are in place.

Suggested-by: Sean Christopherson <sea...@google.com>
Co-developed-by: Mingwei Zhang <mizh...@google.com>
Signed-off-by: Mingwei Zhang <mizh...@google.com>
Signed-off-by: Dapeng Mi <dapeng1...@linux.intel.com>
---
 arch/x86/kvm/pmu.c              |  3 ++-
 arch/x86/kvm/pmu.h              | 11 +++++++++
 arch/x86/kvm/svm/pmu.c          |  1 +
 arch/x86/kvm/vmx/capabilities.h |  3 ++-
 arch/x86/kvm/vmx/pmu_intel.c    |  5 ++++
 arch/x86/kvm/vmx/vmx.c          |  3 ++-
 arch/x86/kvm/x86.c              | 44 ++++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.h              |  1 +
 8 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 75e9cfc689f8..4f455afe4009 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -775,7 +775,8 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
        pmu->pebs_data_cfg_rsvd = ~0ull;
        bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
 
-       if (!vcpu->kvm->arch.enable_pmu)
+       if (!vcpu->kvm->arch.enable_pmu ||
+           (!lapic_in_kernel(vcpu) && enable_mediated_pmu))
                return;
 
        kvm_pmu_call(refresh)(vcpu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index ad89d0bd6005..dd45a0c6be74 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -45,6 +45,7 @@ struct kvm_pmu_ops {
        const u64 EVENTSEL_EVENT;
        const int MAX_NR_GP_COUNTERS;
        const int MIN_NR_GP_COUNTERS;
+       const int MIN_MEDIATED_PMU_VERSION;
 };
 
 void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops);
@@ -63,6 +64,12 @@ static inline bool kvm_pmu_has_perf_global_ctrl(struct 
kvm_pmu *pmu)
        return pmu->version > 1;
 }
 
+static inline bool kvm_mediated_pmu_enabled(struct kvm_vcpu *vcpu)
+{
+       return vcpu->kvm->arch.enable_pmu &&
+              enable_mediated_pmu && vcpu_to_pmu(vcpu)->version;
+}
+
 /*
  * KVM tracks all counters in 64-bit bitmaps, with general purpose counters
  * mapped to bits 31:0 and fixed counters mapped to 63:32, e.g. fixed counter 0
@@ -210,6 +217,10 @@ static inline void kvm_init_pmu_capability(const struct 
kvm_pmu_ops *pmu_ops)
                        enable_pmu = false;
        }
 
+       if (!enable_pmu || !kvm_pmu_cap.mediated ||
+           pmu_ops->MIN_MEDIATED_PMU_VERSION > kvm_pmu_cap.version)
+               enable_mediated_pmu = false;
+
        if (!enable_pmu) {
                memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
                return;
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 288f7f2a46f2..c8b9fd9b5350 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -239,4 +239,5 @@ struct kvm_pmu_ops amd_pmu_ops __initdata = {
        .EVENTSEL_EVENT = AMD64_EVENTSEL_EVENT,
        .MAX_NR_GP_COUNTERS = KVM_MAX_NR_AMD_GP_COUNTERS,
        .MIN_NR_GP_COUNTERS = AMD64_NUM_COUNTERS,
+       .MIN_MEDIATED_PMU_VERSION = 2,
 };
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index cb6588238f46..fac2c80ddbab 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -390,7 +390,8 @@ static inline bool vmx_pt_mode_is_host_guest(void)
 
 static inline bool vmx_pebs_supported(void)
 {
-       return boot_cpu_has(X86_FEATURE_PEBS) && kvm_pmu_cap.pebs_ept;
+       return boot_cpu_has(X86_FEATURE_PEBS) &&
+              !enable_mediated_pmu && kvm_pmu_cap.pebs_ept;
 }
 
 static inline bool cpu_has_notify_vmexit(void)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 77012b2eca0e..425e93d4b1c6 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -739,4 +739,9 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = {
        .EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT,
        .MAX_NR_GP_COUNTERS = KVM_MAX_NR_INTEL_GP_COUNTERS,
        .MIN_NR_GP_COUNTERS = 1,
+       /*
+        * Intel mediated vPMU support depends on
+        * MSR_CORE_PERF_GLOBAL_STATUS_SET which is supported from 4+.
+        */
+       .MIN_MEDIATED_PMU_VERSION = 4,
 };
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 00ac94535c21..a4b5b6455c7b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7916,7 +7916,8 @@ static __init u64 vmx_get_perf_capabilities(void)
        if (boot_cpu_has(X86_FEATURE_PDCM))
                rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
 
-       if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
+       if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR) &&
+           !enable_mediated_pmu) {
                x86_perf_get_lbr(&vmx_lbr_caps);
 
                /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 72995952978a..1ebe169b88b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -188,6 +188,14 @@ bool __read_mostly enable_pmu = true;
 EXPORT_SYMBOL_GPL(enable_pmu);
 module_param(enable_pmu, bool, 0444);
 
+/*
+ * Enable/disable mediated passthrough PMU virtualization.
+ * Don't expose it to userspace as a module paramerter until
+ * all mediated vPMU code is in place.
+ */
+bool __read_mostly enable_mediated_pmu;
+EXPORT_SYMBOL_GPL(enable_mediated_pmu);
+
 bool __read_mostly eager_page_split = true;
 module_param(eager_page_split, bool, 0644);
 
@@ -6643,9 +6651,28 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                        break;
 
                mutex_lock(&kvm->lock);
-               if (!kvm->created_vcpus) {
-                       kvm->arch.enable_pmu = !(cap->args[0] & 
KVM_PMU_CAP_DISABLE);
-                       r = 0;
+               /*
+                * To keep PMU configuration "simple", setting vPMU support is
+                * disallowed if vCPUs are created, or if mediated PMU support
+                * was already enabled for the VM.
+                */
+               if (!kvm->created_vcpus &&
+                   (!enable_mediated_pmu || !kvm->arch.enable_pmu)) {
+                       bool pmu_enable = !(cap->args[0] & KVM_PMU_CAP_DISABLE);
+
+                       if (enable_mediated_pmu && pmu_enable) {
+                               char *err_msg = "Fail to enable mediated vPMU, 
" \
+                                       "please disable system wide perf events 
or nmi_watchdog " \
+                                       "(echo 0 > 
/proc/sys/kernel/nmi_watchdog).\n";
+
+                               r = perf_get_mediated_pmu();
+                               if (r)
+                                       kvm_err("%s", err_msg);
+                       } else
+                               r = 0;
+
+                       if (!r)
+                               kvm->arch.enable_pmu = pmu_enable;
                }
                mutex_unlock(&kvm->lock);
                break;
@@ -12723,7 +12750,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long 
type)
        kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
        kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT;
        kvm->arch.guest_can_read_msr_platform_info = true;
-       kvm->arch.enable_pmu = enable_pmu;
+
+       /*
+        * PMU virtualization is opt-in when mediated PMU support is enabled.
+        * KVM_CAP_PMU_CAPABILITY ioctl must be called explicitly to enable
+        * mediated vPMU. For legacy perf-based vPMU, its behavior isn't 
changed,
+        * KVM_CAP_PMU_CAPABILITY ioctl is optional.
+        */
+       kvm->arch.enable_pmu = enable_pmu && !enable_mediated_pmu;
 
 #if IS_ENABLED(CONFIG_HYPERV)
        spin_lock_init(&kvm->arch.hv_root_tdp_lock);
@@ -12876,6 +12910,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
                mutex_unlock(&kvm->slots_lock);
        }
+       if (kvm->arch.enable_pmu && enable_mediated_pmu)
+               perf_put_mediated_pmu();
        kvm_unload_vcpu_mmus(kvm);
        kvm_x86_call(vm_destroy)(kvm);
        kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, 
&kvm->srcu, 1));
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 91e50a513100..dbf9973b3d09 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -391,6 +391,7 @@ extern struct kvm_caps kvm_caps;
 extern struct kvm_host_values kvm_host;
 
 extern bool enable_pmu;
+extern bool enable_mediated_pmu;
 
 /*
  * Get a filtered version of KVM's supported XCR0 that strips out dynamic
-- 
2.49.0.395.g12beb8f557-goog


Reply via email to