New VMX controls bits for Arch LBR are added. When bit 21 in vmentry_ctrl
is set, VM entry will write the value from the "Guest IA32_LBR_CTL" guest
state field to IA32_LBR_CTL. When bit 26 in vmexit_ctrl is set, VM exit
will clear IA32_LBR_CTL after the value has been saved to the "Guest
IA32_LBR_CTL" guest state field.

To enable guest Arch LBR, KVM should set both the "Load Guest IA32_LBR_CTL"
entry control and the "Clear IA32_LBR_CTL" exit control bits. If these two
conditions cannot be met, KVM will clear the LBR_FMT bits and will not
expose the Arch LBR feature.

If Arch LBR is exposed on KVM, the guest should set both the ARCH_LBR CPUID
and the same LBR_FMT value as the host via MSR_IA32_PERF_CAPABILITIES to
enable guest Arch LBR.

KVM will bypass the host/guest x86 cpu model check and the records msrs can
still be pass-through to guest as usual and work like a model-specific LBR.
KVM is consistent with the host and does not support the LER entry.

Signed-off-by: Like Xu <like...@linux.intel.com>
---
 arch/x86/include/asm/vmx.h      |  2 ++
 arch/x86/kvm/vmx/capabilities.h | 25 +++++++++++++++++--------
 arch/x86/kvm/vmx/pmu_intel.c    | 27 ++++++++++++++++++++++-----
 arch/x86/kvm/vmx/vmx.c          |  9 +++++++--
 4 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 8ec7bc24b37a..c8186ec46fca 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -95,6 +95,7 @@
 #define VM_EXIT_CLEAR_BNDCFGS                   0x00800000
 #define VM_EXIT_PT_CONCEAL_PIP                 0x01000000
 #define VM_EXIT_CLEAR_IA32_RTIT_CTL            0x02000000
+#define VM_EXIT_CLEAR_IA32_LBR_CTL             0x04000000
 
 #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR      0x00036dff
 
@@ -108,6 +109,7 @@
 #define VM_ENTRY_LOAD_BNDCFGS                   0x00010000
 #define VM_ENTRY_PT_CONCEAL_PIP                        0x00020000
 #define VM_ENTRY_LOAD_IA32_RTIT_CTL            0x00040000
+#define VM_ENTRY_LOAD_IA32_LBR_CTL                     0x00200000
 
 #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR     0x000011ff
 
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d1d77985e889..73fceb534c7c 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -378,20 +378,29 @@ static inline bool vmx_pt_mode_is_host_guest(void)
        return pt_mode == PT_MODE_HOST_GUEST;
 }
 
-static inline u64 vmx_get_perf_capabilities(void)
+static inline bool cpu_has_vmx_arch_lbr(void)
 {
-       u64 perf_cap = 0;
-
-       if (boot_cpu_has(X86_FEATURE_PDCM))
-               rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
-
-       perf_cap &= PMU_CAP_LBR_FMT;
+       return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_IA32_LBR_CTL) &&
+               (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_LBR_CTL);
+}
 
+static inline u64 vmx_get_perf_capabilities(void)
+{
        /*
         * Since counters are virtualized, KVM would support full
         * width counting unconditionally, even if the host lacks it.
         */
-       return PMU_CAP_FW_WRITES | perf_cap;
+       u64 perf_cap = PMU_CAP_FW_WRITES;
+       u64 host_perf_cap = 0;
+
+       if (boot_cpu_has(X86_FEATURE_PDCM))
+               rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
+
+       perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+       if (boot_cpu_has(X86_FEATURE_ARCH_LBR) && !cpu_has_vmx_arch_lbr())
+               perf_cap &= ~PMU_CAP_LBR_FMT;
+
+       return perf_cap;
 }
 
 static inline u64 vmx_supported_debugctl(void)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index d14a14eb712d..48a817be60ab 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -176,12 +176,16 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct 
kvm_pmu *pmu, u32 msr)
 
 bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu)
 {
+       if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
+               return guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR);
+
        /*
         * As a first step, a guest could only enable LBR feature if its
         * cpu model is the same as the host because the LBR registers
         * would be pass-through to the guest and they're model specific.
         */
-       return boot_cpu_data.x86_model == guest_cpuid_model(vcpu);
+       return !boot_cpu_has(X86_FEATURE_ARCH_LBR) &&
+               boot_cpu_data.x86_model == guest_cpuid_model(vcpu);
 }
 
 bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
@@ -199,8 +203,11 @@ static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu 
*vcpu, u32 index)
        if (!intel_pmu_lbr_is_enabled(vcpu))
                return ret;
 
-       ret = (index == MSR_LBR_SELECT) || (index == MSR_LBR_TOS) ||
-               (index >= records->from && index < records->from + records->nr) 
||
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
+               ret = (index == MSR_LBR_SELECT) || (index == MSR_LBR_TOS);
+
+       if (!ret)
+               ret = (index >= records->from && index < records->from + 
records->nr) ||
                (index >= records->to && index < records->to + records->nr);
 
        if (!ret && records->info)
@@ -692,6 +699,9 @@ static void vmx_update_intercept_for_lbr_msrs(struct 
kvm_vcpu *vcpu, bool set)
                        vmx_set_intercept_for_msr(vcpu, lbr->info + i, 
MSR_TYPE_RW, set);
        }
 
+       if (guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR))
+               return;
+
        vmx_set_intercept_for_msr(vcpu, MSR_LBR_SELECT, MSR_TYPE_RW, set);
        vmx_set_intercept_for_msr(vcpu, MSR_LBR_TOS, MSR_TYPE_RW, set);
 }
@@ -735,10 +745,13 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
 {
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
        struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
+       bool lbr_enable = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) ?
+               (vmcs_read64(GUEST_IA32_LBR_CTL) & ARCH_LBR_CTL_LBREN) :
+               (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR);
 
        if (!lbr_desc->event) {
                vmx_disable_lbr_msrs_passthrough(vcpu);
-               if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)
+               if (lbr_enable)
                        goto warn;
                if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use))
                        goto warn;
@@ -761,7 +774,11 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
 
 static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
 {
-       if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
+       bool lbr_enable = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR) ?
+               (vmcs_read64(GUEST_IA32_LBR_CTL) & ARCH_LBR_CTL_LBREN) :
+               (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR);
+
+       if (!lbr_enable)
                intel_pmu_release_guest_lbr_event(vcpu);
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a0660b9934c6..2f307689a14b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -666,6 +666,9 @@ static bool is_valid_passthrough_msr(u32 msr)
        case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31:
        case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
        case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
+       case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
+       case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
+       case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
                /* LBR MSRs. These are handled in 
vmx_update_intercept_for_lbr_msrs() */
                return true;
        }
@@ -2529,7 +2532,8 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf,
              VM_EXIT_LOAD_IA32_EFER |
              VM_EXIT_CLEAR_BNDCFGS |
              VM_EXIT_PT_CONCEAL_PIP |
-             VM_EXIT_CLEAR_IA32_RTIT_CTL;
+             VM_EXIT_CLEAR_IA32_RTIT_CTL |
+             VM_EXIT_CLEAR_IA32_LBR_CTL;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
                                &_vmexit_control) < 0)
                return -EIO;
@@ -2553,7 +2557,8 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf,
              VM_ENTRY_LOAD_IA32_EFER |
              VM_ENTRY_LOAD_BNDCFGS |
              VM_ENTRY_PT_CONCEAL_PIP |
-             VM_ENTRY_LOAD_IA32_RTIT_CTL;
+             VM_ENTRY_LOAD_IA32_RTIT_CTL |
+             VM_ENTRY_LOAD_IA32_LBR_CTL;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
                                &_vmentry_control) < 0)
                return -EIO;
-- 
2.29.2

Reply via email to