On 23/11/2016 02:14, David Matlack wrote:
> The VMX capability MSRs advertise the set of features the KVM virtual
> CPU can support. This set of features vary across different host CPUs
> and KVM versions. This patch aims to addresses both sources of
> differences, allowing VMs to be migrated across CPUs and KVM versions
> without guest-visible changes to these MSRs. Note that cross-KVM-
> version migration is only supported from this point forward.
> 
> When the VMX capability MSRs are restored, they are audited to check
> that the set of features advertised are a subset of what KVM and the
> CPU support.
> 
> Since the VMX capability MSRs are read-only, they do not need to be on
> the default MSR save/restore lists. The userspace hypervisor can set
> the values of these MSRs or read them from KVM at VCPU creation time,
> and restore the same value after every save/restore.
> 
> Signed-off-by: David Matlack <dmatl...@google.com>
> ---
>  arch/x86/include/asm/vmx.h |  31 +++++
>  arch/x86/kvm/vmx.c         | 317 
> +++++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 324 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index a002b07..a4ca897 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -25,6 +25,7 @@
>  #define VMX_H
>  
>  
> +#include <linux/bitops.h>
>  #include <linux/types.h>
>  #include <uapi/asm/vmx.h>
>  
> @@ -110,6 +111,36 @@
>  #define VMX_MISC_SAVE_EFER_LMA                       0x00000020
>  #define VMX_MISC_ACTIVITY_HLT                        0x00000040
>  
> +static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
> +{
> +     return vmx_basic & GENMASK_ULL(30, 0);
> +}
> +
> +static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
> +{
> +     return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
> +}
> +
> +static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
> +{
> +     return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
> +}
> +
> +static inline int vmx_misc_cr3_count(u64 vmx_misc)
> +{
> +     return (vmx_misc & GENMASK_ULL(24, 16)) >> 16;
> +}
> +
> +static inline int vmx_misc_max_msr(u64 vmx_misc)
> +{
> +     return (vmx_misc & GENMASK_ULL(27, 25)) >> 25;
> +}
> +
> +static inline int vmx_misc_mseg_revid(u64 vmx_misc)
> +{
> +     return (vmx_misc & GENMASK_ULL(63, 32)) >> 32;
> +}
> +
>  /* VMCS Encodings */
>  enum vmcs_field {
>       VIRTUAL_PROCESSOR_ID            = 0x00000000,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5382b82..6ec3832 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -463,6 +463,12 @@ struct nested_vmx {
>       u32 nested_vmx_misc_high;
>       u32 nested_vmx_ept_caps;

> +/*
> + * Called when userspace is restoring VMX MSRs.
> + *
> + * Returns 0 on success, non-0 otherwise.
> + */
> +static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
>  {
>       struct vcpu_vmx *vmx = to_vmx(vcpu);
>  
>       switch (msr_index) {
>       case MSR_IA32_VMX_BASIC:
> +             return vmx_restore_vmx_basic(vmx, data);
> +     case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
> +     case MSR_IA32_VMX_PINBASED_CTLS:
> +     case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
> +     case MSR_IA32_VMX_PROCBASED_CTLS:
> +     case MSR_IA32_VMX_TRUE_EXIT_CTLS:
> +     case MSR_IA32_VMX_EXIT_CTLS:
> +     case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
> +     case MSR_IA32_VMX_ENTRY_CTLS:

PINBASED_CTLS, PROCBASED_CTLS, EXIT_CTLS and ENTRY_CTLS can be derived
from their "true" counterparts, so I think it's better to remove the
"non-true" ones from struct nested_vmx (and/or add the "true" ones when
missing) and make them entirely computed.  But it can be done on top.

Paolo

> +     case MSR_IA32_VMX_PROCBASED_CTLS2:
> +             return vmx_restore_control_msr(vmx, msr_index, data);
> +     case MSR_IA32_VMX_MISC:
> +             return vmx_restore_vmx_misc(vmx, data);
> +     case MSR_IA32_VMX_CR0_FIXED0:
> +     case MSR_IA32_VMX_CR4_FIXED0:
> +             return vmx_restore_fixed0_msr(vmx, msr_index, data);
> +     case MSR_IA32_VMX_CR0_FIXED1:
> +     case MSR_IA32_VMX_CR4_FIXED1:
> +             return vmx_restore_fixed1_msr(vmx, msr_index, data);
> +     case MSR_IA32_VMX_EPT_VPID_CAP:
> +             return vmx_restore_vmx_ept_vpid_cap(vmx, data);
> +     case MSR_IA32_VMX_VMCS_ENUM:
> +             vmx->nested.nested_vmx_vmcs_enum = data;
> +             return 0;
> +     default:
>               /*
> -              * This MSR reports some information about VMX support. We
> -              * should return information about the VMX we emulate for the
> -              * guest, and the VMCS structure we give it - not about the
> -              * VMX support of the underlying hardware.
> +              * The rest of the VMX capability MSRs do not support restore.
>                */
> -             *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
> -                        ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
> -                        (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
> -             if (cpu_has_vmx_basic_inout())
> -                     *pdata |= VMX_BASIC_INOUT;
> +             return -EINVAL;
> +     }
> +
> +     return 0;
> +}
> +
> +/* Returns 0 on success, non-0 otherwise. */
> +static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
> +{
> +     struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +     switch (msr_index) {
> +     case MSR_IA32_VMX_BASIC:
> +             *pdata = vmx->nested.nested_vmx_basic;
>               break;
>       case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
>       case MSR_IA32_VMX_PINBASED_CTLS:
> @@ -2904,27 +3176,20 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 
> msr_index, u64 *pdata)
>                       vmx->nested.nested_vmx_misc_low,
>                       vmx->nested.nested_vmx_misc_high);
>               break;
> -     /*
> -      * These MSRs specify bits which the guest must keep fixed (on or off)
> -      * while L1 is in VMXON mode (in L1's root mode, or running an L2).
> -      * We picked the standard core2 setting.
> -      */
> -#define VMXON_CR0_ALWAYSON   (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
> -#define VMXON_CR4_ALWAYSON   X86_CR4_VMXE
>       case MSR_IA32_VMX_CR0_FIXED0:
> -             *pdata = VMXON_CR0_ALWAYSON;
> +             *pdata = vmx->nested.nested_vmx_cr0_fixed0;
>               break;
>       case MSR_IA32_VMX_CR0_FIXED1:
> -             *pdata = -1ULL;
> +             *pdata = vmx->nested.nested_vmx_cr0_fixed1;
>               break;
>       case MSR_IA32_VMX_CR4_FIXED0:
> -             *pdata = VMXON_CR4_ALWAYSON;
> +             *pdata = vmx->nested.nested_vmx_cr4_fixed0;
>               break;
>       case MSR_IA32_VMX_CR4_FIXED1:
> -             *pdata = -1ULL;
> +             *pdata = vmx->nested.nested_vmx_cr4_fixed1;
>               break;
>       case MSR_IA32_VMX_VMCS_ENUM:
> -             *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
> +             *pdata = vmx->nested.nested_vmx_vmcs_enum;
>               break;
>       case MSR_IA32_VMX_PROCBASED_CTLS2:
>               *pdata = vmx_control_msr(
> @@ -3107,7 +3372,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
> msr_data *msr_info)
>                       vmx_leave_nested(vcpu);
>               break;
>       case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
> -             return 1; /* they are read-only */
> +             if (!msr_info->host_initiated)
> +                     return 1; /* they are read-only */
> +             if (!nested_vmx_allowed(vcpu))
> +                     return 1;
> +             return vmx_set_vmx_msr(vcpu, msr_index, data);
>       case MSR_IA32_XSS:
>               if (!vmx_xsaves_supported())
>                       return 1;
> 

Reply via email to