Re: [PATCH 3.2 087/102] nEPT: Nested INVEPT

Paolo Bonzini Sun, 02 Nov 2014 01:04:52 -0800

You can just use the same scheme as your patch 88/102:

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 685b8448d6e2..bd8cc9055fe2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6740,6 +6740,12 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -6785,6 +6791,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct 
kvm_vcpu *vcpu) = {
        [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
        [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
        [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+       [EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7020,6 +7027,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
        case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
        case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+       case EXIT_REASON_INVEPT:
                /*
                 * VMX instructions trap unconditionally. This allows L1 to
                 * emulate them for its L2 guest, i.e., allows 3-level nesting!



Paolo

On 01/11/2014 23:28, Ben Hutchings wrote:
> 3.2.64-rc1 review patch.  If anyone has any objections, please let me know.
> 
> ------------------
> 
> From: Nadav Har'El <[email protected]>
> 
> commit bfd0a56b90005f8c8a004baf407ad90045c2b11e upstream.
> 
> If we let L1 use EPT, we should probably also support the INVEPT instruction.
> 
> In our current nested EPT implementation, when L1 changes its EPT table
> for L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in
> the course of this modification already calls INVEPT. But if last level
> of shadow page is unsync not all L1's changes to EPT12 are intercepted,
> which means roots need to be synced when L1 calls INVEPT. Global INVEPT
> should not be different since roots are synced by kvm_mmu_load() each
> time EPTP02 changes.
> 
> Reviewed-by: Xiao Guangrong <[email protected]>
> Signed-off-by: Nadav Har'El <[email protected]>
> Signed-off-by: Jun Nakajima <[email protected]>
> Signed-off-by: Xinhao Xu <[email protected]>
> Signed-off-by: Yang Zhang <[email protected]>
> Signed-off-by: Gleb Natapov <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>
> [bwh: Backported to 3.2:
>  - Adjust context, filename
>  - Add definition of nested_ept_get_cr3(), added upstream by commit
>    155a97a3d7c7 ("nEPT: MMU context for nested EPT")]
> Signed-off-by: Ben Hutchings <[email protected]>
> ---
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -279,6 +279,7 @@ enum vmcs_field {
>  #define EXIT_REASON_APIC_ACCESS         44
>  #define EXIT_REASON_EPT_VIOLATION       48
>  #define EXIT_REASON_EPT_MISCONFIG       49
> +#define EXIT_REASON_INVEPT              50
>  #define EXIT_REASON_WBINVD           54
>  #define EXIT_REASON_XSETBV           55
>  
> @@ -397,6 +398,7 @@ enum vmcs_field {
>  #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR               0
>  #define VMX_EPT_EXTENT_CONTEXT                       1
>  #define VMX_EPT_EXTENT_GLOBAL                        2
> +#define VMX_EPT_EXTENT_SHIFT                 24
>  
>  #define VMX_EPT_EXECUTE_ONLY_BIT             (1ull)
>  #define VMX_EPT_PAGE_WALK_4_BIT                      (1ull << 6)
> @@ -404,6 +406,7 @@ enum vmcs_field {
>  #define VMX_EPTP_WB_BIT                              (1ull << 14)
>  #define VMX_EPT_2MB_PAGE_BIT                 (1ull << 16)
>  #define VMX_EPT_1GB_PAGE_BIT                 (1ull << 17)
> +#define VMX_EPT_INVEPT_BIT                   (1ull << 20)
>  #define VMX_EPT_EXTENT_INDIVIDUAL_BIT                (1ull << 24)
>  #define VMX_EPT_EXTENT_CONTEXT_BIT           (1ull << 25)
>  #define VMX_EPT_EXTENT_GLOBAL_BIT            (1ull << 26)
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2869,6 +2869,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu
>       mmu_sync_roots(vcpu);
>       spin_unlock(&vcpu->kvm->mmu_lock);
>  }
> +EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
>  
>  static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
>                                 u32 access, struct x86_exception *exception)
> @@ -3131,6 +3132,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *
>       ++vcpu->stat.tlb_flush;
>       kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
>  }
> +EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
>  
>  static void paging_new_cr3(struct kvm_vcpu *vcpu)
>  {
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -602,6 +602,7 @@ static void nested_release_page_clean(st
>       kvm_release_page_clean(page);
>  }
>  
> +static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
>  static u64 construct_eptp(unsigned long root_hpa);
>  static void kvm_cpu_vmxon(u64 addr);
>  static void kvm_cpu_vmxoff(void);
> @@ -1899,6 +1900,7 @@ static u32 nested_vmx_secondary_ctls_low
>  static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
>  static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
>  static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
> +static u32 nested_vmx_ept_caps;
>  static __init void nested_vmx_setup_ctls_msrs(void)
>  {
>       /*
> @@ -5550,6 +5552,74 @@ static int handle_vmptrst(struct kvm_vcp
>       return 1;
>  }
>  
> +/* Emulate the INVEPT instruction */
> +static int handle_invept(struct kvm_vcpu *vcpu)
> +{
> +     u32 vmx_instruction_info, types;
> +     unsigned long type;
> +     gva_t gva;
> +     struct x86_exception e;
> +     struct {
> +             u64 eptp, gpa;
> +     } operand;
> +     u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK;
> +
> +     if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
> +         !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
> +             kvm_queue_exception(vcpu, UD_VECTOR);
> +             return 1;
> +     }
> +
> +     if (!nested_vmx_check_permission(vcpu))
> +             return 1;
> +
> +     if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
> +             kvm_queue_exception(vcpu, UD_VECTOR);
> +             return 1;
> +     }
> +
> +     vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> +     type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
> +
> +     types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
> +
> +     if (!(types & (1UL << type))) {
> +             nested_vmx_failValid(vcpu,
> +                             VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
> +             return 1;
> +     }
> +
> +     /* According to the Intel VMX instruction reference, the memory
> +      * operand is read even if it isn't needed (e.g., for type==global)
> +      */
> +     if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
> +                     vmx_instruction_info, &gva))
> +             return 1;
> +     if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
> +                             sizeof(operand), &e)) {
> +             kvm_inject_page_fault(vcpu, &e);
> +             return 1;
> +     }
> +
> +     switch (type) {
> +     case VMX_EPT_EXTENT_CONTEXT:
> +             if ((operand.eptp & eptp_mask) !=
> +                             (nested_ept_get_cr3(vcpu) & eptp_mask))
> +                     break;
> +     case VMX_EPT_EXTENT_GLOBAL:
> +             kvm_mmu_sync_roots(vcpu);
> +             kvm_mmu_flush_tlb(vcpu);
> +             nested_vmx_succeed(vcpu);
> +             break;
> +     default:
> +             BUG_ON(1);
> +             break;
> +     }
> +
> +     skip_emulated_instruction(vcpu);
> +     return 1;
> +}
> +
>  /*
>   * The exit handlers return 1 if the exit was handled fully and guest 
> execution
>   * may resume.  Otherwise they set the kvm_run parameter to indicate what 
> needs
> @@ -5591,6 +5661,7 @@ static int (*kvm_vmx_exit_handlers[])(st
>       [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
>       [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
>       [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
> +     [EXIT_REASON_INVEPT]                  = handle_invept,
>  };
>  
>  static const int kvm_vmx_max_exit_handlers =
> @@ -5775,6 +5846,7 @@ static bool nested_vmx_exit_handled(stru
>       case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
>       case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
>       case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
> +     case EXIT_REASON_INVEPT:
>               /*
>                * VMX instructions trap unconditionally. This allows L1 to
>                * emulate them for its L2 guest, i.e., allows 3-level nesting!
> @@ -6436,6 +6508,12 @@ static void vmx_set_supported_cpuid(u32
>               entry->ecx |= bit(X86_FEATURE_VMX);
>  }
>  
> +static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
> +{
> +     /* return the page table to be shadowed - in our case, EPT12 */
> +     return get_vmcs12(vcpu)->ept_pointer;
> +}
> +
>  /*
>   * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
>   * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3.2 087/102] nEPT: Nested INVEPT

Reply via email to