The following diff adds in formalization around mmio assists for nested page/ept faults on Intel and AMD vmm(4) hosts. It provides what little information is available to userland in terms of either the instruction bytes (on AMD) or the instruction length (on Intel).
vmd is updated to intercept these vm exit events, but will currently log the issue and cause the vm process to exit. This is the same behavior folks experience currently when a guest attempts to read/write guest physical addresses in the mmio-reserved ranges, but now with an explanation of the reason and current {e,r,}ip value. This is the foundation I'll build upon while implementing instruction decode and emulation support in userland. No noticeable change should occur for existing guests that don't trigger mmio assist events. ok? -dv diff refs/heads/master refs/heads/mmio commit - 10e026163f31687dba11fb4655500afb4e616258 commit + 7d92e26b51c3fd520807dbcd5233f14b76bc611e blob - 84da19438b74377276b16b4b4f7db45ae9ec6be2 blob + a89a4dc0fbe7b31a47390de363109092b76ffa22 --- sys/arch/amd64/amd64/vmm.c +++ sys/arch/amd64/amd64/vmm.c @@ -4891,11 +4891,20 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params * vcpu->vc_gueststate.vg_rax = vcpu->vc_exit.vei.vei_data; break; + case VMX_EXIT_EPT_VIOLATION: + ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_GPRS, 0, + &vcpu->vc_exit.vrs); + if (ret) { + printf("%s: vm %d vcpu %d failed to update " + "registers\n", __func__, + vcpu->vc_parent->vm_id, vcpu->vc_id); + return (EINVAL); + } + break; case VM_EXIT_NONE: case VMX_EXIT_HLT: case VMX_EXIT_INT_WINDOW: case VMX_EXIT_EXTINT: - case VMX_EXIT_EPT_VIOLATION: case VMX_EXIT_CPUID: case VMX_EXIT_XSETBV: break; @@ -4927,6 +4936,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params * break; #endif /* VMM_DEBUG */ } + memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit)); } setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1); @@ -5658,7 +5668,7 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa) if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) { DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa); - return (VMM_MEM_TYPE_REGULAR); + return (VMM_MEM_TYPE_MMIO); } /* XXX Use binary search? */ @@ -5782,18 +5792,28 @@ int svm_handle_np_fault(struct vcpu *vcpu) { uint64_t gpa; - int gpa_memtype, ret; + int gpa_memtype, ret = 0; struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; + struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee; - ret = 0; + memset(vee, 0, sizeof(*vee)); + vee->vee_fault_type = VEE_FAULT_INVALID; gpa = vmcb->v_exitinfo2; gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa); switch (gpa_memtype) { case VMM_MEM_TYPE_REGULAR: + vee->vee_fault_type = VEE_FAULT_HANDLED; ret = svm_fault_page(vcpu, gpa); break; + case VMM_MEM_TYPE_MMIO: + vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST; + vee->vee_insn_len = vmcb->v_n_bytes_fetched; + memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes, + sizeof(vee->vee_insn_bytes)); + ret = EAGAIN; + break; default: printf("unknown memory type %d for GPA 0x%llx\n", gpa_memtype, gpa); @@ -5862,10 +5882,13 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa) int vmx_handle_np_fault(struct vcpu *vcpu) { - uint64_t gpa; - int gpa_memtype, ret; + uint64_t insn_len = 0, gpa; + int gpa_memtype, ret = 0; + struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee; - ret = 0; + memset(vee, 0, sizeof(*vee)); + vee->vee_fault_type = VEE_FAULT_INVALID; + if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS, &gpa)) { printf("%s: cannot extract faulting pa\n", __func__); return (EINVAL); @@ -5874,8 +5897,21 @@ vmx_handle_np_fault(struct vcpu *vcpu) gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa); switch (gpa_memtype) { case VMM_MEM_TYPE_REGULAR: + vee->vee_fault_type = VEE_FAULT_HANDLED; ret = vmx_fault_page(vcpu, gpa); break; + case VMM_MEM_TYPE_MMIO: + vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST; + if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_len) || + insn_len == 0 || insn_len > 15) { + printf("%s: cannot extract instruction length\n", + __func__); + ret = EINVAL; + } else { + vee->vee_insn_len = (uint32_t)insn_len; + ret = EAGAIN; + } + break; default: printf("unknown memory type %d for GPA 0x%llx\n", gpa_memtype, gpa); @@ -7321,7 +7357,19 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params * vcpu->vc_exit.vei.vei_data; vmcb->v_rax = vcpu->vc_gueststate.vg_rax; } + break; + case SVM_VMEXIT_NPF: + ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_GPRS, + &vcpu->vc_exit.vrs); + if (ret) { + printf("%s: vm %d vcpu %d failed to update " + "registers\n", __func__, + vcpu->vc_parent->vm_id, vcpu->vc_id); + return (EINVAL); + } + break; } + memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit)); } while (ret == 0) { blob - 6fbb5e1bdc22666010e1c7484035d5d09cd7955b blob + aa1628405717bda486de225055c89c5db91b01c5 --- sys/arch/amd64/include/vmmvar.h +++ sys/arch/amd64/include/vmmvar.h @@ -324,7 +324,9 @@ enum { }; enum { + VEE_FAULT_INVALID, VEE_FAULT_HANDLED, + VEE_FAULT_MMIO_ASSIST, VEE_FAULT_PROTECT, }; @@ -362,6 +366,8 @@ struct vm_exit_inout { */ struct vm_exit_eptviolation { uint8_t vee_fault_type; + uint8_t vee_insn_len; + uint8_t vee_insn_bytes[15]; }; /* @@ -709,6 +715,7 @@ enum { enum { VMM_MEM_TYPE_REGULAR, + VMM_MEM_TYPE_MMIO, VMM_MEM_TYPE_UNKNOWN }; blob - eac6616ea918716c2986bab0a05724b31bcce2ec blob + b244d72264ce9694378af83df076231b62801aae --- usr.sbin/vmd/vm.c +++ usr.sbin/vmd/vm.c @@ -1652,26 +1653,36 @@ vcpu_exit_inout(struct vm_run_params *vrp) * * Return values: * 0: no action required - * EAGAIN: a protection fault occured, kill the vm. + * EFAULT: a protection fault occured, kill the vm. */ int vcpu_exit_eptviolation(struct vm_run_params *vrp) { + int ret = 0; + uint8_t fault_type; struct vm_exit *ve = vrp->vrp_exit; - /* - * vmd may be exiting to vmd to handle a pending interrupt - * but last exit type may have been VMX_EXIT_EPT_VIOLATION, - * check the fault_type to ensure we really are processing - * a VMX_EXIT_EPT_VIOLATION. - */ - if (ve->vee.vee_fault_type == VEE_FAULT_PROTECT) { - log_debug("%s: EPT Violation: rip=0x%llx", - __progname, vrp->vrp_exit->vrs.vrs_gprs[VCPU_REGS_RIP]); - return (EAGAIN); + fault_type = ve->vee.vee_fault_type; + switch (fault_type) { + case VEE_FAULT_HANDLED: + log_debug("%s: fault already handled", __func__); + break; + case VEE_FAULT_PROTECT: + log_debug("%s: EPT Violation: rip=0x%llx", __progname, + ve->vrs.vrs_gprs[VCPU_REGS_RIP]); + ret = EFAULT; + break; + case VEE_FAULT_MMIO_ASSIST: + log_warnx("%s: mmio assist required: rip=0x%llx", __progname, + ve->vrs.vrs_gprs[VCPU_REGS_RIP]); + ret = EFAULT; + break; + default: + fatalx("%s: invalid fault_type %d", __progname, fault_type); + /* UNREACHED */ } - return (0); + return (ret); } /* @@ -1704,7 +1715,6 @@ vcpu_exit(struct vm_run_params *vrp) case VMX_EXIT_CPUID: case VMX_EXIT_EXTINT: case SVM_VMEXIT_INTR: - case SVM_VMEXIT_NPF: case SVM_VMEXIT_MSR: case SVM_VMEXIT_CPUID: /* @@ -1715,11 +1725,11 @@ vcpu_exit(struct vm_run_params *vrp) * in more vmd log spam). */ break; + case SVM_VMEXIT_NPF: case VMX_EXIT_EPT_VIOLATION: ret = vcpu_exit_eptviolation(vrp); if (ret) return (ret); - break; case VMX_EXIT_IO: case SVM_VMEXIT_IOIO: