The branch stable/14 has been updated by aokblast: URL: https://cgit.FreeBSD.org/src/commit/?id=a34f42b083dd323314d385bc7b00323d336ea28b
commit a34f42b083dd323314d385bc7b00323d336ea28b Author: ShengYi Hung <[email protected]> AuthorDate: 2025-07-11 08:52:01 +0000 Commit: ShengYi Hung <[email protected]> CommitDate: 2026-03-04 14:35:40 +0000 vmm: Support INOUT manual decode. The inout instruction in AMD SVM requires DecodeAssist feature to decode the segment override prefix. However, without that feature, we are still able to decode by fetching the instruction directly. Approved by: markj (mentor) MFC after: 2 weeks Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D51256 (cherry picked from commit c18c521c79b6160ce43bb2ca4c2eb42ccf7e6e57) --- sys/amd64/include/vmm.h | 2 + sys/amd64/include/vmm_instruction_emul.h | 25 +++++++++ sys/amd64/vmm/amd/svm.c | 90 +++++++++++++++++++------------- sys/amd64/vmm/intel/vmx.c | 2 + sys/amd64/vmm/vmm_instruction_emul.c | 34 ++++-------- sys/amd64/vmm/vmm_ioport.c | 40 ++++++++++++++ 6 files changed, 132 insertions(+), 61 deletions(-) diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 09f2ea7cb0e5..262fbd6449a0 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -674,6 +674,8 @@ struct vm_inout_str { int addrsize; enum vm_reg_name seg_name; struct seg_desc seg_desc; + int cs_d; + uint64_t cs_base; }; enum task_switch_reason { diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h index a21754ac777f..e57ad81c3e60 100644 --- a/sys/amd64/include/vmm_instruction_emul.h +++ b/sys/amd64/include/vmm_instruction_emul.h @@ -31,6 +31,31 @@ #include <sys/mman.h> +/* struct vie_op.op_type */ +enum { + VIE_OP_TYPE_NONE = 0, + VIE_OP_TYPE_MOV, + VIE_OP_TYPE_MOVSX, + VIE_OP_TYPE_MOVZX, + VIE_OP_TYPE_AND, + VIE_OP_TYPE_OR, + VIE_OP_TYPE_SUB, + VIE_OP_TYPE_TWO_BYTE, + VIE_OP_TYPE_PUSH, + VIE_OP_TYPE_CMP, + VIE_OP_TYPE_POP, + VIE_OP_TYPE_MOVS, + VIE_OP_TYPE_GROUP1, + VIE_OP_TYPE_STOS, + VIE_OP_TYPE_BITTEST, + VIE_OP_TYPE_TWOB_GRP15, + VIE_OP_TYPE_ADD, + VIE_OP_TYPE_TEST, + VIE_OP_TYPE_BEXTR, + VIE_OP_TYPE_OUTS, + VIE_OP_TYPE_LAST +}; + /* * Callback functions to read and write memory regions. */ diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index 3fda9454090b..b85a3e0fcc3e 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -309,6 +309,33 @@ svm_set_tsc_offset(struct svm_vcpu *vcpu, uint64_t offset) #define MSR_AMD7TH_START 0xC0010000UL #define MSR_AMD7TH_END 0xC0011FFFUL +static void +svm_get_cs_info(struct vmcb *vmcb, struct vm_guest_paging *paging, int *cs_d, + uint64_t *base) +{ + struct vmcb_segment seg; + int error __diagused; + + error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); + KASSERT(error == 0, ("%s: vmcb_seg error %d", __func__, error)); + + switch (paging->cpu_mode) { + case CPU_MODE_REAL: + *base = seg.base; + *cs_d = 0; + break; + case CPU_MODE_PROTECTED: + case CPU_MODE_COMPATIBILITY: + *cs_d = !!(seg.attrib & VMCB_CS_ATTRIB_D); + *base = seg.base; + break; + default: + *base = 0; + *cs_d = 0; + break; + } +} + /* * Get the index and bit position for a MSR in permission bitmap. * Two bits are used for each MSR: lower bit for read and higher bit for write. @@ -727,10 +754,29 @@ svm_inout_str_seginfo(struct svm_vcpu *vcpu, int64_t info1, int in, if (in) { vis->seg_name = VM_REG_GUEST_ES; - } else { - /* The segment field has standard encoding */ + } else if (decode_assist()) { + /* + * The effective segment number in EXITINFO1[12:10] is populated + * only if the processor has the DecodeAssist capability. + * + * XXX this is not specified explicitly in APMv2 but can be + * verified empirically. + */ s = (info1 >> 10) & 0x7; + + /* The segment field has standard encoding */ vis->seg_name = vm_segment_name(s); + } else { + /* + * The segment register need to be manually decoded by fetching + * the instructions near ip. However, we are unable to fetch it + * while the interrupts are disabled. Therefore, we leave the + * value unset until the generic ins/outs handler runs. + */ + vis->seg_name = VM_REG_LAST; + svm_get_cs_info(vcpu->vmcb, &vis->paging, &vis->cs_d, + &vis->cs_base); + return; } error = svm_getdesc(vcpu, vis->seg_name, &vis->seg_desc); @@ -790,16 +836,6 @@ svm_handle_io(struct svm_vcpu *vcpu, struct vm_exit *vmexit) info1 = ctrl->exitinfo1; inout_string = info1 & BIT(2) ? 1 : 0; - /* - * The effective segment number in EXITINFO1[12:10] is populated - * only if the processor has the DecodeAssist capability. - * - * XXX this is not specified explicitly in APMv2 but can be verified - * empirically. - */ - if (inout_string && !decode_assist()) - return (UNHANDLED); - vmexit->exitcode = VM_EXITCODE_INOUT; vmexit->u.inout.in = (info1 & BIT(0)) ? 1 : 0; vmexit->u.inout.string = inout_string; @@ -817,6 +853,8 @@ svm_handle_io(struct svm_vcpu *vcpu, struct vm_exit *vmexit) vis->index = svm_inout_str_index(regs, vmexit->u.inout.in); vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep); vis->addrsize = svm_inout_str_addrsize(info1); + vis->cs_d = 0; + vis->cs_base = 0; svm_inout_str_seginfo(vcpu, info1, vmexit->u.inout.in, vis); } @@ -858,10 +896,9 @@ static void svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit) { struct vm_guest_paging *paging; - struct vmcb_segment seg; struct vmcb_ctrl *ctrl; char *inst_bytes; - int error __diagused, inst_len; + int inst_len; ctrl = &vmcb->ctrl; paging = &vmexit->u.inst_emul.paging; @@ -871,29 +908,8 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit) vmexit->u.inst_emul.gla = VIE_INVALID_GLA; svm_paging_info(vmcb, paging); - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); - KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error)); - - switch(paging->cpu_mode) { - case CPU_MODE_REAL: - vmexit->u.inst_emul.cs_base = seg.base; - vmexit->u.inst_emul.cs_d = 0; - break; - case CPU_MODE_PROTECTED: - case CPU_MODE_COMPATIBILITY: - vmexit->u.inst_emul.cs_base = seg.base; - - /* - * Section 4.8.1 of APM2, Default Operand Size or D bit. - */ - vmexit->u.inst_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ? - 1 : 0; - break; - default: - vmexit->u.inst_emul.cs_base = 0; - vmexit->u.inst_emul.cs_d = 0; - break; - } + svm_get_cs_info(vmcb, paging, &vmexit->u.inst_emul.cs_d, + &vmexit->u.inst_emul.cs_base); /* * Copy the instruction bytes into 'vie' if available. diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 3fe20986ec8d..daa4ef56316f 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -2648,6 +2648,8 @@ vmx_exit_process(struct vmx *vmx, struct vmx_vcpu *vcpu, struct vm_exit *vmexit) vis->index = inout_str_index(vcpu, in); vis->count = inout_str_count(vcpu, vis->inout.rep); vis->addrsize = inout_str_addrsize(inst_info); + vis->cs_d = 0; + vis->cs_base = 0; inout_str_seginfo(vcpu, inst_info, in, vis); } SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpuid, vmexit); diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index 7626082ff88d..12b7413e33de 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -63,30 +63,6 @@ #include <x86/psl.h> #include <x86/specialreg.h> -/* struct vie_op.op_type */ -enum { - VIE_OP_TYPE_NONE = 0, - VIE_OP_TYPE_MOV, - VIE_OP_TYPE_MOVSX, - VIE_OP_TYPE_MOVZX, - VIE_OP_TYPE_AND, - VIE_OP_TYPE_OR, - VIE_OP_TYPE_SUB, - VIE_OP_TYPE_TWO_BYTE, - VIE_OP_TYPE_PUSH, - VIE_OP_TYPE_CMP, - VIE_OP_TYPE_POP, - VIE_OP_TYPE_MOVS, - VIE_OP_TYPE_GROUP1, - VIE_OP_TYPE_STOS, - VIE_OP_TYPE_BITTEST, - VIE_OP_TYPE_TWOB_GRP15, - VIE_OP_TYPE_ADD, - VIE_OP_TYPE_TEST, - VIE_OP_TYPE_BEXTR, - VIE_OP_TYPE_LAST -}; - /* struct vie_op.op_flags */ #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ @@ -150,6 +126,16 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x3B, .op_type = VIE_OP_TYPE_CMP, }, + [0x6E] = { + .op_byte = 0x6E, + .op_type = VIE_OP_TYPE_OUTS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION, + }, + [0x6F] = { + .op_byte = 0x6F, + .op_type = VIE_OP_TYPE_OUTS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION, + }, [0x88] = { .op_byte = 0x88, .op_type = VIE_OP_TYPE_MOV, diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c index 3caea7deac16..c8f777a82c14 100644 --- a/sys/amd64/vmm/vmm_ioport.c +++ b/sys/amd64/vmm/vmm_ioport.c @@ -144,10 +144,50 @@ emulate_inout_port(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu) return (0); } +static int +decode_segment(struct vcpu *vcpu, enum vm_reg_name *segment) +{ + struct vm_guest_paging *paging; + struct vie vie; + struct vm_exit *vme; + int err; + int fault; + + vme = vm_exitinfo(vcpu); + paging = &vme->u.inout_str.paging; + + vie_init(&vie, NULL, 0); + err = vmm_fetch_instruction(vcpu, paging, + vme->rip + vme->u.inout_str.cs_base, VIE_INST_SIZE, &vie, &fault); + if (err || fault) + return (err); + + err = vmm_decode_instruction(vcpu, VIE_INVALID_GLA, paging->cpu_mode, + vme->u.inout_str.cs_d, &vie); + + if (err || vie.op.op_type != VIE_OP_TYPE_OUTS) + return (EINVAL); + if (vie.segment_override) + *segment = vie.segment_register; + else + *segment = VM_REG_GUEST_DS; + + return (0); +} + static int emulate_inout_str(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu) { + int err; + *retu = true; + if (vmexit->u.inout_str.seg_name == VM_REG_LAST) { + err = decode_segment(vcpu, &vmexit->u.inout_str.seg_name); + if (err) + return (err); + return (vm_get_seg_desc(vcpu, vmexit->u.inout_str.seg_name, + &vmexit->u.inout_str.seg_desc)); + } return (0); /* Return to userspace to finish emulation */ }
