The branch main has been updated by aokblast: URL: https://cgit.FreeBSD.org/src/commit/?id=c18c521c79b6160ce43bb2ca4c2eb42ccf7e6e57
commit c18c521c79b6160ce43bb2ca4c2eb42ccf7e6e57 Author: ShengYi Hung <aokbl...@freebsd.org> AuthorDate: 2025-07-11 08:52:01 +0000 Commit: ShengYi Hung <aokbl...@freebsd.org> CommitDate: 2025-07-24 10:09:50 +0000 vmm: Support INOUT manual decode. The inout instruction in AMD SVM requires DecodeAssist feature to decode the segment override prefix. However, without that feature, we are still able to decode by fetching the instruction directly. Approved by: markj (mentor) MFC after: 2 weeks Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D51256 --- sys/amd64/include/vmm.h | 2 + sys/amd64/include/vmm_instruction_emul.h | 25 +++++++++ sys/amd64/vmm/amd/svm.c | 90 +++++++++++++++++++------------- sys/amd64/vmm/intel/vmx.c | 2 + sys/amd64/vmm/vmm_instruction_emul.c | 34 ++++-------- sys/amd64/vmm/vmm_ioport.c | 40 ++++++++++++++ 6 files changed, 132 insertions(+), 61 deletions(-) diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index a9c73b75213b..0b3daed4f69e 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -649,6 +649,8 @@ struct vm_inout_str { int addrsize; enum vm_reg_name seg_name; struct seg_desc seg_desc; + int cs_d; + uint64_t cs_base; }; enum task_switch_reason { diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h index d5f0363cfb41..1fb0f97682a7 100644 --- a/sys/amd64/include/vmm_instruction_emul.h +++ b/sys/amd64/include/vmm_instruction_emul.h @@ -31,6 +31,31 @@ #include <sys/mman.h> +/* struct vie_op.op_type */ +enum { + VIE_OP_TYPE_NONE = 0, + VIE_OP_TYPE_MOV, + VIE_OP_TYPE_MOVSX, + VIE_OP_TYPE_MOVZX, + VIE_OP_TYPE_AND, + VIE_OP_TYPE_OR, + VIE_OP_TYPE_SUB, + VIE_OP_TYPE_TWO_BYTE, + VIE_OP_TYPE_PUSH, + VIE_OP_TYPE_CMP, + VIE_OP_TYPE_POP, + VIE_OP_TYPE_MOVS, + VIE_OP_TYPE_GROUP1, + VIE_OP_TYPE_STOS, + VIE_OP_TYPE_BITTEST, + VIE_OP_TYPE_TWOB_GRP15, + VIE_OP_TYPE_ADD, + VIE_OP_TYPE_TEST, + VIE_OP_TYPE_BEXTR, + VIE_OP_TYPE_OUTS, + VIE_OP_TYPE_LAST +}; + /* * Callback functions to read and write memory regions. */ diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index 6c16daaa47c2..2fe6a5bc3584 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -317,6 +317,33 @@ svm_set_tsc_offset(struct svm_vcpu *vcpu, uint64_t offset) #define MSR_AMD7TH_START 0xC0010000UL #define MSR_AMD7TH_END 0xC0011FFFUL +static void +svm_get_cs_info(struct vmcb *vmcb, struct vm_guest_paging *paging, int *cs_d, + uint64_t *base) +{ + struct vmcb_segment seg; + int error __diagused; + + error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); + KASSERT(error == 0, ("%s: vmcb_seg error %d", __func__, error)); + + switch (paging->cpu_mode) { + case CPU_MODE_REAL: + *base = seg.base; + *cs_d = 0; + break; + case CPU_MODE_PROTECTED: + case CPU_MODE_COMPATIBILITY: + *cs_d = !!(seg.attrib & VMCB_CS_ATTRIB_D); + *base = seg.base; + break; + default: + *base = 0; + *cs_d = 0; + break; + } +} + /* * Get the index and bit position for a MSR in permission bitmap. * Two bits are used for each MSR: lower bit for read and higher bit for write. @@ -735,10 +762,29 @@ svm_inout_str_seginfo(struct svm_vcpu *vcpu, int64_t info1, int in, if (in) { vis->seg_name = VM_REG_GUEST_ES; - } else { - /* The segment field has standard encoding */ + } else if (decode_assist()) { + /* + * The effective segment number in EXITINFO1[12:10] is populated + * only if the processor has the DecodeAssist capability. + * + * XXX this is not specified explicitly in APMv2 but can be + * verified empirically. + */ s = (info1 >> 10) & 0x7; + + /* The segment field has standard encoding */ vis->seg_name = vm_segment_name(s); + } else { + /* + * The segment register need to be manually decoded by fetching + * the instructions near ip. However, we are unable to fetch it + * while the interrupts are disabled. Therefore, we leave the + * value unset until the generic ins/outs handler runs. + */ + vis->seg_name = VM_REG_LAST; + svm_get_cs_info(vcpu->vmcb, &vis->paging, &vis->cs_d, + &vis->cs_base); + return; } error = svm_getdesc(vcpu, vis->seg_name, &vis->seg_desc); @@ -798,16 +844,6 @@ svm_handle_io(struct svm_vcpu *vcpu, struct vm_exit *vmexit) info1 = ctrl->exitinfo1; inout_string = info1 & BIT(2) ? 1 : 0; - /* - * The effective segment number in EXITINFO1[12:10] is populated - * only if the processor has the DecodeAssist capability. - * - * XXX this is not specified explicitly in APMv2 but can be verified - * empirically. - */ - if (inout_string && !decode_assist()) - return (UNHANDLED); - vmexit->exitcode = VM_EXITCODE_INOUT; vmexit->u.inout.in = (info1 & BIT(0)) ? 1 : 0; vmexit->u.inout.string = inout_string; @@ -825,6 +861,8 @@ svm_handle_io(struct svm_vcpu *vcpu, struct vm_exit *vmexit) vis->index = svm_inout_str_index(regs, vmexit->u.inout.in); vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep); vis->addrsize = svm_inout_str_addrsize(info1); + vis->cs_d = 0; + vis->cs_base = 0; svm_inout_str_seginfo(vcpu, info1, vmexit->u.inout.in, vis); } @@ -866,10 +904,9 @@ static void svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit) { struct vm_guest_paging *paging; - struct vmcb_segment seg; struct vmcb_ctrl *ctrl; char *inst_bytes; - int error __diagused, inst_len; + int inst_len; ctrl = &vmcb->ctrl; paging = &vmexit->u.inst_emul.paging; @@ -879,29 +916,8 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit) vmexit->u.inst_emul.gla = VIE_INVALID_GLA; svm_paging_info(vmcb, paging); - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); - KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error)); - - switch(paging->cpu_mode) { - case CPU_MODE_REAL: - vmexit->u.inst_emul.cs_base = seg.base; - vmexit->u.inst_emul.cs_d = 0; - break; - case CPU_MODE_PROTECTED: - case CPU_MODE_COMPATIBILITY: - vmexit->u.inst_emul.cs_base = seg.base; - - /* - * Section 4.8.1 of APM2, Default Operand Size or D bit. - */ - vmexit->u.inst_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ? - 1 : 0; - break; - default: - vmexit->u.inst_emul.cs_base = 0; - vmexit->u.inst_emul.cs_d = 0; - break; - } + svm_get_cs_info(vmcb, paging, &vmexit->u.inst_emul.cs_d, + &vmexit->u.inst_emul.cs_base); /* * Copy the instruction bytes into 'vie' if available. diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 957217ab2258..842281ab862e 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -2659,6 +2659,8 @@ vmx_exit_process(struct vmx *vmx, struct vmx_vcpu *vcpu, struct vm_exit *vmexit) vis->index = inout_str_index(vcpu, in); vis->count = inout_str_count(vcpu, vis->inout.rep); vis->addrsize = inout_str_addrsize(inst_info); + vis->cs_d = 0; + vis->cs_base = 0; inout_str_seginfo(vcpu, inst_info, in, vis); } SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpuid, vmexit); diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index c53e32889000..c54b6e6d0074 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -65,30 +65,6 @@ #include <x86/psl.h> #include <x86/specialreg.h> -/* struct vie_op.op_type */ -enum { - VIE_OP_TYPE_NONE = 0, - VIE_OP_TYPE_MOV, - VIE_OP_TYPE_MOVSX, - VIE_OP_TYPE_MOVZX, - VIE_OP_TYPE_AND, - VIE_OP_TYPE_OR, - VIE_OP_TYPE_SUB, - VIE_OP_TYPE_TWO_BYTE, - VIE_OP_TYPE_PUSH, - VIE_OP_TYPE_CMP, - VIE_OP_TYPE_POP, - VIE_OP_TYPE_MOVS, - VIE_OP_TYPE_GROUP1, - VIE_OP_TYPE_STOS, - VIE_OP_TYPE_BITTEST, - VIE_OP_TYPE_TWOB_GRP15, - VIE_OP_TYPE_ADD, - VIE_OP_TYPE_TEST, - VIE_OP_TYPE_BEXTR, - VIE_OP_TYPE_LAST -}; - /* struct vie_op.op_flags */ #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */ #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ @@ -152,6 +128,16 @@ static const struct vie_op one_byte_opcodes[256] = { .op_byte = 0x3B, .op_type = VIE_OP_TYPE_CMP, }, + [0x6E] = { + .op_byte = 0x6E, + .op_type = VIE_OP_TYPE_OUTS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION, + }, + [0x6F] = { + .op_byte = 0x6F, + .op_type = VIE_OP_TYPE_OUTS, + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION, + }, [0x88] = { .op_byte = 0x88, .op_type = VIE_OP_TYPE_MOV, diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c index fc1ecab9f209..8aab28f5e68e 100644 --- a/sys/amd64/vmm/vmm_ioport.c +++ b/sys/amd64/vmm/vmm_ioport.c @@ -144,10 +144,50 @@ emulate_inout_port(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu) return (0); } +static int +decode_segment(struct vcpu *vcpu, enum vm_reg_name *segment) +{ + struct vm_guest_paging *paging; + struct vie vie; + struct vm_exit *vme; + int err; + int fault; + + vme = vm_exitinfo(vcpu); + paging = &vme->u.inout_str.paging; + + vie_init(&vie, NULL, 0); + err = vmm_fetch_instruction(vcpu, paging, + vme->rip + vme->u.inout_str.cs_base, VIE_INST_SIZE, &vie, &fault); + if (err || fault) + return (err); + + err = vmm_decode_instruction(vcpu, VIE_INVALID_GLA, paging->cpu_mode, + vme->u.inout_str.cs_d, &vie); + + if (err || vie.op.op_type != VIE_OP_TYPE_OUTS) + return (EINVAL); + if (vie.segment_override) + *segment = vie.segment_register; + else + *segment = VM_REG_GUEST_DS; + + return (0); +} + static int emulate_inout_str(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu) { + int err; + *retu = true; + if (vmexit->u.inout_str.seg_name == VM_REG_LAST) { + err = decode_segment(vcpu, &vmexit->u.inout_str.seg_name); + if (err) + return (err); + return (vm_get_seg_desc(vcpu, vmexit->u.inout_str.seg_name, + &vmexit->u.inout_str.seg_desc)); + } return (0); /* Return to userspace to finish emulation */ }