> On 12 Jun 2025, at 5:33 PM, Ani Sinha <anisi...@redhat.com> wrote: > > > >> On 27 Feb 2025, at 7:59 PM, Roy Hopkins <roy.hopk...@randomman.co.uk> wrote: >> >> When an SEV guest is started, the reset vector and state are >> extracted from metadata that is contained in the firmware volume. >> >> In preparation for using IGVM to setup the initial CPU state, >> the code has been refactored to populate vmcb_save_area for each >> CPU which is then applied during guest startup and CPU reset. >> >> Signed-off-by: Roy Hopkins <roy.hopk...@randomman.co.uk> >> Acked-by: Michael S. Tsirkin <m...@redhat.com> >> Acked-by: Stefano Garzarella <sgarz...@redhat.com> > > I am not sure if I am responding to the latest version of this patch series, > but ... > >> --- >> target/i386/sev.c | 323 +++++++++++++++++++++++++++++++++++++++++----- >> target/i386/sev.h | 110 ++++++++++++++++ >> 2 files changed, 400 insertions(+), 33 deletions(-) >> >> diff --git a/target/i386/sev.c b/target/i386/sev.c >> index 7d91985f41..1d1e36e3de 100644 >> --- a/target/i386/sev.c >> +++ b/target/i386/sev.c >> @@ -49,6 +49,12 @@ OBJECT_DECLARE_TYPE(SevSnpGuestState, >> SevCommonStateClass, SEV_SNP_GUEST) >> /* hard code sha256 digest size */ >> #define HASH_SIZE 32 >> >> +/* Convert between SEV-ES VMSA and SegmentCache flags/attributes */ >> +#define FLAGS_VMSA_TO_SEGCACHE(flags) \ >> + ((((flags) & 0xff00) << 12) | (((flags) & 0xff) << 8)) >> +#define FLAGS_SEGCACHE_TO_VMSA(flags) \ >> + ((((flags) & 0xff00) >> 8) | (((flags) & 0xf00000) >> 12)) >> + >> typedef struct QEMU_PACKED SevHashTableEntry { >> QemuUUID guid; >> uint16_t len; >> @@ -88,6 +94,14 @@ typedef struct QEMU_PACKED SevHashTableDescriptor { >> uint32_t size; >> } SevHashTableDescriptor; >> >> +typedef struct SevLaunchVmsa { >> + QTAILQ_ENTRY(SevLaunchVmsa) next; >> + >> + uint16_t cpu_index; >> + uint64_t gpa; >> + struct sev_es_save_area vmsa; >> +} SevLaunchVmsa; >> + >> struct SevCommonState { >> X86ConfidentialGuest parent_obj; >> >> @@ -106,9 +120,7 @@ struct SevCommonState { >> int sev_fd; >> SevState state; >> >> - uint32_t reset_cs; >> - uint32_t reset_ip; >> - bool reset_data_valid; >> + QTAILQ_HEAD(, SevLaunchVmsa) launch_vmsa; >> }; >> >> struct SevCommonStateClass { >> @@ -371,6 +383,172 @@ static struct RAMBlockNotifier sev_ram_notifier = { >> .ram_block_removed = sev_ram_block_removed, >> }; >> >> +static void sev_apply_cpu_context(CPUState *cpu) >> +{ >> + SevCommonState *sev_common = >> SEV_COMMON(MACHINE(qdev_get_machine())->cgs); >> + X86CPU *x86; >> + CPUX86State *env; >> + struct SevLaunchVmsa *launch_vmsa; >> + >> + /* See if an initial VMSA has been provided for this CPU */ >> + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) >> + { >> + if (cpu->cpu_index == launch_vmsa->cpu_index) { >> + x86 = X86_CPU(cpu); >> + env = &x86->env; >> + >> + /* >> + * Ideally we would provide the VMSA directly to kvm which would >> + * ensure that the resulting initial VMSA measurement which is >> + * calculated during KVM_SEV_LAUNCH_UPDATE_VMSA is calculated >> from >> + * exactly what we provide here. Currently this is not possible >> so >> + * we need to copy the parts of the VMSA structure that we >> currently >> + * support into the CPU state. >> + */ >> + cpu_load_efer(env, launch_vmsa->vmsa.efer); >> + cpu_x86_update_cr4(env, launch_vmsa->vmsa.cr4); >> + cpu_x86_update_cr0(env, launch_vmsa->vmsa.cr0); >> + cpu_x86_update_cr3(env, launch_vmsa->vmsa.cr3); >> + env->xcr0 = launch_vmsa->vmsa.xcr0; >> + env->pat = launch_vmsa->vmsa.g_pat; >> + >> + cpu_x86_load_seg_cache( >> + env, R_CS, launch_vmsa->vmsa.cs.selector, >> + launch_vmsa->vmsa.cs.base, launch_vmsa->vmsa.cs.limit, >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.cs.attrib)); >> + cpu_x86_load_seg_cache( >> + env, R_DS, launch_vmsa->vmsa.ds.selector, >> + launch_vmsa->vmsa.ds.base, launch_vmsa->vmsa.ds.limit, >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ds.attrib)); >> + cpu_x86_load_seg_cache( >> + env, R_ES, launch_vmsa->vmsa.es.selector, >> + launch_vmsa->vmsa.es.base, launch_vmsa->vmsa.es.limit, >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.es.attrib)); >> + cpu_x86_load_seg_cache( >> + env, R_FS, launch_vmsa->vmsa.fs.selector, >> + launch_vmsa->vmsa.fs.base, launch_vmsa->vmsa.fs.limit, >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.fs.attrib)); >> + cpu_x86_load_seg_cache( >> + env, R_GS, launch_vmsa->vmsa.gs.selector, >> + launch_vmsa->vmsa.gs.base, launch_vmsa->vmsa.gs.limit, >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gs.attrib)); >> + cpu_x86_load_seg_cache( >> + env, R_SS, launch_vmsa->vmsa.ss.selector, >> + launch_vmsa->vmsa.ss.base, launch_vmsa->vmsa.ss.limit, >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ss.attrib)); >> + >> + env->gdt.base = launch_vmsa->vmsa.gdtr.base; >> + env->gdt.limit = launch_vmsa->vmsa.gdtr.limit; >> + env->gdt.flags = >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.gdtr.attrib); >> + env->idt.base = launch_vmsa->vmsa.idtr.base; >> + env->idt.limit = launch_vmsa->vmsa.idtr.limit; >> + env->idt.flags = >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.idtr.attrib); >> + >> + cpu_x86_load_seg_cache( >> + env, R_LDTR, launch_vmsa->vmsa.ldtr.selector, >> + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.ldtr.limit, >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.ldtr.attrib)); >> + cpu_x86_load_seg_cache( >> + env, R_TR, launch_vmsa->vmsa.tr.selector, >> + launch_vmsa->vmsa.ldtr.base, launch_vmsa->vmsa.tr.limit, >> + FLAGS_VMSA_TO_SEGCACHE(launch_vmsa->vmsa.tr.attrib)); >> + >> + env->dr[6] = launch_vmsa->vmsa.dr6; >> + env->dr[7] = launch_vmsa->vmsa.dr7; >> + >> + env->regs[R_EAX] = launch_vmsa->vmsa.rax; >> + env->regs[R_ECX] = launch_vmsa->vmsa.rcx; >> + env->regs[R_EDX] = launch_vmsa->vmsa.rdx; >> + env->regs[R_EBX] = launch_vmsa->vmsa.rbx; >> + env->regs[R_ESP] = launch_vmsa->vmsa.rsp; >> + env->regs[R_EBP] = launch_vmsa->vmsa.rbp; >> + env->regs[R_ESI] = launch_vmsa->vmsa.rsi; >> + env->regs[R_EDI] = launch_vmsa->vmsa.rdi; >> +#ifdef TARGET_X86_64 >> + env->regs[R_R8] = launch_vmsa->vmsa.r8; >> + env->regs[R_R9] = launch_vmsa->vmsa.r9; >> + env->regs[R_R10] = launch_vmsa->vmsa.r10; >> + env->regs[R_R11] = launch_vmsa->vmsa.r11; >> + env->regs[R_R12] = launch_vmsa->vmsa.r12; >> + env->regs[R_R13] = launch_vmsa->vmsa.r13; >> + env->regs[R_R14] = launch_vmsa->vmsa.r14; >> + env->regs[R_R15] = launch_vmsa->vmsa.r15; >> +#endif >> + env->eip = launch_vmsa->vmsa.rip; >> + env->eflags = launch_vmsa->vmsa.rflags; >> + >> + cpu_set_fpuc(env, launch_vmsa->vmsa.x87_fcw); >> + env->mxcsr = launch_vmsa->vmsa.mxcsr; >> + >> + break; >> + } >> + } >> +} >> + >> +static int sev_set_cpu_context(uint16_t cpu_index, const void *ctx, >> + uint32_t ctx_len, hwaddr gpa, Error **errp) >> +{ >> + SevCommonState *sev_common = >> SEV_COMMON(MACHINE(qdev_get_machine())->cgs); >> + SevLaunchVmsa *launch_vmsa; >> + CPUState *cpu; >> + bool exists = false; >> + >> + /* >> + * Setting the CPU context is only supported for SEV-ES and SEV-SNP. The >> + * context buffer will contain a sev_es_save_area from the Linux kernel >> + * which is defined by "Table B-4. VMSA Layout, State Save Area for >> SEV-ES" >> + * in the AMD64 APM, Volume 2. >> + */ >> + >> + if (!sev_es_enabled()) { >> + error_setg(errp, "SEV: unable to set CPU context: Not supported"); >> + return -1; >> + } >> + >> + if (ctx_len < sizeof(struct sev_es_save_area)) { >> + error_setg(errp, "SEV: unable to set CPU context: " >> + "Invalid context provided"); >> + return -1; >> + } >> + >> + cpu = qemu_get_cpu(cpu_index); >> + if (!cpu) { >> + error_setg(errp, "SEV: unable to set CPU context for out of bounds " >> + "CPU index %d", cpu_index); >> + return -1; >> + } >> + >> + /* >> + * If the context of this VP has already been set then replace it with >> the >> + * new context. >> + */ >> + QTAILQ_FOREACH(launch_vmsa, &sev_common->launch_vmsa, next) >> + { >> + if (cpu_index == launch_vmsa->cpu_index) { >> + launch_vmsa->gpa = gpa; >> + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); >> + exists = true; >> + break; >> + } >> + } >> + >> + if (!exists) { >> + /* New VP context */ >> + launch_vmsa = g_new0(SevLaunchVmsa, 1); >> + memcpy(&launch_vmsa->vmsa, ctx, sizeof(launch_vmsa->vmsa)); >> + launch_vmsa->cpu_index = cpu_index; >> + launch_vmsa->gpa = gpa; >> + QTAILQ_INSERT_TAIL(&sev_common->launch_vmsa, launch_vmsa, next); >> + } >> + >> + /* Synchronise the VMSA with the current CPU state */ >> + sev_apply_cpu_context(cpu); > > Ok so here you are not only saving the vmsa but also applying it. > >> + >> + return 0; >> +} >> + >> bool >> sev_enabled(void) >> { >> @@ -1005,6 +1183,16 @@ static int >> sev_launch_update_vmsa(SevGuestState *sev_guest) >> { >> int ret, fw_error; >> + CPUState *cpu; >> + >> + /* >> + * The initial CPU state is measured as part of >> KVM_SEV_LAUNCH_UPDATE_VMSA. >> + * Synchronise the CPU state to any provided launch VMSA structures. >> + */ >> + CPU_FOREACH(cpu) { >> + sev_apply_cpu_context(cpu); >> + } >> + >> >> ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, >> NULL, &fw_error); >> @@ -1787,40 +1975,110 @@ sev_es_find_reset_vector(void *flash_ptr, uint64_t >> flash_size, >> return sev_es_parse_reset_block(info, addr); >> } >> >> -void sev_es_set_reset_vector(CPUState *cpu) >> + >> +static void seg_to_vmsa(const SegmentCache *cpu_seg, struct vmcb_seg >> *vmsa_seg) >> { >> - X86CPU *x86; >> - CPUX86State *env; >> - ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; >> - SevCommonState *sev_common = SEV_COMMON( >> - object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)); >> + vmsa_seg->selector = cpu_seg->selector; >> + vmsa_seg->base = cpu_seg->base; >> + vmsa_seg->limit = cpu_seg->limit; >> + vmsa_seg->attrib = FLAGS_SEGCACHE_TO_VMSA(cpu_seg->flags); >> +} >> >> - /* Only update if we have valid reset information */ >> - if (!sev_common || !sev_common->reset_data_valid) { >> - return; >> - } >> +static void initialize_vmsa(const CPUState *cpu, struct sev_es_save_area >> *vmsa) >> +{ >> + const X86CPU *x86 = X86_CPU(cpu); >> + const CPUX86State *env = &x86->env; >> >> - /* Do not update the BSP reset state */ >> - if (cpu->cpu_index == 0) { >> - return; >> + /* >> + * Initialize the SEV-ES save area from the current state of >> + * the CPU. The entire state does not need to be copied, only the state >> + * that is copied back to the CPUState in sev_apply_cpu_context. >> + */ >> + memset(vmsa, 0, sizeof(struct sev_es_save_area)); >> + vmsa->efer = env->efer; >> + vmsa->cr0 = env->cr[0]; >> + vmsa->cr3 = env->cr[3]; >> + vmsa->cr4 = env->cr[4]; >> + vmsa->xcr0 = env->xcr0; >> + vmsa->g_pat = env->pat; >> + >> + seg_to_vmsa(&env->segs[R_CS], &vmsa->cs); >> + seg_to_vmsa(&env->segs[R_DS], &vmsa->ds); >> + seg_to_vmsa(&env->segs[R_ES], &vmsa->es); >> + seg_to_vmsa(&env->segs[R_FS], &vmsa->fs); >> + seg_to_vmsa(&env->segs[R_GS], &vmsa->gs); >> + seg_to_vmsa(&env->segs[R_SS], &vmsa->ss); >> + >> + seg_to_vmsa(&env->gdt, &vmsa->gdtr); >> + seg_to_vmsa(&env->idt, &vmsa->idtr); >> + seg_to_vmsa(&env->ldt, &vmsa->ldtr); >> + seg_to_vmsa(&env->tr, &vmsa->tr); >> + >> + vmsa->dr6 = env->dr[6]; >> + vmsa->dr7 = env->dr[7]; >> + >> + vmsa->rax = env->regs[R_EAX]; >> + vmsa->rcx = env->regs[R_ECX]; >> + vmsa->rdx = env->regs[R_EDX]; >> + vmsa->rbx = env->regs[R_EBX]; >> + vmsa->rsp = env->regs[R_ESP]; >> + vmsa->rbp = env->regs[R_EBP]; >> + vmsa->rsi = env->regs[R_ESI]; >> + vmsa->rdi = env->regs[R_EDI]; >> + >> +#ifdef TARGET_X86_64 >> + vmsa->r8 = env->regs[R_R8]; >> + vmsa->r9 = env->regs[R_R9]; >> + vmsa->r10 = env->regs[R_R10]; >> + vmsa->r11 = env->regs[R_R11]; >> + vmsa->r12 = env->regs[R_R12]; >> + vmsa->r13 = env->regs[R_R13]; >> + vmsa->r14 = env->regs[R_R14]; >> + vmsa->r15 = env->regs[R_R15]; >> +#endif >> + >> + vmsa->rip = env->eip; >> + vmsa->rflags = env->eflags; >> +} >> + >> +static void sev_es_set_ap_context(uint32_t reset_addr) >> +{ >> + CPUState *cpu; >> + struct sev_es_save_area vmsa; >> + SegmentCache cs; >> + >> + cs.selector = 0xf000; >> + cs.base = reset_addr & 0xffff0000; >> + cs.limit = 0xffff; >> + cs.flags = DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK | >> + DESC_A_MASK; >> + >> + CPU_FOREACH(cpu) { >> + if (cpu->cpu_index == 0) { >> + /* Do not update the BSP reset state */ >> + continue; >> + } >> + initialize_vmsa(cpu, &vmsa); >> + seg_to_vmsa(&cs, &vmsa.cs); >> + vmsa.rip = reset_addr & 0x0000ffff; >> + sev_set_cpu_context(cpu->cpu_index, &vmsa, >> + sizeof(struct sev_es_save_area), >> + 0, &error_fatal); >> + sev_apply_cpu_context(cpu); > > Is the above call ^^^^^^ sev_apply_cpu_context() needed? Since in > sev_set_cpu_context() you already call it? It seems I would have the same question for v8. > >> }