On Wed Sep 6, 2023 at 2:33 PM AEST, Harsh Prateek Bora wrote: > Once the L1 has created a nested guest and its associated VCPU, it can > request for the execution of nested guest by setting its initial state > which can be done either using the h_guest_set_state or using the input > buffers along with the call to h_guest_run_vcpu(). On guest exit, L0 > uses output buffers to convey the exit cause to the L1. L0 takes care of > switching context from L1 to L2 during guest entry and restores L1 context > on guest exit. > > Unlike nested-hv, L2 (nested) guest's entire state is retained with > L0 after guest exit and restored on next entry in case of nested-papr. > > Signed-off-by: Michael Neuling <mi...@neuling.org> > Signed-off-by: Kautuk Consul <kcon...@linux.vnet.ibm.com> > Signed-off-by: Harsh Prateek Bora <hars...@linux.ibm.com> > --- > hw/ppc/spapr_nested.c | 471 +++++++++++++++++++++++++++----- > include/hw/ppc/spapr_cpu_core.h | 7 +- > include/hw/ppc/spapr_nested.h | 6 + > 3 files changed, 408 insertions(+), 76 deletions(-) > > diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c > index 67e389a762..3605f27115 100644 > --- a/hw/ppc/spapr_nested.c > +++ b/hw/ppc/spapr_nested.c > @@ -12,6 +12,17 @@ > #ifdef CONFIG_TCG > #define PRTS_MASK 0x1f > > +static void exit_nested_restore_vcpu(PowerPCCPU *cpu, int excp, > + SpaprMachineStateNestedGuestVcpu *vcpu); > +static void exit_process_output_buffer(PowerPCCPU *cpu, > + SpaprMachineStateNestedGuest *guest, > + target_ulong vcpuid, > + target_ulong *r3); > +static void restore_common_regs(CPUPPCState *dst, CPUPPCState *src); > +static bool vcpu_check(SpaprMachineStateNestedGuest *guest, > + target_ulong vcpuid, > + bool inoutbuf); > + > static target_ulong h_set_ptbl(PowerPCCPU *cpu, > SpaprMachineState *spapr, > target_ulong opcode, > @@ -187,21 +198,21 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu, > return H_PARAMETER; > } > > - spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1); > - if (!spapr_cpu->nested_host_state) { > + spapr_cpu->nested_hv_host = g_try_new(struct nested_ppc_state, 1); > + if (!spapr_cpu->nested_hv_host) { > return H_NO_MEM; > }
Don't rename existing thing in the same patch as adding new thing. > > assert(env->spr[SPR_LPIDR] == 0); > assert(env->spr[SPR_DPDES] == 0); > - nested_save_state(spapr_cpu->nested_host_state, cpu); > + nested_save_state(spapr_cpu->nested_hv_host, cpu); > > len = sizeof(*regs); > regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false, > MEMTXATTRS_UNSPECIFIED); > if (!regs || len != sizeof(*regs)) { > address_space_unmap(CPU(cpu)->as, regs, len, 0, false); > - g_free(spapr_cpu->nested_host_state); > + g_free(spapr_cpu->nested_hv_host); > return H_P2; > } > > @@ -276,105 +287,146 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu, > > void spapr_exit_nested(PowerPCCPU *cpu, int excp) > { > + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > + CPUState *cs = CPU(cpu); I think it would be worth seeing how it looks to split these into original and papr functions rather than try mash them together. > CPUPPCState *env = &cpu->env; > SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); > + target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value > */ > struct nested_ppc_state l2_state; > - target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; > - target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5]; > - target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr; > + target_ulong hv_ptr, regs_ptr; > + target_ulong hsrr0 = 0, hsrr1 = 0, hdar = 0, asdr = 0, hdsisr = 0; > struct kvmppc_hv_guest_state *hvstate; > struct kvmppc_pt_regs *regs; > hwaddr len; > + target_ulong lpid = 0, vcpuid = 0; > + struct SpaprMachineStateNestedGuestVcpu *vcpu = NULL; > + struct SpaprMachineStateNestedGuest *guest = NULL; > > assert(spapr_cpu->in_nested); > - > - nested_save_state(&l2_state, cpu); > - hsrr0 = env->spr[SPR_HSRR0]; > - hsrr1 = env->spr[SPR_HSRR1]; > - hdar = env->spr[SPR_HDAR]; > - hdsisr = env->spr[SPR_HDSISR]; > - asdr = env->spr[SPR_ASDR]; > + if (spapr->nested.api == NESTED_API_KVM_HV) { > + nested_save_state(&l2_state, cpu); > + hsrr0 = env->spr[SPR_HSRR0]; > + hsrr1 = env->spr[SPR_HSRR1]; > + hdar = env->spr[SPR_HDAR]; > + hdsisr = env->spr[SPR_HDSISR]; > + asdr = env->spr[SPR_ASDR]; > + } else if (spapr->nested.api == NESTED_API_PAPR) { > + lpid = spapr_cpu->nested_papr_host->gpr[5]; > + vcpuid = spapr_cpu->nested_papr_host->gpr[6]; > + guest = spapr_get_nested_guest(spapr, lpid); > + assert(guest); > + vcpu_check(guest, vcpuid, false); > + vcpu = &guest->vcpu[vcpuid]; > + > + exit_nested_restore_vcpu(cpu, excp, vcpu); > + /* do the output buffer for run_vcpu*/ > + exit_process_output_buffer(cpu, guest, vcpuid, &r3_return); > + } else > + g_assert_not_reached(); > > /* > * Switch back to the host environment (including for any error). > */ > assert(env->spr[SPR_LPIDR] != 0); > - nested_load_state(cpu, spapr_cpu->nested_host_state); > - env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */ > > - cpu_ppc_hdecr_exit(env); > + if (spapr->nested.api == NESTED_API_KVM_HV) { > + nested_load_state(cpu, spapr_cpu->nested_hv_host); > + env->gpr[3] = r3_return; > + } else if (spapr->nested.api == NESTED_API_PAPR) { > + restore_common_regs(env, spapr_cpu->nested_papr_host); > + env->tb_env->tb_offset -= vcpu->tb_offset; > + env->gpr[3] = H_SUCCESS; > + env->gpr[4] = r3_return; > + hreg_compute_hflags(env); > + ppc_maybe_interrupt(env); > + tlb_flush(cs); > + env->reserve_addr = -1; /* Reset the reservation */ There's a bunch of stuff that's getting duplicated anyway, so it's actually not clear that this maze of if statements makes it simpler to see that nothing is missed. > + } > > - spapr_cpu->in_nested = false; > + cpu_ppc_hdecr_exit(env); > > - g_free(spapr_cpu->nested_host_state); > - spapr_cpu->nested_host_state = NULL; > + if (spapr->nested.api == NESTED_API_KVM_HV) { > + hv_ptr = spapr_cpu->nested_hv_host->gpr[4]; > + regs_ptr = spapr_cpu->nested_hv_host->gpr[5]; > + > + len = sizeof(*hvstate); > + hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, > + MEMTXATTRS_UNSPECIFIED); > + if (len != sizeof(*hvstate)) { > + address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); > + env->gpr[3] = H_PARAMETER; > + return; > + } > > - len = sizeof(*hvstate); > - hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, > - MEMTXATTRS_UNSPECIFIED); > - if (len != sizeof(*hvstate)) { > - address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); > - env->gpr[3] = H_PARAMETER; > - return; > - } > + hvstate->cfar = l2_state.cfar; > + hvstate->lpcr = l2_state.lpcr; > + hvstate->pcr = l2_state.pcr; > + hvstate->dpdes = l2_state.dpdes; > + hvstate->hfscr = l2_state.hfscr; > + > + if (excp == POWERPC_EXCP_HDSI) { > + hvstate->hdar = hdar; > + hvstate->hdsisr = hdsisr; > + hvstate->asdr = asdr; > + } else if (excp == POWERPC_EXCP_HISI) { > + hvstate->asdr = asdr; > + } > > - hvstate->cfar = l2_state.cfar; > - hvstate->lpcr = l2_state.lpcr; > - hvstate->pcr = l2_state.pcr; > - hvstate->dpdes = l2_state.dpdes; > - hvstate->hfscr = l2_state.hfscr; > + /* HEIR should be implemented for HV mode and saved here. */ > + hvstate->srr0 = l2_state.srr0; > + hvstate->srr1 = l2_state.srr1; > + hvstate->sprg[0] = l2_state.sprg0; > + hvstate->sprg[1] = l2_state.sprg1; > + hvstate->sprg[2] = l2_state.sprg2; > + hvstate->sprg[3] = l2_state.sprg3; > + hvstate->pidr = l2_state.pidr; > + hvstate->ppr = l2_state.ppr; > + > + /* Is it okay to specify write len larger than actual data written? > */ > + address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); > + > + len = sizeof(*regs); > + regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, > + MEMTXATTRS_UNSPECIFIED); > + if (!regs || len != sizeof(*regs)) { > + address_space_unmap(CPU(cpu)->as, regs, len, 0, true); > + env->gpr[3] = H_P2; > + return; > + } > > - if (excp == POWERPC_EXCP_HDSI) { > - hvstate->hdar = hdar; > - hvstate->hdsisr = hdsisr; > - hvstate->asdr = asdr; > - } else if (excp == POWERPC_EXCP_HISI) { > - hvstate->asdr = asdr; > - } > + len = sizeof(env->gpr); > + assert(len == sizeof(regs->gpr)); > + memcpy(regs->gpr, l2_state.gpr, len); > > - /* HEIR should be implemented for HV mode and saved here. */ > - hvstate->srr0 = l2_state.srr0; > - hvstate->srr1 = l2_state.srr1; > - hvstate->sprg[0] = l2_state.sprg0; > - hvstate->sprg[1] = l2_state.sprg1; > - hvstate->sprg[2] = l2_state.sprg2; > - hvstate->sprg[3] = l2_state.sprg3; > - hvstate->pidr = l2_state.pidr; > - hvstate->ppr = l2_state.ppr; > + regs->link = l2_state.lr; > + regs->ctr = l2_state.ctr; > + regs->xer = l2_state.xer; > + regs->ccr = l2_state.cr; > > - /* Is it okay to specify write length larger than actual data written? */ > - address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); > + if (excp == POWERPC_EXCP_MCHECK || > + excp == POWERPC_EXCP_RESET || > + excp == POWERPC_EXCP_SYSCALL) { > + regs->nip = l2_state.srr0; > + regs->msr = l2_state.srr1 & env->msr_mask; > + } else { > + regs->nip = hsrr0; > + regs->msr = hsrr1 & env->msr_mask; > + } > > - len = sizeof(*regs); > - regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, > - MEMTXATTRS_UNSPECIFIED); > - if (!regs || len != sizeof(*regs)) { > - address_space_unmap(CPU(cpu)->as, regs, len, 0, true); > - env->gpr[3] = H_P2; > - return; > + /* Is it okay to specify write len larger than actual data written? > */ > + address_space_unmap(CPU(cpu)->as, regs, len, len, true); > } > > - len = sizeof(env->gpr); > - assert(len == sizeof(regs->gpr)); > - memcpy(regs->gpr, l2_state.gpr, len); > - > - regs->link = l2_state.lr; > - regs->ctr = l2_state.ctr; > - regs->xer = l2_state.xer; > - regs->ccr = l2_state.cr; > + spapr_cpu->in_nested = false; > > - if (excp == POWERPC_EXCP_MCHECK || > - excp == POWERPC_EXCP_RESET || > - excp == POWERPC_EXCP_SYSCALL) { > - regs->nip = l2_state.srr0; > - regs->msr = l2_state.srr1 & env->msr_mask; > + if (spapr->nested.api == NESTED_API_KVM_HV) { > + g_free(spapr_cpu->nested_hv_host); > + spapr_cpu->nested_hv_host = NULL; > } else { > - regs->nip = hsrr0; > - regs->msr = hsrr1 & env->msr_mask; > + g_free(spapr_cpu->nested_papr_host); > + spapr_cpu->nested_papr_host = NULL; > } > > - /* Is it okay to specify write length larger than actual data written? */ > - address_space_unmap(CPU(cpu)->as, regs, len, len, true); > } > > SpaprMachineStateNestedGuest *spapr_get_nested_guest(SpaprMachineState > *spapr, > @@ -1372,6 +1424,274 @@ static target_ulong h_guest_get_state(PowerPCCPU *cpu, > return h_guest_getset_state(cpu, spapr, args, false); > } > > +static void restore_common_regs(CPUPPCState *dst, CPUPPCState *src) > +{ > + memcpy(dst->gpr, src->gpr, sizeof(dst->gpr)); > + memcpy(dst->crf, src->crf, sizeof(dst->crf)); > + memcpy(dst->vsr, src->vsr, sizeof(dst->vsr)); > + dst->nip = src->nip; > + dst->msr = src->msr; > + dst->lr = src->lr; > + dst->ctr = src->ctr; > + dst->cfar = src->cfar; > + cpu_write_xer(dst, src->xer); > + ppc_store_vscr(dst, ppc_get_vscr(src)); > + ppc_store_fpscr(dst, src->fpscr); > + memcpy(dst->spr, src->spr, sizeof(dst->spr)); > +} > + > +static void restore_l2_state(PowerPCCPU *cpu, > + CPUPPCState *env, > + struct SpaprMachineStateNestedGuestVcpu *vcpu, > + target_ulong now) > +{ > + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); > + target_ulong lpcr, lpcr_mask, hdec; > + lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; > + > + if (spapr->nested.api == NESTED_API_PAPR) { > + assert(vcpu); > + assert(sizeof(env->gpr) == sizeof(vcpu->env.gpr)); > + restore_common_regs(env, &vcpu->env); > + lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | > + (vcpu->env.spr[SPR_LPCR] & lpcr_mask); > + lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; > + lpcr &= ~LPCR_LPES0; > + env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask; > + > + hdec = vcpu->env.tb_env->hdecr_expiry_tb - now; > + cpu_ppc_store_decr(env, vcpu->dec_expiry_tb - now); > + cpu_ppc_hdecr_init(env); > + cpu_ppc_store_hdecr(env, hdec); > + > + env->tb_env->tb_offset += vcpu->tb_offset; > + } > +} > + > +static void enter_nested(PowerPCCPU *cpu, > + uint64_t lpid, > + struct SpaprMachineStateNestedGuestVcpu *vcpu) That's not good since we have h_enter_nested for the old API. Really have to be a bit more consistent with using papr_ for naming I think. And you don't have to call this enter_nested anyway, papr_run_vcpu is okay too since that matches the API call. Can just add a comment /* Enter the L2 VCPU, equivalent to h_enter_nested */ if you think that's needed. > +{ > + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > + CPUState *cs = CPU(cpu); > + CPUPPCState *env = &cpu->env; > + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); > + target_ulong now = cpu_ppc_load_tbl(env); > + > + assert(env->spr[SPR_LPIDR] == 0); > + assert(spapr->nested.api); /* ensure API version is initialized */ > + spapr_cpu->nested_papr_host = g_try_new(CPUPPCState, 1); > + assert(spapr_cpu->nested_papr_host); > + memcpy(spapr_cpu->nested_papr_host, env, sizeof(CPUPPCState)); > + > + restore_l2_state(cpu, env, vcpu, now); > + env->spr[SPR_LPIDR] = lpid; /* post restore_l2_state */ > + > + spapr_cpu->in_nested = true; > + > + hreg_compute_hflags(env); > + ppc_maybe_interrupt(env); > + tlb_flush(cs); > + env->reserve_addr = -1; /* Reset the reservation */ ^^^ This is the kind of block that could be pulled into a common helper function. There's 3-4 copies now? > + > +} > + > +static target_ulong h_guest_run_vcpu(PowerPCCPU *cpu, > + SpaprMachineState *spapr, > + target_ulong opcode, > + target_ulong *args) > +{ > + CPUPPCState *env = &cpu->env; > + target_ulong flags = args[0]; > + target_ulong lpid = args[1]; > + target_ulong vcpuid = args[2]; > + struct SpaprMachineStateNestedGuestVcpu *vcpu; > + struct guest_state_request gsr; > + SpaprMachineStateNestedGuest *guest; > + > + if (flags) /* don't handle any flags for now */ > + return H_PARAMETER; > + > + guest = spapr_get_nested_guest(spapr, lpid); > + if (!guest) { > + return H_P2; > + } > + if (!vcpu_check(guest, vcpuid, true)) { > + return H_P3; > + } > + > + if (guest->parttbl[0] == 0) { > + /* At least need a partition scoped radix tree */ > + return H_NOT_AVAILABLE; > + } > + > + vcpu = &guest->vcpu[vcpuid]; > + > + /* Read run_vcpu input buffer to update state */ > + gsr.buf = vcpu->runbufin.addr; > + gsr.len = vcpu->runbufin.size; > + gsr.flags = GUEST_STATE_REQUEST_SET; /* Thread wide + writing */ > + if (!map_and_getset_state(cpu, guest, vcpuid, &gsr)) { > + enter_nested(cpu, lpid, vcpu); > + } > + > + return env->gpr[3]; > +} > + > +struct run_vcpu_exit_cause run_vcpu_exit_causes[] = { > + { .nia = 0x980, > + .count = 0, > + }, > + { .nia = 0xc00, > + .count = 10, > + .ids = { > + GSB_VCPU_GPR3, > + GSB_VCPU_GPR4, > + GSB_VCPU_GPR5, > + GSB_VCPU_GPR6, > + GSB_VCPU_GPR7, > + GSB_VCPU_GPR8, > + GSB_VCPU_GPR9, > + GSB_VCPU_GPR10, > + GSB_VCPU_GPR11, > + GSB_VCPU_GPR12, > + }, > + }, > + { .nia = 0xe00, > + .count = 5, > + .ids = { > + GSB_VCPU_SPR_HDAR, > + GSB_VCPU_SPR_HDSISR, > + GSB_VCPU_SPR_ASDR, > + GSB_VCPU_SPR_NIA, > + GSB_VCPU_SPR_MSR, > + }, > + }, > + { .nia = 0xe20, > + .count = 4, > + .ids = { > + GSB_VCPU_SPR_HDAR, > + GSB_VCPU_SPR_ASDR, > + GSB_VCPU_SPR_NIA, > + GSB_VCPU_SPR_MSR, > + }, > + }, > + { .nia = 0xe40, > + .count = 3, > + .ids = { > + GSB_VCPU_SPR_HEIR, > + GSB_VCPU_SPR_NIA, > + GSB_VCPU_SPR_MSR, > + }, > + }, > + { .nia = 0xea0, > + .count = 0, > + }, > + { .nia = 0xf80, > + .count = 3, > + .ids = { > + GSB_VCPU_SPR_HFSCR, > + GSB_VCPU_SPR_NIA, > + GSB_VCPU_SPR_MSR, > + }, > + }, > +}; > + > +static struct run_vcpu_exit_cause *find_exit_cause(uint64_t srr0) > +{ > + int i; > + > + for (i = 0; i < ARRAY_SIZE(run_vcpu_exit_causes); i++) > + if (srr0 == run_vcpu_exit_causes[i].nia) { > + return &run_vcpu_exit_causes[i]; > + } > + > + printf("%s: srr0:0x%016lx\n", __func__, srr0); > + return NULL; > +} This is another weird control flow thing, also unclear why it's used here. Here: 52 lines vs 76, no new struct, simpler for the compiler to understand and optimise. int get_exit_ids(uint64_t srr0, uint16_t ids[16]) { int nr; switch (srr0) { case 0xc00: nr = 10; ids[0] = GSP_VCPU_GPR3; ids[1] = GSP_VCPU_GPR4; ids[2] = GSP_VCPU_GPR5; ids[3] = GSP_VCPU_GPR6; ids[4] = GSP_VCPU_GPR7; ids[5] = GSP_VCPU_GPR8; ids[6] = GSP_VCPU_GPR9; ids[7] = GSP_VCPU_GPR10; ids[8] = GSP_VCPU_GPR11; ids[9] = GSP_VCPU_GPR12; break; case 0xe00: nr = 5; ids[0] = GSP_VCPU_HDAR; ids[1] = GSP_VCPU_HDSISR; ids[2] = GSP_VCPU_ASDR; ids[3] = GSP_VCPU_NIA; ids[4] = GSP_VCPU_MSR; break; case 0xe20: nr = 4; ids[0] = GSP_VCPU_HDAR; ids[1] = GSP_VCPU_ASDR; ids[2] = GSP_VCPU_NIA; ids[3] = GSP_VCPU_MSR; break; case 0xe40: nr = 3; ids[0] = GSP_VCPU_HEIR; ids[1] = GSP_VCPU_NIA; ids[2] = GSP_VCPU_MSR; break; case 0xf80: nr = 3; ids[0] = GSP_VCPU_HFSCR; ids[1] = GSP_VCPU_NIA; ids[2] = GSP_VCPU_MSR; break; default: nr = 0; break; } return nr; } > + > +static void exit_nested_restore_vcpu(PowerPCCPU *cpu, int excp, > + SpaprMachineStateNestedGuestVcpu *vcpu) > +{ > + CPUPPCState *env = &cpu->env; > + target_ulong now, hdar, hdsisr, asdr; > + > + assert(sizeof(env->gpr) == sizeof(vcpu->env.gpr)); /* sanity check */ > + > + now = cpu_ppc_load_tbl(env); /* L2 timebase */ > + now -= vcpu->tb_offset; /* L1 timebase */ > + vcpu->dec_expiry_tb = now - cpu_ppc_load_decr(env); > + /* backup hdar, hdsisr, asdr if reqd later below */ > + hdar = vcpu->env.spr[SPR_HDAR]; > + hdsisr = vcpu->env.spr[SPR_HDSISR]; > + asdr = vcpu->env.spr[SPR_ASDR]; > + > + restore_common_regs(&vcpu->env, env); > + > + if (excp == POWERPC_EXCP_MCHECK || > + excp == POWERPC_EXCP_RESET || > + excp == POWERPC_EXCP_SYSCALL) { > + vcpu->env.nip = env->spr[SPR_SRR0]; > + vcpu->env.msr = env->spr[SPR_SRR1] & env->msr_mask; > + } else { > + vcpu->env.nip = env->spr[SPR_HSRR0]; > + vcpu->env.msr = env->spr[SPR_HSRR1] & env->msr_mask; > + } > + > + /* hdar, hdsisr, asdr should be retained unless certain exceptions */ > + if ((excp != POWERPC_EXCP_HDSI) && (excp != POWERPC_EXCP_HISI)) { > + vcpu->env.spr[SPR_ASDR] = asdr; > + } else if (excp != POWERPC_EXCP_HDSI) { > + vcpu->env.spr[SPR_HDAR] = hdar; > + vcpu->env.spr[SPR_HDSISR] = hdsisr; > + } > +} > + > +static void exit_process_output_buffer(PowerPCCPU *cpu, > + SpaprMachineStateNestedGuest *guest, > + target_ulong vcpuid, > + target_ulong *r3) > +{ > + SpaprMachineStateNestedGuestVcpu *vcpu = &guest->vcpu[vcpuid]; > + struct guest_state_request gsr; > + struct guest_state_buffer *gsb; > + struct guest_state_element *element; > + struct guest_state_element_type *type; > + struct run_vcpu_exit_cause *exit_cause; > + hwaddr len; > + int i; > + > + len = vcpu->runbufout.size; > + gsb = address_space_map(CPU(cpu)->as, vcpu->runbufout.addr, &len, true, > + MEMTXATTRS_UNSPECIFIED); > + if (!gsb || len != vcpu->runbufout.size) { > + address_space_unmap(CPU(cpu)->as, gsb, len, 0, true); > + *r3 = H_P2; > + return; > + } > + > + exit_cause = find_exit_cause(*r3); > + > + /* Create a buffer of elements to send back */ > + gsb->num_elements = cpu_to_be32(exit_cause->count); > + element = gsb->elements; > + for (i = 0; i < exit_cause->count; i++) { > + type = guest_state_element_type_find(exit_cause->ids[i]); > + assert(type); > + element->id = cpu_to_be16(exit_cause->ids[i]); > + element->size = cpu_to_be16(type->size); > + element = guest_state_element_next(element, NULL, NULL); > + } > + gsr.gsb = gsb; > + gsr.len = VCPU_OUT_BUF_MIN_SZ; > + gsr.flags = 0; /* get + never guest wide */ > + getset_state(guest, vcpuid, &gsr); > + > + address_space_unmap(CPU(cpu)->as, gsb, len, len, true); > + return; > +} > + > void spapr_register_nested(void) > { > spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); > @@ -1388,6 +1708,7 @@ void spapr_register_nested_phyp(void) > spapr_register_hypercall(H_GUEST_CREATE_VCPU , h_guest_create_vcpu); > spapr_register_hypercall(H_GUEST_SET_STATE , h_guest_set_state); > spapr_register_hypercall(H_GUEST_GET_STATE , h_guest_get_state); > + spapr_register_hypercall(H_GUEST_RUN_VCPU , h_guest_run_vcpu); > } > > #else > diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h > index 69a52e39b8..09855f69aa 100644 > --- a/include/hw/ppc/spapr_cpu_core.h > +++ b/include/hw/ppc/spapr_cpu_core.h > @@ -53,7 +53,12 @@ typedef struct SpaprCpuState { > > /* Fields for nested-HV support */ > bool in_nested; /* true while the L2 is executing */ > - struct nested_ppc_state *nested_host_state; /* holds the L1 state while > L2 executes */ > + union { > + /* nested-hv needs minimal set of regs as L1 stores L2 state */ > + struct nested_ppc_state *nested_hv_host; > + /* In nested-papr, L0 retains entire L2 state, so keep it all safe. > */ > + CPUPPCState *nested_papr_host; > + }; This IMO still shouldn't be a CPUPPCState, but extending nested_ppc_state. Differences between nested APIs should not be here either, but inside the nested_ppc_state structure. Thanks, Nick > } SpaprCpuState; > > static inline SpaprCpuState *spapr_cpu_state(PowerPCCPU *cpu) > diff --git a/include/hw/ppc/spapr_nested.h b/include/hw/ppc/spapr_nested.h > index eaee624b87..ca5d28c06e 100644 > --- a/include/hw/ppc/spapr_nested.h > +++ b/include/hw/ppc/spapr_nested.h > @@ -358,6 +358,12 @@ struct guest_state_request { > uint16_t flags; > }; > > +struct run_vcpu_exit_cause { > + uint64_t nia; > + uint64_t count; > + uint16_t ids[10]; /* max ids supported by run_vcpu_exit_causes */ > +}; > + > /* > * Register state for entering a nested guest with H_ENTER_NESTED. > * New member must be added at the end.