On 09.03.20 12:21, Janosch Frank wrote: > The unpack facility provides the means to setup a protected guest. A > protected guest can not be introspected by the hypervisor or any > user/administrator of the machine it is running on. > > Protected guests are encrypted at rest and need a special boot > mechanism via diag308 subcode 8 and 10. > > Code 8 sets the PV specific IPLB which is retained seperately from > those set via code 5. > > Code 10 is used to unpack the VM into protected memory, verify its > integrity and start it. > > Signed-off-by: Janosch Frank <fran...@linux.ibm.com> > Signed-off-by: Christian Borntraeger <borntrae...@de.ibm.com> [Changes > to machine]
As you signed this patch off, Maybe this should rather be a Co-developed-by: [...] > { > S390IPLState *ipl = get_ipl_device(); > @@ -561,7 +581,8 @@ void s390_ipl_reset_request(CPUState *cs, enum s390_reset > reset_type) > { > S390IPLState *ipl = get_ipl_device(); > > - if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL) > { > + if (reset_type == S390_RESET_EXTERNAL || reset_type == S390_RESET_REIPL > || > + reset_type == S390_RESET_PV) { > /* use CPU 0 for full resets */ > ipl->reset_cpu_index = 0; This looks wrong. In case of an error, you modify the registers of a theoretically unrelated CPU. > } else { > @@ -635,6 +656,38 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu) > cpu_physical_memory_unmap(addr, len, 1, len); > } > > +int s390_ipl_prepare_pv_header(void) > +{ > + S390IPLState *ipl = get_ipl_device(); > + IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv; > + void *hdr = g_malloc(ipib_pv->pv_header_len); Should there be an upper limit? The guest can allocate quite some memory this way and theoretically crash the VM. > + int rc; > + > + cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr, > + ipib_pv->pv_header_len); Shouldn't we validate if this memory is accessible at all? > + rc = s390_pv_set_sec_parms((uint64_t)hdr, > + ipib_pv->pv_header_len); > + g_free(hdr); > + return rc; > +} > + > +int s390_ipl_pv_unpack(void) > +{ > + int i, rc = 0; NIT: These declarations last. > + S390IPLState *ipl = get_ipl_device(); > + IPLBlockPV *ipib_pv = &ipl->iplb_pv.pv; use s390_ipl_get_iplb_pv() and assert that we don't get NULL? > + > + for (i = 0; i < ipib_pv->num_comp; i++) { > + rc = s390_pv_unpack(ipib_pv->components[i].addr, > + TARGET_PAGE_ALIGN(ipib_pv->components[i].size), > + ipib_pv->components[i].tweak_pref); > + if (rc) { > + break; > + } > + } > + return rc; > +} > + > void s390_ipl_prepare_cpu(S390CPU *cpu) > { > S390IPLState *ipl = get_ipl_device(); > diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h > index d4813105db..b2ccdd9dae 100644 > --- a/hw/s390x/ipl.h > +++ b/hw/s390x/ipl.h > @@ -15,6 +15,24 @@ > #include "cpu.h" > #include "hw/qdev-core.h" > > +struct IPLBlockPVComp { > + uint64_t tweak_pref; > + uint64_t addr; > + uint64_t size; > +} QEMU_PACKED; > +typedef struct IPLBlockPVComp IPLBlockPVComp; > + > +struct IPLBlockPV { > + uint8_t reserved18[87]; /* 0x18 */ > + uint8_t version; /* 0x6f */ > + uint32_t reserved70; /* 0x70 */ > + uint32_t num_comp; /* 0x70 */ > + uint64_t pv_header_addr; /* 0x74 */ > + uint64_t pv_header_len; /* 0x7c */ > + struct IPLBlockPVComp components[]; > +} QEMU_PACKED; > +typedef struct IPLBlockPV IPLBlockPV; > + > struct IplBlockCcw { > uint8_t reserved0[85]; > uint8_t ssid; > @@ -71,6 +89,7 @@ union IplParameterBlock { > union { > IplBlockCcw ccw; > IplBlockFcp fcp; > + IPLBlockPV pv; > IplBlockQemuScsi scsi; > }; > } QEMU_PACKED; > @@ -85,8 +104,11 @@ typedef union IplParameterBlock IplParameterBlock; > > int s390_ipl_set_loadparm(uint8_t *loadparm); > void s390_ipl_update_diag308(IplParameterBlock *iplb); > +int s390_ipl_prepare_pv_header(void); > +int s390_ipl_pv_unpack(void); > void s390_ipl_prepare_cpu(S390CPU *cpu); > IplParameterBlock *s390_ipl_get_iplb(void); > +IplParameterBlock *s390_ipl_get_iplb_pv(void); > > enum s390_reset { > /* default is a reset not triggered by a CPU e.g. issued by QMP */ > @@ -94,6 +116,7 @@ enum s390_reset { > S390_RESET_REIPL, > S390_RESET_MODIFIED_CLEAR, > S390_RESET_LOAD_NORMAL, > + S390_RESET_PV, > }; > void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type); > void s390_ipl_get_reset_request(CPUState **cs, enum s390_reset *reset_type); > @@ -133,6 +156,7 @@ struct S390IPLState { > /*< private >*/ > DeviceState parent_obj; > IplParameterBlock iplb; > + IplParameterBlock iplb_pv; > QemuIplParameters qipl; > uint64_t start_addr; > uint64_t compat_start_addr; > @@ -140,6 +164,7 @@ struct S390IPLState { > uint64_t compat_bios_start_addr; > bool enforce_bios; > bool iplb_valid; > + bool iplb_valid_pv; > bool netboot; > /* reset related properties don't have to be migrated or reset */ > enum s390_reset reset_type; > @@ -161,9 +186,11 @@ QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, > "alignment of iplb wrong"); > > #define S390_IPL_TYPE_FCP 0x00 > #define S390_IPL_TYPE_CCW 0x02 > +#define S390_IPL_TYPE_PV 0x05 > #define S390_IPL_TYPE_QEMU_SCSI 0xff > > #define S390_IPLB_HEADER_LEN 8 > +#define S390_IPLB_MIN_PV_LEN 148 > #define S390_IPLB_MIN_CCW_LEN 200 > #define S390_IPLB_MIN_FCP_LEN 384 > #define S390_IPLB_MIN_QEMU_SCSI_LEN 200 > @@ -173,16 +200,49 @@ static inline bool iplb_valid_len(IplParameterBlock > *iplb) > return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock); > } > > -static inline bool iplb_valid_ccw(IplParameterBlock *iplb) > +static inline bool s390_ipl_pv_check_components(IplParameterBlock *iplb) Still don't like the function name :) ipl_valid_pv_components() ? > { > - return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN && > - iplb->pbt == S390_IPL_TYPE_CCW; > + IPLBlockPV *ipib_pv = &iplb->pv; > + int i; > + > + if (ipib_pv->num_comp == 0) { > + return false; > + } > + > + for (i = 0; i < ipib_pv->num_comp; i++) { > + /* Addr must be 4k aligned */ > + if (ipib_pv->components[i].addr & ~TARGET_PAGE_MASK) { > + return false; > + } > + > + /* Tweak prefix is monotonically increasing with each component */ > + if (i < ipib_pv->num_comp - 1 && > + ipib_pv->components[i].tweak_pref >= > + ipib_pv->components[i + 1].tweak_pref) { > + return false; > + } > + } > + return true; > } > > -static inline bool iplb_valid_fcp(IplParameterBlock *iplb) > +static inline bool iplb_valid(IplParameterBlock *iplb) > { > - return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN && > - iplb->pbt == S390_IPL_TYPE_FCP; > + switch (iplb->pbt) { > + case S390_IPL_TYPE_FCP: > + return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN && > + iplb->pbt == S390_IPL_TYPE_FCP); > + case S390_IPL_TYPE_CCW: > + return (be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN && > + iplb->pbt == S390_IPL_TYPE_CCW); That's a refactoring you could have split out. > + case S390_IPL_TYPE_PV: > + if(be32_to_cpu(iplb->len) < S390_IPLB_MIN_PV_LEN || > + iplb->pbt != S390_IPL_TYPE_PV) { > + return false; > + } > + return s390_ipl_pv_check_components(iplb); > + default: > + return false; > + } > } > > #endif > diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c > new file mode 100644 > index 0000000000..ba6409246e > --- /dev/null > +++ b/hw/s390x/pv.c > @@ -0,0 +1,104 @@ > +/* > + * Secure execution functions > + * > + * Copyright IBM Corp. 2020 > + * Author(s): > + * Janosch Frank <fran...@linux.ibm.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or (at > + * your option) any later version. See the COPYING file in the top-level > + * directory. > + */ > +#include "qemu/osdep.h" > + > +#include <linux/kvm.h> > + > +#include "qemu/error-report.h" > +#include "sysemu/kvm.h" > +#include "pv.h" > + > +const char *cmd_names[] = { > + "VM_ENABLE", > + "VM_DISABLE", > + "VM_SET_SEC_PARAMS", > + "VM_UNPACK", > + "VM_VERIFY", > + "VM_PREP_RESET", > + "VM_UNSHARE_ALL", > +}; > + > +static int s390_pv_cmd(uint32_t cmd, void *data) > +{ > + int rc; > + struct kvm_pv_cmd pv_cmd = { > + .cmd = cmd, > + .data = (uint64_t)data, > + }; > + > + rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd); > + if (rc) { > + error_report("KVM PV command %d (%s) failed: header rc %x rrc %x " > + "IOCTL rc: %d", cmd, cmd_names[cmd], pv_cmd.rc, > pv_cmd.rrc, > + rc); > + } > + return rc; > +} > + > +static void s390_pv_cmd_exit(uint32_t cmd, void *data) > +{ > + int rc; > + > + rc = s390_pv_cmd(cmd, data); > + if (rc) { > + exit(1); > + } > +} > + > +int s390_pv_vm_enable(void) > +{ > + return s390_pv_cmd(KVM_PV_ENABLE, NULL); > +} > + > +void s390_pv_vm_disable(void) > +{ > + s390_pv_cmd_exit(KVM_PV_DISABLE, NULL); > +} > + > +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) > +{ > + struct kvm_s390_pv_sec_parm args = { > + .origin = origin, > + .length = length, > + }; > + > + return s390_pv_cmd(KVM_PV_VM_SET_SEC_PARMS, &args); > +} > + > +/* > + * Called for each component in the SE type IPL parameter block 0. > + */ > +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) > +{ > + struct kvm_s390_pv_unp args = { > + .addr = addr, > + .size = size, > + .tweak = tweak, > + }; > + > + return s390_pv_cmd(KVM_PV_VM_UNPACK, &args); > +} > + > +void s390_pv_perf_clear_reset(void) > +{ > + s390_pv_cmd_exit(KVM_PV_VM_PREP_RESET, NULL); > +} > + > +int s390_pv_verify(void) > +{ > + return s390_pv_cmd(KVM_PV_VM_VERIFY, NULL); > +} > + > +void s390_pv_unshare(void) > +{ > + s390_pv_cmd_exit(KVM_PV_VM_UNSHARE_ALL, NULL); > +} > diff --git a/hw/s390x/pv.h b/hw/s390x/pv.h > new file mode 100644 > index 0000000000..e58fbca96a > --- /dev/null > +++ b/hw/s390x/pv.h > @@ -0,0 +1,34 @@ > +/* > + * Protected Virtualization header > + * > + * Copyright IBM Corp. 2020 > + * Author(s): > + * Janosch Frank <fran...@linux.ibm.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or (at > + * your option) any later version. See the COPYING file in the top-level > + * directory. > + */ > + > +#ifndef HW_S390_PV_H > +#define HW_S390_PV_H > + > +#ifdef CONFIG_KVM > +int s390_pv_vm_enable(void); > +void s390_pv_vm_disable(void); > +int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); > +int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); > +void s390_pv_perf_clear_reset(void); > +int s390_pv_verify(void); > +void s390_pv_unshare(void); > +#else > +static inline int s390_pv_vm_enable(void) { return 0; } > +static inline void s390_pv_vm_disable(void) {} > +static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { > return 0; } > +static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t > tweak) { return 0; } > +static inline void s390_pv_perf_clear_reset(void) {} > +static inline int s390_pv_verify(void) { return 0; } > +static inline void s390_pv_unshare(void) {} > +#endif > + > +#endif /* HW_S390_PV_H */ > diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c > index a89cf4c129..f718cfc591 100644 > --- a/hw/s390x/s390-virtio-ccw.c > +++ b/hw/s390x/s390-virtio-ccw.c > @@ -41,6 +41,8 @@ > #include "hw/qdev-properties.h" > #include "hw/s390x/tod.h" > #include "sysemu/sysemu.h" > +#include "hw/s390x/pv.h" > +#include <linux/kvm.h> > > S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) > { > @@ -238,9 +240,11 @@ static void s390_create_sclpconsole(const char *type, > Chardev *chardev) > static void ccw_init(MachineState *machine) > { > int ret; > + S390CcwMachineState *ms = S390_CCW_MACHINE(machine); > VirtualCssBus *css_bus; > DeviceState *dev; > > + ms->pv = false; > s390_sclp_init(); > /* init memory + setup max page size. Required for the CPU model */ > s390_memory_init(machine->ram); > @@ -316,10 +320,90 @@ static inline void s390_do_cpu_ipl(CPUState *cs, > run_on_cpu_data arg) > s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); > } > > +static void s390_machine_unprotect(S390CcwMachineState *ms) > +{ > + CPUState *t; > + > + if (ms->pv) { This is always the case. The check for ms->pv dropped (and if other patches require this, it is to be handled in the caller). > + s390_pv_vm_disable(); > + CPU_FOREACH(t) { > + S390_CPU(t)->env.pv = false; > + } > + ms->pv = false; > + } > +} > + > +static int s390_machine_protect(S390CcwMachineState *ms) > +{ > + CPUState *t; > + int rc; > + > + /* Create SE VM */ > + rc = s390_pv_vm_enable(); > + if (rc) { > + return rc; > + } > + > + CPU_FOREACH(t) { > + S390_CPU(t)->env.pv = true; > + } > + ms->pv = true; > + > + /* Set SE header and unpack */ > + rc = s390_ipl_prepare_pv_header(); > + if (rc) { > + goto out_err; > + } > + > + /* Decrypt image */ > + rc = s390_ipl_pv_unpack(); > + if (rc) { > + goto out_err; > + } > + > + /* Verify integrity */ > + rc = s390_pv_verify(); > + if (rc) { > + goto out_err; > + } > + return rc; > + > +out_err: > + s390_machine_unprotect(ms); > + return rc; > +} > + > +#define DIAG_308_RC_INVAL_FOR_PV 0x0a02 > +static void s390_machine_inject_pv_error(CPUState *cs) > +{ > + int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; > + CPUS390XState *env = &S390_CPU(cs)->env; > + > + /* Report that we are unable to enter protected mode */ > + env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; > +} > + > +static void s390_pv_prepare_reset(CPUS390XState *env) > +{ > + CPUState *cs; > + > + if (!env->pv) { > + return; > + } > + CPU_FOREACH(cs) { > + s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs)); > + } This is done by the CPU reset in all cases? Why is that needed? (->comment) > + s390_pv_unshare(); > + s390_pv_perf_clear_reset(); > +} > + > static void s390_machine_reset(MachineState *machine) > { > enum s390_reset reset_type; > CPUState *cs, *t; > + S390CPU *cpu; > + S390CcwMachineState *ms = S390_CCW_MACHINE(machine); Nit: Move this to the very top. > + CPUS390XState *env; > > /* get the reset parameters, reset them once done */ > s390_ipl_get_reset_request(&cs, &reset_type); > @@ -327,9 +411,16 @@ static void s390_machine_reset(MachineState *machine) > /* all CPUs are paused and synchronized at this point */ > s390_cmma_reset(); > > + cpu = S390_CPU(cs); > + env = &cpu->env; Can you just pass "cpu" to s390_pv_prepare_reset() and handle it in there? > + > switch (reset_type) { > case S390_RESET_EXTERNAL: > case S390_RESET_REIPL: > + if (ms->pv) { > + s390_machine_unprotect(ms); > + } > + > qemu_devices_reset(); > s390_crypto_reset(); > > @@ -337,22 +428,52 @@ static void s390_machine_reset(MachineState *machine) > run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL); > break; > case S390_RESET_MODIFIED_CLEAR: > + /* > + * Susbsystem reset needs to be done before we unshare memory > + * and loose access to VIRTIO structures in guest memory. > + */ > + subsystem_reset(); > + s390_crypto_reset(); > + s390_pv_prepare_reset(env); > CPU_FOREACH(t) { > run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); > } > - subsystem_reset(); > - s390_crypto_reset(); > run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); > break; > case S390_RESET_LOAD_NORMAL: > + /* > + * Susbsystem reset needs to be done before we unshare memory > + * and loose access to VIRTIO structures in guest memory. > + */ > + subsystem_reset(); > + s390_pv_prepare_reset(env); > CPU_FOREACH(t) { > if (t == cs) { > continue; > } > run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); > } > - subsystem_reset(); > run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); > + run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); > + break; > + case S390_RESET_PV: /* Subcode 10 */ > + subsystem_reset(); > + s390_crypto_reset(); > + > + CPU_FOREACH(t) { > + if (t == cs) { > + continue; > + } > + run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); > + } > + run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL); > + > + if (s390_machine_protect(ms)) { > + s390_machine_inject_pv_error(cs); > + s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); > + return; > + } > + > run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); [...] > > +#if !defined(CONFIG_USER_ONLY) > +static bool machine_is_pv(MachineState *ms) > +{ > + Object *obj; > + > + /* we have to bail out for the "none" machine */ > + obj = object_dynamic_cast(OBJECT(ms), TYPE_S390_CCW_MACHINE); > + if (!obj) { > + return false; > + } > + return S390_CCW_MACHINE(obj)->pv; Maybe you want to cache the machine, so you can avoid the lookup+conversion on every new CPU. > +} > +#endif [...] > static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, > uintptr_t ra, bool write) > { > + /* Handled by the Ultravisor */ > + if (env->pv) { > + return 0; > + } > if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { > s390_program_interrupt(env, PGM_SPECIFICATION, ra); > return -1; > @@ -93,6 +101,11 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, > uint64_t r3, uintptr_t ra) > return; > } > > + if (subcode > 7 && !s390_has_feat(S390_FEAT_UNPACK)) { >= DIAG308_PV_SET > + s390_program_interrupt(env, PGM_SPECIFICATION, ra); > + return; > + } > + -- Thanks, David / dhildenb