On Friday 30 August 2019 10:38 PM, Greg Kurz wrote:
> On Fri, 30 Aug 2019 14:44:07 +0530
> Aravinda Prasad <aravi...@linux.vnet.ibm.com> wrote:
>
>> This patch adds support in QEMU to handle "ibm,nmi-register"
>> and "ibm,nmi-interlock" RTAS calls and sets the default
>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
>> type 4.2.
>>
>> The machine check notification address is saved when the
>> OS issues "ibm,nmi-register" RTAS call.
>>
>> This patch also handles the case when multiple processors
>> experience machine check at or about the same time by
>> handling "ibm,nmi-interlock" call. In such cases, as per
>> PAPR, subsequent processors serialize waiting for the first
>> processor to issue the "ibm,nmi-interlock" call. The second
>> processor that also received a machine check error waits
>> till the first processor is done reading the error log.
>> The first processor issues "ibm,nmi-interlock" call
>> when the error log is consumed.
>>
>> Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com>
>> ---
>> hw/ppc/spapr.c | 12 +++++++++++-
>> hw/ppc/spapr_rtas.c | 50
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>> include/hw/ppc/spapr.h | 5 ++++-
>> 3 files changed, 65 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 9f2e5d2..1c0908e 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2941,6 +2941,15 @@ static void spapr_machine_init(MachineState *machine)
>>
>> /* Resize rtas blob to accommodate error log */
>> spapr->rtas_size = RTAS_ERROR_LOG_MAX;
>> +
>> + /* Set fwnmi capability in KVM */
>> + if (kvmppc_set_fwnmi() < 0) {
>> + error_report("Could not enable FWNMI capability");
>> + exit(1);
>> + }
>
> Hmm... shouldn't this be performed only when the guest
> calls "ibm,nmi-register" ?
The above code is called only when SPAPR_CAP_FWNMI_MCE is set. Please
see my reply to your comments on patch 2 where I have explained why we
don't call it during "ibm,nmi-register".
>
>> +
>> + /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>> + spapr_fwnmi_register();
>> }
>>
>> spapr->rtas_blob = g_malloc(spapr->rtas_size);
>> @@ -4508,7 +4517,7 @@ static void spapr_machine_class_init(ObjectClass *oc,
>> void *data)
>> smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>> smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>> smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>> - smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>> + smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
>> spapr_caps_add_properties(smc, &error_abort);
>> smc->irq = &spapr_irq_dual;
>> smc->dr_phb_enabled = true;
>> @@ -4582,6 +4591,7 @@ static void
>> spapr_machine_4_1_class_options(MachineClass *mc)
>> smc->linux_pci_probe = false;
>> compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
>> compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
>> + smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>> }
>>
>> DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index d8fb8a8..d892583 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -400,6 +400,48 @@ static void rtas_get_power_level(PowerPCCPU *cpu,
>> SpaprMachineState *spapr,
>> rtas_st(rets, 1, 100);
>> }
>>
>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>> + SpaprMachineState *spapr,
>> + uint32_t token, uint32_t nargs,
>> + target_ulong args,
>> + uint32_t nret, target_ulong rets)
>> +{
>> + hwaddr rtas_addr = spapr_get_rtas_addr();
>> +
>> + if (!rtas_addr) {
>> + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> + return;
>> + }
>> +
>> + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
>> + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> + return;
>> + }
>> +
>> + spapr->guest_machine_check_addr = rtas_ld(args, 1);
>
> Doesn't this need some sanity checks ? At least error out on -1
> which has a special meaning in the code and cannot really be used
> as a valid instruction address.
>
> Also PAPR+ says:
>
> R1–7.3.14–6. For the FWNMI option: The Real/Logical address of the
> registered OS Machine Check and System Reset routines must be in the
> first 32 MB of the OS’s memory address space.
Yes, we can add a sanity check, but I feel it is not required, as a
guest can still provide a wrong address in the first 32MB address space.
>
>
> And only at this point you may enable the cap in KVM since the
> guest has decided to use FWNMI.
>
> My concern is: what happens when the guest reboots ? We set
> guest_machine_check_addr back to -1 during machine reset but
> KVM still assumes the guest has enabled FWNMI... I see that
> enabling FWNMI boils down to setting a kvm->arch.fwnmi_enabled
> to true... what about providing a way to disable FWNMI ?
When the guest reboots, as per my understanding, FWNMI is still set in
KVM. So a machine check will still cause a guest exit. Upon reboot if
the guest does not call "ibm,nmi-register" then we invoke 0x200
interrupt vector of the guest upon a machine check (because
guest_machine_check_addr is set to -1). If the guest calls
"ibm,nmi-register", then the registered handler is invoked.
And, unfortunately disabling a capability is not supported in KVM.
Regards,
Aravinda
>
>> + rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +}
>> +
>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>> + SpaprMachineState *spapr,
>> + uint32_t token, uint32_t nargs,
>> + target_ulong args,
>> + uint32_t nret, target_ulong rets)
>> +{
>> + if (spapr->guest_machine_check_addr == -1) {
>> + /* NMI register not called */
>> + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>> + } else {
>> + /*
>> + * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
>> + * hence unset mc_status.
>> + */
>> + spapr->mc_status = -1;
>> + qemu_cond_signal(&spapr->mc_delivery_cond);
>> + rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> + }
>> +}
>> +
>> static struct rtas_call {
>> const char *name;
>> spapr_rtas_fn fn;
>> @@ -544,6 +586,14 @@ hwaddr spapr_get_rtas_addr(void)
>> return (hwaddr)fdt32_to_cpu(*rtas_data);
>> }
>>
>> +void spapr_fwnmi_register(void)
>> +{
>> + spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
>> + rtas_ibm_nmi_register);
>> + spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
>> + rtas_ibm_nmi_interlock);
>> +}
>> +
>> static void core_rtas_register_types(void)
>> {
>> spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index ffefde7..dada821 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -655,8 +655,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu,
>> target_ulong opcode,
>> #define RTAS_IBM_REMOVE_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x28)
>> #define RTAS_IBM_RESET_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x29)
>> #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
>> +#define RTAS_IBM_NMI_REGISTER (RTAS_TOKEN_BASE + 0x2B)
>> +#define RTAS_IBM_NMI_INTERLOCK (RTAS_TOKEN_BASE + 0x2C)
>>
>> -#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2B)
>> +#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2D)
>>
>> /* RTAS ibm,get-system-parameter token values */
>> #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20
>> @@ -908,4 +910,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr,
>> hwaddr pagesize,
>>
>> void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>> hwaddr spapr_get_rtas_addr(void);
>> +void spapr_fwnmi_register(void);
>> #endif /* HW_SPAPR_H */
>>
>
--
Regards,
Aravinda