On Monday 03 June 2019 09:10 PM, Greg Kurz wrote:
> On Wed, 29 May 2019 11:10:57 +0530
> Aravinda Prasad <aravi...@linux.vnet.ibm.com> wrote:
>
>> This patch includes migration support for machine check
>> handling. Especially this patch blocks VM migration
>> requests until the machine check error handling is
>> complete as (i) these errors are specific to the source
>> hardware and is irrelevant on the target hardware,
>> (ii) these errors cause data corruption and should
>> be handled before migration.
>>
>> Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com>
>> ---
>
> LGTM, just one issue: machine reset should del and free the blocker as well,
> otherwise QEMU would crash if spapr_mce_req_event() is called again.
Sure.
>
>> hw/ppc/spapr.c | 20 ++++++++++++++++++++
>> hw/ppc/spapr_events.c | 17 +++++++++++++++++
>> hw/ppc/spapr_rtas.c | 4 ++++
>> include/hw/ppc/spapr.h | 2 ++
>> 4 files changed, 43 insertions(+)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index e8a77636..31c4850 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2104,6 +2104,25 @@ static const VMStateDescription vmstate_spapr_dtb = {
>> },
>> };
>>
>> +static bool spapr_fwnmi_needed(void *opaque)
>> +{
>> + SpaprMachineState *spapr = (SpaprMachineState *)opaque;
>> +
>> + return (spapr->guest_machine_check_addr == -1) ? 0 : 1;
>> +}
>> +
>> +static const VMStateDescription vmstate_spapr_machine_check = {
>> + .name = "spapr_machine_check",
>> + .version_id = 1,
>> + .minimum_version_id = 1,
>> + .needed = spapr_fwnmi_needed,
>> + .fields = (VMStateField[]) {
>> + VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
>> + VMSTATE_INT32(mc_status, SpaprMachineState),
>> + VMSTATE_END_OF_LIST()
>> + },
>> +};
>> +
>> static const VMStateDescription vmstate_spapr = {
>> .name = "spapr",
>> .version_id = 3,
>> @@ -2137,6 +2156,7 @@ static const VMStateDescription vmstate_spapr = {
>> &vmstate_spapr_dtb,
>> &vmstate_spapr_cap_large_decr,
>> &vmstate_spapr_cap_ccf_assist,
>> + &vmstate_spapr_machine_check,
>> NULL
>> }
>> };
>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>> index 573c0b7..35e21e4 100644
>> --- a/hw/ppc/spapr_events.c
>> +++ b/hw/ppc/spapr_events.c
>> @@ -41,6 +41,7 @@
>> #include "qemu/bcd.h"
>> #include "hw/ppc/spapr_ovec.h"
>> #include <libfdt.h>
>> +#include "migration/blocker.h"
>>
>> #define RTAS_LOG_VERSION_MASK 0xff000000
>> #define RTAS_LOG_VERSION_6 0x06000000
>> @@ -855,6 +856,22 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu,
>> bool recovered)
>> void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>> {
>> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> + int ret;
>> + Error *local_err = NULL;
>> +
>> + error_setg(&spapr->fwnmi_migration_blocker,
>> + "Live migration not supported during machine check handling");
>> + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
>> + if (ret < 0) {
>> + /*
>> + * We don't want to abort and let the migration to continue. In a
>> + * rare case, the machine check handler will run on the target
>> + * hardware. Though this is not preferable, it is better than
>> aborting
>> + * the migration or killing the VM.
>> + */
>> + error_free(spapr->fwnmi_migration_blocker);
>> + warn_report_err(local_err);
>> + }
>>
>> while (spapr->mc_status != -1) {
>> /*
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index 91a7ab9..c849223 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -50,6 +50,7 @@
>> #include "target/ppc/mmu-hash64.h"
>> #include "target/ppc/mmu-book3s-v3.h"
>> #include "kvm_ppc.h"
>> +#include "migration/blocker.h"
>>
>> static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState
>> *spapr,
>> uint32_t token, uint32_t nargs,
>> @@ -404,6 +405,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>> spapr->mc_status = -1;
>> qemu_cond_signal(&spapr->mc_delivery_cond);
>> rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> + migrate_del_blocker(spapr->fwnmi_migration_blocker);
>> + error_free(spapr->fwnmi_migration_blocker);
>> + spapr->fwnmi_migration_blocker = NULL;
>> }
>> }
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index bd75d4b..6c0cfd8 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -214,6 +214,8 @@ struct SpaprMachineState {
>> SpaprCapabilities def, eff, mig;
>>
>> unsigned gpu_numa_id;
>> +
>> + Error *fwnmi_migration_blocker;
>> };
>>
>> #define H_SUCCESS 0
>>
>>
>
--
Regards,
Aravinda