Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 3/4] target-ppc: Handle NMI guest exit

Aravinda Prasad Wed, 16 Dec 2015 21:03:03 -0800


On Thursday 17 December 2015 09:30 AM, David Gibson wrote:
> On Wed, Dec 16, 2015 at 11:38:37AM +0530, Aravinda Prasad wrote:
>> Memory error such as bit flips that cannot be corrected
>> by hardware are passed on to the kernel for handling.
>> If the memory address in error belongs to guest then
>> guest kernel is responsible for taking suitable action.
>> Patch [1] enhances KVM to exit guest with exit reason
>> set to KVM_EXIT_NMI in such cases.
>>
>> This patch handles KVM_EXIT_NMI exit. If the guest OS
>> has registered the machine check handling routine by
>> calling "ibm,nmi-register", then the handler builds
>> the error log and invokes the registered handler else
>> invokes the handler at 0x200.
>>
>> [1] http://marc.info/?l=kvm&m=145024538523497
>>
>> Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com>
>> ---
>>  cpus.c                   |    5 +++
>>  hw/ppc/spapr.c           |    6 +++
>>  include/qemu/main-loop.h |    8 ++++
>>  target-ppc/kvm.c         |   86 
>> ++++++++++++++++++++++++++++++++++++++++++++++
>>  target-ppc/kvm_ppc.h     |   81 +++++++++++++++++++++++++++++++++++++++++++
>>  5 files changed, 186 insertions(+)
>>
>> diff --git a/cpus.c b/cpus.c
>> index dddd056..7b7dd0f 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -1154,6 +1154,11 @@ void qemu_mutex_unlock_iothread(void)
>>      qemu_mutex_unlock(&qemu_global_mutex);
>>  }
>>  
>> +void qemu_cond_wait_iothread(QemuCond *cond)
>> +{
>> +    qemu_cond_wait(cond, &qemu_global_mutex);
>> +}
>> +
> 
> Even though it's trivial, this generic change should probably go in a
> separate patch from the papr specific pieces.
> 
> Speaking of which, I think it's sufficiently trivial you could just
> inline it in the header.


I had it in a separate patch before. I changed my mind later as it is
trivial.

I will include it in a separate patch.

> 
>>  static int all_vcpus_paused(void)
>>  {
>>      CPUState *cpu;
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 05926a3..501dd70 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -1556,6 +1556,12 @@ static void ppc_spapr_init(MachineState *machine)
>>          exit(1);
>>      }
>>      spapr->rtas_size = get_image_size(filename);
>> +
>> +    assert(spapr->rtas_size < RTAS_ERRLOG_OFFSET);
>> +
>> +    /* Resize blob to accommodate error log. */
>> +    spapr->rtas_size = RTAS_ERRLOG_OFFSET + sizeof(struct RtasMCELog);
>> +
>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>>      if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
>>          error_report("Could not load LPAR rtas '%s'", filename);
>> diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
>> index 9976909..c4d4446 100644
>> --- a/include/qemu/main-loop.h
>> +++ b/include/qemu/main-loop.h
>> @@ -263,6 +263,14 @@ void qemu_mutex_lock_iothread(void);
>>   */
>>  void qemu_mutex_unlock_iothread(void);
>>  
>> +/**
>> + * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
>> + *
>> + * This function atomically releases the main loop mutex and causes
>> + * the calling thread to block on the condition.
>> + */
>> +void qemu_cond_wait_iothread(QemuCond *cond);
>> +
>>  /* internal interfaces */
>>  
>>  void qemu_fd_register(int fd);
>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
>> index 110436d..2bbb46d 100644
>> --- a/target-ppc/kvm.c
>> +++ b/target-ppc/kvm.c
>> @@ -1665,6 +1665,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
>> *run)
>>          ret = 0;
>>          break;
>>  
>> +    case KVM_EXIT_NMI:
>> +        DPRINTF("handle NMI exception\n");
>> +        ret = kvm_handle_nmi(cpu);
>> +        break;
>> +
>>      default:
>>          fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
>>          ret = -1;
>> @@ -2484,3 +2489,84 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
>>  {
>>      return data & 0xffff;
>>  }
>> +
>> +int kvm_handle_nmi(PowerPCCPU *cpu)
>> +{
>> +    struct RtasMCELog mc_log;
>> +    CPUPPCState *env = &cpu->env;
> 
> You go from cpu to env here..
> 
>> +    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
>> +    target_ulong msr = 0;
>> +
>> +    cpu_synchronize_state(CPU(ppc_env_get_cpu(env)));
> 
> Then back again awkwardly, although you still have the cpu variable.

ah.. I overlooked it.

> 
>> +
>> +    /*
>> +     * Properly set bits in MSR before we invoke the handler.
>> +     * SRR0/1, DAR and DSISR are properly set by KVM
>> +     */
>> +    if (!(*pcc->interrupts_big_endian)(cpu)) {
>> +        msr |= (1ULL << MSR_LE);
>> +    }
>> +
>> +    if (env->msr && (1ULL << MSR_SF)) {
>> +        msr |= (1ULL << MSR_SF);
>> +    }
>> +
>> +    msr |= (1ULL << MSR_ME);
> 
> Based on earlier discussions, sounds like assert(msr & (1ULL <<
> MSR_ME)) would actually be correct here.

Based on
http://lists.nongnu.org/archive/html/qemu-ppc/2015-11/msg00306.html, I
always set MSR_ME and don't assert if not set. Or am I missing anything
here?

Regards,
Aravinda

> 
>> +    env->msr = msr;
>> +
>> +    if (!spapr->guest_machine_check_addr) {
>> +        /*
>> +         * If OS has not registered with "ibm,nmi-register"
>> +         * jump to 0x200
>> +         */
>> +        env->nip = 0x200;
>> +        return 0;
>> +    }
>> +
>> +    while (spapr->mc_in_progress) {
>> +        /*
>> +         * Check whether the same CPU got machine check error
>> +         * while still handling the mc error (i.e., before
>> +         * that CPU called "ibm,nmi-interlock"
>> +         */
>> +        if (spapr->mc_cpu == cpu->cpu_dt_id) {
>> +            qemu_system_guest_panicked();
>> +        }
>> +        qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
>> +    }
>> +    spapr->mc_in_progress = true;
>> +    spapr->mc_cpu = cpu->cpu_dt_id;
>> +
>> +    /* Set error log fields */
>> +    mc_log.r3 = env->gpr[3];
>> +    mc_log.err_log.byte0 = 0;
>> +    mc_log.err_log.byte1 =
>> +        (RTAS_SEVERITY_ERROR_SYNC << RTAS_ELOG_SEVERITY_SHIFT);
>> +    mc_log.err_log.byte1 |=
>> +        (RTAS_DISP_NOT_RECOVERED << RTAS_ELOG_DISPOSITION_SHIFT);
>> +    mc_log.err_log.byte2 =
>> +        (RTAS_INITIATOR_MEMORY << RTAS_ELOG_INITIATOR_SHIFT);
>> +    mc_log.err_log.byte2 |= RTAS_TARGET_MEMORY;
>> +
>> +    if (env->spr[SPR_DSISR] & P7_DSISR_MC_UE) {
>> +        mc_log.err_log.byte3 = RTAS_TYPE_ECC_UNCORR;
>> +    } else {
>> +        mc_log.err_log.byte3 = 0;
>> +    }
>> +
>> +    /* Handle all Host/Guest LE/BE combinations */
>> +    if (env->msr & (1ULL << MSR_LE)) {
>> +        mc_log.r3 = cpu_to_le64(mc_log.r3);
>> +    } else {
>> +        mc_log.r3 = cpu_to_be64(mc_log.r3);
>> +    }
>> +
>> +    cpu_physical_memory_write(spapr->rtas_addr + RTAS_ERRLOG_OFFSET,
>> +                              &mc_log, sizeof(mc_log));
>> +
>> +    env->nip = spapr->guest_machine_check_addr;
>> +    env->gpr[3] = spapr->rtas_addr + RTAS_ERRLOG_OFFSET;
>> +
>> +    return 0;
>> +}
>> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
>> index 5c1d334..ea3345b 100644
>> --- a/target-ppc/kvm_ppc.h
>> +++ b/target-ppc/kvm_ppc.h
>> @@ -53,6 +53,87 @@ void kvmppc_hash64_free_pteg(uint64_t token);
>>  void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
>>                               target_ulong pte0, target_ulong pte1);
>>  bool kvmppc_has_cap_fixup_hcalls(void);
>> +int kvm_handle_nmi(PowerPCCPU *cpu);
>> +
>> +/* Offset from rtas-base where error log is placed */
>> +#define RTAS_ERRLOG_OFFSET       0x200
>> +
>> +#define RTAS_ELOG_SEVERITY_SHIFT         0x5
>> +#define RTAS_ELOG_DISPOSITION_SHIFT      0x3
>> +#define RTAS_ELOG_INITIATOR_SHIFT        0x4
>> +
>> +/*
>> + * Only required RTAS event severity, disposition, initiator
>> + * target and type are copied from arch/powerpc/include/asm/rtas.h
>> + */
>> +
>> +/* RTAS event severity */
>> +#define RTAS_SEVERITY_ERROR_SYNC    0x3
>> +
>> +/* RTAS event disposition */
>> +#define RTAS_DISP_NOT_RECOVERED     0x2
>> +
>> +/* RTAS event initiator */
>> +#define RTAS_INITIATOR_MEMORY       0x4
>> +
>> +/* RTAS event target */
>> +#define RTAS_TARGET_MEMORY          0x4
>> +
>> +/* RTAS event type */
>> +#define RTAS_TYPE_ECC_UNCORR        0x09
>> +
>> +/*
>> + * Currently KVM only passes on the uncorrected machine
>> + * check memory error to guest. Other machine check errors
>> + * such as SLB multi-hit and TLB multi-hit are recovered
>> + * in KVM and are not passed on to guest.
>> + *
>> + * DSISR Bit for uncorrected machine check error. Based
>> + * on arch/powerpc/include/asm/mce.h
>> + */
>> +#define PPC_BIT(bit)                (0x8000000000000000ULL >> bit)
>> +#define P7_DSISR_MC_UE              (PPC_BIT(48))  /* P8 too */
>> +
>> +/* Adopted from kernel source arch/powerpc/include/asm/rtas.h */
>> +struct rtas_error_log {
>> +    /* Byte 0 */
>> +    uint8_t     byte0;          /* Architectural version */
>> +
>> +    /* Byte 1 */
>> +    uint8_t     byte1;
>> +    /* XXXXXXXX
>> +     * XXX      3: Severity level of error
>> +     *    XX    2: Degree of recovery
>> +     *      X   1: Extended log present?
>> +     *       XX 2: Reserved
>> +     */
>> +
>> +    /* Byte 2 */
>> +    uint8_t     byte2;
>> +    /* XXXXXXXX
>> +     * XXXX     4: Initiator of event
>> +     *     XXXX 4: Target of failed operation
>> +     */
>> +    uint8_t     byte3;          /* General event or error*/
>> +    __be32      extended_log_length;    /* length in bytes */
>> +    unsigned char   buffer[1];      /* Start of extended log */
>> +                                /* Variable length.      */
>> +};
>> +
>> +/*
>> + * Data format in RTAS-Blob
>> + *
>> + * This structure contains error information related to Machine
>> + * Check exception. This is filled up and copied to rtas-blob
>> + * upon machine check exception. The address of rtas-blob is
>> + * passed on to OS registered machine check notification
>> + * routines upon machine check exception
>> + */
>> +struct RtasMCELog {
>> +    target_ulong r3;
>> +    struct rtas_error_log err_log;
>> +};
>> +
>>  
>>  #else
>>  
>>
> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 3/4] target-ppc: Handle NMI guest exit

Reply via email to