On Wednesday 20 February 2019 06:35 AM, Paul Mackerras wrote:
> This makes the handling of machine check interrupts that occur inside
> a guest simpler and more robust, with less done in assembler code and
> in real mode.
> 
> Now, when a machine check occurs inside a guest, we always get the
> machine check event struct and put a copy in the vcpu struct for the
> vcpu where the machine check occurred.  We no longer call
> machine_check_queue_event() from kvmppc_realmode_mc_power7(), because
> on POWER8, when a vcpu is running on an offline secondary thread and
> we call machine_check_queue_event(), that calls irq_work_queue(),
> which doesn't work because the CPU is offline, but instead triggers
> the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which
> fires again and again because nothing clears the condition).
> 
> All that machine_check_queue_event() actually does is to cause the
> event to be printed to the console.  For a machine check occurring in
> the guest, we now print the event in kvmppc_handle_exit_hv()
> instead.
> 
> The assembly code at label machine_check_realmode now just calls C
> code and then continues exiting the guest.  We no longer either
> synthesize a machine check for the guest in assembly code or return
> to the guest without a machine check.
> 
> The code in kvmppc_handle_exit_hv() is extended to handle the case
> where the guest is not FWNMI-capable.  In that case we now always
> synthesize a machine check interrupt for the guest.  Previously, if
> the host thinks it has recovered the machine check fully, it would
> return to the guest without any notification that the machine check
> had occurred.  If the machine check was caused by some action of the
> guest (such as creating duplicate SLB entries), it is much better to
> tell the guest that it has caused a problem.  Therefore we now always
> generate a machine check interrupt for guests that are not
> FWNMI-capable.
> 
> Signed-off-by: Paul Mackerras <pau...@ozlabs.org>
> ---
>  arch/powerpc/include/asm/kvm_ppc.h      |  3 +-
>  arch/powerpc/kvm/book3s.c               |  7 +++++
>  arch/powerpc/kvm/book3s_hv.c            | 18 +++++++++--
>  arch/powerpc/kvm/book3s_hv_ras.c        | 56 
> +++++++++------------------------
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S | 40 ++---------------------
>  5 files changed, 42 insertions(+), 82 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
> b/arch/powerpc/include/asm/kvm_ppc.h
> index b3bf4f6..d283d31 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -143,6 +143,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
> 
>  extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
>  extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
> +extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong 
> flags);
>  extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
>  extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
>  extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
> @@ -646,7 +647,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int 
> target,
>                              unsigned int yield_count);
>  long kvmppc_h_random(struct kvm_vcpu *vcpu);
>  void kvmhv_commence_exit(int trap);
> -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
> +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
>  void kvmppc_subcore_enter_guest(void);
>  void kvmppc_subcore_exit_guest(void);
>  long kvmppc_realmode_hmi_handler(void);
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 22a46c6..10c5579 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -195,6 +195,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, 
> unsigned int vec)
>  }
>  EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
> 
> +void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
> +{
> +     /* might as well deliver this straight away */
> +     kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags);
> +}
> +EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
> +
>  void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
>  {
>       /* might as well deliver this straight away */
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 1860c0b..d8bf05a 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1215,6 +1215,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, 
> struct kvm_vcpu *vcpu,
>               r = RESUME_GUEST;
>               break;
>       case BOOK3S_INTERRUPT_MACHINE_CHECK:
> +             /* Print the MCE event to host console. */
> +             machine_check_print_event_info(&vcpu->arch.mce_evt, false);
> +
> +             /*
> +              * If the guest can do FWNMI, exit to userspace so it can
> +              * deliver a FWNMI to the guest.
> +              * Otherwise we synthesize a machine check for the guest
> +              * so that it knows that the machine check occurred.
> +              */
> +             if (!vcpu->kvm->arch.fwnmi_enabled) {
> +                     ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
> +                     kvmppc_core_queue_machine_check(vcpu, flags);
> +                     r = RESUME_GUEST;
> +                     break;
> +             }
> +
>               /* Exit to guest with KVM_EXIT_NMI as exit reason */
>               run->exit_reason = KVM_EXIT_NMI;
>               run->hw.hardware_exit_reason = vcpu->arch.trap;
> @@ -1227,8 +1243,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, 
> struct kvm_vcpu *vcpu,
>                       run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
> 
>               r = RESUME_HOST;
> -             /* Print the MCE event to host console. */
> -             machine_check_print_event_info(&vcpu->arch.mce_evt, false);
>               break;
>       case BOOK3S_INTERRUPT_PROGRAM:
>       {
> diff --git a/arch/powerpc/kvm/book3s_hv_ras.c 
> b/arch/powerpc/kvm/book3s_hv_ras.c
> index 0787f12..9aa10b1 100644
> --- a/arch/powerpc/kvm/book3s_hv_ras.c
> +++ b/arch/powerpc/kvm/book3s_hv_ras.c
> @@ -69,7 +69,7 @@ static void reload_slb(struct kvm_vcpu *vcpu)
>   *
>   * Returns: 0 => exit guest, 1 => deliver machine check to guest

You have to remove the above comment as the function now returns nothing.

Reviewed-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com>

Regards,
Aravinda

>   */
> -static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
> +static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
>  {
>       unsigned long srr1 = vcpu->arch.shregs.msr;
>       struct machine_check_event mce_evt;
> @@ -111,52 +111,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu 
> *vcpu)
>       }
> 
>       /*
> -      * See if we have already handled the condition in the linux host.
> -      * We assume that if the condition is recovered then linux host
> -      * will have generated an error log event that we will pick
> -      * up and log later.
> -      * Don't release mce event now. We will queue up the event so that
> -      * we can log the MCE event info on host console.
> +      * Now get the event and stash it in the vcpu struct so it can
> +      * be handled by the primary thread in virtual mode.  We can't
> +      * call machine_check_queue_event() here if we are running on
> +      * an offline secondary thread.
>        */
> -     if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
> -             goto out;
> -
> -     if (mce_evt.version == MCE_V1 &&
> -         (mce_evt.severity == MCE_SEV_NO_ERROR ||
> -          mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
> -             handled = 1;
> -
> -out:
> -     /*
> -      * For guest that supports FWNMI capability, hook the MCE event into
> -      * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
> -      * exit reason. On our way to exit we will pull this event from vcpu
> -      * structure and print it from thread 0 of the core/subcore.
> -      *
> -      * For guest that does not support FWNMI capability (old QEMU):
> -      * We are now going enter guest either through machine check
> -      * interrupt (for unhandled errors) or will continue from
> -      * current HSRR0 (for handled errors) in guest. Hence
> -      * queue up the event so that we can log it from host console later.
> -      */
> -     if (vcpu->kvm->arch.fwnmi_enabled) {
> -             /*
> -              * Hook up the mce event on to vcpu structure.
> -              * First clear the old event.
> -              */
> -             memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
> -             if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
> -                     vcpu->arch.mce_evt = mce_evt;
> -             }
> -     } else
> -             machine_check_queue_event();
> +     if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
> +             if (handled && mce_evt.version == MCE_V1)
> +                     mce_evt.disposition = MCE_DISPOSITION_RECOVERED;
> +     } else {
> +             memset(&mce_evt, 0, sizeof(mce_evt));
> +     }
> 
> -     return handled;
> +     vcpu->arch.mce_evt = mce_evt;
>  }
> 
> -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
> +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
>  {
> -     return kvmppc_realmode_mc_power7(vcpu);
> +     kvmppc_realmode_mc_power7(vcpu);
>  }
> 
>  /* Check if dynamic split is in force and return subcore size accordingly. */
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
> b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 9b8d50a..f24f6a2 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -2826,49 +2826,15 @@ kvm_cede_exit:
>  #endif /* CONFIG_KVM_XICS */
>  3:   b       guest_exit_cont
> 
> -     /* Try to handle a machine check in real mode */
> +     /* Try to do machine check recovery in real mode */
>  machine_check_realmode:
>       mr      r3, r9          /* get vcpu pointer */
>       bl      kvmppc_realmode_machine_check
>       nop
> +     /* all machine checks go to virtual mode for further handling */
>       ld      r9, HSTATE_KVM_VCPU(r13)
>       li      r12, BOOK3S_INTERRUPT_MACHINE_CHECK
> -     /*
> -      * For the guest that is FWNMI capable, deliver all the MCE errors
> -      * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
> -      * reason. This new approach injects machine check errors in guest
> -      * address space to guest with additional information in the form
> -      * of RTAS event, thus enabling guest kernel to suitably handle
> -      * such errors.
> -      *
> -      * For the guest that is not FWNMI capable (old QEMU) fallback
> -      * to old behaviour for backward compatibility:
> -      * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
> -      * through machine check interrupt (set HSRR0 to 0x200).
> -      * For handled errors (no-fatal), just go back to guest execution
> -      * with current HSRR0.
> -      * if we receive machine check with MSR(RI=0) then deliver it to
> -      * guest as machine check causing guest to crash.
> -      */
> -     ld      r11, VCPU_MSR(r9)
> -     rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
> -     bne     guest_exit_cont         /* if so, exit to host */
> -     /* Check if guest is capable of handling NMI exit */
> -     ld      r10, VCPU_KVM(r9)
> -     lbz     r10, KVM_FWNMI(r10)
> -     cmpdi   r10, 1                  /* FWNMI capable? */
> -     beq     guest_exit_cont         /* if so, exit with KVM_EXIT_NMI. */
> -
> -     /* if not, fall through for backward compatibility. */
> -     andi.   r10, r11, MSR_RI        /* check for unrecoverable exception */
> -     beq     1f                      /* Deliver a machine check to guest */
> -     ld      r10, VCPU_PC(r9)
> -     cmpdi   r3, 0           /* Did we handle MCE ? */
> -     bne     2f      /* Continue guest execution. */
> -     /* If not, deliver a machine check.  SRR0/1 are already set */
> -1:   li      r10, BOOK3S_INTERRUPT_MACHINE_CHECK
> -     bl      kvmppc_msr_interrupt
> -2:   b       fast_interrupt_c_return
> +     b       guest_exit_cont
> 
>  /*
>   * Call C code to handle a HMI in real mode.
> 

-- 
Regards,
Aravinda

Reply via email to