Hello, On Wed, 04 Jul 2018 23:28:21 +0530 "Mahesh J Salgaonkar" <mah...@linux.vnet.ibm.com> wrote:
> From: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com> > > On pseries, as of today system crashes if we get a machine check > exceptions due to SLB errors. These are soft errors and can be fixed > by flushing the SLBs so the kernel can continue to function instead of > system crash. We do this in real mode before turning on MMU. Otherwise > we would run into nested machine checks. This patch now fetches the > rtas error log in real mode and flushes the SLBs on SLB errors. > > Signed-off-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 > arch/powerpc/include/asm/machdep.h | 1 > arch/powerpc/kernel/exceptions-64s.S | 42 > +++++++++++++++++++++ arch/powerpc/kernel/mce.c > | 16 +++++++- arch/powerpc/mm/slb.c | 6 > +++ arch/powerpc/platforms/pseries/pseries.h | 1 > arch/powerpc/platforms/pseries/ras.c | 51 > +++++++++++++++++++++++++ > arch/powerpc/platforms/pseries/setup.c | 1 8 files changed, > 116 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h > b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index > 50ed64fba4ae..cc00a7088cf3 100644 --- > a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ > b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -487,6 +487,7 @@ > extern void hpte_init_native(void); > extern void slb_initialize(void); > extern void slb_flush_and_rebolt(void); > +extern void slb_flush_and_rebolt_realmode(void); > > extern void slb_vmalloc_update(void); > extern void slb_set_size(u16 size); > diff --git a/arch/powerpc/include/asm/machdep.h > b/arch/powerpc/include/asm/machdep.h index ffe7c71e1132..fe447e0d4140 > 100644 --- a/arch/powerpc/include/asm/machdep.h > +++ b/arch/powerpc/include/asm/machdep.h > @@ -108,6 +108,7 @@ struct machdep_calls { > > /* Early exception handlers called in realmode */ > int (*hmi_exception_early)(struct pt_regs > *regs); > + int (*machine_check_early)(struct pt_regs > *regs); > /* Called during machine check exception to retrive fixup > address. */ bool (*mce_check_early_recovery)(struct > pt_regs *regs); diff --git a/arch/powerpc/kernel/exceptions-64s.S > b/arch/powerpc/kernel/exceptions-64s.S index > f283958129f2..0038596b7906 100644 --- > a/arch/powerpc/kernel/exceptions-64s.S +++ > b/arch/powerpc/kernel/exceptions-64s.S @@ -332,6 +332,9 @@ > TRAMP_REAL_BEGIN(machine_check_pSeries) machine_check_fwnmi: > SET_SCRATCH0(r13) /* save r13 */ > EXCEPTION_PROLOG_0(PACA_EXMC) > +BEGIN_FTR_SECTION > + b machine_check_pSeries_early > +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) > machine_check_pSeries_0: > EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) > /* > @@ -343,6 +346,45 @@ machine_check_pSeries_0: > > TRAMP_KVM_SKIP(PACA_EXMC, 0x200) > > +TRAMP_REAL_BEGIN(machine_check_pSeries_early) > +BEGIN_FTR_SECTION > + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) > + mr r10,r1 /* Save r1 */ > + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency > stack */ > + subi r1,r1,INT_FRAME_SIZE /* alloc stack > frame */ > + mfspr r11,SPRN_SRR0 /* Save SRR0 */ > + mfspr r12,SPRN_SRR1 /* Save SRR1 */ > + EXCEPTION_PROLOG_COMMON_1() > + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) > + EXCEPTION_PROLOG_COMMON_3(0x200) > + addi r3,r1,STACK_FRAME_OVERHEAD > + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI > */ + > + /* Move original SRR0 and SRR1 into the respective regs */ > + ld r9,_MSR(r1) > + mtspr SPRN_SRR1,r9 > + ld r3,_NIP(r1) > + mtspr SPRN_SRR0,r3 > + ld r9,_CTR(r1) > + mtctr r9 > + ld r9,_XER(r1) > + mtxer r9 > + ld r9,_LINK(r1) > + mtlr r9 > + REST_GPR(0, r1) > + REST_8GPRS(2, r1) > + REST_GPR(10, r1) > + ld r11,_CCR(r1) > + mtcr r11 > + REST_GPR(11, r1) > + REST_2GPRS(12, r1) > + /* restore original r1. */ > + ld r1,GPR1(r1) > + SET_SCRATCH0(r13) /* save r13 */ > + EXCEPTION_PROLOG_0(PACA_EXMC) > + b machine_check_pSeries_0 > +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) > + > EXC_COMMON_BEGIN(machine_check_common) > /* > * Machine check is different because we use a different > diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c > index efdd16a79075..221271c96a57 100644 > --- a/arch/powerpc/kernel/mce.c > +++ b/arch/powerpc/kernel/mce.c > @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs) > { > long handled = 0; > > - __this_cpu_inc(irq_stat.mce_exceptions); > + /* > + * For pSeries we count mce when we go into virtual mode > machine > + * check handler. Hence skip it. Also, We can't access per > cpu > + * variables in real mode for LPAR. > + */ > + if (early_cpu_has_feature(CPU_FTR_HVMODE)) > + __this_cpu_inc(irq_stat.mce_exceptions); Maybe we could simplify the condition if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE)) in machine_check instead? Thanks Michal