On Wed, Nov 11, 2015 at 11:27:16AM +1100, Benjamin Herrenschmidt wrote: > On ppc64 especially, we flush the tlb on any slbie or tlbie instruction. > > However, those instructions often come in bursts of 3 or more (context > switch will favor a series of slbie's for example to an slbia if the > SLB has less than a certain number of entries in it, and tlbie's can > happen in a series, with PAPR, H_BULK_REMOVE can remove up to 4 entries > at a time. > > Doing a tlb_flush() each time is a waste of time. We end up doing a memset > of the whole TLB, reloading it for the next instruction, memset'ing again, > etc... > > Those instructions don't have to take effect immediately. For slbie, they > can wait for the next context synchronizing event. For tlbie, the next > tlbsync. > > This implements batching by keeping a flag that indicates that we have a > TLB in need of flushing. We check it on interrupts, rfi's, isync's and > tlbsync and flush the TLB if needed. > > This reduces the number of tlb_flush() on a boot to a ubuntu installer > first dialog screen from roughly 360K down to 36K. > > Signed-off-by: Benjamin Herrenschmidt <b...@kernel.crashing.org> > --- > hw/ppc/spapr_hcall.c | 12 +++++++++--- > target-ppc/cpu.h | 2 ++ > target-ppc/excp_helper.c | 9 +++++++++ > target-ppc/helper.h | 1 + > target-ppc/helper_regs.h | 13 +++++++++++++ > target-ppc/mmu-hash64.c | 12 +++--------- > target-ppc/mmu_helper.c | 9 ++++++++- > target-ppc/translate.c | 39 ++++++++++++++++++++++++++++++++++++--- > 8 files changed, 81 insertions(+), 16 deletions(-) > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > index cebceea..7e2cb4b 100644 > --- a/hw/ppc/spapr_hcall.c > +++ b/hw/ppc/spapr_hcall.c > @@ -220,6 +220,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, > sPAPRMachineState *spapr, > > switch (ret) { > case REMOVE_SUCCESS: > + check_tlb_flush(env); > return H_SUCCESS; > > case REMOVE_NOT_FOUND: > @@ -257,6 +258,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, > sPAPRMachineState *spapr, > target_ulong opcode, target_ulong *args) > { > CPUPPCState *env = &cpu->env; > + target_ulong rc = H_SUCCESS; > int i; > > for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { > @@ -290,14 +292,18 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, > sPAPRMachineState *spapr, > break; > > case REMOVE_PARM: > - return H_PARAMETER; > + rc = H_PARAMETER; > + goto exit; > > case REMOVE_HW: > - return H_HARDWARE; > + rc = H_HARDWARE; > + goto exit; > } > } > + exit: > + check_tlb_flush(env); > > - return H_SUCCESS; > + return rc; > } > > static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr, > diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h > index aaa7117..e6c43f9 100644 > --- a/target-ppc/cpu.h > +++ b/target-ppc/cpu.h > @@ -1013,6 +1013,8 @@ struct CPUPPCState { > /* PowerPC 64 SLB area */ > ppc_slb_t slb[MAX_SLB_ENTRIES]; > int32_t slb_nr; > + /* tcg TLB needs flush (deferred slb inval instruction typically) */ > + uint32_t tlb_need_flush; > #endif > /* segment registers */ > hwaddr htab_base; > diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c > index 3e39098..c1d6605 100644 > --- a/target-ppc/excp_helper.c > +++ b/target-ppc/excp_helper.c > @@ -671,6 +671,11 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int > excp_model, int excp) > /* Reset exception state */ > cs->exception_index = POWERPC_EXCP_NONE; > env->error_code = 0; > + > + /* Any interrupt is context synchronizing, check if TCG TLB > + * needs a delayed flush on ppc64 > + */ > + check_tlb_flush(env); > } > > void ppc_cpu_do_interrupt(CPUState *cs) > @@ -692,6 +697,7 @@ static void ppc_hw_interrupt(CPUPPCState *env) > __func__, env, env->pending_interrupts, > cs->interrupt_request, (int)msr_me, (int)msr_ee); > #endif > + > /* External reset */ > if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) { > env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET); > @@ -896,6 +902,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong > nip, target_ulong msr, > * as rfi is always the last insn of a TB > */ > cs->interrupt_request |= CPU_INTERRUPT_EXITTB; > + > + /* Context synchronizing: check if TCG TLB needs flush */ > + check_tlb_flush(env); > } > > void helper_rfi(CPUPPCState *env) > diff --git a/target-ppc/helper.h b/target-ppc/helper.h > index 869be15..ff2d50b 100644 > --- a/target-ppc/helper.h > +++ b/target-ppc/helper.h > @@ -16,6 +16,7 @@ DEF_HELPER_1(rfmci, void, env) > DEF_HELPER_1(rfid, void, env) > DEF_HELPER_1(hrfid, void, env) > #endif > +DEF_HELPER_1(check_tlb_flush, void, env) > #endif > > DEF_HELPER_3(lmw, void, env, tl, i32) > diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h > index f7edd5b..57da931 100644 > --- a/target-ppc/helper_regs.h > +++ b/target-ppc/helper_regs.h > @@ -151,4 +151,17 @@ static inline int hreg_store_msr(CPUPPCState *env, > target_ulong value, > return excp; > } > > +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) > +static inline void check_tlb_flush(CPUPPCState *env) > +{ > + CPUState *cs = CPU(ppc_env_get_cpu(env)); > + if (env->tlb_need_flush) { > + env->tlb_need_flush = 0; > + tlb_flush(cs, 1); > + } > +} > +#else > +static inline void check_tlb_flush(CPUPPCState *env) { } > +#endif > + > #endif /* !defined(__HELPER_REGS_H__) */ > diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c > index 7df6ede..71e1d14 100644 > --- a/target-ppc/mmu-hash64.c > +++ b/target-ppc/mmu-hash64.c > @@ -97,10 +97,8 @@ void dump_slb(FILE *f, fprintf_function cpu_fprintf, > CPUPPCState *env) > > void helper_slbia(CPUPPCState *env) > { > - PowerPCCPU *cpu = ppc_env_get_cpu(env); > - int n, do_invalidate; > + int n; > > - do_invalidate = 0; > /* XXX: Warning: slbia never invalidates the first segment */ > for (n = 1; n < env->slb_nr; n++) { > ppc_slb_t *slb = &env->slb[n]; > @@ -111,17 +109,13 @@ void helper_slbia(CPUPPCState *env) > * and we still don't have a tlb_flush_mask(env, n, mask) > * in QEMU, we just invalidate all TLBs > */ > - do_invalidate = 1; > + env->tlb_need_flush = true; > } > } > - if (do_invalidate) { > - tlb_flush(CPU(cpu), 1); > - } > } > > void helper_slbie(CPUPPCState *env, target_ulong addr) > { > - PowerPCCPU *cpu = ppc_env_get_cpu(env); > ppc_slb_t *slb; > > slb = slb_lookup(env, addr); > @@ -136,7 +130,7 @@ void helper_slbie(CPUPPCState *env, target_ulong addr) > * and we still don't have a tlb_flush_mask(env, n, mask) > * in QEMU, we just invalidate all TLBs > */ > - tlb_flush(CPU(cpu), 1); > + env->tlb_need_flush = true; > } > } > > diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c > index e52d0e5..54bc5d1 100644 > --- a/target-ppc/mmu_helper.c > +++ b/target-ppc/mmu_helper.c > @@ -23,6 +23,7 @@ > #include "mmu-hash64.h" > #include "mmu-hash32.h" > #include "exec/cpu_ldst.h" > +#include "helper_regs.h" > > //#define DEBUG_MMU > //#define DEBUG_BATS > @@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) > case POWERPC_MMU_2_03: > case POWERPC_MMU_2_06: > case POWERPC_MMU_2_07: > + env->tlb_need_flush = 0; > #endif /* defined(TARGET_PPC64) */ > tlb_flush(CPU(cpu), 1); > break;
Any particular reason you're leaving this one as an immediate rather than deferred flush? > @@ -2019,7 +2021,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, > target_ulong addr) > * and we still don't have a tlb_flush_mask(env, n, mask) in > QEMU, > * we just invalidate all TLBs > */ > - tlb_flush(CPU(cpu), 1); > + env->tlb_need_flush = 1; > break; > #endif /* defined(TARGET_PPC64) */ > default: > @@ -2904,6 +2906,11 @@ void helper_booke206_tlbflush(CPUPPCState *env, > target_ulong type) > } > > > +void helper_check_tlb_flush(CPUPPCState *env) > +{ > + check_tlb_flush(env); > +} > + > > /*****************************************************************************/ > > /* try to fill the TLB and return an exception if error. If retaddr is > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index 6d9f252..e18d204 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -3299,9 +3299,32 @@ static void gen_eieio(DisasContext *ctx) > { > } > > +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) > +static inline void gen_check_tlb_flush(DisasContext *ctx) > +{ > + TCGv_i32 t = tcg_temp_new_i32(); > + TCGLabel *l = gen_new_label(); > + > + tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); > + tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l); > + gen_helper_check_tlb_flush(cpu_env); > + gen_set_label(l); > + tcg_temp_free_i32(t); > +} > +#else > +static inline void gen_check_tlb_flush(DisasContext *ctx) { } > +#endif > + > /* isync */ > static void gen_isync(DisasContext *ctx) > { > + /* > + * We need to check for a pending TLB flush. This can only happen in > + * kernel mode however so check MSR_PR > + */ > + if (!ctx->pr) { > + gen_check_tlb_flush(ctx); > + } > gen_stop_exception(ctx); > } > > @@ -3458,6 +3481,15 @@ STCX(stqcx_, 16); > /* sync */ > static void gen_sync(DisasContext *ctx) > { > + uint32_t l = (ctx->opcode >> 21) & 3; > + > + /* > + * For l == 2, it's a ptesync, We need to check for a pending TLB flush. > + * This can only happen in kernel mode however so check MSR_PR as well. > + */ > + if (l == 2 && !ctx->pr) { > + gen_check_tlb_flush(ctx); > + } > } > > /* wait */ > @@ -4851,10 +4883,11 @@ static void gen_tlbsync(DisasContext *ctx) > gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC); > return; > } > - /* This has no effect: it should ensure that all previous > - * tlbie have completed > + /* tlbsync is a nop for server, ptesync handles delayed tlb flush, > + * embedded however needs to deal with tlbsync. We don't try to be > + * fancy and swallow the overhead of checking for both. > */ > - gen_stop_exception(ctx); > + gen_check_tlb_flush(ctx); > #endif > } > Should you be clearing the pending flush flag cpu_reset()? -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature