On 5/23/2023 8:45 AM, Richard Henderson wrote: > On 5/18/23 06:57, Fei Wu wrote: >> +void HELPER(inc_exec_freq)(void *ptr) >> +{ >> + TBStatistics *stats = (TBStatistics *) ptr; >> + tcg_debug_assert(stats); >> + ++stats->executions.normal; >> +} > ... >> +static inline void gen_tb_exec_count(TranslationBlock *tb) >> +{ >> + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { >> + TCGv_ptr ptr = tcg_temp_new_ptr(); >> + tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats); >> + gen_helper_inc_exec_freq(ptr); >> + } >> +} > > This is 3 host instructions, easily expanded inline: > > --- a/accel/tcg/translator.c > +++ b/accel/tcg/translator.c > @@ -11,6 +11,7 @@ > #include "qemu/error-report.h" > #include "tcg/tcg.h" > #include "tcg/tcg-op.h" > +#include "tcg/tcg-temp-internal.h" > #include "exec/exec-all.h" > #include "exec/gen-icount.h" > #include "exec/log.h" > @@ -18,6 +19,30 @@ > #include "exec/plugin-gen.h" > #include "exec/replay-core.h" > > + > +static void gen_tb_exec_count(TranslationBlock *tb) > +{ > + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { > + TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); > + > + tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal); > + if (sizeof(tb->tb_stats->executions.normal) == 4) { > + TCGv_i32 t = tcg_temp_ebb_new_i32(); > + tcg_gen_ld_i32(t, ptr, 0); > + tcg_gen_addi_i32(t, t, 1); > + tcg_gen_st_i32(t, ptr, 0); > + tcg_temp_free_i32(t); > + } else { > + TCGv_i64 t = tcg_temp_ebb_new_i64(); > + tcg_gen_ld_i64(t, ptr, 0); > + tcg_gen_addi_i64(t, t, 1); > + tcg_gen_st_i64(t, ptr, 0); > + tcg_temp_free_i64(t); > + } > + tcg_temp_free_ptr(ptr); > + } > +} > + > bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest) > { > /* Suppress goto_tb if requested. */ > > > I'm not expecially keen on embedding the TBStatistics pointer directly > like this; for most hosts we will have to put this constant into the > constant pool. Whereas the pointer already exists at tb->tb_stats, and > tb is at a constant displacement prior to the code, so we already have > mechanisms for generating pc-relative addresses. > > However, that's premature optimization. Let's get it working first. > Richard, have you reviewed the whole series? I will integrate your change to next version.
Thanks, Fei. > > r~ >