vandersonmr <vanderson...@gmail.com> writes:
> Filling other tb statistics such as number of times the > tb is compiled, its number of guest/host/IR instructions... > > Signed-off-by: vandersonmr <vanderson...@gmail.com> > --- > accel/tcg/translate-all.c | 14 +++++ > accel/tcg/translator.c | 4 ++ > disas.c | 107 ++++++++++++++++++++++++++++++++++++++ > include/disas/disas.h | 1 + > tcg/tcg.c | 8 +++ > 5 files changed, 134 insertions(+) > > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index d05803a142..9ee7232bb8 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -1865,6 +1865,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > atomic_set(&prof->search_out_len, prof->search_out_len + search_size); > #endif > > + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && > qemu_log_in_addr_range(tb->pc)) { This should be a different flag - CPU_LOG_JIT_STATS? Also enable on the TBstats creation and check the per-TB flag (tb_stats_enabled(tb, JIT_STATS)). > + size_t code_size = gen_code_size; > + if (tcg_ctx->data_gen_ptr) { > + code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr; > + } > + qemu_log_lock(); Wrong lock, and not needed if you are using atomics. If you did want a lock you can move get_num_insts out of the lock as it doesn't need protection and reduce the contention on the lock. > + atomic_set(&tb->tb_stats->code.num_host_inst, > + get_num_insts(tb->tc.ptr, code_size)); atomic_add and then later when we present the data / by the number of translations? > + qemu_log_unlock(); > + } > + > #ifdef DEBUG_DISAS > if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && > qemu_log_in_addr_range(tb->pc)) { > @@ -1922,6 +1933,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > phys_page2 = -1; > if ((pc & TARGET_PAGE_MASK) != virt_page2) { > phys_page2 = get_page_addr_code(env, virt_page2); > + if (tb->tb_stats) { > + atomic_inc(&tb->tb_stats->translations.spanning); > + } > } > /* > * No explicit memory barrier is required -- tb_link_page() makes the > diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c > index cc06070e7e..d2529ca97d 100644 > --- a/accel/tcg/translator.c > +++ b/accel/tcg/translator.c > @@ -117,6 +117,10 @@ void translator_loop(const TranslatorOps *ops, > DisasContextBase *db, > db->tb->size = db->pc_next - db->pc_first; > db->tb->icount = db->num_insns; > > + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && > qemu_log_in_addr_range(tb->pc)) { > + db->tb->tb_stats->code.num_guest_inst = db->num_insns; > + } Leave the in_addr range check to the TBStats creation, use tb_stats_enabled(tb, JIT_STATS). > + > #ifdef DEBUG_DISAS > if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) > && qemu_log_in_addr_range(db->pc_first)) { > diff --git a/disas.c b/disas.c > index 3e2bfa572b..f5ae9c009a 100644 > --- a/disas.c > +++ b/disas.c > @@ -475,6 +475,113 @@ void target_disas(FILE *out, CPUState *cpu, > target_ulong code, > } > } > > + > +static int fprintf_fake(struct _IO_FILE *a, const char *b, ...) > +{ > + return 1; > +} > + > +/* > + * This is a work around to get the number of host instructions with > + * a small effort. It reuses the disas function with a fake printf to > + * print nothing but count the number of instructions. > + * > + */ > +unsigned get_num_insts(void *code, unsigned long size) > +{ > + uintptr_t pc; > + int count; > + CPUDebug s; > + int (*print_insn)(bfd_vma pc, disassemble_info *info) = NULL; > + > + INIT_DISASSEMBLE_INFO(s.info, NULL, fprintf_fake); > + s.info.print_address_func = generic_print_host_address; > + > + s.info.buffer = code; > + s.info.buffer_vma = (uintptr_t)code; > + s.info.buffer_length = size; > + s.info.cap_arch = -1; > + s.info.cap_mode = 0; > + s.info.cap_insn_unit = 4; > + s.info.cap_insn_split = 4; > + > +#ifdef HOST_WORDS_BIGENDIAN > + s.info.endian = BFD_ENDIAN_BIG; > +#else > + s.info.endian = BFD_ENDIAN_LITTLE; > +#endif > +#if defined(CONFIG_TCG_INTERPRETER) > + print_insn = print_insn_tci; > +#elif defined(__i386__) > + s.info.mach = bfd_mach_i386_i386; > + print_insn = print_insn_i386; > + s.info.cap_arch = CS_ARCH_X86; > + s.info.cap_mode = CS_MODE_32; > + s.info.cap_insn_unit = 1; > + s.info.cap_insn_split = 8; > +#elif defined(__x86_64__) > + s.info.mach = bfd_mach_x86_64; > + print_insn = print_insn_i386; > + s.info.cap_arch = CS_ARCH_X86; > + s.info.cap_mode = CS_MODE_64; > + s.info.cap_insn_unit = 1; > + s.info.cap_insn_split = 8; > +#elif defined(_ARCH_PPC) > + s.info.disassembler_options = (char *)"any"; > + print_insn = print_insn_ppc; > + s.info.cap_arch = CS_ARCH_PPC; > +# ifdef _ARCH_PPC64 > + s.info.cap_mode = CS_MODE_64; > +# endif > +#elif defined(__riscv) && defined(CONFIG_RISCV_DIS) > +#if defined(_ILP32) || (__riscv_xlen == 32) > + print_insn = print_insn_riscv32; > +#elif defined(_LP64) > + print_insn = print_insn_riscv64; > +#else > +#error unsupported RISC-V ABI > +#endif > +#elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS) > + print_insn = print_insn_arm_a64; > + s.info.cap_arch = CS_ARCH_ARM64; > +#elif defined(__alpha__) > + print_insn = print_insn_alpha; > +#elif defined(__sparc__) > + print_insn = print_insn_sparc; > + s.info.mach = bfd_mach_sparc_v9b; > +#elif defined(__arm__) > + print_insn = print_insn_arm; > + s.info.cap_arch = CS_ARCH_ARM; > + /* TCG only generates code for arm mode. */ > +#elif defined(__MIPSEB__) > + print_insn = print_insn_big_mips; > +#elif defined(__MIPSEL__) > + print_insn = print_insn_little_mips; > +#elif defined(__m68k__) > + print_insn = print_insn_m68k; > +#elif defined(__s390__) > + print_insn = print_insn_s390; > +#elif defined(__hppa__) > + print_insn = print_insn_hppa; > +#endif > + > + if (print_insn == NULL) { > + print_insn = print_insn_od_host; > + } I feel the above should probably be wrapped in a function and called from the various disas functions to initialise the data. DRY. > + > + s.info.fprintf_func = fprintf_fake; > + unsigned num_insts = 0; > + for (pc = (uintptr_t)code; size > 0; pc += count, size -= count) { > + num_insts++; > + count = print_insn(pc, &s.info); > + if (count < 0) { > + break; > + } > + } > + return num_insts; > +} > + > + > /* Disassemble this for me please... (debugging). */ > void disas(FILE *out, void *code, unsigned long size) > { > diff --git a/include/disas/disas.h b/include/disas/disas.h > index 15da511f49..9797ae7cfa 100644 > --- a/include/disas/disas.h > +++ b/include/disas/disas.h > @@ -7,6 +7,7 @@ > > /* Disassemble this for me please... (debugging). */ > void disas(FILE *out, void *code, unsigned long size); > +unsigned get_num_insts(void *code, unsigned long size); > void target_disas(FILE *out, CPUState *cpu, target_ulong code, > target_ulong size); > > diff --git a/tcg/tcg.c b/tcg/tcg.c > index 02a2680169..bd57bb642b 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -4072,6 +4072,14 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > atomic_set(&prof->la_time, prof->la_time + profile_getclock()); > #endif > > + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && > qemu_log_in_addr_range(tb->pc)) { local tb flag check > + int n = 0; > + QTAILQ_FOREACH(op, &s->ops, link) { > + n++; > + } > + tb->tb_stats->code.num_tcg_inst = n; atomic_add and maybe num_tcg_ops? It would probably be worth checking the op count before and after optimisation. > + } > + > #ifdef DEBUG_DISAS > if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) > && qemu_log_in_addr_range(tb->pc))) { -- Alex Bennée