From: "Vanderson M. do Rosario" <vanderson...@gmail.com> Replace all others CONFIG_PROFILER statistics and migrate it to TBStatistics system. However, TCGProfiler still exists and can be use to store global statistics and times. All TB related statistics goes to TBStatistics.
Signed-off-by: Vanderson M. do Rosario <vanderson...@gmail.com> Message-Id: <20190829173437.5926-6-vanderson...@gmail.com> [AJB: fix authorship] Signed-off-by: Alex Bennée <alex.ben...@linaro.org> --- accel/tcg/tb-stats.c | 98 +++++++++++++++++++++--------- accel/tcg/translate-all.c | 46 ++++++++------- configure | 3 - cpus.c | 14 ++--- include/exec/tb-stats.h | 15 +++++ include/qemu/timer.h | 5 +- monitor/misc.c | 28 ++------- tcg/tcg.c | 121 ++++++++++++-------------------------- tcg/tcg.h | 2 +- vl.c | 8 +-- 10 files changed, 159 insertions(+), 181 deletions(-) diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c index 0e64c176b3..f431159fd2 100644 --- a/accel/tcg/tb-stats.c +++ b/accel/tcg/tb-stats.c @@ -17,11 +17,18 @@ #include "exec/tb-stats.h" /* TBStatistic collection controls */ -enum TBStatsStatus { TB_STATS_RUNNING, TB_STATS_PAUSED, TB_STATS_STOPPED }; +enum TBStatsStatus { + TB_STATS_DISABLED = 0, + TB_STATS_RUNNING, + TB_STATS_PAUSED, + TB_STATS_STOPPED +}; static enum TBStatsStatus tcg_collect_tb_stats; static uint32_t default_tbstats_flag; +uint64_t dev_time; + struct jit_profile_info { uint64_t translations; uint64_t aborted; @@ -33,6 +40,13 @@ struct jit_profile_info { uint64_t host; uint64_t guest; uint64_t search_data; + + uint64_t interm_time; + uint64_t code_time; + uint64_t restore_count; + uint64_t restore_time; + uint64_t opt_time; + uint64_t la_time; }; /* accumulate the statistics from all TBs */ @@ -54,6 +68,29 @@ static void collect_jit_profile_info(void *p, uint32_t hash, void *userp) jpi->host += tbs->code.out_len; jpi->guest += tbs->code.in_len; jpi->search_data += tbs->code.search_out_len; + + jpi->interm_time += stat_per_translation(tbs, time.interm); + jpi->code_time += stat_per_translation(tbs, time.code); + jpi->opt_time += stat_per_translation(tbs, time.opt); + jpi->la_time += stat_per_translation(tbs, time.la); + jpi->restore_time += tbs->time.restore; + jpi->restore_count += tbs->time.restore_count; +} + +void dump_jit_exec_time_info(uint64_t dev_time) +{ + static uint64_t last_cpu_exec_time; + uint64_t cpu_exec_time; + uint64_t delta; + + cpu_exec_time = tcg_cpu_exec_time(); + delta = cpu_exec_time - last_cpu_exec_time; + + qemu_printf("async time %" PRId64 " (%0.3f)\n", + dev_time, dev_time / (double) NANOSECONDS_PER_SECOND); + qemu_printf("qemu time %" PRId64 " (%0.3f)\n", + delta, delta / (double) NANOSECONDS_PER_SECOND); + last_cpu_exec_time = cpu_exec_time; } /* dump JIT statisticis using TCGProfile and TBStats */ @@ -80,34 +117,39 @@ void dump_jit_profile_info(TCGProfile *s) qemu_printf("avg search data/TB %0.1f\n", jpi->search_data / (double) jpi->translations); + uint64_t tot = jpi->interm_time + jpi->code_time; + + qemu_printf("JIT cycles %" PRId64 " (%0.3fs at 2.4 GHz)\n", + tot, tot / 2.4e9); + qemu_printf(" cycles/op %0.1f\n", + jpi->ops ? (double)tot / jpi->ops : 0); + qemu_printf(" cycles/in byte %0.1f\n", + jpi->guest ? (double)tot / jpi->guest : 0); + qemu_printf(" cycles/out byte %0.1f\n", + jpi->host ? (double)tot / jpi->host : 0); + qemu_printf(" cycles/search byte %0.1f\n", + jpi->search_data ? (double)tot / jpi->search_data : 0); + if (tot == 0) { + tot = 1; + } + + qemu_printf(" gen_interm time %0.1f%%\n", + (double)jpi->interm_time / tot * 100.0); + qemu_printf(" gen_code time %0.1f%%\n", + (double)jpi->code_time / tot * 100.0); + + qemu_printf(" optim./code time %0.1f%%\n", + (double)jpi->opt_time / (jpi->code_time ? jpi->code_time : 1) * 100.0); + qemu_printf(" liveness/code time %0.1f%%\n", + (double)jpi->la_time / (jpi->code_time ? jpi->code_time : 1) * 100.0); + + qemu_printf("cpu_restore count %" PRId64 "\n", jpi->restore_count); + qemu_printf(" avg cycles %0.1f\n", + jpi->restore_count ? (double)jpi->restore_time / jpi->restore_count : 0); + if (s) { - int64_t tot = s->interm_time + s->code_time; - qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", - tot, tot / 2.4e9); - qemu_printf("cycles/op %0.1f\n", - jpi->ops ? (double)tot / jpi->ops : 0); - qemu_printf("cycles/in byte %0.1f\n", - jpi->guest ? (double)tot / jpi->guest : 0); - qemu_printf("cycles/out byte %0.1f\n", - jpi->host ? (double)tot / jpi->host : 0); - qemu_printf("cycles/search byte %0.1f\n", - jpi->search_data ? (double)tot / jpi->search_data : 0); - if (tot == 0) { - tot = 1; - } - qemu_printf(" gen_interm time %0.1f%%\n", - (double)s->interm_time / tot * 100.0); - qemu_printf(" gen_code time %0.1f%%\n", - (double)s->code_time / tot * 100.0); - qemu_printf("optim./code time %0.1f%%\n", - (double)s->opt_time / (s->code_time ? s->code_time : 1) - * 100.0); - qemu_printf("liveness/code time %0.1f%%\n", - (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); - qemu_printf("cpu_restore count %" PRId64 "\n", - s->restore_count); - qemu_printf(" avg cycles %0.1f\n", - s->restore_count ? (double)s->restore_time / s->restore_count : 0); + qemu_printf("cpu exec time %" PRId64 " (%0.3fs)\n", + s->cpu_exec_time, s->cpu_exec_time / (double) NANOSECONDS_PER_SECOND); } } g_free(jpi); diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 84f759a829..396e63c3e7 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -337,10 +337,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, CPUArchState *env = cpu->env_ptr; uint8_t *p = tb->tc.ptr + tb->tc.size; int i, j, num_insns = tb->icount; -#ifdef CONFIG_PROFILER - TCGProfile *prof = &tcg_ctx->prof; - int64_t ti = profile_getclock(); -#endif + uint64_t ti = 0; + + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + ti = profile_getclock(); + } searched_pc -= GETPC_ADJ; @@ -370,11 +371,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, } restore_state_to_opc(env, tb, data); -#ifdef CONFIG_PROFILER - atomic_set(&prof->restore_time, - prof->restore_time + profile_getclock() - ti); - atomic_set(&prof->restore_count, prof->restore_count + 1); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.restore, profile_getclock() - ti); + atomic_inc(&tb->tb_stats->time.restore_count); + } + return 0; } @@ -1731,9 +1732,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_insn_unit *gen_code_buf; int gen_code_size, search_size, max_insns; TCGProfile *prof = &tcg_ctx->prof; -#ifdef CONFIG_PROFILER - int64_t ti; -#endif + uint64_t ti = 0; + assert_memory_lock(); phys_pc = get_page_addr_code(env, pc); @@ -1779,9 +1779,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_ctx->tb_cflags = cflags; tb_overflow: -#ifdef CONFIG_PROFILER - ti = profile_getclock(); -#endif /* * We want to fetch the stats structure before we start code @@ -1801,6 +1798,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu, if (flag & TB_JIT_STATS) { tb->tb_stats->stats_enabled |= TB_JIT_STATS; } + + if (flag & TB_JIT_TIME) { + tb->tb_stats->stats_enabled |= TB_JIT_TIME; + ti = profile_getclock(); + } } else { tb->tb_stats = NULL; } @@ -1826,10 +1828,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; } -#ifdef CONFIG_PROFILER - atomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti); - ti = profile_getclock(); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.interm, profile_getclock() - ti); + ti = profile_getclock(); + } gen_code_size = tcg_gen_code(tcg_ctx, tb); if (unlikely(gen_code_size < 0)) { @@ -1871,9 +1873,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } tb->tc.size = gen_code_size; -#ifdef CONFIG_PROFILER - atomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.code, profile_getclock() - ti); + } #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && diff --git a/configure b/configure index 8f8446f52b..eedeb9016e 100755 --- a/configure +++ b/configure @@ -6566,9 +6566,6 @@ fi if test "$static" = "yes" ; then echo "CONFIG_STATIC=y" >> $config_host_mak fi -if test "$profiler" = "yes" ; then - echo "CONFIG_PROFILER=y" >> $config_host_mak -fi if test "$want_tools" = "yes" ; then echo "CONFIG_TOOLS=y" >> $config_host_mak fi diff --git a/cpus.c b/cpus.c index d2c61ff155..3c4bce0e56 100644 --- a/cpus.c +++ b/cpus.c @@ -1442,21 +1442,17 @@ static void process_icount_data(CPUState *cpu) static int tcg_cpu_exec(CPUState *cpu) { int ret; -#ifdef CONFIG_PROFILER - int64_t ti; -#endif + uint64_t ti; assert(tcg_enabled()); -#ifdef CONFIG_PROFILER ti = profile_getclock(); -#endif + cpu_exec_start(cpu); ret = cpu_exec(cpu); cpu_exec_end(cpu); -#ifdef CONFIG_PROFILER - atomic_set(&tcg_ctx->prof.cpu_exec_time, - tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); -#endif + + atomic_add(&tcg_ctx->prof.cpu_exec_time, profile_getclock() - ti); + return ret; } diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h index c779d6a183..a142972960 100644 --- a/include/exec/tb-stats.h +++ b/include/exec/tb-stats.h @@ -90,10 +90,24 @@ struct TBStatistics { * this TBStats structure. Has to be reset on a tb_flush. */ GPtrArray *tbs; + + /* end of jit_stats_lock */ + + /* These are accessed with atomic operations */ + struct { + int64_t restore; + uint64_t restore_count; + int64_t interm; + int64_t code; + int64_t opt; + int64_t la; + } time; }; bool tb_stats_cmp(const void *ap, const void *bp); +void dump_jit_exec_time_info(uint64_t dev_time); + void init_tb_stats_htable_if_not(void); void dump_jit_profile_info(TCGProfile *s); @@ -101,6 +115,7 @@ void dump_jit_profile_info(TCGProfile *s); #define TB_NOTHING (1 << 0) #define TB_EXEC_STATS (1 << 1) #define TB_JIT_STATS (1 << 2) +#define TB_JIT_TIME (1 << 3) void enable_collect_tb_stats(void); void disable_collect_tb_stats(void); diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 85bc6eb00b..641ca3ddd0 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -1006,13 +1006,10 @@ static inline int64_t cpu_get_host_ticks(void) } #endif -#ifdef CONFIG_PROFILER static inline int64_t profile_getclock(void) { return get_clock(); } -extern int64_t dev_time; -#endif - +extern uint64_t dev_time; #endif diff --git a/monitor/misc.c b/monitor/misc.c index aef16f6cfb..ac4ff58d96 100644 --- a/monitor/misc.c +++ b/monitor/misc.c @@ -472,6 +472,11 @@ static void hmp_info_jit(Monitor *mon, const QDict *qdict) static void hmp_info_opcount(Monitor *mon, const QDict *qdict) { + if (!tb_stats_collection_enabled()) { + error_report("TB information not being recorded."); + return; + } + dump_opcount_info(); } #endif @@ -1082,32 +1087,11 @@ static void hmp_info_mtree(Monitor *mon, const QDict *qdict) mtree_info(flatview, dispatch_tree, owner); } -#ifdef CONFIG_PROFILER - -int64_t dev_time; - static void hmp_info_profile(Monitor *mon, const QDict *qdict) { - static int64_t last_cpu_exec_time; - int64_t cpu_exec_time; - int64_t delta; - - cpu_exec_time = tcg_cpu_exec_time(); - delta = cpu_exec_time - last_cpu_exec_time; - - monitor_printf(mon, "async time %" PRId64 " (%0.3f)\n", - dev_time, dev_time / (double)NANOSECONDS_PER_SECOND); - monitor_printf(mon, "qemu time %" PRId64 " (%0.3f)\n", - delta, delta / (double)NANOSECONDS_PER_SECOND); - last_cpu_exec_time = cpu_exec_time; + dump_jit_exec_time_info(dev_time); dev_time = 0; } -#else -static void hmp_info_profile(Monitor *mon, const QDict *qdict) -{ - monitor_printf(mon, "Internal profiler not compiled\n"); -} -#endif /* Capture support */ static QLIST_HEAD (capture_list_head, CaptureState) capture_head; diff --git a/tcg/tcg.c b/tcg/tcg.c index 9c199c8a40..e1438fa0d9 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3881,82 +3881,34 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) } } -#ifdef CONFIG_PROFILER - /* avoid copy/paste errors */ #define PROF_ADD(to, from, field) \ do { \ (to)->field += atomic_read(&((from)->field)); \ } while (0) -#define PROF_MAX(to, from, field) \ - do { \ - typeof((from)->field) val__ = atomic_read(&((from)->field)); \ - if (val__ > (to)->field) { \ - (to)->field = val__; \ - } \ - } while (0) - -/* Pass in a zero'ed @prof */ -static inline -void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) +static void collect_tcg_profiler(TCGProfile *prof) { + int i; unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); - unsigned int i; for (i = 0; i < n_ctxs; i++) { TCGContext *s = atomic_read(&tcg_ctxs[i]); const TCGProfile *orig = &s->prof; - if (counters) { - PROF_ADD(prof, orig, cpu_exec_time); - PROF_ADD(prof, orig, interm_time); - PROF_ADD(prof, orig, code_time); - PROF_ADD(prof, orig, la_time); - PROF_ADD(prof, orig, opt_time); - PROF_ADD(prof, orig, restore_count); - PROF_ADD(prof, orig, restore_time); - } - if (table) { - int i; + PROF_ADD(prof, orig, cpu_exec_time); - for (i = 0; i < NB_OPS; i++) { - PROF_ADD(prof, orig, table_op_count[i]); - } + for (i = 0; i < NB_OPS; i++) { + PROF_ADD(prof, orig, table_op_count[i]); } } } -#undef PROF_ADD -#undef PROF_MAX - -static void tcg_profile_snapshot_counters(TCGProfile *prof) -{ - tcg_profile_snapshot(prof, true, false); -} - -static void tcg_profile_snapshot_table(TCGProfile *prof) -{ - tcg_profile_snapshot(prof, false, true); -} - -void tcg_dump_op_count(void) -{ - TCGProfile prof = {}; - int i; - - tcg_profile_snapshot_table(&prof); - for (i = 0; i < NB_OPS; i++) { - qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, - prof.table_op_count[i]); - } -} - -int64_t tcg_cpu_exec_time(void) +uint64_t tcg_cpu_exec_time(void) { unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); unsigned int i; - int64_t ret = 0; + uint64_t ret = 0; for (i = 0; i < n_ctxs; i++) { const TCGContext *s = atomic_read(&tcg_ctxs[i]); @@ -3966,19 +3918,6 @@ int64_t tcg_cpu_exec_time(void) } return ret; } -#else -void tcg_dump_op_count(void) -{ - qemu_printf("[TCG profiler not compiled]\n"); -} - -int64_t tcg_cpu_exec_time(void) -{ - error_report("%s: TCG profiler not compiled", __func__); - exit(EXIT_FAILURE); -} -#endif - int tcg_gen_code(TCGContext *s, TranslationBlock *tb) { @@ -4020,18 +3959,18 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) } #endif -#ifdef CONFIG_PROFILER - atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.opt, -profile_getclock()); + } #ifdef USE_TCG_OPTIMIZATIONS tcg_optimize(s); #endif -#ifdef CONFIG_PROFILER - atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); - atomic_set(&prof->la_time, prof->la_time - profile_getclock()); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.opt, profile_getclock()); + atomic_add(&tb->tb_stats->time.la, -profile_getclock()); + } reachable_code_pass(s); liveness_pass_1(s); @@ -4054,9 +3993,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) } } -#ifdef CONFIG_PROFILER - atomic_set(&prof->la_time, prof->la_time + profile_getclock()); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.la, profile_getclock()); + } #ifdef DEBUG_DISAS if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) @@ -4081,14 +4020,17 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) s->pool_labels = NULL; #endif + if (!tb_stats_collection_enabled()) { + QTAILQ_FOREACH(op, &s->ops, link) { + TCGOpcode opc = op->opc; + atomic_add(&s->prof.table_op_count[opc], 1); + } + } + num_insns = -1; QTAILQ_FOREACH(op, &s->ops, link) { TCGOpcode opc = op->opc; -#ifdef CONFIG_PROFILER - atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); -#endif - switch (opc) { case INDEX_op_mov_i32: case INDEX_op_mov_i64: @@ -4181,14 +4123,23 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) return tcg_current_code_size(s); } +void tcg_dump_op_count(void) +{ + TCGProfile prof = {}; + int i; + + collect_tcg_profiler(&prof); + for (i = 0; i < NB_OPS; i++) { + qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, prof.table_op_count[i]); + } +} + void tcg_dump_info(void) { TCGProfile *s = NULL; -#ifdef CONFIG_PROFILER TCGProfile prof = {}; - tcg_profile_snapshot_counters(&prof); s = &prof; -#endif + collect_tcg_profiler(s); dump_jit_profile_info(s); } diff --git a/tcg/tcg.h b/tcg/tcg.h index 5296f7c075..6dcbbf79e7 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -948,7 +948,7 @@ int tcg_check_temp_count(void); #define tcg_check_temp_count() 0 #endif -int64_t tcg_cpu_exec_time(void); +uint64_t tcg_cpu_exec_time(void); void tcg_dump_info(void); void tcg_dump_op_count(void); diff --git a/vl.c b/vl.c index 002bf4919e..f91bc6fb4c 100644 --- a/vl.c +++ b/vl.c @@ -1796,17 +1796,11 @@ static bool main_loop_should_exit(void) static void main_loop(void) { -#ifdef CONFIG_PROFILER - int64_t ti; -#endif + uint64_t ti; while (!main_loop_should_exit()) { -#ifdef CONFIG_PROFILER ti = profile_getclock(); -#endif main_loop_wait(false); -#ifdef CONFIG_PROFILER dev_time += profile_getclock() - ti; -#endif } } -- 2.20.1