To store statistics for each TB we created a TBStatistics structure which is linked with the TBs. The TBStatistics can stay alive after tb_flush and be relinked to a regenerated TB. So the statistics can be accumulated even through flushes.
TBStatistics will be also referred to as TBS or tbstats. Signed-off-by: Vanderson M. do Rosario <vanderson...@gmail.com> --- accel/tcg/translate-all.c | 57 +++++++++++++++++++++++++++++++++++++++ include/exec/exec-all.h | 15 +++-------- include/exec/tb-context.h | 12 +++++++++ include/exec/tb-hash.h | 7 +++++ include/exec/tb-stats.h | 29 ++++++++++++++++++++ include/qemu/log.h | 6 +++++ util/log.c | 28 +++++++++++++++++++ 7 files changed, 143 insertions(+), 11 deletions(-) create mode 100644 include/exec/tb-stats.h diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 5d1e08b169..a574890a80 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1118,6 +1118,23 @@ static inline void code_gen_alloc(size_t tb_size) } } +/* + * This is the more or less the same compare, but the data persists + * over tb_flush. We also aggregate the various variations of cflags + * under one record and ignore the details of page overlap (although + * we can count it). + */ +bool tb_stats_cmp(const void *ap, const void *bp) +{ + const TBStatistics *a = ap; + const TBStatistics *b = bp; + + return a->phys_pc == b->phys_pc && + a->pc == b->pc && + a->cs_base == b->cs_base && + a->flags == b->flags; +} + static bool tb_cmp(const void *ap, const void *bp) { const TranslationBlock *a = ap; @@ -1137,6 +1154,9 @@ static void tb_htable_init(void) unsigned int mode = QHT_MODE_AUTO_RESIZE; qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode); + if (tb_stats_collection_enabled()) { + qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode); + } } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -1666,6 +1686,32 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, return tb; } +static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc, + target_ulong cs_base, uint32_t flags) +{ + TBStatistics *new_stats = g_new0(TBStatistics, 1); + uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags); + void *existing_stats = NULL; + new_stats->phys_pc = phys_pc; + new_stats->pc = pc; + new_stats->cs_base = cs_base; + new_stats->flags = flags; + + qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats); + + if (unlikely(existing_stats)) { + /* + * If there is already a TBStatistic for this TB from a previous flush + * then just make the new TB point to the older TBStatistic + */ + g_free(new_stats); + return existing_stats; + } else { + return new_stats; + } +} + + /* Called with mmap_lock held for user mode emulation. */ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, target_ulong cs_base, @@ -1732,6 +1778,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, ti = profile_getclock(); #endif + /* + * We want to fetch the stats structure before we start code + * generation so we can count interesting things about this + * generation. + */ + if (tb_stats_collection_enabled()) { + tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags); + } else { + tb->tb_stats = NULL; + } + tcg_func_start(tcg_ctx); tcg_ctx->cpu = env_cpu(env); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 16034ee651..24bd6a0a0c 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -22,21 +22,11 @@ #include "exec/tb-context.h" #include "sysemu/cpus.h" +#include "exec/tb-stats.h" /* allow to see translation results - the slowdown should be negligible, so we leave it */ #define DEBUG_DISAS -/* Page tracking code uses ram addresses in system mode, and virtual - addresses in userspace mode. Define tb_page_addr_t to be an appropriate - type. */ -#if defined(CONFIG_USER_ONLY) -typedef abi_ulong tb_page_addr_t; -#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx -#else -typedef ram_addr_t tb_page_addr_t; -#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT -#endif - #include "qemu/log.h" void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns); @@ -403,6 +393,9 @@ struct TranslationBlock { uintptr_t jmp_list_head; uintptr_t jmp_list_next[2]; uintptr_t jmp_dest[2]; + + /* Pointer to a struct where statistics from the TB is stored */ + TBStatistics *tb_stats; }; extern bool parallel_cpus; diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index feb585e0a7..3cfb62a338 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -23,6 +23,17 @@ #include "qemu/thread.h" #include "qemu/qht.h" +/* Page tracking code uses ram addresses in system mode, and virtual + addresses in userspace mode. Define tb_page_addr_t to be an appropriate + type. */ +#if defined(CONFIG_USER_ONLY) +typedef abi_ulong tb_page_addr_t; +#define TB_PAGE_ADDR_FMT TARGET_ABI_FMT_lx +#else +typedef ram_addr_t tb_page_addr_t; +#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT +#endif + #define CODE_GEN_HTABLE_BITS 15 #define CODE_GEN_HTABLE_SIZE (1 << CODE_GEN_HTABLE_BITS) @@ -35,6 +46,7 @@ struct TBContext { /* statistics */ unsigned tb_flush_count; + struct qht tb_stats; }; extern TBContext tb_ctx; diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h index 4f3a37d927..54c477fe79 100644 --- a/include/exec/tb-hash.h +++ b/include/exec/tb-hash.h @@ -64,4 +64,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); } +static inline +uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc, + uint32_t flags) +{ + return qemu_xxhash5(phys_pc, pc, flags); +} + #endif diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h new file mode 100644 index 0000000000..0913155ec3 --- /dev/null +++ b/include/exec/tb-stats.h @@ -0,0 +1,29 @@ +#ifndef TB_STATS_H + +#define TB_STATS_H + +#include "exec/cpu-common.h" +#include "exec/tb-context.h" +#include "tcg.h" + +typedef struct TBStatistics TBStatistics; + +/* + * This struct stores statistics such as execution count of the + * TranslationBlocks. Each sets of TBs for a given phys_pc/pc/flags + * has its own TBStatistics which will persist over tb_flush. + * + * We include additional counters to track number of translations as + * well as variants for compile flags. + */ +struct TBStatistics { + tb_page_addr_t phys_pc; + target_ulong pc; + uint32_t flags; + /* cs_base isn't included in the hash but we do check for matches */ + target_ulong cs_base; +}; + +bool tb_stats_cmp(const void *ap, const void *bp); + +#endif diff --git a/include/qemu/log.h b/include/qemu/log.h index b097a6cae1..e175d4d5d0 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -129,4 +129,10 @@ void qemu_log_flush(void); /* Close the log file */ void qemu_log_close(void); +void enable_collect_tb_stats(void); +void disable_collect_tb_stats(void); +void pause_collect_tb_stats(void); +bool tb_stats_collection_enabled(void); +bool tb_stats_collection_paused(void); + #endif diff --git a/util/log.c b/util/log.c index 1d1b33f7d9..ab73fdc100 100644 --- a/util/log.c +++ b/util/log.c @@ -326,3 +326,31 @@ void qemu_print_log_usage(FILE *f) fprintf(f, "\nUse \"-d trace:help\" to get a list of trace events.\n\n"); #endif } + +int tcg_collect_tb_stats; + +void enable_collect_tb_stats(void) +{ + tcg_collect_tb_stats = 1; +} + +void disable_collect_tb_stats(void) +{ + tcg_collect_tb_stats = 0; +} + +void pause_collect_tb_stats(void) +{ + tcg_collect_tb_stats = 2; +} + +bool tb_stats_collection_enabled(void) +{ + return tcg_collect_tb_stats; +} + +bool tb_stats_collection_paused(void) +{ + return tcg_collect_tb_stats == 2; +} + -- 2.22.0