We want to store statistics for each TB even after flushes. We do not want to modify or grow the TB struct. So we create a new struct to contain this statistics and we link one of it to each TB as they are generated.
Signed-off-by: Vanderson M. do Rosario <vanderson...@gmail.com> --- accel/tcg/translate-all.c | 60 +++++++++++++++++++++++++++++++++++++++ include/exec/exec-all.h | 46 ++++++++++++++++++++++++++++++ include/exec/tb-context.h | 1 + include/exec/tb-hash.h | 7 +++++ include/qemu/log.h | 1 + 5 files changed, 115 insertions(+) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 5d1e08b169..d05803a142 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1132,11 +1132,31 @@ static bool tb_cmp(const void *ap, const void *bp) a->page_addr[1] == b->page_addr[1]; } +/* + * This is the more or less the same compare, but the data persists + * over tb_flush. We also aggregate the various variations of cflags + * under one record and ignore the details of page overlap (although + * we can count it). + */ +bool tb_stats_cmp(const void *ap, const void *bp) +{ + const TBStatistics *a = ap; + const TBStatistics *b = bp; + + return a->phys_pc == b->phys_pc && + a->pc == b->pc && + a->cs_base == b->cs_base && + a->flags == b->flags; +} + static void tb_htable_init(void) { unsigned int mode = QHT_MODE_AUTO_RESIZE; qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode); + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) { + qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE, mode); + } } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -1586,6 +1606,31 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb, #endif } +static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc, + target_ulong cs_base, uint32_t flags) +{ + TBStatistics *new_stats = g_new0(TBStatistics, 1); + uint32_t hash = tb_stats_hash_func(phys_pc, pc, flags); + void *existing_stats = NULL; + new_stats->phys_pc = phys_pc; + new_stats->pc = pc; + new_stats->cs_base = cs_base; + new_stats->flags = flags; + + qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats); + + if (unlikely(existing_stats)) { + /* + * If there is already a TBStatistic for this TB from a previous flush + * then just make the new TB point to the older TBStatistic + */ + g_free(new_stats); + return existing_stats; + } else { + return new_stats; + } +} + /* add a new TB and link it to the physical page tables. phys_page2 is * (-1) to indicate that only one page contains the TB. * @@ -1732,6 +1777,21 @@ TranslationBlock *tb_gen_code(CPUState *cpu, ti = profile_getclock(); #endif + /* + * We want to fetch the stats structure before we start code + * generation so we can count interesting things about this + * generation. + * + * XXX: using loglevel is fugly - we should have a general flag + */ + if (qemu_loglevel_mask(CPU_LOG_HOT_TBS) && qemu_log_in_addr_range(tb->pc)) { + tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags); + /* XXX: should we lock and update in bulk? */ + atomic_inc(&tb->tb_stats->translations.total); + } else { + tb->tb_stats = NULL; + } + tcg_func_start(tcg_ctx); tcg_ctx->cpu = env_cpu(env); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 16034ee651..a4bcd9b6df 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -324,6 +324,49 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, #define CODE_GEN_AVG_BLOCK_SIZE 150 #endif +typedef struct TBStatistics TBStatistics; + +/* + * This struct stores statistics such as execution count of the + * TranslationBlocks. Each sets of TBs for a given phys_pc/pc/flags + * has its own TBStatistics which will persist over tb_flush. + * + * We include additional counters to track number of translations as + * well as variants for compile flags. + */ +struct TBStatistics { + tb_page_addr_t phys_pc; + target_ulong pc; + uint32_t flags; + /* cs_base isn't included in the hash but we do check for matches */ + target_ulong cs_base; + + /* Translation stats */ + struct { + unsigned long total; + unsigned long uncached; + unsigned long spanning; + /* XXX: count invalidation? */ + } translations; + + /* Execution stats */ + struct { + unsigned long total; + unsigned long atomic; + } executions; + + struct { + unsigned num_guest_inst; + unsigned num_host_inst; + unsigned num_tcg_inst; + } code; + + /* HMP information - used for referring to previous search */ + int display_id; +}; + +bool tb_stats_cmp(const void *ap, const void *bp); + /* * Translation Cache-related fields of a TB. * This struct exists just for convenience; we keep track of TB's in a binary @@ -403,6 +446,9 @@ struct TranslationBlock { uintptr_t jmp_list_head; uintptr_t jmp_list_next[2]; uintptr_t jmp_dest[2]; + + /* Pointer to a struct where statistics from the TB is stored */ + TBStatistics *tb_stats; }; extern bool parallel_cpus; diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index feb585e0a7..a4decb5d1f 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -35,6 +35,7 @@ struct TBContext { /* statistics */ unsigned tb_flush_count; + struct qht tb_stats; }; extern TBContext tb_ctx; diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h index 4f3a37d927..54c477fe79 100644 --- a/include/exec/tb-hash.h +++ b/include/exec/tb-hash.h @@ -64,4 +64,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); } +static inline +uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc, + uint32_t flags) +{ + return qemu_xxhash5(phys_pc, pc, flags); +} + #endif diff --git a/include/qemu/log.h b/include/qemu/log.h index b097a6cae1..2fca65dd01 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -45,6 +45,7 @@ static inline bool qemu_log_separate(void) /* LOG_TRACE (1 << 15) is defined in log-for-trace.h */ #define CPU_LOG_TB_OP_IND (1 << 16) #define CPU_LOG_TB_FPU (1 << 17) +#define CPU_LOG_HOT_TBS (1 << 18) /* Lock output for a series of related logs. Since this is not needed * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we -- 2.22.0