From: Alex Bennée <alex.ben...@linaro.org> This allows the perf tool to map samples to each individual translation block. This could be expanded for user space but currently it gives enough information to find any hotblocks by other means. --- qemu-options.hx | 10 ++++++++++ tcg/tcg.c | 21 +++++++++++++++++++++ vl.c | 6 ++++++ 3 files changed, 37 insertions(+)
diff --git a/qemu-options.hx b/qemu-options.hx index c5577be..09fb1d0 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2683,6 +2683,16 @@ Will dump output for any code in the 0x1000 sized block starting at 0x8000 and the 0x200 sized block starting at 0xffffffc000080000. ETEXI +DEF("perfmap", 0, QEMU_OPTION_PERFMAP, \ + "-perfmap generate a /tmp/perf-${pid}.map file for perf\n", + QEMU_ARCH_ALL) +STEXI +@item -perfmap +@findex -perfmap +This will cause QEMU to generate a map file for Linux perf tools that will allow +basic profiling information to be broken down into basic blocks. +ETEXI + DEF("L", HAS_ARG, QEMU_OPTION_L, \ "-L path set the directory for the BIOS, VGA BIOS and keymaps\n", QEMU_ARCH_ALL) diff --git a/tcg/tcg.c b/tcg/tcg.c index 57d2b82..a24f581 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -27,6 +27,8 @@ #define USE_TCG_OPTIMIZATIONS #include "config.h" +#include <glib.h> +#include <glib/gstdio.h> /* Define to jump the ELF file used to communicate with GDB. */ #undef DEBUG_JIT @@ -106,6 +108,8 @@ static int tcg_target_const_match(tcg_target_long val, static void tcg_out_tb_init(TCGContext *s); static void tcg_out_tb_finalize(TCGContext *s); +static void tcg_write_perfmap(uint8_t *start, uint64_t size, uint64_t target_pc); +void qemu_tcg_enable_perfmap(void); TCGOpDef tcg_op_defs[] = { #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, @@ -2575,6 +2579,8 @@ static inline int tcg_gen_code_common(TCGContext *s, uint64_t target_pc, the_end: /* Generate TB finalization at the end of block */ tcg_out_tb_finalize(s); + + tcg_write_perfmap(gen_code_buf, s->code_ptr - gen_code_buf, target_pc); return -1; } @@ -2666,6 +2672,21 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf) } #endif +static FILE *tcg_perfmap = NULL; +void qemu_tcg_enable_perfmap(void) { + gchar * map_file = g_strdup_printf("/tmp/perf-%d.map", getpid()); + tcg_perfmap = g_fopen(map_file, "w"); + g_free(map_file); +} + +static void tcg_write_perfmap(uint8_t *start, uint64_t size, uint64_t target_pc) +{ + if (tcg_perfmap) { + g_fprintf(tcg_perfmap, "%lx %lx subject-0x%lx\n", + (uint64_t) start, size, target_pc); + } +} + #ifdef ELF_HOST_MACHINE /* In order to use this feature, the backend needs to do three things: diff --git a/vl.c b/vl.c index c036367..f1c3c3d 100644 --- a/vl.c +++ b/vl.c @@ -123,6 +123,9 @@ int main(int argc, char **argv) #define MAX_VIRTIO_CONSOLES 1 #define MAX_SCLP_CONSOLES 1 +/* seems better than pulling in all the tcg headers? */ +extern void qemu_tcg_enable_perfmap(void); + static const char *data_dir[16]; static int data_dir_idx; const char *bios_name = NULL; @@ -3345,6 +3348,9 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_DFILTER: qemu_set_dfilter_ranges(optarg); break; + case QEMU_OPTION_PERFMAP: + qemu_tcg_enable_perfmap(); + break; case QEMU_OPTION_s: add_device_config(DEV_GDB, "tcp::" DEFAULT_GDBSTUB_PORT); break; -- 1.9.1