When icount is enabled and we recompile an MMIO access we end up double counting the instruction execution. To avoid this we introduce the CF_NOINSTR cflag which disables instrumentation for the next TB. As this is part of the hashed compile flags we will only execute the generated TB while coming out of a cpu_io_recompile.
While we are at it delete the old TODO. We might as well keep the translation handy as it's likely you will repeatedly hit it on each MMIO access. Reported-by: Aaron Lindsay <aa...@os.amperecomputing.com> Signed-off-by: Alex Bennée <alex.ben...@linaro.org> --- include/exec/exec-all.h | 3 ++- accel/tcg/translate-all.c | 17 ++++++++--------- accel/tcg/translator.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index e08179de34..ebf015e22d 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -454,6 +454,7 @@ struct TranslationBlock { uint32_t cflags; /* compile flags */ #define CF_COUNT_MASK 0x00007fff #define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ +#define CF_NOINSTR 0x00010000 /* Disable instrumentation of TB */ #define CF_USE_ICOUNT 0x00020000 #define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */ #define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ @@ -461,7 +462,7 @@ struct TranslationBlock { #define CF_CLUSTER_SHIFT 24 /* cflags' mask for hashing/comparison */ #define CF_HASH_MASK \ - (CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL | CF_CLUSTER_MASK) + (CF_COUNT_MASK | CF_LAST_IO | CF_NOINSTR | CF_USE_ICOUNT | CF_PARALLEL | CF_CLUSTER_MASK) /* Per-vCPU dynamic tracing state used to generate this TB */ uint32_t trace_vcpu_dstate; diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 27b3042f1d..3dee698457 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2398,7 +2398,8 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) } #ifndef CONFIG_USER_ONLY -/* in deterministic execution mode, instructions doing device I/Os +/* + * In deterministic execution mode, instructions doing device I/Os * must be at the end of the TB. * * Called by softmmu_template.h, with iothread mutex not held. @@ -2429,19 +2430,17 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) n = 2; } - /* Generate a new TB executing the I/O insn. */ - cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n; + /* + * Exit the loop and potentially generate a new TB executing the + * just the I/O insns. We also disable instrumentation so we don't + * double count the instruction. + */ + cpu->cflags_next_tb = curr_cflags() | CF_NOINSTR | CF_LAST_IO | n; qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, "cpu_io_recompile: rewound execution of TB to " TARGET_FMT_lx "\n", tb->pc); - /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not - * the first in the TB) then we end up generating a whole new TB and - * repeating the fault, which is horribly inefficient. - * Better would be to execute just this insn uncached, or generate a - * second new TB. - */ cpu_loop_exit_noexc(cpu); } diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index a49a794065..14d1ea795d 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -58,7 +58,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ - plugin_enabled = plugin_gen_tb_start(cpu, tb); + plugin_enabled = !(tb_cflags(db->tb) & CF_NOINSTR) && plugin_gen_tb_start(cpu, tb); while (true) { db->num_insns++; -- 2.20.1