This patch adds instructions counting into the target-specific part of i386/x86_64 simulator. In record/replay mode it inserts replay functions calls and instructions counter increment into the translated code.
Signed-off-by: Pavel Dovgalyuk <pavel.dovga...@ispras.ru> --- target-i386/Makefile.objs | 1 + target-i386/cpu.h | 7 ++++ target-i386/helper.h | 2 + target-i386/replay_helper.c | 33 ++++++++++++++++++ target-i386/translate.c | 81 +++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 120 insertions(+), 4 deletions(-) create mode 100755 target-i386/replay_helper.c diff --git a/target-i386/Makefile.objs b/target-i386/Makefile.objs index 027b94e..09a6e8a 100644 --- a/target-i386/Makefile.objs +++ b/target-i386/Makefile.objs @@ -2,6 +2,7 @@ obj-y += translate.o helper.o cpu.o obj-y += excp_helper.o fpu_helper.o cc_helper.o int_helper.o svm_helper.o obj-y += smm_helper.o misc_helper.o mem_helper.o seg_helper.o obj-y += gdbstub.o +obj-y += replay_helper.o obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o obj-$(CONFIG_KVM) += kvm.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 42bda46..c9b92af 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -28,6 +28,13 @@ #define TARGET_LONG_BITS 32 #endif +/* Maximum instruction code size */ +#ifdef TARGET_X86_64 +#define TARGET_MAX_INSN_SIZE 16 +#else +#define TARGET_MAX_INSN_SIZE 16 +#endif + /* target supports implicit self modifying code */ #define TARGET_HAS_SMC /* support for self modifying code even if the modified instruction is diff --git a/target-i386/helper.h b/target-i386/helper.h index 8eb0145..058302b 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -217,3 +217,5 @@ DEF_HELPER_3(rcrl, tl, env, tl, tl) DEF_HELPER_3(rclq, tl, env, tl, tl) DEF_HELPER_3(rcrq, tl, env, tl, tl) #endif + +DEF_HELPER_1(replay_instruction, i32, env) diff --git a/target-i386/replay_helper.c b/target-i386/replay_helper.c new file mode 100755 index 0000000..7e70c78 --- /dev/null +++ b/target-i386/replay_helper.c @@ -0,0 +1,33 @@ +/* + * replay_helper.c + * + * Copyright (c) 2010-2014 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "cpu.h" +#include "exec/helper-proto.h" +#include "replay/replay.h" + +uint32_t helper_replay_instruction(CPUX86State *env) +{ + CPUState *cpu = ENV_GET_CPU(env); + if (replay_mode == REPLAY_MODE_PLAY + && !replay_has_instruction()) { + cpu->exception_index = EXCP_REPLAY; + return 1; + } + + if (cpu->exit_request) { + cpu->exception_index = EXCP_REPLAY; + return 1; + } + + int timer = replay_has_async_request(); + replay_instruction(timer); + return timer; +} diff --git a/target-i386/translate.c b/target-i386/translate.c index 6fcd824..1843b46 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -28,6 +28,7 @@ #include "disas/disas.h" #include "tcg-op.h" #include "exec/cpu_ldst.h" +#include "replay/replay.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" @@ -112,6 +113,7 @@ typedef struct DisasContext { int tf; /* TF cpu flag */ int singlestep_enabled; /* "hardware" single step enabled */ int jmp_opt; /* use direct block chaining for direct jumps */ + int repz_opt; /* optimize jumps within repz instructions */ int mem_index; /* select memory access functions */ uint64_t flags; /* all execution flags */ struct TranslationBlock *tb; @@ -1212,8 +1214,9 @@ static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \ gen_op_add_reg_im(s->aflag, R_ECX, -1); \ /* a loop would cause two single step exceptions if ECX = 1 \ before rep string_insn */ \ - if (!s->jmp_opt) \ + if (!s->repz_opt) { \ gen_op_jz_ecx(s->aflag, l2); \ + } \ gen_jmp(s, cur_eip); \ } @@ -1230,8 +1233,9 @@ static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \ gen_op_add_reg_im(s->aflag, R_ECX, -1); \ gen_update_cc_op(s); \ gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \ - if (!s->jmp_opt) \ + if (!s->repz_opt) { \ gen_op_jz_ecx(s->aflag, l2); \ + } \ gen_jmp(s, cur_eip); \ } @@ -7887,6 +7891,32 @@ void optimize_flags_init(void) } } +static void gen_instr_replay(DisasContext *s, target_ulong pc_ptr) +{ + int l1 = gen_new_label(); + + gen_helper_replay_instruction(cpu_tmp2_i32, cpu_env); + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_tmp2_i32, 0, l1); + + /* Don't reset dirty flag */ + if (s->cc_op_dirty) { + tcg_gen_movi_i32(cpu_cc_op, s->cc_op); + } + gen_jmp_im(pc_ptr - s->cs_base); + tcg_gen_exit_tb(0); + + gen_set_label(l1); +} + +static void gen_instructions_count(void) +{ + tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUState, instructions_count) - ENV_OFFSET); + tcg_gen_addi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 1); + tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, + offsetof(CPUState, instructions_count) - ENV_OFFSET); +} + /* generate intermediate code in gen_opc_buf and gen_opparam_buf for basic block 'tb'. If search_pc is TRUE, also generate PC information for each intermediate instruction. */ @@ -7948,6 +7978,19 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu, || (flags & HF_SOFTMMU_MASK) #endif ); + dc->repz_opt = dc->jmp_opt + /* Do not optimize repz jumps at all in replay mode, because + rep movsS instructions are execured with different paths + in repz_opt and !repz_opt modes. The first one was used + always except single step mode. And this setting + disables jumps optimization and control paths become + equivalent in run and single step modes. + Now there will be no jump optimization for repz in + trace and replay modes and there will always be an + additional step for ecx=0. + */ + || replay_mode != REPLAY_MODE_NONE + ; #if 0 /* check addseg logic */ if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32)) @@ -8000,8 +8043,22 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu, tcg_ctx.gen_opc_instr_start[lj] = 1; tcg_ctx.gen_opc_icount[lj] = num_insns; } - if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) + if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO) + && replay_mode == REPLAY_MODE_NONE) { gen_io_start(); + } + /* generate instruction counter code for replay */ + if (replay_mode != REPLAY_MODE_NONE) { + /* In PLAY mode check timer event at every instruction, + not only at the beginning of the block. This is needed, + when replaying has changed the bounds of translation blocks. + */ + if (pc_ptr == pc_start || replay_mode == REPLAY_MODE_PLAY) { + gen_instr_replay(dc, pc_ptr); + } else { + gen_instructions_count(); + } + } pc_ptr = disas_insn(env, dc, pc_ptr); num_insns++; @@ -8019,6 +8076,20 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu, gen_eob(dc); break; } + /* In replay mode do not cross the boundary of the pages, + it can cause an exception. Do it only when boundary is + crossed by the first instruction in the block. + If current instruction already crossed the bound - it's ok, + because an exception hasn't stopped this code. + */ + if (replay_mode != REPLAY_MODE_NONE + && ((pc_ptr & TARGET_PAGE_MASK) + != ((pc_ptr + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK) + || (pc_ptr & ~TARGET_PAGE_MASK) == 0)) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } /* if too long translation, stop generation too */ if (tcg_ctx.gen_opc_ptr >= gen_opc_end || (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) || @@ -8033,8 +8104,10 @@ static inline void gen_intermediate_code_internal(X86CPU *cpu, break; } } - if (tb->cflags & CF_LAST_IO) + if ((tb->cflags & CF_LAST_IO) + && replay_mode == REPLAY_MODE_NONE) { gen_io_end(); + } gen_tb_end(tb, num_insns); *tcg_ctx.gen_opc_ptr = INDEX_op_end; /* we don't forget to fill the last values */