On Wed, Jul 27, 2022 at 9:24 AM Richard Henderson <richard.hender...@linaro.org> wrote: > > On 7/26/22 20:25, Anup Patel wrote: > > We should call decode_save_opc() for every decoded instruction > > because generating transformed instruction upon guest page faults > > expects opcode to be available. Without this, hypervisor sees > > transformed instruction as zero in htinst CSR for guest MMIO > > emulation which makes MMIO emulation in hypervisor slow and > > also breaks nested virtualization. > > Then just add decode_save_opc to load/store insns, not everything including > plain > arithmetic...
We will have to add for float load/store, atomics, and HLV/HSV as well. Basically we end-up adding for everything except integer and float arithmetic. I see that decode_save_opc() only saves opcode in an array through tcg_set_insn_start_param(). Which brings me to the question about how much are we saving by distributing decode_save_opc() calls ? If we distribute decode_save_opc() calls then the code becomes fragile for future changes and we will miss adding decode_save_opc() for some new extensions. Regards, Anup > > > r~ > > > > > > Fixes: a9814e3e08d2 ("target/riscv: Minimize the calls to decode_save_opc") > > Signed-off-by: Anup Patel <apa...@ventanamicro.com> > > --- > > target/riscv/insn_trans/trans_privileged.c.inc | 4 ---- > > target/riscv/insn_trans/trans_rvh.c.inc | 2 -- > > target/riscv/insn_trans/trans_rvi.c.inc | 2 -- > > target/riscv/translate.c | 10 ++++------ > > 4 files changed, 4 insertions(+), 14 deletions(-) > > > > diff --git a/target/riscv/insn_trans/trans_privileged.c.inc > > b/target/riscv/insn_trans/trans_privileged.c.inc > > index 46f96ad74d..53613682e8 100644 > > --- a/target/riscv/insn_trans/trans_privileged.c.inc > > +++ b/target/riscv/insn_trans/trans_privileged.c.inc > > @@ -75,7 +75,6 @@ static bool trans_sret(DisasContext *ctx, arg_sret *a) > > { > > #ifndef CONFIG_USER_ONLY > > if (has_ext(ctx, RVS)) { > > - decode_save_opc(ctx); > > gen_helper_sret(cpu_pc, cpu_env); > > tcg_gen_exit_tb(NULL, 0); /* no chaining */ > > ctx->base.is_jmp = DISAS_NORETURN; > > @@ -91,7 +90,6 @@ static bool trans_sret(DisasContext *ctx, arg_sret *a) > > static bool trans_mret(DisasContext *ctx, arg_mret *a) > > { > > #ifndef CONFIG_USER_ONLY > > - decode_save_opc(ctx); > > gen_helper_mret(cpu_pc, cpu_env); > > tcg_gen_exit_tb(NULL, 0); /* no chaining */ > > ctx->base.is_jmp = DISAS_NORETURN; > > @@ -104,7 +102,6 @@ static bool trans_mret(DisasContext *ctx, arg_mret *a) > > static bool trans_wfi(DisasContext *ctx, arg_wfi *a) > > { > > #ifndef CONFIG_USER_ONLY > > - decode_save_opc(ctx); > > gen_set_pc_imm(ctx, ctx->pc_succ_insn); > > gen_helper_wfi(cpu_env); > > return true; > > @@ -116,7 +113,6 @@ static bool trans_wfi(DisasContext *ctx, arg_wfi *a) > > static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a) > > { > > #ifndef CONFIG_USER_ONLY > > - decode_save_opc(ctx); > > gen_helper_tlb_flush(cpu_env); > > return true; > > #endif > > diff --git a/target/riscv/insn_trans/trans_rvh.c.inc > > b/target/riscv/insn_trans/trans_rvh.c.inc > > index 4f8aecddc7..cebcb3f8f6 100644 > > --- a/target/riscv/insn_trans/trans_rvh.c.inc > > +++ b/target/riscv/insn_trans/trans_rvh.c.inc > > @@ -169,7 +169,6 @@ static bool trans_hfence_gvma(DisasContext *ctx, > > arg_sfence_vma *a) > > { > > REQUIRE_EXT(ctx, RVH); > > #ifndef CONFIG_USER_ONLY > > - decode_save_opc(ctx); > > gen_helper_hyp_gvma_tlb_flush(cpu_env); > > return true; > > #endif > > @@ -180,7 +179,6 @@ static bool trans_hfence_vvma(DisasContext *ctx, > > arg_sfence_vma *a) > > { > > REQUIRE_EXT(ctx, RVH); > > #ifndef CONFIG_USER_ONLY > > - decode_save_opc(ctx); > > gen_helper_hyp_tlb_flush(cpu_env); > > return true; > > #endif > > diff --git a/target/riscv/insn_trans/trans_rvi.c.inc > > b/target/riscv/insn_trans/trans_rvi.c.inc > > index c49dbec0eb..1f318ffbef 100644 > > --- a/target/riscv/insn_trans/trans_rvi.c.inc > > +++ b/target/riscv/insn_trans/trans_rvi.c.inc > > @@ -834,8 +834,6 @@ static bool trans_fence_i(DisasContext *ctx, > > arg_fence_i *a) > > > > static bool do_csr_post(DisasContext *ctx) > > { > > - /* The helper may raise ILLEGAL_INSN -- record binv for unwind. */ > > - decode_save_opc(ctx); > > /* We may have changed important cpu state -- exit to main loop. */ > > gen_set_pc_imm(ctx, ctx->pc_succ_insn); > > tcg_gen_exit_tb(NULL, 0); > > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > > index a79d0cd95b..5425d19846 100644 > > --- a/target/riscv/translate.c > > +++ b/target/riscv/translate.c > > @@ -207,10 +207,10 @@ static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 > > in) > > tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); > > } > > > > -static void decode_save_opc(DisasContext *ctx) > > +static void decode_save_opc(DisasContext *ctx, target_ulong opc) > > { > > assert(ctx->insn_start != NULL); > > - tcg_set_insn_start_param(ctx->insn_start, 1, ctx->opcode); > > + tcg_set_insn_start_param(ctx->insn_start, 1, opc); > > ctx->insn_start = NULL; > > } > > > > @@ -240,8 +240,6 @@ static void generate_exception(DisasContext *ctx, int > > excp) > > > > static void gen_exception_illegal(DisasContext *ctx) > > { > > - tcg_gen_st_i32(tcg_constant_i32(ctx->opcode), cpu_env, > > - offsetof(CPURISCVState, bins)); > > generate_exception(ctx, RISCV_EXCP_ILLEGAL_INST); > > } > > > > @@ -643,8 +641,6 @@ static void gen_set_rm(DisasContext *ctx, int rm) > > return; > > } > > > > - /* The helper may raise ILLEGAL_INSN -- record binv for unwind. */ > > - decode_save_opc(ctx); > > gen_helper_set_rounding_mode(cpu_env, tcg_constant_i32(rm)); > > } > > > > @@ -1055,6 +1051,7 @@ static void decode_opc(CPURISCVState *env, > > DisasContext *ctx, uint16_t opcode) > > > > /* Check for compressed insn */ > > if (extract16(opcode, 0, 2) != 3) { > > + decode_save_opc(ctx, opcode); > > if (!has_ext(ctx, RVC)) { > > gen_exception_illegal(ctx); > > } else { > > @@ -1071,6 +1068,7 @@ static void decode_opc(CPURISCVState *env, > > DisasContext *ctx, uint16_t opcode) > > ctx->base.pc_next + 2)); > > ctx->opcode = opcode32; > > ctx->pc_succ_insn = ctx->base.pc_next + 4; > > + decode_save_opc(ctx, opcode32); > > > > for (size_t i = 0; i < ARRAY_SIZE(decoders); ++i) { > > if (decoders[i].guard_func(ctx) && >