Hi Richard, Thank you very much for adding atomic support to translator_ld(). It has been a big help.
Hi Alistair, I can rebase the Ziccif patch when Richard's patch has been merged. Jim Shu On Fri, Apr 4, 2025 at 12:41 PM Alistair Francis <alistai...@gmail.com> wrote: > > On Fri, Jun 7, 2024 at 8:15 PM Jim Shu <jim....@sifive.com> wrote: > > > > Support 4-byte atomic instruction fetch when instruction is natural > > aligned. > > > > Current implementation is not atomic because it loads instruction twice > > for first and last 2 bytes. We load 4 bytes at once to keep the > > atomicity. This instruction preload method only applys when instruction > > is 4-byte aligned. If instruction is unaligned, it could be across pages > > so that preload will trigger additional page fault. > > > > We encounter this issue when doing pressure test of enabling & disabling > > Linux kernel ftrace. Ftrace with kernel preemption requires concurrent > > modification and execution of instruction, so non-atomic instruction > > fetch will cause the race condition. We may fetch the wrong instruction > > which is the mixing of 2 instructions. > > > > Also, RISC-V Profile wants to provide this feature by HW. RVA20U64 > > Ziccif protects the atomicity of instruction fetch when it is > > natural aligned. > > > > Signed-off-by: Jim Shu <jim....@sifive.com> > > Reviewed-by: Frank Chang <frank.ch...@sifive.com> > > Once https://patchwork.kernel.org/project/qemu-devel/list/?series=945333 > (specifically > https://patchwork.kernel.org/project/qemu-devel/patch/20250318213209.2579218-12-richard.hender...@linaro.org/) > is merged this should be good to go in as well. > > Alistair > > > --- > > target/riscv/translate.c | 45 ++++++++++++++++++++++++++++++---------- > > 1 file changed, 34 insertions(+), 11 deletions(-) > > > > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > > index 0569224e53..2be8ef63e6 100644 > > --- a/target/riscv/translate.c > > +++ b/target/riscv/translate.c > > @@ -1133,13 +1133,37 @@ const RISCVDecoder decoder_table[] = { > > > > const size_t decoder_table_size = ARRAY_SIZE(decoder_table); > > > > -static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t > > opcode) > > +static void decode_opc(CPURISCVState *env, DisasContext *ctx) > > { > > ctx->virt_inst_excp = false; > > + > > + uint32_t opcode; > > + bool is_4byte_align = false; > > + > > + if ((ctx->base.pc_next % 4) == 0) { > > + /* > > + * Load 4 bytes at once to make instruction fetch atomically. > > + * > > + * Note: When pc is 4-byte aligned, 4-byte instruction wouldn't be > > + * across pages. We could preload 4 bytes instruction no matter > > + * real one is 2 or 4 bytes. Instruction preload wouldn't trigger > > + * additional page fault. > > + */ > > + opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next); > > + is_4byte_align = true; > > + } else { > > + /* > > + * For unaligned pc, instruction preload may trigger additional > > + * page fault so we only load 2 bytes here. > > + */ > > + opcode = (uint32_t) translator_lduw(env, &ctx->base, > > ctx->base.pc_next); > > + } > > + ctx->ol = ctx->xl; > > + > > ctx->cur_insn_len = insn_len(opcode); > > /* Check for compressed insn */ > > if (ctx->cur_insn_len == 2) { > > - ctx->opcode = opcode; > > + ctx->opcode = (uint16_t)opcode; > > /* > > * The Zca extension is added as way to refer to instructions in > > the C > > * extension that do not include the floating-point loads and > > stores > > @@ -1149,15 +1173,16 @@ static void decode_opc(CPURISCVState *env, > > DisasContext *ctx, uint16_t opcode) > > return; > > } > > } else { > > - uint32_t opcode32 = opcode; > > - opcode32 = deposit32(opcode32, 16, 16, > > - translator_lduw(env, &ctx->base, > > - ctx->base.pc_next + 2)); > > - ctx->opcode = opcode32; > > + if (!is_4byte_align) { > > + /* Load last 2 bytes of instruction here */ > > + opcode = deposit32(opcode, 16, 16, > > + translator_lduw(env, &ctx->base, > > + ctx->base.pc_next + 2)); > > + } > > > > for (guint i = 0; i < ctx->decoders->len; ++i) { > > riscv_cpu_decode_fn func = g_ptr_array_index(ctx->decoders, i); > > - if (func(ctx, opcode32)) { > > + if (func(ctx, opcode)) { > > return; > > } > > } > > @@ -1226,10 +1251,8 @@ static void riscv_tr_translate_insn(DisasContextBase > > *dcbase, CPUState *cpu) > > { > > DisasContext *ctx = container_of(dcbase, DisasContext, base); > > CPURISCVState *env = cpu_env(cpu); > > - uint16_t opcode16 = translator_lduw(env, &ctx->base, > > ctx->base.pc_next); > > > > - ctx->ol = ctx->xl; > > - decode_opc(env, ctx, opcode16); > > + decode_opc(env, ctx); > > ctx->base.pc_next += ctx->cur_insn_len; > > > > /* Only the first insn within a TB is allowed to cross a page > > boundary. */ > > -- > > 2.17.1 > > > >