On Wed, Aug 12, 2020 at 7:14 PM LIU Zhiwei <zhiwei_...@c-sky.com> wrote: > > > > On 2020/8/13 6:30, Alistair Francis wrote: > > From: Richard Henderson <richard.hender...@linaro.org> > > > > If a 32-bit input is not properly nanboxed, then the input is replaced > > with the default qnan. The only inline expansion is for the sign-changing > > set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S. > > > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > > Reviewed-by: LIU Zhiwei <zhiwei_...@c-sky.com> > > Message-Id: <20200724002807.441147-6-richard.hender...@linaro.org> > > Signed-off-by: Alistair Francis <alistair.fran...@wdc.com> > > --- > > target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------ > > target/riscv/translate.c | 18 +++++++ > > 2 files changed, 73 insertions(+), 16 deletions(-) > > > > diff --git a/target/riscv/insn_trans/trans_rvf.inc.c > > b/target/riscv/insn_trans/trans_rvf.inc.c > > index 264d3139f1..f9a9e0643a 100644 > > --- a/target/riscv/insn_trans/trans_rvf.inc.c > > +++ b/target/riscv/insn_trans/trans_rvf.inc.c > > @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, > > arg_fsgnj_s *a) > > { > > REQUIRE_FPU; > > REQUIRE_EXT(ctx, RVF); > > + > > if (a->rs1 == a->rs2) { /* FMOV */ > > - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > > + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > > } else { /* FSGNJ */ > > - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], > > cpu_fpr[a->rs1], > > - 0, 31); > > + TCGv_i64 rs1 = tcg_temp_new_i64(); > > + TCGv_i64 rs2 = tcg_temp_new_i64(); > > + > > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > > + > > + /* This formulation retains the nanboxing of rs2. */ > > + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); > > + tcg_temp_free_i64(rs1); > > + tcg_temp_free_i64(rs2); > > } > > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > > mark_fs_dirty(ctx); > > return true; > > } > > > > static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) > > { > > + TCGv_i64 rs1, rs2, mask; > > + > > REQUIRE_FPU; > > REQUIRE_EXT(ctx, RVF); > > + > > + rs1 = tcg_temp_new_i64(); > > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > > + > > if (a->rs1 == a->rs2) { /* FNEG */ > > - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN); > > + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); > > } else { > > - TCGv_i64 t0 = tcg_temp_new_i64(); > > - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]); > > - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31); > > - tcg_temp_free_i64(t0); > > + rs2 = tcg_temp_new_i64(); > > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > > + > > + /* > > + * Replace bit 31 in rs1 with inverse in rs2. > > + * This formulation retains the nanboxing of rs1. > > + */ > > + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1)); > > + tcg_gen_andc_i64(rs2, mask, rs2); > Hi Alistair, > > As Chih-Min said, it's wrong here. He has given the correct patch code > https://www.mail-archive.com/qemu-devel@nongnu.org/msg728540.html > > We can either squash the code to this patch or add an separate patch > later. I prefer the former. > Thanks very much.
Richard are you ok if I squash this diff into the patch and send a PR v2? diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c index f9a9e0643a..76f281d275 100644 --- a/target/riscv/insn_trans/trans_rvf.inc.c +++ b/target/riscv/insn_trans/trans_rvf.inc.c @@ -201,7 +201,8 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) * This formulation retains the nanboxing of rs1. */ mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1)); - tcg_gen_andc_i64(rs2, mask, rs2); + tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2 + tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be tcg_gen_and_i64(rs1, mask, rs1); tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); Alistair > > Zhiwei > > + tcg_gen_and_i64(rs1, mask, rs1); > > + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); > > + > > + tcg_temp_free_i64(mask); > > + tcg_temp_free_i64(rs2); > > } > > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > > + tcg_temp_free_i64(rs1); > > + > > mark_fs_dirty(ctx); > > return true; > > } > > > > static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) > > { > > + TCGv_i64 rs1, rs2; > > + > > REQUIRE_FPU; > > REQUIRE_EXT(ctx, RVF); > > + > > + rs1 = tcg_temp_new_i64(); > > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > > + > > if (a->rs1 == a->rs2) { /* FABS */ > > - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN); > > + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); > > } else { > > - TCGv_i64 t0 = tcg_temp_new_i64(); > > - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN); > > - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0); > > - tcg_temp_free_i64(t0); > > + rs2 = tcg_temp_new_i64(); > > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > > + > > + /* > > + * Xor bit 31 in rs1 with that in rs2. > > + * This formulation retains the nanboxing of rs1. > > + */ > > + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); > > + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); > > + > > + tcg_temp_free_i64(rs2); > > } > > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > > + tcg_temp_free_i64(rs1); > > + > > mark_fs_dirty(ctx); > > return true; > > } > > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > > index 12a746da97..bf35182776 100644 > > --- a/target/riscv/translate.c > > +++ b/target/riscv/translate.c > > @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in) > > tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); > > } > > > > +/* > > + * A narrow n-bit operation, where n < FLEN, checks that input operands > > + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1. > > + * If so, the least-significant bits of the input are used, otherwise the > > + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2). > > + * > > + * Here, the result is always nan-boxed, even the canonical nan. > > + */ > > +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) > > +{ > > + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull); > > + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull); > > + > > + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); > > + tcg_temp_free_i64(t_max); > > + tcg_temp_free_i64(t_nan); > > +} > > + > > static void generate_exception(DisasContext *ctx, int excp) > > { > > tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); > >