On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson < richard.hender...@linaro.org> wrote:
> If a 32-bit input is not properly nanboxed, then the input is replaced > with the default qnan. The only inline expansion is for the sign-changing > set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------ > target/riscv/translate.c | 18 +++++++ > 2 files changed, 73 insertions(+), 16 deletions(-) > > diff --git a/target/riscv/insn_trans/trans_rvf.inc.c > b/target/riscv/insn_trans/trans_rvf.inc.c > index 264d3139f1..f9a9e0643a 100644 > --- a/target/riscv/insn_trans/trans_rvf.inc.c > +++ b/target/riscv/insn_trans/trans_rvf.inc.c > @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, > arg_fsgnj_s *a) > { > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > if (a->rs1 == a->rs2) { /* FMOV */ > - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); > } else { /* FSGNJ */ > - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], > cpu_fpr[a->rs1], > - 0, 31); > + TCGv_i64 rs1 = tcg_temp_new_i64(); > + TCGv_i64 rs2 = tcg_temp_new_i64(); > + > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* This formulation retains the nanboxing of rs2. */ > + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); > + tcg_temp_free_i64(rs1); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) > { > + TCGv_i64 rs1, rs2, mask; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FNEG */ > - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN); > + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]); > - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Replace bit 31 in rs1 with inverse in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1)); > + tcg_gen_andc_i64(rs2, mask, rs2); > + tcg_gen_and_i64(rs1, mask, rs1); > + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(mask); > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > > static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) > { > + TCGv_i64 rs1, rs2; > + > REQUIRE_FPU; > REQUIRE_EXT(ctx, RVF); > + > + rs1 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); > + > if (a->rs1 == a->rs2) { /* FABS */ > - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN); > + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); > } else { > - TCGv_i64 t0 = tcg_temp_new_i64(); > - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN); > - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0); > - tcg_temp_free_i64(t0); > + rs2 = tcg_temp_new_i64(); > + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); > + > + /* > + * Xor bit 31 in rs1 with that in rs2. > + * This formulation retains the nanboxing of rs1. > + */ > + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); > + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); > + > + tcg_temp_free_i64(rs2); > } > - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); > + tcg_temp_free_i64(rs1); > + > mark_fs_dirty(ctx); > return true; > } > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 12a746da97..bf35182776 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in) > tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); > } > > +/* > + * A narrow n-bit operation, where n < FLEN, checks that input operands > + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1. > + * If so, the least-significant bits of the input are used, otherwise the > + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2). > + * > + * Here, the result is always nan-boxed, even the canonical nan. > + */ > +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) > +{ > + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull); > + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull); > + > + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); > + tcg_temp_free_i64(t_max); > + tcg_temp_free_i64(t_nan); > +} > + > static void generate_exception(DisasContext *ctx, int excp) > { > tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); > -- > 2.25.1 > > > Reviewed-by: Chih-Min Chao <chihmin.c...@sifive.com> Chih-Min Chao