On Tue, Apr 27, 2021 at 12:18 AM Levy Hsu <ad...@levyhsu.com> wrote: > > From: LevyHsu <ad...@levyhsu.com> > > Added implementation for builtin overflow detection, new patterns are listed > below. > > --------------------------------------------------------------- > Addition: > > signed addition (SImode with RV32 || DImode with RV64): > add t0, t1, t2 > slti t3, t2, 0 > slt t4, t0, t1 > bne t3, t4, overflow > > signed addition (SImode with RV64): > add t0, t1, t2 > sext.w t3, t0 > bne t0, t3, overflow
The following version has the same instruction count but offers more ILP: add t0, t1, t2 addw t3, t1, t2 bne t0, t3, overflow > > unsigned addition (SImode with RV32 || DImode with RV64): > add t0, t1, t2 > bltu t0, t1, overflow > > unsigned addition (SImode with RV64): > sext.w t3, t1 > addw t0, t1, t2 > bltu t0, t3, overflow I think you can do this in two instructions, similar to the previous pattern: addw t0, t1, t2 bltu t0, t1, overflow > --------------------------------------------------------------- > Subtraction: > > signed subtraction (SImode with RV32 || DImode with RV64): > sub t0, t1, t2 > slti t3, t2, 0 > slt t4, t1, t0 > bne t3, t4, overflow > > signed subtraction (SImode with RV64): > sub t0, t1, t2 > sext.w t3, t0 > bne t0, t3, overflow See analogous addition comment. > > unsigned subtraction (SImode with RV32 || DImode with RV64): > add t0, t1, t2 > bltu t1, t0, overflow > > unsigned subtraction (SImode with RV64): > sext.w t3, t1 > subw t0, t1, t2 > bltu t0, t3, overflow See analogous addition comment. > --------------------------------------------------------------- > Multiplication: > > signed multiplication (SImode with RV32 || DImode with RV64): > mulh t4, t1, t2 > mul t0, t1, t2 > srai t5, t0, 31/63 (RV32/64) > bne t4, t5, overflow > > signed multiplication (SImode with RV64): > mul t0, t1, t2 > sext.w t3, t0 > bne t0, t3, overflow > > unsigned multiplication (SImode with RV32 || DImode with RV64 ): > mulhu t4, t1, t2 > mul t0, t1, t2 > bne t4, 0, overflow > > unsigned multiplication (SImode with RV64): > slli t0,t0,32 > slli t1,t1,32 > srli t0,t0,32 > srli t1,t1,32 > mul t0,t0,t1 > srai t5,t0,32 > bne t5, 0, overflow I think you can eliminate the first two right shifts by replacing mul with mulhu... something like: slli rx, rx, 32 slli ry, ry, 32 mulhu rz, rx, ry srli rt, rz, 32 bnez rt, overflow > > --------------------------------------------------------------- > --- > gcc/config/riscv/riscv.c | 8 ++ > gcc/config/riscv/riscv.h | 5 + > gcc/config/riscv/riscv.md | 240 ++++++++++++++++++++++++++++++++++++++ > 3 files changed, 253 insertions(+) > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c > index d489717b2a5..cf94f5c9658 100644 > --- a/gcc/config/riscv/riscv.c > +++ b/gcc/config/riscv/riscv.c > @@ -351,6 +351,14 @@ static const struct riscv_tune_info > riscv_tune_info_table[] = { > { "size", generic, &optimize_size_tune_info }, > }; > > +/* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ > + > +static unsigned int > +riscv_min_arithmetic_precision (void) > +{ > + return 32; > +} > + > /* Return the riscv_tune_info entry for the given name string. */ > > static const struct riscv_tune_info * > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > index 172c7ca7c98..a6f451b97e3 100644 > --- a/gcc/config/riscv/riscv.h > +++ b/gcc/config/riscv/riscv.h > @@ -121,6 +121,11 @@ extern const char *riscv_default_mtune (int argc, const > char **argv); > #define MIN_UNITS_PER_WORD 4 > #endif > > +/* Allows SImode op in builtin overflow pattern, see internal-fn.c. */ > + > +#undef TARGET_MIN_ARITHMETIC_PRECISION > +#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision > + > /* The `Q' extension is not yet supported. */ > #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4) > > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md > index 36012ad1f77..c82017a4bce 100644 > --- a/gcc/config/riscv/riscv.md > +++ b/gcc/config/riscv/riscv.md > @@ -462,6 +462,81 @@ > [(set_attr "type" "arith") > (set_attr "mode" "DI")]) > > +(define_expand "addv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "=r,r") > + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") > + (match_operand:GPR 2 "arith_operand" " r,I"))) > + (label_ref (match_operand 3 "" ""))] > + "" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + rtx t5 = gen_reg_rtx (DImode); > + rtx t6 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); > + else > + t4 = operands[1]; > + if (GET_CODE (operands[2]) != CONST_INT) > + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); > + else > + t5 = operands[2]; > + emit_insn (gen_adddi3 (t3, t4, t5)); > + > + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); > + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], NE, t6, t3); > + } > + else > + { > + rtx t3 = gen_reg_rtx (<MODE>mode); > + rtx t4 = gen_reg_rtx (<MODE>mode); > + > + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); > + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); > + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); > + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]); > + > + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1])); > + riscv_expand_conditional_branch (operands[3], NE, t3, t4); > + } > + DONE; > +}) > + > +(define_expand "uaddv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "=r,r") > + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") > + (match_operand:GPR 2 "arith_operand" " r,I"))) > + (label_ref (match_operand 3 "" ""))] > + "" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); > + else > + t3 = operands[1]; > + emit_insn (gen_addsi3 (operands[0], operands[1], operands[2])); > + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], LTU, t4, t3); > + } > + else > + { > + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); > + riscv_expand_conditional_branch (operands[3], LTU, operands[0], > operands[1]); > + } > + > + DONE; > +}) > + > (define_insn "*addsi3_extended" > [(set (match_operand:DI 0 "register_operand" "=r,r") > (sign_extend:DI > @@ -518,6 +593,85 @@ > [(set_attr "type" "arith") > (set_attr "mode" "SI")]) > > +(define_expand "subv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "= r") > + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") > + (match_operand:GPR 2 "register_operand" " r"))) > + (label_ref (match_operand 3 "" ""))] > + "" > +{ > + > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + rtx t5 = gen_reg_rtx (DImode); > + rtx t6 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); > + else > + t4 = operands[1]; > + if (GET_CODE (operands[2]) != CONST_INT) > + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); > + else > + t5 = operands[2]; > + emit_insn (gen_subdi3 (t3, t4, t5)); > + > + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); > + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], NE, t6, t3); > + } > + else > + { > + rtx t3 = gen_reg_rtx (<MODE>mode); > + rtx t4 = gen_reg_rtx (<MODE>mode); > + > + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); > + > + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); > + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); > + > + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]); > + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0])); > + > + riscv_expand_conditional_branch (operands[3], NE, t3, t4); > + } > + > + DONE; > +}) > + > +(define_expand "usubv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "= r") > + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") > + (match_operand:GPR 2 "register_operand" " r"))) > + (label_ref (match_operand 3 "" ""))] > + "" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); > + else > + t3 = operands[1]; > + emit_insn (gen_subsi3 (operands[0], operands[1], operands[2])); > + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], LTU, t3, t4); > + } > + else > + { > + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); > + riscv_expand_conditional_branch (operands[3], LTU, operands[1], > operands[0]); > + } > + > + DONE; > +}) > + > (define_insn "*subsi3_extended" > [(set (match_operand:DI 0 "register_operand" "= r") > (sign_extend:DI > @@ -609,6 +763,92 @@ > [(set_attr "type" "imul") > (set_attr "mode" "DI")]) > > +(define_expand "mulv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "=r") > + (mult:GPR (match_operand:GPR 1 "register_operand" " r") > + (match_operand:GPR 2 "register_operand" " r"))) > + (label_ref (match_operand 3 "" ""))] > + "TARGET_MUL" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + rtx t5 = gen_reg_rtx (DImode); > + rtx t6 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); > + else > + t4 = operands[1]; > + if (GET_CODE (operands[2]) != CONST_INT) > + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); > + else > + t5 = operands[2]; > + emit_insn (gen_muldi3 (t3, t4, t5)); > + > + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); > + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); > + > + riscv_expand_conditional_branch (operands[3], NE, t6, t3); > + } > + else > + { > + rtx hp = gen_reg_rtx (<MODE>mode); > + rtx lp = gen_reg_rtx (<MODE>mode); > + > + emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2])); > + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); > + emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - > 1))); > + > + riscv_expand_conditional_branch (operands[3], NE, hp, lp); > + } > + > + DONE; > +}) > + > +(define_expand "umulv<mode>4" > + [(set (match_operand:GPR 0 "register_operand" "=r") > + (mult:GPR (match_operand:GPR 1 "register_operand" " r") > + (match_operand:GPR 2 "register_operand" " r"))) > + (label_ref (match_operand 3 "" ""))] > + "TARGET_MUL" > +{ > + if (TARGET_64BIT && <MODE>mode == SImode) > + { > + rtx t3 = gen_reg_rtx (DImode); > + rtx t4 = gen_reg_rtx (DImode); > + rtx t5 = gen_reg_rtx (DImode); > + rtx t6 = gen_reg_rtx (DImode); > + > + if (GET_CODE (operands[1]) != CONST_INT) > + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 1)); > + else > + t4 = operands[1]; > + if (GET_CODE (operands[2]) != CONST_INT) > + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 1)); > + else > + t5 = operands[2]; > + emit_insn (gen_muldi3 (t3, t4, t5)); > + > + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); > + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 1)); > + > + riscv_expand_conditional_branch (operands[3], NE, t6, t3); > + } > + else > + { > + rtx hp = gen_reg_rtx (<MODE>mode); > + > + emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2])); > + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); > + > + riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx); > + } > + > + DONE; > +}) > + > (define_insn "*mulsi3_extended" > [(set (match_operand:DI 0 "register_operand" "=r") > (sign_extend:DI > -- > 2.30.1 >