On Tue, Apr 27, 2021 at 12:18 AM Levy Hsu <[email protected]> wrote:
>
> From: LevyHsu <[email protected]>
>
> Added implementation for builtin overflow detection, new patterns are listed
> below.
>
> ---------------------------------------------------------------
> Addition:
>
> signed addition (SImode with RV32 || DImode with RV64):
> add t0, t1, t2
> slti t3, t2, 0
> slt t4, t0, t1
> bne t3, t4, overflow
>
> signed addition (SImode with RV64):
> add t0, t1, t2
> sext.w t3, t0
> bne t0, t3, overflow
The following version has the same instruction count but offers more ILP:
add t0, t1, t2
addw t3, t1, t2
bne t0, t3, overflow
>
> unsigned addition (SImode with RV32 || DImode with RV64):
> add t0, t1, t2
> bltu t0, t1, overflow
>
> unsigned addition (SImode with RV64):
> sext.w t3, t1
> addw t0, t1, t2
> bltu t0, t3, overflow
I think you can do this in two instructions, similar to the previous pattern:
addw t0, t1, t2
bltu t0, t1, overflow
> ---------------------------------------------------------------
> Subtraction:
>
> signed subtraction (SImode with RV32 || DImode with RV64):
> sub t0, t1, t2
> slti t3, t2, 0
> slt t4, t1, t0
> bne t3, t4, overflow
>
> signed subtraction (SImode with RV64):
> sub t0, t1, t2
> sext.w t3, t0
> bne t0, t3, overflow
See analogous addition comment.
>
> unsigned subtraction (SImode with RV32 || DImode with RV64):
> add t0, t1, t2
> bltu t1, t0, overflow
>
> unsigned subtraction (SImode with RV64):
> sext.w t3, t1
> subw t0, t1, t2
> bltu t0, t3, overflow
See analogous addition comment.
> ---------------------------------------------------------------
> Multiplication:
>
> signed multiplication (SImode with RV32 || DImode with RV64):
> mulh t4, t1, t2
> mul t0, t1, t2
> srai t5, t0, 31/63 (RV32/64)
> bne t4, t5, overflow
>
> signed multiplication (SImode with RV64):
> mul t0, t1, t2
> sext.w t3, t0
> bne t0, t3, overflow
>
> unsigned multiplication (SImode with RV32 || DImode with RV64 ):
> mulhu t4, t1, t2
> mul t0, t1, t2
> bne t4, 0, overflow
>
> unsigned multiplication (SImode with RV64):
> slli t0,t0,32
> slli t1,t1,32
> srli t0,t0,32
> srli t1,t1,32
> mul t0,t0,t1
> srai t5,t0,32
> bne t5, 0, overflow
I think you can eliminate the first two right shifts by replacing mul
with mulhu... something like:
slli rx, rx, 32
slli ry, ry, 32
mulhu rz, rx, ry
srli rt, rz, 32
bnez rt, overflow
>
> ---------------------------------------------------------------
> ---
> gcc/config/riscv/riscv.c | 8 ++
> gcc/config/riscv/riscv.h | 5 +
> gcc/config/riscv/riscv.md | 240 ++++++++++++++++++++++++++++++++++++++
> 3 files changed, 253 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index d489717b2a5..cf94f5c9658 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -351,6 +351,14 @@ static const struct riscv_tune_info
> riscv_tune_info_table[] = {
> { "size", generic, &optimize_size_tune_info },
> };
>
> +/* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
> +
> +static unsigned int
> +riscv_min_arithmetic_precision (void)
> +{
> + return 32;
> +}
> +
> /* Return the riscv_tune_info entry for the given name string. */
>
> static const struct riscv_tune_info *
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index 172c7ca7c98..a6f451b97e3 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -121,6 +121,11 @@ extern const char *riscv_default_mtune (int argc, const
> char **argv);
> #define MIN_UNITS_PER_WORD 4
> #endif
>
> +/* Allows SImode op in builtin overflow pattern, see internal-fn.c. */
> +
> +#undef TARGET_MIN_ARITHMETIC_PRECISION
> +#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
> +
> /* The `Q' extension is not yet supported. */
> #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 36012ad1f77..c82017a4bce 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -462,6 +462,81 @@
> [(set_attr "type" "arith")
> (set_attr "mode" "DI")])
>
> +(define_expand "addv<mode>4"
> + [(set (match_operand:GPR 0 "register_operand" "=r,r")
> + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> + (match_operand:GPR 2 "arith_operand" " r,I")))
> + (label_ref (match_operand 3 "" ""))]
> + ""
> +{
> + if (TARGET_64BIT && <MODE>mode == SImode)
> + {
> + rtx t3 = gen_reg_rtx (DImode);
> + rtx t4 = gen_reg_rtx (DImode);
> + rtx t5 = gen_reg_rtx (DImode);
> + rtx t6 = gen_reg_rtx (DImode);
> +
> + if (GET_CODE (operands[1]) != CONST_INT)
> + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> + else
> + t4 = operands[1];
> + if (GET_CODE (operands[2]) != CONST_INT)
> + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> + else
> + t5 = operands[2];
> + emit_insn (gen_adddi3 (t3, t4, t5));
> +
> + emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> + riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> + }
> + else
> + {
> + rtx t3 = gen_reg_rtx (<MODE>mode);
> + rtx t4 = gen_reg_rtx (<MODE>mode);
> +
> + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
> + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
> + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
> + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);
> +
> + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
> + riscv_expand_conditional_branch (operands[3], NE, t3, t4);
> + }
> + DONE;
> +})
> +
> +(define_expand "uaddv<mode>4"
> + [(set (match_operand:GPR 0 "register_operand" "=r,r")
> + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> + (match_operand:GPR 2 "arith_operand" " r,I")))
> + (label_ref (match_operand 3 "" ""))]
> + ""
> +{
> + if (TARGET_64BIT && <MODE>mode == SImode)
> + {
> + rtx t3 = gen_reg_rtx (DImode);
> + rtx t4 = gen_reg_rtx (DImode);
> +
> + if (GET_CODE (operands[1]) != CONST_INT)
> + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
> + else
> + t3 = operands[1];
> + emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
> + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
> +
> + riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
> + }
> + else
> + {
> + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
> + riscv_expand_conditional_branch (operands[3], LTU, operands[0],
> operands[1]);
> + }
> +
> + DONE;
> +})
> +
> (define_insn "*addsi3_extended"
> [(set (match_operand:DI 0 "register_operand" "=r,r")
> (sign_extend:DI
> @@ -518,6 +593,85 @@
> [(set_attr "type" "arith")
> (set_attr "mode" "SI")])
>
> +(define_expand "subv<mode>4"
> + [(set (match_operand:GPR 0 "register_operand" "= r")
> + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
> + (match_operand:GPR 2 "register_operand" " r")))
> + (label_ref (match_operand 3 "" ""))]
> + ""
> +{
> +
> + if (TARGET_64BIT && <MODE>mode == SImode)
> + {
> + rtx t3 = gen_reg_rtx (DImode);
> + rtx t4 = gen_reg_rtx (DImode);
> + rtx t5 = gen_reg_rtx (DImode);
> + rtx t6 = gen_reg_rtx (DImode);
> +
> + if (GET_CODE (operands[1]) != CONST_INT)
> + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> + else
> + t4 = operands[1];
> + if (GET_CODE (operands[2]) != CONST_INT)
> + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> + else
> + t5 = operands[2];
> + emit_insn (gen_subdi3 (t3, t4, t5));
> +
> + emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> + riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> + }
> + else
> + {
> + rtx t3 = gen_reg_rtx (<MODE>mode);
> + rtx t4 = gen_reg_rtx (<MODE>mode);
> +
> + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
> +
> + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
> + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
> +
> + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
> + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));
> +
> + riscv_expand_conditional_branch (operands[3], NE, t3, t4);
> + }
> +
> + DONE;
> +})
> +
> +(define_expand "usubv<mode>4"
> + [(set (match_operand:GPR 0 "register_operand" "= r")
> + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
> + (match_operand:GPR 2 "register_operand" " r")))
> + (label_ref (match_operand 3 "" ""))]
> + ""
> +{
> + if (TARGET_64BIT && <MODE>mode == SImode)
> + {
> + rtx t3 = gen_reg_rtx (DImode);
> + rtx t4 = gen_reg_rtx (DImode);
> +
> + if (GET_CODE (operands[1]) != CONST_INT)
> + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
> + else
> + t3 = operands[1];
> + emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
> + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
> +
> + riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
> + }
> + else
> + {
> + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
> + riscv_expand_conditional_branch (operands[3], LTU, operands[1],
> operands[0]);
> + }
> +
> + DONE;
> +})
> +
> (define_insn "*subsi3_extended"
> [(set (match_operand:DI 0 "register_operand" "= r")
> (sign_extend:DI
> @@ -609,6 +763,92 @@
> [(set_attr "type" "imul")
> (set_attr "mode" "DI")])
>
> +(define_expand "mulv<mode>4"
> + [(set (match_operand:GPR 0 "register_operand" "=r")
> + (mult:GPR (match_operand:GPR 1 "register_operand" " r")
> + (match_operand:GPR 2 "register_operand" " r")))
> + (label_ref (match_operand 3 "" ""))]
> + "TARGET_MUL"
> +{
> + if (TARGET_64BIT && <MODE>mode == SImode)
> + {
> + rtx t3 = gen_reg_rtx (DImode);
> + rtx t4 = gen_reg_rtx (DImode);
> + rtx t5 = gen_reg_rtx (DImode);
> + rtx t6 = gen_reg_rtx (DImode);
> +
> + if (GET_CODE (operands[1]) != CONST_INT)
> + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> + else
> + t4 = operands[1];
> + if (GET_CODE (operands[2]) != CONST_INT)
> + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> + else
> + t5 = operands[2];
> + emit_insn (gen_muldi3 (t3, t4, t5));
> +
> + emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> + riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> + }
> + else
> + {
> + rtx hp = gen_reg_rtx (<MODE>mode);
> + rtx lp = gen_reg_rtx (<MODE>mode);
> +
> + emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
> + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
> + emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD -
> 1)));
> +
> + riscv_expand_conditional_branch (operands[3], NE, hp, lp);
> + }
> +
> + DONE;
> +})
> +
> +(define_expand "umulv<mode>4"
> + [(set (match_operand:GPR 0 "register_operand" "=r")
> + (mult:GPR (match_operand:GPR 1 "register_operand" " r")
> + (match_operand:GPR 2 "register_operand" " r")))
> + (label_ref (match_operand 3 "" ""))]
> + "TARGET_MUL"
> +{
> + if (TARGET_64BIT && <MODE>mode == SImode)
> + {
> + rtx t3 = gen_reg_rtx (DImode);
> + rtx t4 = gen_reg_rtx (DImode);
> + rtx t5 = gen_reg_rtx (DImode);
> + rtx t6 = gen_reg_rtx (DImode);
> +
> + if (GET_CODE (operands[1]) != CONST_INT)
> + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 1));
> + else
> + t4 = operands[1];
> + if (GET_CODE (operands[2]) != CONST_INT)
> + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 1));
> + else
> + t5 = operands[2];
> + emit_insn (gen_muldi3 (t3, t4, t5));
> +
> + emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 1));
> +
> + riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> + }
> + else
> + {
> + rtx hp = gen_reg_rtx (<MODE>mode);
> +
> + emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
> + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
> +
> + riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
> + }
> +
> + DONE;
> +})
> +
> (define_insn "*mulsi3_extended"
> [(set (match_operand:DI 0 "register_operand" "=r")
> (sign_extend:DI
> --
> 2.30.1
>