Re: [PATCH] [RISCV] Add Pattern for builtin overflow

Andrew Waterman Tue, 27 Apr 2021 00:45:48 -0700

On Tue, Apr 27, 2021 at 12:18 AM Levy Hsu <ad...@levyhsu.com> wrote:
>
> From: LevyHsu <ad...@levyhsu.com>
>
> Added implementation for builtin overflow detection, new patterns are listed 
> below.
>
> ---------------------------------------------------------------
> Addition:
>
> signed addition (SImode with RV32 || DImode with RV64):
>         add     t0, t1, t2
>         slti    t3, t2, 0
>         slt     t4, t0, t1
>         bne     t3, t4, overflow
>
> signed addition (SImode with RV64):
>         add     t0, t1, t2
>     sext.w  t3, t0
>     bne     t0, t3, overflow


The following version has the same instruction count but offers more ILP:

  add t0, t1, t2
  addw t3, t1, t2
  bne t0, t3, overflow

>
> unsigned addition (SImode with RV32 || DImode with RV64):
>     add     t0, t1, t2
>     bltu    t0, t1, overflow
>
> unsigned addition (SImode with RV64):
>     sext.w  t3, t1
>     addw        t0, t1, t2
>     bltu        t0, t3, overflow

I think you can do this in two instructions, similar to the previous pattern:

  addw t0, t1, t2
  bltu t0, t1, overflow

> ---------------------------------------------------------------
> Subtraction:
>
> signed subtraction (SImode with RV32 || DImode with RV64):
>     sub     t0, t1, t2
>     slti    t3, t2, 0
>     slt     t4, t1, t0
>     bne     t3, t4, overflow
>
> signed subtraction (SImode with RV64):
>         sub     t0, t1, t2
>     sext.w  t3, t0
>     bne     t0, t3, overflow

See analogous addition comment.

>
> unsigned subtraction (SImode with RV32 || DImode with RV64):
>     add     t0, t1, t2
>     bltu    t1, t0, overflow
>
> unsigned subtraction (SImode with RV64):
>     sext.w  t3, t1
>     subw        t0, t1, t2
>     bltu    t0, t3, overflow

See analogous addition comment.

> ---------------------------------------------------------------
> Multiplication:
>
> signed multiplication (SImode with RV32 || DImode with RV64):
>     mulh    t4, t1, t2
>     mul         t0, t1, t2
>     srai        t5, t0, 31/63 (RV32/64)
>     bne     t4, t5, overflow
>
> signed multiplication (SImode with RV64):
>         mul     t0, t1, t2
>     sext.w  t3, t0
>     bne     t0, t3, overflow
>
> unsigned multiplication (SImode with RV32 || DImode with RV64 ):
>     mulhu   t4, t1, t2
>     mul     t0, t1, t2
>     bne     t4, 0,  overflow
>
> unsigned multiplication (SImode with RV64):
>     slli    t0,t0,32
>         slli    t1,t1,32
>         srli    t0,t0,32
>         srli    t1,t1,32
>         mul         t0,t0,t1
>         srai    t5,t0,32
>         bne         t5, 0, overflow

I think you can eliminate the first two right shifts by replacing mul
with mulhu... something like:

  slli rx, rx, 32
  slli ry, ry, 32
  mulhu rz, rx, ry
  srli rt, rz, 32
  bnez rt, overflow

>
> ---------------------------------------------------------------
> ---
>  gcc/config/riscv/riscv.c  |   8 ++
>  gcc/config/riscv/riscv.h  |   5 +
>  gcc/config/riscv/riscv.md | 240 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 253 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index d489717b2a5..cf94f5c9658 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -351,6 +351,14 @@ static const struct riscv_tune_info 
> riscv_tune_info_table[] = {
>    { "size", generic, &optimize_size_tune_info },
>  };
>
> +/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
> +
> +static unsigned int
> +riscv_min_arithmetic_precision (void)
> +{
> +  return 32;
> +}
> +
>  /* Return the riscv_tune_info entry for the given name string.  */
>
>  static const struct riscv_tune_info *
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index 172c7ca7c98..a6f451b97e3 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -121,6 +121,11 @@ extern const char *riscv_default_mtune (int argc, const 
> char **argv);
>  #define MIN_UNITS_PER_WORD 4
>  #endif
>
> +/* Allows SImode op in builtin overflow pattern, see internal-fn.c.  */
> +
> +#undef TARGET_MIN_ARITHMETIC_PRECISION
> +#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
> +
>  /* The `Q' extension is not yet supported.  */
>  #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
>
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 36012ad1f77..c82017a4bce 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -462,6 +462,81 @@
>    [(set_attr "type" "arith")
>     (set_attr "mode" "DI")])
>
> +(define_expand "addv<mode>4"
> +  [(set (match_operand:GPR         0 "register_operand" "=r,r")
> +        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> +                (match_operand:GPR 2 "arith_operand"    " r,I")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_adddi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx t3 = gen_reg_rtx (<MODE>mode);
> +    rtx t4 = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
> +    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
> +    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
> +    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);
> +
> +    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
> +    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
> +  }
> +  DONE;
> +})
> +
> +(define_expand "uaddv<mode>4"
> +  [(set (match_operand:GPR           0 "register_operand" "=r,r")
> +        (plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
> +                  (match_operand:GPR 2 "arith_operand"    " r,I")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
> +    else
> +      t3 = operands[1];
> +    emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
> +    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
> +  }
> +  else
> +  {
> +    emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
> +    riscv_expand_conditional_branch (operands[3], LTU, operands[0], 
> operands[1]);
> +  }
> +
> +  DONE;
> +})
> +
>  (define_insn "*addsi3_extended"
>    [(set (match_operand:DI               0 "register_operand" "=r,r")
>         (sign_extend:DI
> @@ -518,6 +593,85 @@
>    [(set_attr "type" "arith")
>     (set_attr "mode" "SI")])
>
> +(define_expand "subv<mode>4"
> +  [(set (match_operand:GPR          0 "register_operand" "= r")
> +        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
> +                 (match_operand:GPR 2 "register_operand" "  r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_subdi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx t3 = gen_reg_rtx (<MODE>mode);
> +    rtx t4 = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
> +
> +    rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
> +    emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
> +
> +    rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
> +    emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t3, t4);
> +  }
> +
> +  DONE;
> +})
> +
> +(define_expand "usubv<mode>4"
> +  [(set (match_operand:GPR            0 "register_operand" "= r")
> +        (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
> +                   (match_operand:GPR 2 "register_operand" "  r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  ""
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
> +    else
> +      t3 = operands[1];
> +    emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
> +    emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
> +  }
> +  else
> +  {
> +    emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
> +    riscv_expand_conditional_branch (operands[3], LTU, operands[1], 
> operands[0]);
> +  }
> +
> +  DONE;
> +})
> +
>  (define_insn "*subsi3_extended"
>    [(set (match_operand:DI               0 "register_operand" "= r")
>         (sign_extend:DI
> @@ -609,6 +763,92 @@
>    [(set_attr "type" "imul")
>     (set_attr "mode" "DI")])
>
> +(define_expand "mulv<mode>4"
> +  [(set (match_operand:GPR         0 "register_operand" "=r")
> +        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
> +                (match_operand:GPR 2 "register_operand" " r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  "TARGET_MUL"
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_muldi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx hp = gen_reg_rtx (<MODE>mode);
> +    rtx lp = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
> +    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
> +    emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 
> 1)));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, hp, lp);
> +  }
> +
> +  DONE;
> +})
> +
> +(define_expand "umulv<mode>4"
> +  [(set (match_operand:GPR         0 "register_operand" "=r")
> +        (mult:GPR (match_operand:GPR 1 "register_operand" " r")
> +                (match_operand:GPR 2 "register_operand" " r")))
> +                        (label_ref (match_operand 3 "" ""))]
> +  "TARGET_MUL"
> +{
> +  if (TARGET_64BIT && <MODE>mode == SImode)
> +  {
> +    rtx t3 = gen_reg_rtx (DImode);
> +    rtx t4 = gen_reg_rtx (DImode);
> +    rtx t5 = gen_reg_rtx (DImode);
> +    rtx t6 = gen_reg_rtx (DImode);
> +
> +    if (GET_CODE (operands[1]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 1));
> +    else
> +      t4 = operands[1];
> +    if (GET_CODE (operands[2]) != CONST_INT)
> +      emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 1));
> +    else
> +      t5 = operands[2];
> +    emit_insn (gen_muldi3 (t3, t4, t5));
> +
> +    emit_move_insn (operands[0], gen_lowpart (SImode, t3));
> +    emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 1));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, t6, t3);
> +  }
> +  else
> +  {
> +    rtx hp = gen_reg_rtx (<MODE>mode);
> +
> +    emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
> +    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
> +
> +    riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
> +  }
> +
> +  DONE;
> +})
> +
>  (define_insn "*mulsi3_extended"
>    [(set (match_operand:DI              0 "register_operand" "=r")
>         (sign_extend:DI
> --
> 2.30.1
>

Re: [PATCH] [RISCV] Add Pattern for builtin overflow

Reply via email to