YunQiang Su <yunqiang...@cipunited.com> 于2023年8月3日周四 11:18写道:
>
> PR #104914
>
> On TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode)) == true platforms,
> zero_extract (SI, SI) can be sign-extended.  So, if a zero_extract (DI,
> DI) following with an sign_extend(SI, DI) can be merged to a single
> zero_extract (SI, SI).
>

The RTL is like:

(insn 10 49 11 2 (set (zero_extract:DI (reg/v:DI 200 [ val ])
            (const_int 8 [0x8])
            (const_int 0 [0]))
        (subreg:DI (reg:QI 202 [ *buf_8(D) ]) 0)) "xx.c":4:29 281 {*insvdi}
     (expr_list:REG_DEAD (reg:QI 202 [ *buf_8(D) ])
        (nil)))
(insn 11 10 12 2 (set (reg/v:DI 200 [ val ])
        (sign_extend:DI (subreg:SI (reg/v:DI 200 [ val ]) 0)))
"xx.c":4:29 238 {extendsidi2}
     (nil))

------->

(note 10 49 11 2 NOTE_INSN_DELETED)
(insn 11 10 12 2 (set (zero_extract:SI (subreg:SI (reg/v:DI 200 [ val ]) 0)
            (const_int 8 [0x8])
            (const_int 0 [0]))
        (subreg:SI (reg:QI 202 [ *buf_8(D) ]) 0)) "xx.c":4:29 280 {*insvsi}
     (expr_list:REG_DEAD (reg:QI 202 [ *buf_8(D) ])
        (nil)))


This is another method to solve #104914.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104914

Another method is here:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624856.html
aka when generate RTL for zero_extract, we can determine whether it
is SImode. So we can generate the correct zero_extract at the first time,
aka in the expand pass.

Any idea about which method is better?

> gcc/ChangeLog:
>         PR: 104914.
>         * combine.cc (try_combine): Combine zero_extract (DI, DI) and
>           following sign_extend (DI, SI) for
>           TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode)) == true.
>           (subst): Allow replacing reg(DI) with subreg(SI (reg DI))
>           if to is SImode and from is DImode for
>           TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode)) == true.
>
> gcc/testsuite/ChangeLog:
>         PR: 104914.
>         * gcc.target/mips/pr104914.c: New testcase.
> ---
>  gcc/combine.cc                           | 88 ++++++++++++++++++++----
>  gcc/testsuite/gcc.target/mips/pr104914.c | 17 +++++
>  2 files changed, 90 insertions(+), 15 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/mips/pr104914.c
>
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index e46d202d0a7..701b7c33b17 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -3294,15 +3294,64 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn 
> *i1, rtx_insn *i0,
>        n_occurrences = 0;               /* `subst' counts here */
>        subst_low_luid = DF_INSN_LUID (i2);
>
> -      /* If I1 feeds into I2 and I1DEST is in I1SRC, we need to make a unique
> -        copy of I2SRC each time we substitute it, in order to avoid creating
> -        self-referential RTL when we will be substituting I1SRC for I1DEST
> -        later.  Likewise if I0 feeds into I2, either directly or indirectly
> -        through I1, and I0DEST is in I0SRC.  */
> -      newpat = subst (PATTERN (i3), i2dest, i2src, false, false,
> -                     (i1_feeds_i2_n && i1dest_in_i1src)
> -                     || ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n))
> -                         && i0dest_in_i0src));
> +      /* Try to combine zero_extract (DImode) and sign_extend (SImode to 
> DImode)
> +        for TARGET_TRULY_NOOP_TRUNCATION.  The RTL may look like:
> +
> +        (insn 10 49 11 2 (set (zero_extract:DI (reg/v:DI 200 [ val ])
> +               (const_int 8 [0x8])
> +               (const_int 0 [0]))
> +            (subreg:DI (reg:QI 202 [ *buf_8(D) ]) 0)) "xx.c":4:29 278 
> {*insvdi}
> +            (expr_list:REG_DEAD (reg:QI 202 [ *buf_8(D) ]) (nil)))
> +        (insn 11 10 12 2 (set (reg/v:DI 200 [ val ])
> +
> +        (sign_extend:DI (subreg:SI (reg/v:DI 200 [ val ]) 0))) 238 
> {extendsidi2}
> +            (nil))
> +
> +        Since these architectures (MIPS64 as an example), the 32bit operation
> +        instructions will sign-extend the reuslt to 64bit.  The result can 
> be:
> +
> +        (insn 10 49 11 2 (set (zero_extract:SI (subreg:SI (reg/v:DI 200 [ 
> val ]) 0)
> +              (const_int 8 [0x8])
> +              (const_int 0 [0]))
> +            (subreg:SI (reg:QI 202 [ *buf_8(D) ]) 0)) "xx.c":4:29 280 
> {*insvsi}
> +            (expr_list:REG_DEAD (reg:QI 202 [ *buf_8(D) ]) (nil)))
> +       */
> +      if (i0 == 0 && i1 == 0 && i3 != 0 && i2 != 0 && GET_CODE (i2) == INSN
> +         && GET_CODE (i3) == INSN && GET_CODE (PATTERN (i2)) == SET
> +         && GET_CODE (PATTERN (i3)) == SET
> +         && GET_CODE (SET_DEST (single_set (i2))) == ZERO_EXTRACT
> +         && GET_CODE (SET_SRC (single_set (i3))) == SIGN_EXTEND
> +         && SUBREG_P (XEXP (SET_SRC (single_set (i3)), 0))
> +         && REGNO (SUBREG_REG (XEXP (SET_SRC (single_set (i3)), 0)))
> +                == REGNO (SET_DEST (single_set (i3)))
> +         && REGNO (XEXP (SET_DEST (single_set (i2)), 0))
> +                == REGNO (SET_DEST (single_set (i3)))
> +         && GET_MODE (SET_DEST (single_set (i2))) == DImode
> +         && GET_MODE (SET_DEST (single_set (i3))) == DImode
> +         && GET_MODE (XEXP (SET_SRC (single_set (i3)), 0)) == SImode
> +         && TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode))
> +       {
> +         newpat = copy_rtx (PATTERN (i2));
> +         PUT_MODE (SET_DEST (newpat), SImode);
> +         PUT_MODE (SET_SRC (newpat), SImode);
> +
> +         rtx i2dest_r = XEXP (SET_DEST (newpat), 0);
> +         rtx i3src_r = XEXP (SET_SRC (single_set (i3)), 0);
> +         newpat = subst (newpat, i2dest_r, i3src_r, false, false, false);
> +       }
> +      else
> +       {
> +         /* If I1 feeds into I2 and I1DEST is in I1SRC, we need to make a
> +            unique copy of I2SRC each time we substitute it, in order to
> +            avoid creating self-referential RTL when we will be substituting
> +            I1SRC for I1DEST later.  Likewise if I0 feeds into I2, either
> +            directly or indirectly through I1, and I0DEST is in I0SRC.  */
> +         newpat = subst (
> +             PATTERN (i3), i2dest, i2src, false, false,
> +             (i1_feeds_i2_n && i1dest_in_i1src)
> +                 || ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n))
> +                     && i0dest_in_i0src));
> +       }
>        substed_i2 = true;
>
>        /* Record whether I2's body now appears within I3's body.  */
> @@ -5482,13 +5531,22 @@ subst (rtx x, rtx from, rtx to, bool in_dest, bool 
> in_cond, bool unique_copy)
>             }
>           else if (fmt[i] == 'e')
>             {
> -             /* If this is a register being set, ignore it.  */
>               new_rtx = XEXP (x, i);
> -             if (in_dest
> -                 && i == 0
> -                 && (((code == SUBREG || code == ZERO_EXTRACT)
> -                      && REG_P (new_rtx))
> -                     || code == STRICT_LOW_PART))
> +             /* Allow replacing reg with subreg if it is sign extension.  */
> +             if (in_dest && (code == SUBREG || code == ZERO_EXTRACT)
> +                 && TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode)
> +                 && GET_MODE (from) == DImode && GET_MODE (to) == SImode
> +                 && i == 0)
> +               {
> +                 new_rtx
> +                     = (unique_copy && n_occurrences ? copy_rtx (to) : to);
> +                 n_occurrences++;
> +               }
> +             /* If this is a register being set, ignore it.  */
> +             else if (in_dest && i == 0
> +                      && (((code == SUBREG || code == ZERO_EXTRACT)
> +                           && REG_P (new_rtx))
> +                          || code == STRICT_LOW_PART))
>                 ;
>
>               else if (COMBINE_RTX_EQUAL_P (XEXP (x, i), from))
> diff --git a/gcc/testsuite/gcc.target/mips/pr104914.c 
> b/gcc/testsuite/gcc.target/mips/pr104914.c
> new file mode 100644
> index 00000000000..fd6ef6af446
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/mips/pr104914.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=mips64r2 -mabi=64" } */
> +
> +/* { dg-final { scan-assembler-not "\tdins\t" } } */
> +
> +NOMIPS16 int test (const unsigned char *buf)
> +{
> +  int val;
> +  ((unsigned char*)&val)[0] = *buf++;
> +  ((unsigned char*)&val)[1] = *buf++;
> +  ((unsigned char*)&val)[2] = *buf++;
> +  ((unsigned char*)&val)[3] = *buf++;
> +  if(val > 0)
> +    return 1;
> +  else
> +    return 0;
> +}
> --
> 2.30.2
>


-- 
YunQiang Su

Reply via email to