YunQiang Su <yunqiang...@cipunited.com> 于2023年8月3日周四 11:18写道: > > PR #104914 > > On TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode)) == true platforms, > zero_extract (SI, SI) can be sign-extended. So, if a zero_extract (DI, > DI) following with an sign_extend(SI, DI) can be merged to a single > zero_extract (SI, SI). >
The RTL is like: (insn 10 49 11 2 (set (zero_extract:DI (reg/v:DI 200 [ val ]) (const_int 8 [0x8]) (const_int 0 [0])) (subreg:DI (reg:QI 202 [ *buf_8(D) ]) 0)) "xx.c":4:29 281 {*insvdi} (expr_list:REG_DEAD (reg:QI 202 [ *buf_8(D) ]) (nil))) (insn 11 10 12 2 (set (reg/v:DI 200 [ val ]) (sign_extend:DI (subreg:SI (reg/v:DI 200 [ val ]) 0))) "xx.c":4:29 238 {extendsidi2} (nil)) -------> (note 10 49 11 2 NOTE_INSN_DELETED) (insn 11 10 12 2 (set (zero_extract:SI (subreg:SI (reg/v:DI 200 [ val ]) 0) (const_int 8 [0x8]) (const_int 0 [0])) (subreg:SI (reg:QI 202 [ *buf_8(D) ]) 0)) "xx.c":4:29 280 {*insvsi} (expr_list:REG_DEAD (reg:QI 202 [ *buf_8(D) ]) (nil))) This is another method to solve #104914. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104914 Another method is here: https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624856.html aka when generate RTL for zero_extract, we can determine whether it is SImode. So we can generate the correct zero_extract at the first time, aka in the expand pass. Any idea about which method is better? > gcc/ChangeLog: > PR: 104914. > * combine.cc (try_combine): Combine zero_extract (DI, DI) and > following sign_extend (DI, SI) for > TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode)) == true. > (subst): Allow replacing reg(DI) with subreg(SI (reg DI)) > if to is SImode and from is DImode for > TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode)) == true. > > gcc/testsuite/ChangeLog: > PR: 104914. > * gcc.target/mips/pr104914.c: New testcase. > --- > gcc/combine.cc | 88 ++++++++++++++++++++---- > gcc/testsuite/gcc.target/mips/pr104914.c | 17 +++++ > 2 files changed, 90 insertions(+), 15 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/mips/pr104914.c > > diff --git a/gcc/combine.cc b/gcc/combine.cc > index e46d202d0a7..701b7c33b17 100644 > --- a/gcc/combine.cc > +++ b/gcc/combine.cc > @@ -3294,15 +3294,64 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn > *i1, rtx_insn *i0, > n_occurrences = 0; /* `subst' counts here */ > subst_low_luid = DF_INSN_LUID (i2); > > - /* If I1 feeds into I2 and I1DEST is in I1SRC, we need to make a unique > - copy of I2SRC each time we substitute it, in order to avoid creating > - self-referential RTL when we will be substituting I1SRC for I1DEST > - later. Likewise if I0 feeds into I2, either directly or indirectly > - through I1, and I0DEST is in I0SRC. */ > - newpat = subst (PATTERN (i3), i2dest, i2src, false, false, > - (i1_feeds_i2_n && i1dest_in_i1src) > - || ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n)) > - && i0dest_in_i0src)); > + /* Try to combine zero_extract (DImode) and sign_extend (SImode to > DImode) > + for TARGET_TRULY_NOOP_TRUNCATION. The RTL may look like: > + > + (insn 10 49 11 2 (set (zero_extract:DI (reg/v:DI 200 [ val ]) > + (const_int 8 [0x8]) > + (const_int 0 [0])) > + (subreg:DI (reg:QI 202 [ *buf_8(D) ]) 0)) "xx.c":4:29 278 > {*insvdi} > + (expr_list:REG_DEAD (reg:QI 202 [ *buf_8(D) ]) (nil))) > + (insn 11 10 12 2 (set (reg/v:DI 200 [ val ]) > + > + (sign_extend:DI (subreg:SI (reg/v:DI 200 [ val ]) 0))) 238 > {extendsidi2} > + (nil)) > + > + Since these architectures (MIPS64 as an example), the 32bit operation > + instructions will sign-extend the reuslt to 64bit. The result can > be: > + > + (insn 10 49 11 2 (set (zero_extract:SI (subreg:SI (reg/v:DI 200 [ > val ]) 0) > + (const_int 8 [0x8]) > + (const_int 0 [0])) > + (subreg:SI (reg:QI 202 [ *buf_8(D) ]) 0)) "xx.c":4:29 280 > {*insvsi} > + (expr_list:REG_DEAD (reg:QI 202 [ *buf_8(D) ]) (nil))) > + */ > + if (i0 == 0 && i1 == 0 && i3 != 0 && i2 != 0 && GET_CODE (i2) == INSN > + && GET_CODE (i3) == INSN && GET_CODE (PATTERN (i2)) == SET > + && GET_CODE (PATTERN (i3)) == SET > + && GET_CODE (SET_DEST (single_set (i2))) == ZERO_EXTRACT > + && GET_CODE (SET_SRC (single_set (i3))) == SIGN_EXTEND > + && SUBREG_P (XEXP (SET_SRC (single_set (i3)), 0)) > + && REGNO (SUBREG_REG (XEXP (SET_SRC (single_set (i3)), 0))) > + == REGNO (SET_DEST (single_set (i3))) > + && REGNO (XEXP (SET_DEST (single_set (i2)), 0)) > + == REGNO (SET_DEST (single_set (i3))) > + && GET_MODE (SET_DEST (single_set (i2))) == DImode > + && GET_MODE (SET_DEST (single_set (i3))) == DImode > + && GET_MODE (XEXP (SET_SRC (single_set (i3)), 0)) == SImode > + && TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode)) > + { > + newpat = copy_rtx (PATTERN (i2)); > + PUT_MODE (SET_DEST (newpat), SImode); > + PUT_MODE (SET_SRC (newpat), SImode); > + > + rtx i2dest_r = XEXP (SET_DEST (newpat), 0); > + rtx i3src_r = XEXP (SET_SRC (single_set (i3)), 0); > + newpat = subst (newpat, i2dest_r, i3src_r, false, false, false); > + } > + else > + { > + /* If I1 feeds into I2 and I1DEST is in I1SRC, we need to make a > + unique copy of I2SRC each time we substitute it, in order to > + avoid creating self-referential RTL when we will be substituting > + I1SRC for I1DEST later. Likewise if I0 feeds into I2, either > + directly or indirectly through I1, and I0DEST is in I0SRC. */ > + newpat = subst ( > + PATTERN (i3), i2dest, i2src, false, false, > + (i1_feeds_i2_n && i1dest_in_i1src) > + || ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n)) > + && i0dest_in_i0src)); > + } > substed_i2 = true; > > /* Record whether I2's body now appears within I3's body. */ > @@ -5482,13 +5531,22 @@ subst (rtx x, rtx from, rtx to, bool in_dest, bool > in_cond, bool unique_copy) > } > else if (fmt[i] == 'e') > { > - /* If this is a register being set, ignore it. */ > new_rtx = XEXP (x, i); > - if (in_dest > - && i == 0 > - && (((code == SUBREG || code == ZERO_EXTRACT) > - && REG_P (new_rtx)) > - || code == STRICT_LOW_PART)) > + /* Allow replacing reg with subreg if it is sign extension. */ > + if (in_dest && (code == SUBREG || code == ZERO_EXTRACT) > + && TRULY_NOOP_TRUNCATION_MODES_P (DImode, SImode) > + && GET_MODE (from) == DImode && GET_MODE (to) == SImode > + && i == 0) > + { > + new_rtx > + = (unique_copy && n_occurrences ? copy_rtx (to) : to); > + n_occurrences++; > + } > + /* If this is a register being set, ignore it. */ > + else if (in_dest && i == 0 > + && (((code == SUBREG || code == ZERO_EXTRACT) > + && REG_P (new_rtx)) > + || code == STRICT_LOW_PART)) > ; > > else if (COMBINE_RTX_EQUAL_P (XEXP (x, i), from)) > diff --git a/gcc/testsuite/gcc.target/mips/pr104914.c > b/gcc/testsuite/gcc.target/mips/pr104914.c > new file mode 100644 > index 00000000000..fd6ef6af446 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/mips/pr104914.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=mips64r2 -mabi=64" } */ > + > +/* { dg-final { scan-assembler-not "\tdins\t" } } */ > + > +NOMIPS16 int test (const unsigned char *buf) > +{ > + int val; > + ((unsigned char*)&val)[0] = *buf++; > + ((unsigned char*)&val)[1] = *buf++; > + ((unsigned char*)&val)[2] = *buf++; > + ((unsigned char*)&val)[3] = *buf++; > + if(val > 0) > + return 1; > + else > + return 0; > +} > -- > 2.30.2 > -- YunQiang Su