https://gcc.gnu.org/g:343e1083eb9f57e05c0caba195f118ef2e95cc40
commit r15-7227-g343e1083eb9f57e05c0caba195f118ef2e95cc40 Author: Richard Biener <rguent...@suse.de> Date: Mon Jan 27 10:49:51 2025 +0100 rtl-optimization/118662 - wrong combination of vector sign-extends The following fixes an issue in the RTL combiner where we correctly combine two vector sign-extends with a vector load Trying 7, 9 -> 10: 7: r106:V4QI=[r119:DI] REG_DEAD r119:DI 9: r108:V4HI=sign_extend(vec_select(r106:V4QI#0,parallel)) 10: r109:V4SI=sign_extend(vec_select(r108:V4HI#0,parallel)) REG_DEAD r108:V4HI to modifying insn i2 9: r109:V4SI=sign_extend([r119:DI]) but since r106 is used we wrongly materialize it using a subreg: modifying insn i3 10: r106:V4QI=r109:V4SI#0 which of course does not work for modes with more than one component, specifically vector and complex modes. PR rtl-optimization/118662 * combine.cc (try_combine): When re-materializing a load from an extended reg by a lowpart subreg make sure we're not dealing with vector or complex modes. * gcc.dg/torture/pr118662.c: New testcase. Diff: --- gcc/combine.cc | 5 +++++ gcc/testsuite/gcc.dg/torture/pr118662.c | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/gcc/combine.cc b/gcc/combine.cc index a2d4387cebe8..b0159b23d869 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -3904,6 +3904,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, copy. This saves at least one insn, more if register allocation can eliminate the copy. + We cannot do this if the involved modes have more than one elements, + like for vector or complex modes. + We cannot do this if the destination of the first assignment is a condition code register. We eliminate this case by making sure the SET_DEST and SET_SRC have the same mode. @@ -3919,6 +3922,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, && GET_CODE (SET_SRC (XVECEXP (newpat, 0, 0))) == SIGN_EXTEND && (GET_MODE (SET_DEST (XVECEXP (newpat, 0, 0))) == GET_MODE (SET_SRC (XVECEXP (newpat, 0, 0)))) + && ! VECTOR_MODE_P (GET_MODE (SET_DEST (XVECEXP (newpat, 0, 0)))) + && ! COMPLEX_MODE_P (GET_MODE (SET_DEST (XVECEXP (newpat, 0, 0)))) && GET_CODE (XVECEXP (newpat, 0, 1)) == SET && rtx_equal_p (SET_SRC (XVECEXP (newpat, 0, 1)), XEXP (SET_SRC (XVECEXP (newpat, 0, 0)), 0)) diff --git a/gcc/testsuite/gcc.dg/torture/pr118662.c b/gcc/testsuite/gcc.dg/torture/pr118662.c new file mode 100644 index 000000000000..b9e8cca0aeb5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr118662.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-slp-vectorize -fno-vect-cost-model" } */ +/* { dg-additional-options "-msse4" { target sse4_runtime} } */ + +int __attribute__((noipa)) addup(signed char *num) { + int val = num[0] + num[1] + num[2] + num[3]; + if (num[3] >= 0) + val++; + return val; +} + +int main(int, char *[]) +{ + signed char num[4] = {1, 1, 1, -1}; + if (addup(num) != 2) + __builtin_abort(); + return 0; +}