https://gcc.gnu.org/g:343e1083eb9f57e05c0caba195f118ef2e95cc40

commit r15-7227-g343e1083eb9f57e05c0caba195f118ef2e95cc40
Author: Richard Biener <rguent...@suse.de>
Date:   Mon Jan 27 10:49:51 2025 +0100

    rtl-optimization/118662 - wrong combination of vector sign-extends
    
    The following fixes an issue in the RTL combiner where we correctly
    combine two vector sign-extends with a vector load
    
    Trying 7, 9 -> 10:
        7: r106:V4QI=[r119:DI]
          REG_DEAD r119:DI
        9: r108:V4HI=sign_extend(vec_select(r106:V4QI#0,parallel))
       10: r109:V4SI=sign_extend(vec_select(r108:V4HI#0,parallel))
          REG_DEAD r108:V4HI
    
    to
    
    modifying insn i2     9: r109:V4SI=sign_extend([r119:DI])
    
    but since r106 is used we wrongly materialize it using a subreg:
    
    modifying insn i3    10: r106:V4QI=r109:V4SI#0
    
    which of course does not work for modes with more than one component,
    specifically vector and complex modes.
    
            PR rtl-optimization/118662
            * combine.cc (try_combine): When re-materializing a load
            from an extended reg by a lowpart subreg make sure we're
            not dealing with vector or complex modes.
    
            * gcc.dg/torture/pr118662.c: New testcase.

Diff:
---
 gcc/combine.cc                          |  5 +++++
 gcc/testsuite/gcc.dg/torture/pr118662.c | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index a2d4387cebe8..b0159b23d869 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -3904,6 +3904,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
      copy.  This saves at least one insn, more if register allocation can
      eliminate the copy.
 
+     We cannot do this if the involved modes have more than one elements,
+     like for vector or complex modes.
+
      We cannot do this if the destination of the first assignment is a
      condition code register.  We eliminate this case by making sure
      the SET_DEST and SET_SRC have the same mode.
@@ -3919,6 +3922,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
           && GET_CODE (SET_SRC (XVECEXP (newpat, 0, 0))) == SIGN_EXTEND
           && (GET_MODE (SET_DEST (XVECEXP (newpat, 0, 0)))
               == GET_MODE (SET_SRC (XVECEXP (newpat, 0, 0))))
+          && ! VECTOR_MODE_P (GET_MODE (SET_DEST (XVECEXP (newpat, 0, 0))))
+          && ! COMPLEX_MODE_P (GET_MODE (SET_DEST (XVECEXP (newpat, 0, 0))))
           && GET_CODE (XVECEXP (newpat, 0, 1)) == SET
           && rtx_equal_p (SET_SRC (XVECEXP (newpat, 0, 1)),
                           XEXP (SET_SRC (XVECEXP (newpat, 0, 0)), 0))
diff --git a/gcc/testsuite/gcc.dg/torture/pr118662.c 
b/gcc/testsuite/gcc.dg/torture/pr118662.c
new file mode 100644
index 000000000000..b9e8cca0aeb5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr118662.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-slp-vectorize -fno-vect-cost-model" } */
+/* { dg-additional-options "-msse4" { target sse4_runtime} } */
+
+int __attribute__((noipa)) addup(signed char *num) {
+  int val = num[0] + num[1] + num[2] + num[3];
+  if (num[3] >= 0)
+    val++;
+  return val;
+}
+
+int main(int, char *[])
+{
+  signed char num[4] = {1, 1, 1, -1};
+  if (addup(num) != 2)
+    __builtin_abort();
+  return 0;
+}

Reply via email to