From: Richard Sandiford <[email protected]>

The problem seems to be with a packing permutation:

    op0[4] op0[5] op0[6] op0[7]

and with the identity_offset parameter to vect_add_slp_permutation.

Both the repeating_p and !repeating_p paths correctly realise that this
permutation reduces to an identity.  But the !repeating_p path ends up with
first_node and second_node both set to the second VEC_PERM_EXPR operand
(since that path works elementwise, and since no elements are taken from
the first input).  Therefore, the call:

      vect_add_slp_permutation (vinfo, gsi, node, first_def,
                                second_def, mask_vec, mask[0]);

works regardless of whether vect_add_slp_permutation picks first_def or
second_def.  In that sense, the parameters to vect_add_slp_permutation are
already “canonical”.

The repeating_p path instead passes vector 2N as first_def and vector 2N+1
as second_def, with mask[0] indicating the position of the identity within
the concatenation of first_def and second_def.  However,
vect_add_slp_permutation doesn't expect this and instead ignores the
identity_offset parameter.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/122793
        * tree-vect-slp.cc (vect_add_slp_permutation): Document the existing
        identity_offset parameter.  Handle identities that take from the
        second input rather than the first.

        * gcc.dg/vect/vect-pr122793.c: New testcase.

Co-authored-by: Richard Biener <[email protected]>
---
 gcc/testsuite/gcc.dg/vect/vect-pr122793.c | 28 ++++++++++
 gcc/tree-vect-slp.cc                      | 67 ++++++++++++++---------
 2 files changed, 69 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-pr122793.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr122793.c 
b/gcc/testsuite/gcc.dg/vect/vect-pr122793.c
new file mode 100644
index 00000000000..18af3235b84
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr122793.c
@@ -0,0 +1,28 @@
+/* { dg-additional-options "-msse4" { target sse4_runtime } } */
+
+#include "tree-vect.h"
+
+static void
+foo (unsigned char *d, unsigned char *s, int e, int f)
+{
+  for (int i = 0; i < 4; i++)
+    {
+      d[0] = s[-2];
+      d[5] = (s[5] + s[6]) * 2 - (s[4] + s[7]);
+      d[6] = (s[6] + s[7]) * 2 - (s[5] + s[8]);
+      d[7] = (s[7] + s[8]) * 2 - (s[6] + s[9]);
+      d += e;
+      s += f;
+    }
+}
+
+unsigned char s[128] = { 2 }, d[128];
+
+int
+main ()
+{
+  check_vect ();
+  foo (d, s + 2, 16, 16);
+  if (d[5] != 0)
+    __builtin_abort ();
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 4e01adf4cd3..e4e0320c678 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -11365,7 +11365,11 @@ vect_transform_slp_perm_load (vec_info *vinfo,
 
    otherwise add:
 
-      <new SSA name> = FIRST_DEF.  */
+      <new SSA name> = VEC_PERM_EXPR <FIRST_DEF, SECOND_DEF,
+                                     { N, N+1, N+2, ... }>
+
+   where N == IDENTITY_OFFSET which is either zero or equal to the
+   number of elements of the result.  */
 
 static void
 vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
@@ -11405,36 +11409,47 @@ vect_add_slp_permutation (vec_info *vinfo, 
gimple_stmt_iterator *gsi,
                                       first_def, second_def,
                                       mask_vec);
     }
-  else if (!types_compatible_p (TREE_TYPE (first_def), vectype))
+  else
     {
-      /* For identity permutes we still need to handle the case
-        of offsetted extracts or concats.  */
-      unsigned HOST_WIDE_INT c;
-      auto first_def_nunits
-       = TYPE_VECTOR_SUBPARTS (TREE_TYPE (first_def));
-      if (known_le (TYPE_VECTOR_SUBPARTS (vectype), first_def_nunits))
+      auto def_nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (first_def));
+      unsigned HOST_WIDE_INT vecno;
+      poly_uint64 eltno;
+      if (!can_div_trunc_p (poly_uint64 (identity_offset), def_nunits,
+                           &vecno, &eltno))
+       gcc_unreachable ();
+      tree def = vecno & 1 ? second_def : first_def;
+      if (!types_compatible_p (TREE_TYPE (def), vectype))
        {
-         unsigned HOST_WIDE_INT elsz
-           = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (first_def))));
-         tree lowpart = build3 (BIT_FIELD_REF, vectype, first_def,
-                                TYPE_SIZE (vectype),
-                                bitsize_int (identity_offset * elsz));
-         perm_stmt = gimple_build_assign (perm_dest, lowpart);
+         /* For identity permutes we still need to handle the case
+            of offsetted extracts or concats.  */
+         unsigned HOST_WIDE_INT c;
+         if (known_le (TYPE_VECTOR_SUBPARTS (vectype), def_nunits))
+           {
+             unsigned HOST_WIDE_INT elsz
+               = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (def))));
+             tree lowpart = build3 (BIT_FIELD_REF, vectype, def,
+                                    TYPE_SIZE (vectype),
+                                    bitsize_int (eltno * elsz));
+             perm_stmt = gimple_build_assign (perm_dest, lowpart);
+           }
+         else if (constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
+                                       def_nunits, &c) && c == 2)
+           {
+             gcc_assert (known_eq (identity_offset, 0U));
+             tree ctor = build_constructor_va (vectype, 2,
+                                               NULL_TREE, first_def,
+                                               NULL_TREE, second_def);
+             perm_stmt = gimple_build_assign (perm_dest, ctor);
+           }
+         else
+           gcc_unreachable ();
        }
-      else if (constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
-                                   first_def_nunits, &c) && c == 2)
+      else
        {
-         tree ctor = build_constructor_va (vectype, 2, NULL_TREE, first_def,
-                                           NULL_TREE, second_def);
-         perm_stmt = gimple_build_assign (perm_dest, ctor);
+         /* We need a copy here in case the def was external.  */
+         gcc_assert (known_eq (eltno, 0U));
+         perm_stmt = gimple_build_assign (perm_dest, def);
        }
-      else
-       gcc_unreachable ();
-    }
-  else
-    {
-      /* We need a copy here in case the def was external.  */
-      perm_stmt = gimple_build_assign (perm_dest, first_def);
     }
   vect_finish_stmt_generation (vinfo, NULL, perm_stmt, gsi);
   /* Store the vector statement in NODE.  */
-- 
2.51.0

Reply via email to