When we do SLP discovery of a .MASK_LOAD for a dataref group with gaps
the discovery for the mask will have gaps as well and this was
unexpected in a few places.  The following re-organizes things
slightly to accomodate for this.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

        PR tree-optimization/117484
        * tree-vect-slp.cc (vect_build_slp_tree_2): Handle gaps in
        mask discovery.  Fix condition to release the load permutation.
        (vect_lower_load_permutations): Assert we get no load
        permutation for the unpermuted node.
        * tree-vect-slp-patterns.cc (linear_loads_p): Properly identify
        loads (without permutation).
        (compatible_complex_nodes_p): Likewise.

        * gcc.dg/vect/pr117484-1.c: New testcase.
        * gcc.dg/vect/pr117484-2.c: Likewise.
---
 gcc/testsuite/gcc.dg/vect/pr117484-1.c | 13 +++++++++++++
 gcc/testsuite/gcc.dg/vect/pr117484-2.c | 16 ++++++++++++++++
 gcc/tree-vect-slp-patterns.cc          | 14 ++++++++++----
 gcc/tree-vect-slp.cc                   | 22 +++++++++++++---------
 4 files changed, 52 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr117484-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr117484-2.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-1.c 
b/gcc/testsuite/gcc.dg/vect/pr117484-1.c
new file mode 100644
index 00000000000..453556c50f9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr117484-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+
+extern int a;
+extern short b[];
+extern signed char c[], d[];
+int main()
+{
+  for (long j = 3; j < 1024; j += 3)
+    if (c[j] ? b[j] : 0) {
+      b[j] = d[j - 2];
+      a = d[j];
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-2.c 
b/gcc/testsuite/gcc.dg/vect/pr117484-2.c
new file mode 100644
index 00000000000..baffe7597ba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr117484-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+
+int a;
+extern int d[];
+extern int b[];
+extern _Bool c[];
+extern char h[];
+int main()
+{
+  for (int i = 0; i < 1024; i += 4)
+    if (h[i] || c[i])
+      {
+       a = d[i];
+       b[i] = d[i - 3];
+      }
+}
diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc
index 8adae8a6ec0..d62682be43c 100644
--- a/gcc/tree-vect-slp-patterns.cc
+++ b/gcc/tree-vect-slp-patterns.cc
@@ -221,9 +221,15 @@ linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, 
slp_tree root)
   perm_cache->put (root, retval);
 
   /* If it's a load node, then just read the load permute.  */
-  if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
+  if (SLP_TREE_DEF_TYPE (root) == vect_internal_def
+      && SLP_TREE_CODE (root) != VEC_PERM_EXPR
+      && STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))
+      && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))))
     {
-      retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
+      if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
+       retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
+      else
+       retval = PERM_EVENODD;
       perm_cache->put (root, retval);
       return retval;
     }
@@ -798,8 +804,8 @@ compatible_complex_nodes_p (slp_compat_nodes_map_t 
*compat_cache,
        return false;
     }
 
-  if (!SLP_TREE_LOAD_PERMUTATION (a).exists ()
-      || !SLP_TREE_LOAD_PERMUTATION (b).exists ())
+  if (!STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a))
+      || !STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b)))
     {
       for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
        {
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index ffe9e718575..8e4ad05e2a4 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2004,14 +2004,15 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
            = STMT_VINFO_GROUPED_ACCESS (stmt_info)
              ? DR_GROUP_FIRST_ELEMENT (stmt_info) : stmt_info;
          bool any_permute = false;
-         bool any_null = false;
          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
            {
              int load_place;
              if (! load_info)
                {
-                 load_place = j;
-                 any_null = true;
+                 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+                   load_place = j;
+                 else
+                   load_place = 0;
                }
              else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
                load_place = vect_get_place_in_interleaving_chain
@@ -2022,11 +2023,6 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
              any_permute |= load_place != j;
              load_permutation.quick_push (load_place);
            }
-         if (any_null)
-           {
-             gcc_assert (!any_permute);
-             load_permutation.release ();
-           }
 
          if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
            {
@@ -2081,6 +2077,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                     followed by 'node' being the desired final permutation.  */
                  if (unperm_load)
                    {
+                     gcc_assert
+                       (!SLP_TREE_LOAD_PERMUTATION (unperm_load).exists ());
                      lane_permutation_t lperm;
                      lperm.create (group_size);
                      for (unsigned j = 0; j < load_permutation.length (); ++j)
@@ -2101,6 +2099,10 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
            }
          else
            {
+             if (!any_permute
+                 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
+                 && group_size == DR_GROUP_SIZE (first_stmt_info))
+               load_permutation.release ();
              SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
              return node;
            }
@@ -2675,7 +2677,8 @@ out:
              tree op0;
              tree uniform_val = op0 = oprnd_info->ops[0];
              for (j = 1; j < oprnd_info->ops.length (); ++j)
-               if (!operand_equal_p (uniform_val, oprnd_info->ops[j]))
+               if (oprnd_info->ops[j]
+                   && !operand_equal_p (uniform_val, oprnd_info->ops[j]))
                  {
                    uniform_val = NULL_TREE;
                    break;
@@ -4510,6 +4513,7 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
                                         group_lanes,
                                         &max_nunits, matches, &limit,
                                         &tree_size, bst_map);
+      gcc_assert (!SLP_TREE_LOAD_PERMUTATION (l0).exists ());
 
       if (ld_lanes_lanes != 0)
        {
-- 
2.43.0

Reply via email to