[gcc r13-8727] tree-optimization/114375 - disallow SLP discovery of permuted mask loads

Richard Biener via Gcc-cvs Wed, 08 May 2024 06:40:09 -0700

https://gcc.gnu.org/g:4f2a35a76cca503749c696e7772d2e8eadc77ba5


commit r13-8727-g4f2a35a76cca503749c696e7772d2e8eadc77ba5
Author: Richard Biener <rguent...@suse.de>
Date:   Mon Mar 18 12:39:03 2024 +0100

    tree-optimization/114375 - disallow SLP discovery of permuted mask loads
    
    We cannot currently handle permutations of mask loads in code generation
    or permute optimization.  But we simply drop any permutation on the
    floor, so the following instead rejects the SLP build rather than
    producing wrong-code.  I've also made sure to reject them in
    vectorizable_load for completeness.
    
            PR tree-optimization/114375
            * tree-vect-slp.cc (vect_build_slp_tree_2): Compute the
            load permutation for masked loads but reject it when any
            such is necessary.
            * tree-vect-stmts.cc (vectorizable_load): Reject masked
            VMAT_ELEMENTWISE and VMAT_STRIDED_SLP as those are not
            supported.
    
            * gcc.dg/vect/vect-pr114375.c: New testcase.
    
    (cherry picked from commit 94c3508c5a14d1948fe3bffa9e16c6f3d9c2836a)

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-pr114375.c | 44 +++++++++++++++++++++++++++++++
 gcc/tree-vect-slp.cc                      | 34 +++++++++++++++++++-----
 gcc/tree-vect-stmts.cc                    |  8 ++++++
 3 files changed, 79 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr114375.c 
b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
new file mode 100644
index 000000000000..1e1cb0123d07
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c
@@ -0,0 +1,44 @@
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+int a[512];
+int b[512];
+int c[512];
+
+void __attribute__((noipa))
+foo(int * __restrict p)
+{
+  for (int i = 0; i < 64; ++i)
+    {
+      int tem = 2, tem2 = 2;
+      if (a[4*i + 1])
+        tem = p[4*i];
+      if (a[4*i])
+        tem2 = p[4*i + 2];
+      b[2*i] = tem2;
+      b[2*i+1] = tem;
+      if (a[4*i + 2])
+        tem = p[4*i + 1];
+      if (a[4*i + 3])
+        tem2 = p[4*i + 3];
+      c[2*i] = tem2;
+      c[2*i+1] = tem;
+    }
+}
+int main()
+{
+  check_vect ();
+
+  for (int i = 0; i < 512; ++i)
+    a[i] = (i >> 1) & 1;
+
+  foo (a);
+
+  if (c[0] != 1 || c[1] != 0 || c[2] != 1 || c[3] != 0
+      || b[0] != 2 || b[1] != 2 || b[2] != 2 || b[3] != 2)
+    abort ();
+
+  return 0;
+}
+
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index bbc05fac65ec..c01dc02afff6 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1780,10 +1780,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
     {
-      if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
-       gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
-                   || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
-                   || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+       gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
       else
        {
          *max_nunits = this_max_nunits;
@@ -1799,15 +1797,37 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
          load_permutation.create (group_size);
          stmt_vec_info first_stmt_info
            = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
+         bool any_permute = false;
          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
            {
              int load_place = vect_get_place_in_interleaving_chain
                  (load_info, first_stmt_info);
              gcc_assert (load_place != -1);
-             load_permutation.safe_push (load_place);
+             any_permute |= load_place != j;
+             load_permutation.quick_push (load_place);
+           }
+
+         if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
+           {
+             gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
+                         || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
+                         || gimple_call_internal_p (stmt, 
IFN_MASK_GATHER_LOAD));
+             load_permutation.release ();
+             /* We cannot handle permuted masked loads, see PR114375.  */
+             if (any_permute
+                 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+                     && DR_GROUP_SIZE (first_stmt_info) != group_size)
+                 || STMT_VINFO_STRIDED_P (stmt_info))
+               {
+                 matches[0] = false;
+                 return NULL;
+               }
+           }
+         else
+           {
+             SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
+             return node;
            }
-         SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
-         return node;
        }
     }
   else if (gimple_assign_single_p (stmt_info->stmt)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 6b7dbfd4a231..e3dea33e04a7 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9121,6 +9121,14 @@ vectorizable_load (vec_info *vinfo,
                             "unsupported masked emulated gather.\n");
          return false;
        }
+      else if (memory_access_type == VMAT_ELEMENTWISE
+              || memory_access_type == VMAT_STRIDED_SLP)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "unsupported masked strided access.\n");
+         return false;
+       }
     }
 
   if (!vec_stmt) /* transformation not required.  */

[gcc r13-8727] tree-optimization/114375 - disallow SLP discovery of permuted mask loads

Reply via email to