https://gcc.gnu.org/g:32b1be7eb434addefe203249746dc6bbdc185403

commit r16-2785-g32b1be7eb434addefe203249746dc6bbdc185403
Author: Richard Biener <rguent...@suse.de>
Date:   Mon Aug 4 14:45:53 2025 +0200

    tree-optimization/121395 - SLP of SIMD calls w/o LHS
    
    The following records the alternate SLP instance entries coming from
    stmts with stores that have no SSA def, like OMP SIMD calls without LHS.
    There's a bit of fallout with having a SLP tree with a NULL vectype,
    but nothing too gross.
    
            PR tree-optimization/121395
            * tree-vectorizer.h (_loop_vec_info::alternate_defs): New member.
            (LOOP_VINFO_ALTERNATE_DEFS): New.
            * tree-vect-stmts.cc (vect_stmt_relevant_p): Populate it.
            (vectorizable_simd_clone_call): Do not register a SLP def
            when there is none.
            * tree-vect-slp.cc (vect_build_slp_tree_1): Allow a NULL
            vectype when there's no LHS.  Allow all calls w/o LHS.
            (vect_analyze_slp): Process LOOP_VINFO_ALTERNATE_DEFS as
            SLP graph entries.
            (vect_make_slp_decision): Handle a NULL SLP_TREE_VECTYPE.
            (vect_slp_analyze_node_operations_1): Likewise.
            (vect_schedule_slp_node): Likewise.
    
            * gcc.dg/vect/pr59984.c: Adjust.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr59984.c |  4 ++++
 gcc/tree-vect-slp.cc                | 37 ++++++++++++++++++++++++++++---------
 gcc/tree-vect-stmts.cc              |  6 +++++-
 gcc/tree-vectorizer.h               |  5 +++++
 4 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr59984.c 
b/gcc/testsuite/gcc.dg/vect/pr59984.c
index c00c22671586..8ca446ea67c6 100644
--- a/gcc/testsuite/gcc.dg/vect/pr59984.c
+++ b/gcc/testsuite/gcc.dg/vect/pr59984.c
@@ -64,3 +64,7 @@ main ()
   return 0;
 }
 
+/* { dg-final { scan-tree-dump "31:17: optimized: loop vectorized" "vect" } } 
*/
+/* { dg-final { scan-tree-dump "37:7: optimized: loop vectorized" "vect" } } */
+/* { dg-final { scan-tree-dump "44:17: optimized: loop vectorized" "vect" } } 
*/
+/* { dg-final { scan-tree-dump "50:7: optimized: loop vectorized" "vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index b46989505e04..2530216e9f18 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1140,7 +1140,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
       soft_fail_nunits_vectype = nunits_vectype;
     }
 
-  gcc_assert (vectype);
+  gcc_assert (vectype || !gimple_get_lhs (first_stmt_info->stmt));
   *node_vectype = vectype;
 
   /* For every stmt in NODE find its def stmt/s.  */
@@ -1187,10 +1187,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
 
       gcall *call_stmt = dyn_cast <gcall *> (stmt);
       tree lhs = gimple_get_lhs (stmt);
-      if (lhs == NULL_TREE
-         && (!call_stmt
-             || !gimple_call_internal_p (stmt)
-             || !internal_store_fn_p (gimple_call_internal_fn (stmt))))
+      if (lhs == NULL_TREE && !call_stmt)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -4917,6 +4914,22 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size,
              return opt_result::failure_at (vect_location,
                                             "SLP build failed.\n");
          }
+
+      stmt_vec_info stmt_info;
+      FOR_EACH_VEC_ELT (LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo), i, stmt_info)
+       {
+         vec<stmt_vec_info> stmts;
+         vec<stmt_vec_info> roots = vNULL;
+         vec<tree> remain = vNULL;
+         stmts.create (1);
+         stmts.quick_push (stmt_info);
+         if (! vect_build_slp_instance (vinfo, slp_inst_kind_store,
+                                        stmts, roots, remain, max_tree_size,
+                                        &limit, bst_map, NULL,
+                                        force_single_lane))
+           return opt_result::failure_at (vect_location,
+                                          "SLP build failed.\n");
+       }
     }
 
   if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
@@ -7634,7 +7647,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
       /* If all instances ended up with vector(1) T roots make sure to
         not vectorize.  RVV for example relies on loop vectorization
         when some instances are essentially kept scalar.  See PR121048.  */
-      if (known_gt (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (root)), 1U))
+      if (SLP_TREE_VECTYPE (root)
+         && known_gt (TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (root)), 1U))
        decided_to_slp++;
     }
 
@@ -7961,7 +7975,10 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, 
slp_tree node,
      elements in a vector.  For single-defuse-cycle, lane-reducing op, and
      PHI statement that starts reduction comprised of only lane-reducing ops,
      the number is more than effective vector statements actually required.  */
-  SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
+  if (SLP_TREE_VECTYPE (node))
+    SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
+  else
+    SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
 
   /* Handle purely internal nodes.  */
   if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
@@ -11318,8 +11335,10 @@ vect_schedule_slp_node (vec_info *vinfo,
 
   stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
 
-  gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
-  SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
+  gcc_assert (!SLP_TREE_VECTYPE (node)
+             || SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
+  if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0)
+    SLP_TREE_VEC_DEFS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
 
   if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
       && STMT_VINFO_DATA_REF (stmt_info))
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 6a6f2ccbbcef..f7a052b6660e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -386,6 +386,9 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
          dump_printf_loc (MSG_NOTE, vect_location,
                            "vec_stmt_relevant_p: stmt has vdefs.\n");
        *relevant = vect_used_in_scope;
+       if (! STMT_VINFO_DATA_REF (stmt_info)
+           && zero_ssa_operands (stmt_info->stmt, SSA_OP_DEF))
+         LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo).safe_push (stmt_info);
       }
 
   /* uses outside the loop.  */
@@ -4752,7 +4755,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
            }
        }
 
-      SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
+      if (gimple_get_lhs (new_stmt))
+       SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
     }
 
   for (i = 0; i < nargs; ++i)
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index c8482c2b4a67..5b0eed3c46d0 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -947,6 +947,10 @@ public:
      stmt in the chain.  */
   auto_vec<stmt_vec_info> reduction_chains;
 
+  /* Defs that could not be analyzed such as OMP SIMD calls without
+     a LHS.  */
+  auto_vec<stmt_vec_info> alternate_defs;
+
   /* Cost vector for a single scalar iteration.  */
   auto_vec<stmt_info_for_cost> scalar_cost_vec;
 
@@ -1186,6 +1190,7 @@ public:
 #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
 #define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L)  (L)->inv_pattern_def_seq
 #define LOOP_VINFO_DRS_ADVANCED_BY(L)      (L)->drs_advanced_by
+#define LOOP_VINFO_ALTERNATE_DEFS(L)       (L)->alternate_defs
 
 #define LOOP_VINFO_FULLY_MASKED_P(L)           \
   (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L)      \

Reply via email to