This makes sure to use splats early when facing uniform internal
operands in BB SLP discovery rather than relying on the late
heuristincs re-building nodes from scratch.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2020-10-27  Richard Biener  <rguent...@suse.de>

        * tree-vect-slp.c (vect_build_slp_tree_2): When vectorizing
        BBs splat uniform operands and stop SLP discovery.

        * gcc.target/i386/pr95866-1.c: Adjust.
---
 gcc/testsuite/gcc.target/i386/pr95866-1.c |  2 +-
 gcc/tree-vect-slp.c                       | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr95866-1.c 
b/gcc/testsuite/gcc.target/i386/pr95866-1.c
index 991370cf669..553d415eed8 100644
--- a/gcc/testsuite/gcc.target/i386/pr95866-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr95866-1.c
@@ -13,6 +13,6 @@ void foo(int i)
 
 /* We should not use vector operations for i + 1 and (i + 1) & 31 but
    instead use { j, j, j, j }.  */ 
-/* { dg-final { scan-tree-dump-times "Building parent vector operands from 
scalars" 2 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "Using a splat of the uniform operand" 2 
"slp2" } } */
 /* { dg-final { scan-tree-dump-not " = \{i_" "slp2" } } */
 /* { dg-final { scan-tree-dump-times " = \{j_" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 894f045c0fe..85865dae1ab 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1486,6 +1486,28 @@ vect_build_slp_tree_2 (vec_info *vinfo,
          continue;
        }
 
+      if (is_a <bb_vec_info> (vinfo)
+         && oprnd_info->first_dt == vect_internal_def)
+       {
+         /* For BB vectorization, if all defs are the same do not
+            bother to continue the build along the single-lane
+            graph but use a splat of the scalar value.  */
+         stmt_vec_info first_def = oprnd_info->def_stmts[0];
+         for (j = 1; j < group_size; ++j)
+           if (oprnd_info->def_stmts[j] != first_def)
+             break;
+         if (j == group_size
+             /* But avoid doing this for loads where we may be
+                able to CSE things.  */
+             && !gimple_vuse (first_def->stmt))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "Using a splat of the uniform operand\n");
+             oprnd_info->first_dt = vect_external_def;
+           }
+       }
+
       if (oprnd_info->first_dt != vect_internal_def
          && oprnd_info->first_dt != vect_reduction_def
          && oprnd_info->first_dt != vect_induction_def)
-- 
2.26.2

Reply via email to