The fold-left reduction transform relies on preserving the scalar
cycle PHI. The following rewrites how we connect this to the
involved stmt-infos instead of relying on (the actually bogus for
reduction chain) scalar stmts in SLP nodes more than absolutely
necessary. This also makes sure to not re-associate to form a
reduction chain when a fold-left reduction is required.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
PR tree-optimization/122371
* tree-vect-loop.cc (vectorize_fold_left_reduction): Get
to the scalar def to replace via the scalar PHI backedge def.
* tree-vect-slp.cc (vect_analyze_slp_reduc_chain): Do not
re-associate to for a reduction chain if a fold-left
reduction is required.
* gcc.dg/vect/vect-pr122371.c: New testcase.
---
gcc/testsuite/gcc.dg/vect/vect-pr122371.c | 20 ++++++++++++++++++++
gcc/tree-vect-loop.cc | 17 ++++++++++-------
gcc/tree-vect-slp.cc | 6 +++++-
3 files changed, 35 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-pr122371.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr122371.c
b/gcc/testsuite/gcc.dg/vect/vect-pr122371.c
new file mode 100644
index 00000000000..fd03b846a9e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr122371.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+
+struct {
+ double lsum;
+} AnalyzeSamples_rgData;
+
+float *AnalyzeSamples_curleft;
+float AnalyzeSamples_sum_l;
+int AnalyzeSamples_i;
+
+void AnalyzeSamples() {
+ while (AnalyzeSamples_i--) {
+ float l1 = AnalyzeSamples_curleft[1] * AnalyzeSamples_curleft[1],
+ l3 = AnalyzeSamples_curleft[3] * AnalyzeSamples_curleft[3],
+ sl = l1 + l3;
+ AnalyzeSamples_sum_l += sl;
+ AnalyzeSamples_curleft += 4;
+ }
+ AnalyzeSamples_rgData.lsum += AnalyzeSamples_sum_l;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 15cb22023fc..617018f5aaf 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6405,27 +6405,30 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
would also allow generalizing this for reduction paths of length > 1
and/or SLP reductions. */
slp_tree reduc_node = SLP_TREE_CHILDREN (slp_node)[reduc_index];
- tree reduc_var = vect_get_slp_scalar_def (reduc_node, 0);
+ stmt_vec_info reduc_var_def = SLP_TREE_SCALAR_STMTS (reduc_node)[0];
+ tree reduc_var = gimple_get_lhs (STMT_VINFO_STMT (reduc_var_def));
/* The operands either come from a binary operation or an IFN_COND operation.
The former is a gimple assign with binary rhs and the latter is a
gimple call with four arguments. */
gcc_assert (num_ops == 2 || num_ops == 4);
- int group_size = 1;
- stmt_vec_info scalar_dest_def_info;
auto_vec<tree> vec_oprnds0, vec_opmask;
vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[(is_cond_op ? 2 : 0)
+ (1 - reduc_index)],
&vec_oprnds0);
- group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
- scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
/* For an IFN_COND_OP we also need the vector mask operand. */
if (is_cond_op)
vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], &vec_opmask);
- gimple *sdef = vect_orig_stmt (scalar_dest_def_info)->stmt;
- tree scalar_dest = gimple_get_lhs (sdef);
+ /* The transform below relies on preserving the original scalar PHI
+ and its latch def which we replace. So work backwards from there. */
+ tree scalar_dest
+ = gimple_phi_arg_def_from_edge (as_a <gphi *> (STMT_VINFO_STMT
+ (reduc_var_def)),
+ loop_latch_edge (loop));
+ stmt_vec_info scalar_dest_def_info
+ = vect_stmt_to_vectorize (loop_vinfo->lookup_def (scalar_dest));
tree scalar_type = TREE_TYPE (scalar_dest);
int vec_num = vec_oprnds0.length ();
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9d8e64b2712..9698709f567 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4237,7 +4237,11 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
reduction chain try to linearize an associative operation manually. */
if (scalar_stmts.length () == 1
&& code.is_tree_code ()
- && associative_tree_code ((tree_code)code))
+ && associative_tree_code ((tree_code)code)
+ /* We may not associate if a fold-left reduction is required. */
+ && !needs_fold_left_reduction_p (TREE_TYPE (gimple_get_lhs
+ (scalar_stmt->stmt)),
+ code))
{
auto_vec<chain_op_t> chain;
auto_vec<std::pair<tree_code, gimple *> > worklist;
--
2.51.0