The following handles detecting of a reduction chain wrapped in a
conversion. This does not yet try to combine operands with different
signedness, but we should now handle signed integer accumulation
to both a signed and unsigned accumulator fine.
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
PR tree-optimization/122364
* tree-vect-slp.cc (vect_analyze_slp_reduc_chain): Re-try
linearization on a conversion source.
* gcc.dg/vect/vect-reduc-chain-5.c: New testcase.
---
.../gcc.dg/vect/vect-reduc-chain-5.c | 31 +++++++++
gcc/tree-vect-slp.cc | 64 +++++++++++++++++--
2 files changed, 90 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-reduc-chain-5.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-chain-5.c
b/gcc/testsuite/gcc.dg/vect/vect-reduc-chain-5.c
new file mode 100644
index 00000000000..1566e5f3faf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-chain-5.c
@@ -0,0 +1,31 @@
+#include "tree-vect.h"
+
+int q[32];
+
+unsigned __attribute__((noipa))
+foo ()
+{
+ unsigned res = 0;
+ for (int i = 0; i < 8; ++i)
+ res += q[4*i] + q[4*i+1] + q[4*i+2] + q[4*i+3];
+ return res;
+}
+
+int main()
+{
+ check_vect ();
+
+ unsigned sum = 0;
+#pragma GCC novector
+ for (int i = 0; i < 32; ++i)
+ {
+ q[i] = i;
+ sum += i;
+ }
+
+ if (foo () != sum)
+ abort ();
+}
+
+/* { dg-final { scan-tree-dump "vectorizing a reduction chain" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 53fe643be2c..9d8e64b2712 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4233,6 +4233,8 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
if (fail)
return false;
+ /* When the SSA def chain through reduc-idx does not form a natural
+ reduction chain try to linearize an associative operation manually. */
if (scalar_stmts.length () == 1
&& code.is_tree_code ()
&& associative_tree_code ((tree_code)code))
@@ -4243,11 +4245,6 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
vect_slp_linearize_chain (vinfo, worklist, chain, (tree_code)code,
scalar_stmts[0]->stmt, op_stmt, other_op_stmt,
NULL);
- if (chain.length () < 3)
- {
- scalar_stmts.release ();
- return false;
- }
scalar_stmts.truncate (0);
stmt_vec_info tail = NULL;
@@ -4272,6 +4269,63 @@ vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
}
gcc_assert (tail);
+ /* When this linearization didn't produce a chain see if stripping
+ a wrapping sign conversion produces one. */
+ if (scalar_stmts.length () == 1)
+ {
+ gimple *stmt = scalar_stmts[0]->stmt;
+ if (!is_gimple_assign (stmt)
+ || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
+ || TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME
+ || !tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
+ TREE_TYPE (gimple_assign_rhs1 (stmt))))
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+ stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
+ if (!is_gimple_assign (stmt)
+ || gimple_assign_rhs_code (stmt) != (tree_code)code)
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+ chain.truncate (0);
+ vect_slp_linearize_chain (vinfo, worklist, chain, (tree_code)code,
+ stmt, op_stmt, other_op_stmt, NULL);
+
+ scalar_stmts.truncate (0);
+ tail = NULL;
+ for (auto el : chain)
+ {
+ if (el.dt == vect_external_def
+ || el.dt == vect_constant_def
+ || el.code != (tree_code) code)
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+ stmt_vec_info stmt = vinfo->lookup_def (el.op);
+ if (STMT_VINFO_REDUC_IDX (stmt) != -1
+ || STMT_VINFO_REDUC_DEF (stmt))
+ {
+ gcc_assert (tail == NULL);
+ tail = stmt;
+ continue;
+ }
+ scalar_stmts.safe_push (stmt);
+ }
+ /* Unlike the above this does not include the reduction SSA
+ cycle. */
+ gcc_assert (!tail);
+ }
+
+ if (scalar_stmts.length () < 2)
+ {
+ scalar_stmts.release ();
+ return false;
+ }
+
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
--
2.51.0