This refines the fix for PR102226 to do the mode conversion from V2DI to VNx2DI separately from the sign-conversion, retaining the signedness of the saved accumulator as before the original fix.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. 2021-09-15 Richard Biener <rguent...@suse.de> PR tree-optimization/102318 * tree-vect-loop.c (vect_transform_cycle_phi): Revert previous change and do the mode conversion separately from the sign conversion. * gcc.dg/vect/pr102318.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr102318.c | 21 +++++++++++++++++++++ gcc/tree-vect-loop.c | 13 +++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr102318.c diff --git a/gcc/testsuite/gcc.dg/vect/pr102318.c b/gcc/testsuite/gcc.dg/vect/pr102318.c new file mode 100644 index 00000000000..cc58efacecd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr102318.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ + +void +vec_slp_int16_t (short int *restrict a, short int *restrict b, int n) +{ + short int x0 = b[0]; + short int x1 = b[1]; + short int x2 = b[2]; + short int x3 = b[3]; + for (int i = 0; i < n; ++i) + { + x0 += a[i * 4]; + x1 += a[i * 4 + 1]; + x2 += a[i * 4 + 2]; + x3 += a[i * 4 + 3]; + } + b[0] = x0; + b[1] = x1; + b[2] = x2; + b[3] = x3; +} diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index c9dcc647d2c..5a5b8da2e77 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -7755,11 +7755,20 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, (reduc_info), &stmts); } - if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def))) - def = gimple_convert (&stmts, vectype_out, def); + /* The epilogue loop might use a different vector mode, like + VNx2DI vs. V2DI. */ + if (TYPE_MODE (vectype_out) != TYPE_MODE (TREE_TYPE (def))) + { + tree reduc_type = build_vector_type_for_mode + (TREE_TYPE (TREE_TYPE (def)), TYPE_MODE (vectype_out)); + def = gimple_convert (&stmts, reduc_type, def); + } /* Adjust the input so we pick up the partially reduced value for the skip edge in vect_create_epilog_for_reduction. */ accumulator->reduc_input = def; + /* And the reduction could be carried out using a different sign. */ + if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def))) + def = gimple_convert (&stmts, vectype_out, def); if (loop_vinfo->main_loop_edge) { /* While we'd like to insert on the edge this will split -- 2.31.1