The first reassoc pass is supposed to strictly linearize expressions but re-propagation of negates can break this in case it is faced with (-a + -b) + c which it turns into c - (a + b). The root-cause is swap_ops_for_binary_stmt which prematurely associates two negates together. The following patch avoids this and allows the two new testcases to be SLP vectorized - previously reassoc made the expression trees not match up for SLP discovery.
This fixes the reassoc part of the PR. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. OK? Thanks, Richard. 2020-11-17 Richard Biener <rguent...@suse.de> PR tree-optimization/97832 * tree-ssa-reassoc.c (op_negated_p): New function. (swap_ops_for_binary_stmt): Avoid swapping two negated operands together. (get_rank): Simplify. * gcc.dg/vect/pr97832-1.c: New testcase. * gcc.dg/vect/pr97832-2.c: Likewise. --- gcc/testsuite/gcc.dg/vect/pr97832-1.c | 17 +++++++++++++++ gcc/testsuite/gcc.dg/vect/pr97832-2.c | 29 ++++++++++++++++++++++++++ gcc/tree-ssa-reassoc.c | 30 ++++++++++++++++++++++----- 3 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr97832-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr97832-2.c diff --git a/gcc/testsuite/gcc.dg/vect/pr97832-1.c b/gcc/testsuite/gcc.dg/vect/pr97832-1.c new file mode 100644 index 00000000000..063fc7bd717 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97832-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast" } */ +/* { dg-require-effective-target vect_double } */ + +double a[1024], b[1024], c[1024]; + +void foo() +{ + for (int i = 0; i < 256; ++i) + { + a[2*i] = a[2*i] + b[2*i] - c[2*i]; + a[2*i+1] = a[2*i+1] - b[2*i+1] - c[2*i+1]; + } +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr97832-2.c b/gcc/testsuite/gcc.dg/vect/pr97832-2.c new file mode 100644 index 00000000000..4f0578120ee --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97832-2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast" } */ +/* { dg-require-effective-target vect_double } */ + +void foo1x1(double* restrict y, const double* restrict x, int clen) +{ + int xi = clen & 2; + double f_re = x[0+xi+0]; + double f_im = x[4+xi+0]; + int clen2 = (clen+xi) * 2; +#pragma GCC unroll 0 + for (int c = 0; c < clen2; c += 8) { + // y[c] = y[c] - x[c]*conj(f); +#pragma GCC unroll 4 + for (int k = 0; k < 4; ++k) { + double x_re = x[c+0+k]; + double x_im = x[c+4+k]; + double y_re = y[c+0+k]; + double y_im = y[c+4+k]; + y_re = y_re - x_re * f_re - x_im * f_im;; + y_im = y_im + x_re * f_im - x_im * f_re; + y[c+0+k] = y_re; + y[c+4+k] = y_im; + } + } +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c index a2ca1713d4b..4b728ee69de 100644 --- a/gcc/tree-ssa-reassoc.c +++ b/gcc/tree-ssa-reassoc.c @@ -450,16 +450,18 @@ get_rank (tree e) FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) rank = propagate_rank (rank, op); + rank += 1; + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Rank for "); print_generic_expr (dump_file, e); - fprintf (dump_file, " is %ld\n", (rank + 1)); + fprintf (dump_file, " is %ld\n", rank); } /* Note the rank in the hashtable so we don't recompute it. */ - insert_operand_rank (e, (rank + 1)); - return (rank + 1); + insert_operand_rank (e, rank); + return rank; } /* Constants, globals, etc., are rank 0 */ @@ -4890,6 +4892,18 @@ remove_visited_stmt_chain (tree var) } } +/* Return true if OE is negated. */ + +static bool +op_negated_p (operand_entry *oe) +{ + if (TREE_CODE (oe->op) != SSA_NAME) + return false; + if (gassign *ass = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (oe->op))) + return gimple_assign_rhs_code (ass) == NEGATE_EXPR; + return false; +} + /* This function checks three consequtive operands in passed operands vector OPS starting from OPINDEX and swaps two operands if it is profitable for binary operation @@ -4921,13 +4935,19 @@ swap_ops_for_binary_stmt (vec<operand_entry *> ops, oe3 = ops[opindex + 2]; if ((oe1->rank == oe2->rank - && oe2->rank != oe3->rank) + && oe2->rank != oe3->rank + /* Avoid associating two negated operands together, this + undoes linearization during negate repropagation. */ + && (!(op_negated_p (oe1) && op_negated_p (oe2)) + || op_negated_p (oe3))) || (stmt && is_phi_for_stmt (stmt, oe3->op) && !is_phi_for_stmt (stmt, oe1->op) && !is_phi_for_stmt (stmt, oe2->op))) std::swap (*oe1, *oe3); else if ((oe1->rank == oe3->rank - && oe2->rank != oe3->rank) + && oe2->rank != oe3->rank + && (!(op_negated_p (oe1) && op_negated_p (oe3)) + || op_negated_p (oe2))) || (stmt && is_phi_for_stmt (stmt, oe2->op) && !is_phi_for_stmt (stmt, oe1->op) && !is_phi_for_stmt (stmt, oe3->op))) -- 2.26.2