Bootstrapped and regtested x86_64-redhat-linux, ppc64le-redhat-linux and
s390x-redhat-linux.  I also ran SPEC 2006 and 2017 on these platforms,
and the only measurable regression was 3% in 520.omnetpp_r on ppc, which
went away after inserting a single nop at the beginning of
cDynamicExpression::evaluate.

OK for master?

---

PR tree-optimization/49749 introduced code that shortens dependency
chains containing loop accumulators by placing them last on operand
lists of associative operations.

456.hmmer benchmark on s390 could benefit from this, however, the code
that needs it modifies loop accumulator before using it, and since only
so-called loop-carried phis are are treated as loop accumulators, the
code in the present form doesn't really help.   According to Bill
Schmidt - the original author - such a conservative approach was chosen
so as to avoid unnecessarily swapping operands, which might cause
unpredictable effects.  However, giving special treatment to forms of
loop accumulators is acceptable.

The definition of loop-carried phi is: it's a single-use phi, which is
used in the same innermost loop it's defined in, at least one argument
of which is defined in the same innermost loop as the phi itself.
Given this, it seems natural to treat single uses of such phis as phis
themselves.

gcc/ChangeLog:

2020-05-06  Ilya Leoshkevich  <i...@linux.ibm.com>

        * passes.def (pass_reassoc): Rename parameter to early_p.
        * tree-ssa-reassoc.c (reassoc_bias_loop_carried_phi_ranks_p):
        New variable.
        (phi_rank): Don't bias loop-carried phi ranks
        before vectorization pass.
        (loop_carried_phi): Remove (superseded by
        operand_rank::biased_p).
        (propagate_rank): Propagate bias along single uses.
        (get_rank): Pass stmt to propagate_rank.
        (execute_reassoc): Add bias_loop_carried_phi_ranks_p parameter.
        (pass_reassoc::pass_reassoc): Add bias_loop_carried_phi_ranks_p
        initializer.
        (pass_reassoc::set_param): Set bias_loop_carried_phi_ranks_p
        value.
        (pass_reassoc::execute): Pass bias_loop_carried_phi_ranks_p to
        execute_reassoc.
        (pass_reassoc::bias_loop_carried_phi_ranks_p): New member.

gcc/testsuite/ChangeLog:

2020-05-06  Ilya Leoshkevich  <i...@linux.ibm.com>

        * gcc.target/s390/reassoc-1.c: New test.
        * gcc.target/s390/reassoc-2.c: New test.
        * gcc.target/s390/reassoc-3.c: New test.
        * gcc.target/s390/reassoc.h: New test.
---
 gcc/passes.def                            |  4 +-
 gcc/testsuite/gcc.target/s390/reassoc-1.c |  6 ++
 gcc/testsuite/gcc.target/s390/reassoc-2.c |  7 ++
 gcc/testsuite/gcc.target/s390/reassoc-3.c |  8 ++
 gcc/testsuite/gcc.target/s390/reassoc.h   | 22 +++++
 gcc/tree-ssa-reassoc.c                    | 97 ++++++++++++++---------
 6 files changed, 105 insertions(+), 39 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/reassoc-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/reassoc-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/reassoc-3.c
 create mode 100644 gcc/testsuite/gcc.target/s390/reassoc.h

diff --git a/gcc/passes.def b/gcc/passes.def
index 2b1e09fdda3..6864f583f20 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -235,7 +235,7 @@ along with GCC; see the file COPYING3.  If not see
         program and isolate those paths.  */
       NEXT_PASS (pass_isolate_erroneous_paths);
       NEXT_PASS (pass_dse);
-      NEXT_PASS (pass_reassoc, true /* insert_powi_p */);
+      NEXT_PASS (pass_reassoc, true /* early_p */);
       NEXT_PASS (pass_dce);
       NEXT_PASS (pass_forwprop);
       NEXT_PASS (pass_phiopt, false /* early_p */);
@@ -312,7 +312,7 @@ along with GCC; see the file COPYING3.  If not see
       NEXT_PASS (pass_lower_vector_ssa);
       NEXT_PASS (pass_lower_switch);
       NEXT_PASS (pass_cse_reciprocals);
-      NEXT_PASS (pass_reassoc, false /* insert_powi_p */);
+      NEXT_PASS (pass_reassoc, false /* early_p */);
       NEXT_PASS (pass_strength_reduction);
       NEXT_PASS (pass_split_paths);
       NEXT_PASS (pass_tracer);
diff --git a/gcc/testsuite/gcc.target/s390/reassoc-1.c 
b/gcc/testsuite/gcc.target/s390/reassoc-1.c
new file mode 100644
index 00000000000..8343f1cd4b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/reassoc-1.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include "reassoc.h"
+
+/* { dg-final { scan-assembler 
{(?n)\n\tl\t(%r\d+),.+(\n.*)*\n\ta\t(\1),.+(\n.*)*\n\tar\t(%r\d+),(\1)} } } */
diff --git a/gcc/testsuite/gcc.target/s390/reassoc-2.c 
b/gcc/testsuite/gcc.target/s390/reassoc-2.c
new file mode 100644
index 00000000000..5e393ed4937
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/reassoc-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define MODIFY
+#include "reassoc.h"
+
+/* { dg-final { scan-assembler 
{(?n)\n\tl\t(%r\d+),.+(\n.*)*\n\ta\t(\1),.+(\n.*)*\n\tar\t(%r\d+),(\1)} } } */
diff --git a/gcc/testsuite/gcc.target/s390/reassoc-3.c 
b/gcc/testsuite/gcc.target/s390/reassoc-3.c
new file mode 100644
index 00000000000..ccbb627be7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/reassoc-3.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define MODIFY
+#define STORE_MODIFIED
+#include "reassoc.h"
+
+/* { dg-final { scan-assembler 
{(?n)\n\tl\t(%r\d+),.+(\n.*)*\n\ta\t(\1),.+(\n.*)*\n\tar\t(%r\d+),(\1)} } } */
diff --git a/gcc/testsuite/gcc.target/s390/reassoc.h 
b/gcc/testsuite/gcc.target/s390/reassoc.h
new file mode 100644
index 00000000000..b0e2b41202a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/reassoc.h
@@ -0,0 +1,22 @@
+#define M 1024
+unsigned int arr1[M];
+unsigned int arr2[M];
+volatile unsigned int sink;
+
+unsigned int
+test ()
+{
+  unsigned int sum = 0;
+  for (int i = 0; i < M; i++)
+    {
+#ifdef MODIFY
+      sum |= 1;
+#ifdef STORE_MODIFIED
+      sink = sum;
+#endif
+#endif
+      sum += arr1[i];
+      sum += arr2[i];
+    }
+  return sum;
+}
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 2e67987f6c6..67be5312f42 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -177,6 +177,10 @@ along with GCC; see the file COPYING3.  If not see
    point 3a in the pass header comment.  */
 static bool reassoc_insert_powi_p;
 
+/* Enable biasing ranks of loop accumulators.  We don't want this before
+   vectorization, since it interferes with reduction chains.  */
+static bool reassoc_bias_loop_carried_phi_ranks_p;
+
 /* Statistics */
 static struct
 {
@@ -275,6 +279,9 @@ phi_rank (gimple *stmt)
   use_operand_p use;
   gimple *use_stmt;
 
+  if (!reassoc_bias_loop_carried_phi_ranks_p)
+    return bb_rank[bb->index];
+
   /* We only care about real loops (those with a latch).  */
   if (!father->latch)
     return bb_rank[bb->index];
@@ -312,48 +319,59 @@ phi_rank (gimple *stmt)
   return bb_rank[bb->index];
 }
 
-/* If EXP is an SSA_NAME defined by a PHI statement that represents a
-   loop-carried dependence of an innermost loop, return TRUE; else
-   return FALSE.  */
-static bool
-loop_carried_phi (tree exp)
-{
-  gimple *phi_stmt;
-  long block_rank;
-
-  if (TREE_CODE (exp) != SSA_NAME
-      || SSA_NAME_IS_DEFAULT_DEF (exp))
-    return false;
-
-  phi_stmt = SSA_NAME_DEF_STMT (exp);
-
-  if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
-    return false;
-
-  /* Non-loop-carried phis have block rank.  Loop-carried phis have
-     an additional bias added in.  If this phi doesn't have block rank,
-     it's biased and should not be propagated.  */
-  block_rank = bb_rank[gimple_bb (phi_stmt)->index];
-
-  if (phi_rank (phi_stmt) != block_rank)
-    return true;
-
-  return false;
-}
-
 /* Return the maximum of RANK and the rank that should be propagated
    from expression OP.  For most operands, this is just the rank of OP.
-   For loop-carried phis, the value is zero to avoid undoing the bias
+   For loop-carried phis, the value is block rank to avoid undoing the bias
    in favor of the phi.  */
 static long
-propagate_rank (long rank, tree op)
+propagate_rank (long rank, tree op, gimple *stmt)
 {
   long op_rank;
 
-  if (loop_carried_phi (op))
-    return rank;
-
   op_rank = get_rank (op);
+  if (op_rank & PHI_LOOP_BIAS)
+    {
+      /* Treat loop-carried phis and elements of innermost-loop-internal
+        single-use chains originating from them identically: propagate ranks
+        (which are always biased) only along innermost-loop-internal
+        single-use chains.  Treat only GIMPLE_ASSIGNs to SSA_NAMEs as uses,
+        because only these participate in rank propagation.  This way in the
+        following code:
+
+          x_1 = phi(x_0, x_2)
+          q = x_1 & 1
+          .MEM = q
+          b = a + q
+          c = b + d
+          x_2 = c + e
+
+        the rank of q, which is essentially a form of loop accumulator, will
+        be biased.  Ranks of b and c will also be biased, which seems
+        unnecessary at first glance, however, this will have no consequences,
+        since b and c are intermediate results, and their ranks will be
+        discarded when creating + operand lists.  */
+      use_operand_p use;
+      imm_use_iterator use_iter;
+      gimple *use_stmt = NULL;
+      FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
+      if (is_gimple_assign (USE_STMT (use))
+         && TREE_CODE (gimple_assign_lhs (USE_STMT (use))) == SSA_NAME)
+       {
+         if (use_stmt == NULL)
+           {
+             use_stmt = USE_STMT (use);
+           }
+         else
+           {
+             use_stmt = NULL;
+             break;
+           }
+       }
+      if (use_stmt == NULL
+         || gimple_bb (stmt)->loop_father
+                != gimple_bb (use_stmt)->loop_father)
+       return bb_rank[gimple_bb (stmt)->index];
+    }
 
   return MAX (rank, op_rank);
 }
@@ -448,7 +466,7 @@ get_rank (tree e)
         thus have rank 0.  */
       rank = 0;
       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
-       rank = propagate_rank (rank, op);
+       rank = propagate_rank (rank, op, stmt);
 
       if (dump_file && (dump_flags & TDF_DETAILS))
        {
@@ -6662,9 +6680,10 @@ fini_reassoc (void)
    optimization of a gimple conditional.  Otherwise returns zero.  */
 
 static unsigned int
-execute_reassoc (bool insert_powi_p)
+execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
 {
   reassoc_insert_powi_p = insert_powi_p;
+  reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
 
   init_reassoc ();
 
@@ -6705,15 +6724,19 @@ public:
     {
       gcc_assert (n == 0);
       insert_powi_p = param;
+      bias_loop_carried_phi_ranks_p = !param;
     }
   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
   virtual unsigned int execute (function *)
-    { return execute_reassoc (insert_powi_p); }
+  {
+    return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
+  }
 
  private:
   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
      point 3a in the pass header comment.  */
   bool insert_powi_p;
+  bool bias_loop_carried_phi_ranks_p;
 }; // class pass_reassoc
 
 } // anon namespace
-- 
2.25.4

Reply via email to