From 4050630efd5b3dd534bef15589cb224ae497d586 Mon Sep 17 00:00:00 2001
From: "Bin Cheng" <bin.cheng@linux.alibaba.com>
Date: Mon, 14 Sep 2020 21:08:39 +0800
Subject: [PATCH] Skip output dependence if values stored are bytewise the
 same.

If two references stores bytewise the same values, order of the two
references is not important.  In other words, the output dependence
can be skipped in various cases like loop distribution.  This patch
adds interface, uses it in loop distribution.

gcc/ChangeLog:

	PR tree-optimization/93334
	* tree-data-ref.c (trivial_output_dependence): New function.
	* tree-data-ref.h (trivial_output_dependence): New declaration.
	* tree-loop-distribution.c (pg_add_dependence_edges): Use above.
	* tree-loop-distribution.c (const_with_all_bytes_same): Move to
	* tree.c (const_with_all_bytes_same): ...here.
	* tree.h (const_with_all_bytes_same): New declaration.

gcc/testsuite/ChangeLog:

	PR tree-optimization/93334
	* gcc.dg/tree-ssa/pr93334.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr93334.c | 11 +++++
 gcc/tree-data-ref.c                     | 39 +++++++++++++++
 gcc/tree-data-ref.h                     |  3 ++
 gcc/tree-loop-distribution.c            | 65 ++-----------------------
 gcc/tree.c                              | 61 +++++++++++++++++++++++
 gcc/tree.h                              |  3 ++
 6 files changed, 121 insertions(+), 61 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr93334.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93334.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93334.c
new file mode 100644
index 00000000000..74815a02cda
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93334.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
+
+void test_simple_code(long l, double* mem, long ofs2) {
+  for (long k=0; k<l; k++) {
+      mem[k] = 0.0;
+      mem[ofs2 +k] = 0.0;
+  }
+}
+/* { dg-final { scan-tree-dump-not "Version loop \.\.\. with runtime alias check" "ldist" } } */
+/* { dg-final { scan-tree-dump-times "split to 0 loops and 2 library calls" 1 "ldist" } } */
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 5505ba46778..b9a5a8b7f24 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -6053,3 +6053,42 @@ dr_known_forward_stride_p (struct data_reference *dr)
 				   ssize_int (0));
   return neg_step_val && integer_zerop (neg_step_val);
 }
+
+/* If A and B are two store data references, they could form an output
+   dependence, however, if A and B sore the same constant value bytewise or
+   wrto. alignment of references, the order of A and B is not important.
+   Return true if that's the case.  Note it's caller's responsibility making
+   sure skipping the output dependence is safe.  */
+
+bool
+trivial_output_dependence (struct data_reference *a, struct data_reference *b)
+{
+  gimple *stmt1 = DR_STMT (a), *stmt2 = DR_STMT (b);
+  if (!is_gimple_assign (stmt1) || !is_gimple_assign (stmt2))
+    return false;
+
+  if (!gimple_assign_single_p (stmt1) || !gimple_assign_single_p (stmt2))
+    return false;
+
+  tree rhs1 = gimple_assign_rhs1 (stmt1), rhs2 = gimple_assign_rhs1 (stmt2);
+
+  if (!operand_equal_p (rhs1, rhs2, 0) || !CONSTANT_CLASS_P (rhs1))
+    return false;
+
+  int byte1 = const_with_all_bytes_same (rhs1);
+  int byte2 = const_with_all_bytes_same (rhs2);
+  if (byte1 == byte2)
+    return true;
+
+  HOST_WIDE_INT size = int_cst_value (TYPE_SIZE (TREE_TYPE (rhs1)));
+  unsigned HOST_WIDE_INT align = get_object_alignment (DR_REF (a));
+  if (align != get_object_alignment (DR_REF (b)))
+    return false;
+
+  if (align == (unsigned HOST_WIDE_INT) size)
+    return true;
+
+  /* TODO: we can further check if diff of address of the two references is
+     multiple of size of stored constant.  */
+  return false;
+}
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index 771d20fbbc3..1e3bde9981c 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -615,6 +615,9 @@ same_data_refs (data_reference_p a, data_reference_p b)
   return true;
 }
 
+bool
+trivial_output_dependence (struct data_reference*, struct data_reference*);
+
 /* Returns true when all the dependences are computable.  */
 
 inline bool
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 888af48946f..83616e9ecbb 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1073,67 +1073,6 @@ generate_loops_for_partition (class loop *loop, partition *partition,
   free (bbs);
 }
 
-/* If VAL memory representation contains the same value in all bytes,
-   return that value, otherwise return -1.
-   E.g. for 0x24242424 return 0x24, for IEEE double
-   747708026454360457216.0 return 0x44, etc.  */
-
-static int
-const_with_all_bytes_same (tree val)
-{
-  unsigned char buf[64];
-  int i, len;
-
-  if (integer_zerop (val)
-      || (TREE_CODE (val) == CONSTRUCTOR
-          && !TREE_CLOBBER_P (val)
-          && CONSTRUCTOR_NELTS (val) == 0))
-    return 0;
-
-  if (real_zerop (val))
-    {
-      /* Only return 0 for +0.0, not for -0.0, which doesn't have
-	 an all bytes same memory representation.  Don't transform
-	 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS.  */
-      switch (TREE_CODE (val))
-	{
-	case REAL_CST:
-	  if (!real_isneg (TREE_REAL_CST_PTR (val)))
-	    return 0;
-	  break;
-	case COMPLEX_CST:
-	  if (!const_with_all_bytes_same (TREE_REALPART (val))
-	      && !const_with_all_bytes_same (TREE_IMAGPART (val)))
-	    return 0;
-	  break;
-	case VECTOR_CST:
-	  {
-	    unsigned int count = vector_cst_encoded_nelts (val);
-	    unsigned int j;
-	    for (j = 0; j < count; ++j)
-	      if (const_with_all_bytes_same (VECTOR_CST_ENCODED_ELT (val, j)))
-		break;
-	    if (j == count)
-	      return 0;
-	    break;
-	  }
-	default:
-	  break;
-	}
-    }
-
-  if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
-    return -1;
-
-  len = native_encode_expr (val, buf, sizeof (buf));
-  if (len == 0)
-    return -1;
-  for (i = 1; i < len; i++)
-    if (buf[i] != buf[0])
-      return -1;
-  return buf[0];
-}
-
 /* Generate a call to memset for PARTITION in LOOP.  */
 
 static void
@@ -2042,6 +1981,10 @@ loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir,
 	  if (DR_IS_READ (dr1) && DR_IS_READ (dr2))
 	    continue;
 
+	  if (DR_IS_WRITE (dr1) && DR_IS_WRITE (dr2)
+	      && trivial_output_dependence (dr1, dr2))
+	    continue;
+
 	  saved_dr1 = dr1;
 	  /* Re-shuffle data-refs to be in topological order.  */
 	  if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
diff --git a/gcc/tree.c b/gcc/tree.c
index 45aacadbe2d..f7edd6ef143 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -11874,6 +11874,67 @@ num_ending_zeros (const_tree x)
   return build_int_cst (TREE_TYPE (x), wi::ctz (wi::to_wide (x)));
 }
 
+/* If VAL memory representation contains the same value in all bytes,
+   return that value, otherwise return -1.
+   E.g. for 0x24242424 return 0x24, for IEEE double
+   747708026454360457216.0 return 0x44, etc.  */
+
+int
+const_with_all_bytes_same (tree val)
+{
+  unsigned char buf[64];
+  int i, len;
+
+  if (integer_zerop (val)
+      || (TREE_CODE (val) == CONSTRUCTOR
+          && !TREE_CLOBBER_P (val)
+          && CONSTRUCTOR_NELTS (val) == 0))
+    return 0;
+
+  if (real_zerop (val))
+    {
+      /* Only return 0 for +0.0, not for -0.0, which doesn't have
+	 an all bytes same memory representation.  Don't transform
+	 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS.  */
+      switch (TREE_CODE (val))
+	{
+	case REAL_CST:
+	  if (!real_isneg (TREE_REAL_CST_PTR (val)))
+	    return 0;
+	  break;
+	case COMPLEX_CST:
+	  if (!const_with_all_bytes_same (TREE_REALPART (val))
+	      && !const_with_all_bytes_same (TREE_IMAGPART (val)))
+	    return 0;
+	  break;
+	case VECTOR_CST:
+	  {
+	    unsigned int count = vector_cst_encoded_nelts (val);
+	    unsigned int j;
+	    for (j = 0; j < count; ++j)
+	      if (const_with_all_bytes_same (VECTOR_CST_ENCODED_ELT (val, j)))
+		break;
+	    if (j == count)
+	      return 0;
+	    break;
+	  }
+	default:
+	  break;
+	}
+    }
+
+  if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
+    return -1;
+
+  len = native_encode_expr (val, buf, sizeof (buf));
+  if (len == 0)
+    return -1;
+  for (i = 1; i < len; i++)
+    if (buf[i] != buf[0])
+      return -1;
+  return buf[0];
+}
+
 
 #define WALK_SUBTREE(NODE)				\
   do							\
diff --git a/gcc/tree.h b/gcc/tree.h
index 9ec24a3008b..65c769c8e43 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4782,6 +4782,9 @@ extern bool integer_truep (const_tree);
 extern bool cst_and_fits_in_hwi (const_tree);
 extern tree num_ending_zeros (const_tree);
 
+extern int
+const_with_all_bytes_same (tree);
+
 /* fixed_zerop (tree x) is nonzero if X is a fixed-point constant of
    value 0.  */
 
-- 
2.19.1.6.gb485710b

