On 06/07/2017 02:23 AM, Richard Biener wrote:
On Wed, Jun 7, 2017 at 5:26 AM, Martin Sebor <mse...@gmail.com> wrote:
Note I'd be _much_ more sympathetic to simply canonicalizing all of
bzero and bcopy
to memset / memmove and be done with all the above complexity.


Attached is an updated patch along these lines.  Please let me
know if it matches your expectations.

I think you attached the wrong patch.

Yes I did, sorry.  The correct one is attached.

Martin


Richard.

FWIW, although I don't feel too strongly about bzero et al. I'm
not sure that this approach is the right one in general.  It might
(slightly) simplify GCC itself, but other than the incidental code
generation improvement, it offers no benefit to users.  In some
cases, it even degrades user experience by causing GCC issue
diagnostics that refer to functions that don't appear in the source
code, such as for:

  char d[1];

  void* f (const void *p)
  {
    bzero (d, 7);
  }

  warning: ‘__builtin_memset’ writing 7 bytes into a region of size 1
overflows the destination [-Wstringop-overflow=]

For some functions like mempcpy it might even worse code overall
(slower and bigger).

In other cases (like profiling) it loses interesting information.

I think these types of transformations would be justified  f they
were done based on measurably improved efficiency of the generated
code, but I'm uneasy about swapping calls to one function for another
solely because it simplifies the implementation.  Not least because
it doesn't seem like a viable general approach to simplifying the
implementation.

Martin

PS I stopped short of simplifying GCC to remove the existing special
handling of these three built-ins.  If the patch is approved I'm
willing to do the cleanup in a subsequent pass.

PR tree-optimization/80934 - bzero should be assumed not to escape pointer argument
PR tree-optimization/80933 - redundant bzero/bcopy calls not eliminated

gcc/ChangeLog:

	PR tree-optimization/80933
	PR tree-optimization/80934
	* builtins.c (fold_builtin_bcmp, fold_builtin_bcopy): New functions.
	(fold_builtin_bzero): Likewise.
	(fold_builtin_2): Handle bzero.
	(fold_builtin_3): Handle bcmp and bcpy.

gcc/testsuite/ChangeLog:

	PR tree-optimization/80933
	PR tree-optimization/80934
	* gcc.dg/fold-bcopy.c: New test.
	* gcc.dg/tree-ssa/ssa-dse-30.c: Likewise..
	* gcc.dg/tree-ssa/alias-36.c: Likewise.
	* gcc/testsuite/gcc.dg/pr79214.c: Adjust.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 30462ad..c6a2ec5 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -145,6 +145,9 @@ static rtx expand_builtin_unop (machine_mode, tree, rtx, rtx, optab);
 static rtx expand_builtin_frame_address (tree, tree);
 static tree stabilize_va_list_loc (location_t, tree, int);
 static rtx expand_builtin_expect (tree, rtx);
+static tree fold_builtin_bcmp (location_t, tree, tree, tree);
+static tree fold_builtin_bcopy (location_t, tree, tree, tree);
+static tree fold_builtin_bzero (location_t, tree, tree);
 static tree fold_builtin_constant_p (tree);
 static tree fold_builtin_classify_type (tree);
 static tree fold_builtin_strlen (location_t, tree, tree);
@@ -7982,6 +7985,53 @@ fold_builtin_sincos (location_t loc,
 			 fold_build1_loc (loc, REALPART_EXPR, type, call)));
 }
 
+/* Fold function call to built-in bzero with arguments SRC and LEN
+   into a call to built-in memset (DST, 0, LEN).  */
+
+static tree
+fold_builtin_bzero (location_t loc, tree dst, tree len)
+{
+  if (!validate_arg (dst, POINTER_TYPE)
+      || !validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  tree fn = builtin_decl_implicit (BUILT_IN_MEMSET);
+  return build_call_expr_loc (loc, fn, 3, dst, integer_zero_node, len);
+}
+
+/* Fold function call to built-in bcmp with arguments ARG1, ARG2, and LEN
+   into a call to built-in memcmp(ARG1, ARG2, LEN).  */
+
+static tree
+fold_builtin_bcmp (location_t loc, tree arg1, tree arg2, tree len)
+{
+  if (tree ret = fold_builtin_memcmp (loc, arg1, arg2, len))
+    return ret;
+
+  if (!validate_arg (arg1, POINTER_TYPE)
+      || !validate_arg (arg2, POINTER_TYPE)
+      || !validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  tree fn = builtin_decl_implicit (BUILT_IN_MEMCMP);
+  return build_call_expr_loc (loc, fn, 3, arg1, arg2, len);
+}
+
+/* Fold function call to built-in bcopy with arguments SRC, DST, and LEN
+   into a call to built-in memcpy(DST, SRC, LEN).  */
+
+static tree
+fold_builtin_bcopy (location_t loc, tree src, tree dst, tree len)
+{
+  if (!validate_arg (src, POINTER_TYPE)
+      || !validate_arg (dst, POINTER_TYPE)
+      || !validate_arg (len, INTEGER_TYPE))
+    return NULL_TREE;
+
+  tree fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
+  return build_call_expr_loc (loc, fn, 3, dst, src, len);
+}
+
 /* Fold function call to builtin memcmp with arguments ARG1 and ARG2.
    Return NULL_TREE if no simplification can be made.  */
 
@@ -8947,6 +8997,9 @@ fold_builtin_2 (location_t loc, tree fndecl, tree arg0, tree arg1)
     CASE_FLT_FN (BUILT_IN_MODF):
       return fold_builtin_modf (loc, arg0, arg1, type);
 
+    case BUILT_IN_BZERO:
+      return fold_builtin_bzero (loc, arg0, arg1);
+
     case BUILT_IN_STRSPN:
       return fold_builtin_strspn (loc, arg0, arg1);
 
@@ -9034,7 +9087,12 @@ fold_builtin_3 (location_t loc, tree fndecl,
 	return do_mpfr_remquo (arg0, arg1, arg2);
     break;
 
+    case BUILT_IN_BCOPY:
+      return fold_builtin_bcopy (loc, arg0, arg1, arg2);;
+
     case BUILT_IN_BCMP:
+      return fold_builtin_bcmp (loc, arg0, arg1, arg2);;
+
     case BUILT_IN_MEMCMP:
       return fold_builtin_memcmp (loc, arg0, arg1, arg2);;
 
diff --git a/gcc/testsuite/gcc.dg/fold-bcopy.c b/gcc/testsuite/gcc.dg/fold-bcopy.c
new file mode 100644
index 0000000..bf6a143
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-bcopy.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/80933 - redundant bzero/bcopy calls not eliminated
+   { dg-do compile }
+   { dg-options "-O0 -fdump-tree-gimple" } */
+
+void* f0 (void *p, const void *q, unsigned n)
+{
+  return __builtin_bcopy (q, p, n);
+}
+
+int f1 (const void *p, const void *q, unsigned n)
+{
+  return __builtin_bcmp (q, p, n);
+}
+
+void f2 (void *p, unsigned n)
+{
+  __builtin_bzero (p, n);
+}
+
+/* { dg-final { scan-tree-dump-not "bcmp|bcopy|bzero" "gimple" } } */
+/* { dg-final { scan-tree-dump-times "memcpy|memcmp|memset" 3 "gimple" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/alias-36.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-36.c
new file mode 100644
index 0000000..61b601a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-36.c
@@ -0,0 +1,28 @@
+/* PR tree-optimization/80934 - bzero should be assumed not to escape
+   pointer argument
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-alias" } */
+
+void foobar (void);
+
+void f (void);
+
+void g (void)
+{
+  char d[32];
+  __builtin_memset (d, 0, sizeof d);
+  f ();
+  if (*d != 0)
+    foobar ();
+}
+
+void h (void)
+{
+  char d[32];
+  __builtin_bzero (d, sizeof d);
+  f ();
+  if (*d != 0)
+    foobar ();
+}
+
+/* { dg-final { scan-tree-dump-not "memset|foobar|bzero" "alias" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-30.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-30.c
new file mode 100644
index 0000000..9d2c920
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-30.c
@@ -0,0 +1,30 @@
+/* PR tree-optimization/80933 - redundant bzero/bcopy calls not eliminated
+   { dg-do compile }
+   { dg-options "-O2 -fdump-tree-dse1" } */
+
+void sink (void*);
+
+void test_bcopy (const void *s)
+{
+  char d[33];
+
+  /* The bcopy calls are expanded inline in EVRP, before DSE runs,
+     so this test doesn't actually verify that DSE does its job.  */
+  __builtin_bcopy (s, d, sizeof d);
+  __builtin_bcopy (s, d, sizeof d);
+
+  sink (d);
+}
+
+void test_bzero (void)
+{
+  char d[33];
+
+  __builtin_bzero (d, sizeof d);
+  __builtin_bzero (d, sizeof d);
+
+  sink (d);
+}
+
+/* { dg-final { scan-tree-dump-times "builtin_bzero" 1 "dse1" } } */
+/* { dg-final { scan-tree-dump-not "builtin_bcopy" "dse1" } } */

Reply via email to