On Fri, 25 Nov 2016, Jakub Jelinek wrote:

> Hi!
> 
> This patch optimizes a = {}; b = a; into a = {}; b = {};
> and similarly for memset instead of the first stmt and/or memcpy
> instead of the second one.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2016-11-25  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR c/78408
>       * tree-ssa-ccp.c: Include tree-dfa.h.
>       (optimize_memcpy): New function.
>       (pass_fold_builtins::execute): Use it.  Remove useless conditional
>       break after BUILT_IN_VA_*.
> 
>       * gcc.dg/pr78408.c: New test.
> 
> --- gcc/tree-ssa-ccp.c.jj     2016-11-18 20:04:27.000000000 +0100
> +++ gcc/tree-ssa-ccp.c        2016-11-25 17:54:26.862166658 +0100
> @@ -143,6 +143,7 @@ along with GCC; see the file COPYING3.
>  #include "stor-layout.h"
>  #include "optabs-query.h"
>  #include "tree-ssa-ccp.h"
> +#include "tree-dfa.h"
>  
>  /* Possible lattice values.  */
>  typedef enum
> @@ -2928,6 +2929,113 @@ optimize_atomic_bit_test_and (gimple_stm
>    release_ssa_name (lhs);
>  }
>  
> +/* Optimize
> +   a = {};
> +   b = a;
> +   into
> +   a = {};
> +   b = {};
> +   Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
> +   and/or memcpy (&b, &a, sizeof (a)); instead of b = a;  */
> +
> +static void
> +optimize_memcpy (gimple_stmt_iterator *gsip, tree dest, tree src)
> +{
> +  gimple *stmt = gsi_stmt (*gsip);
> +  if (gimple_has_volatile_ops (stmt)
> +      || TREE_THIS_VOLATILE (dest)
> +      || TREE_THIS_VOLATILE (src))
> +    return;
> +
> +  tree vuse = gimple_vuse (stmt);
> +  if (vuse == NULL)
> +    return;
> +
> +  gimple *defstmt = SSA_NAME_DEF_STMT (vuse);
> +  tree src2 = NULL_TREE;
> +  tree val = integer_zero_node;
> +  if (gimple_store_p (defstmt)
> +      && gimple_assign_single_p (defstmt)
> +      && TREE_CODE (gimple_assign_rhs1 (defstmt)) == CONSTRUCTOR
> +      && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (defstmt)) == 0
> +      && !gimple_clobber_p (defstmt))
> +    src2 = gimple_assign_lhs (defstmt);
> +  else if (gimple_call_builtin_p (defstmt, BUILT_IN_MEMSET)
> +        && TREE_CODE (gimple_call_arg (defstmt, 0)) == ADDR_EXPR
> +        && TREE_CODE (gimple_call_arg (defstmt, 1)) == INTEGER_CST
> +        && TREE_CODE (gimple_call_arg (defstmt, 2)) == INTEGER_CST)
> +    {
> +      HOST_WIDE_INT ssize, max_size, off;
> +      bool reverse;
> +      src2 = TREE_OPERAND (gimple_call_arg (defstmt, 0), 0);
> +      get_ref_base_and_extent (src2, &off, &ssize, &max_size, &reverse);
> +      if (ssize != max_size
> +       || (ssize % BITS_PER_UNIT) != 0
> +       || !wi::eq_p (gimple_call_arg (defstmt, 2), ssize / BITS_PER_UNIT))
> +     src2 = NULL_TREE;

I wonder why you jump through the hoops of get_ref_base_and_extent
given the call args will be invariant addresses and thus
get_addr_base_and_unit_offset would be more appropriate here.

Also not sure why you want to restrict the size with the wi::eq_p
(probably for the a = b case where the size isn't given explicitely
but then you don't check whether off is 0 ...).  I'd say passing
in base, offset and size for src and dest into this function will
simplify things and should allow to handle

  memset (p+10, 0, 24);
  memcpy (q, p+10, 24);

if you compare bases with operand_equal_p.

> +      else
> +     {
> +       val = gimple_call_arg (defstmt, 1);
> +       if (!integer_zerop (val) && is_gimple_assign (stmt))
> +         src2 = NULL_TREE;
> +     }
> +    }
> +
> +  if (src2 == NULL_TREE)
> +    return;
> +
> +  if (!operand_equal_p (src, src2, 0))
> +    {
> +      /* Handle also
> +      a = {};
> +      MEM[(char * {ref-all})&b] = MEM[(char * {ref-all})&a];  */
> +      if (is_gimple_assign (stmt)
> +       && TREE_CODE (src) == MEM_REF
> +       && integer_zerop (TREE_OPERAND (src, 1))
> +       && TREE_CODE (TREE_OPERAND (src, 0)) == ADDR_EXPR
> +       && DECL_P (src2)
> +       && operand_equal_p (TREE_OPERAND (TREE_OPERAND (src, 0), 0),
> +                           src2, 0)
> +       && tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (src)),
> +                              DECL_SIZE (src2)))
> +     src = TREE_OPERAND (TREE_OPERAND (src, 0), 0);
> +      else
> +     return;
> +    }
> +  if (refs_may_alias_p (dest, src))
> +    return;

Why's that?

Richard.

> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "Simplified\n  ");
> +      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
> +      fprintf (dump_file, "after previous\n  ");
> +      print_gimple_stmt (dump_file, defstmt, 0, dump_flags);
> +    }
> +
> +  if (is_gimple_assign (stmt))
> +    {
> +      tree ctor = build_constructor (TREE_TYPE (dest), NULL);
> +      gimple_assign_set_rhs_from_tree (gsip, ctor);
> +      update_stmt (stmt);
> +    }
> +  else
> +    {
> +      gcall *call = as_a <gcall *> (stmt);
> +      tree fndecl = builtin_decl_implicit (BUILT_IN_MEMSET);
> +      gimple_call_set_fndecl (call, fndecl);
> +      gimple_call_set_fntype (call, TREE_TYPE (fndecl));
> +      gimple_call_set_arg (call, 1, val);
> +      update_stmt (stmt);
> +    }
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      fprintf (dump_file, "into\n  ");
> +      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
> +    }
> +}
> +
>  /* A simple pass that attempts to fold all builtin functions.  This pass
>     is run after we've propagated as many constants as we can.  */
>  
> @@ -2994,6 +3102,9 @@ pass_fold_builtins::execute (function *f
>                     continue;
>                   }
>               }
> +           else if (gimple_assign_load_p (stmt) && gimple_store_p (stmt))
> +             optimize_memcpy (&i, gimple_assign_lhs (stmt),
> +                              gimple_assign_rhs1 (stmt));
>             gsi_next (&i);
>             continue;
>           }
> @@ -3109,14 +3220,39 @@ pass_fold_builtins::execute (function *f
>                                               false, false);
>                 break;
>  
> +             case BUILT_IN_MEMCPY:
> +               if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
> +                   && TREE_CODE (gimple_call_arg (stmt, 0)) == ADDR_EXPR
> +                   && TREE_CODE (gimple_call_arg (stmt, 1)) == ADDR_EXPR
> +                   && TREE_CODE (gimple_call_arg (stmt, 2)) == INTEGER_CST)
> +                 {
> +                   tree dest = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
> +                   tree src = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
> +                   HOST_WIDE_INT dsize, ssize, max_size, off;
> +                   bool reverse;
> +                   get_ref_base_and_extent (dest, &off, &dsize, &max_size,
> +                                            &reverse);
> +                   if (dsize != max_size)
> +                     break;
> +                   get_ref_base_and_extent (src, &off, &ssize, &max_size,
> +                                            &reverse);
> +                   if (ssize != max_size
> +                       || ssize != dsize
> +                       || (ssize % BITS_PER_UNIT) != 0)
> +                     break;
> +                   if (!wi::eq_p (gimple_call_arg (stmt, 2),
> +                                  ssize / BITS_PER_UNIT))
> +                     break;
> +                   optimize_memcpy (&i, dest, src);
> +                 }
> +               break;
> +
>               case BUILT_IN_VA_START:
>               case BUILT_IN_VA_END:
>               case BUILT_IN_VA_COPY:
>                 /* These shouldn't be folded before pass_stdarg.  */
>                 result = optimize_stdarg_builtin (stmt);
> -               if (result)
> -                 break;
> -               /* FALLTHRU */
> +               break;
>  
>               default:;
>               }
> --- gcc/testsuite/gcc.dg/pr78408.c.jj 2016-11-25 18:02:47.344752199 +0100
> +++ gcc/testsuite/gcc.dg/pr78408.c    2016-11-25 18:02:26.000000000 +0100
> @@ -0,0 +1,78 @@
> +/* PR c/78408 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-fab1-details" } */
> +/* { dg-final { scan-tree-dump-times "after previous" 16 "fab1" } } */
> +
> +struct S { char a[32]; };
> +struct T { char a[65536]; };
> +void bar (int, struct S *, struct S *, struct T *, struct T *);
> +
> +void
> +f1 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  a = b = (struct S) {};
> +  c = d = (struct T) {};
> +  bar (1, &a, &b, &c, &d);
> +}
> +
> +void
> +f2 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  b = (struct S) {};
> +  a = b;
> +  d = (struct T) {};
> +  c = d;
> +  bar (2, &a, &b, &c, &d);
> +}
> +
> +void
> +f3 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  __builtin_memset (&b, 0, sizeof (b));
> +  a = b;
> +  __builtin_memset (&d, 0, sizeof (d));
> +  c = d;
> +  bar (3, &a, &b, &c, &d);
> +}
> +
> +
> +void
> +f4 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  b = (struct S) {};
> +  __builtin_memcpy (&a, &b, sizeof (b));
> +  d = (struct T) {};
> +  __builtin_memcpy (&c, &d, sizeof (d));
> +  bar (4, &a, &b, &c, &d);
> +}
> +
> +void
> +f5 (void)
> +{
> +  struct S a, b;
> +  struct T c, d;
> +  __builtin_memset (&b, 0, sizeof (b));
> +  __builtin_memcpy (&a, &b, sizeof (b));
> +  __builtin_memset (&d, 0, sizeof (d));
> +  __builtin_memcpy (&c, &d, sizeof (d));
> +  bar (5, &a, &b, &c, &d);
> +}
> +
> +void
> +f6 (void)
> +{
> +  struct S a, b, e, g;
> +  struct T c, d, f, h;
> +  g = e = a = b = (struct S) {};
> +  h = f = c = d = (struct T) {};
> +  bar (6, &a, &b, &c, &d);
> +  bar (6, &e, &g, &f, &h);
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Reply via email to