On Wed, Nov 10, 2021 at 1:43 PM Jan Hubicka via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
> this patch implements DSE using modref summaries: if function has no side 
> effects
> besides storing to memory pointed to by its argument and if we can prove 
> those stores
> to be dead, we can optimize out. So we handle for example:
>
> volatile int *ptr;
> struct a {
>         int a,b,c;
> } a;
> __attribute__((noinline))
> static int init (struct a*a)
> {
>         a->a=0;
>         a->b=1;
> }
> __attribute__((noinline))
> static int use (struct a*a)
> {
>         if (a->c != 3)
>                 *ptr=5;
> }
>
> void
> main(void)
> {
>         struct a a;
>         init (&a);
>         a.c=3;
>         use (&a);
> }
>
> And optimize out call to init (&a).
>
> We work quite hard to inline such constructors and this patch is only
> effective if inlining did not happen (for whatever reason).  Still, we
> optimize about 26 calls building tramp3d and about 70 calls during
> bootstrap (mostly ctors of poly_int). During bootstrap most removal
> happens early and we would inline the ctors unless we decide to optimize
> for size. 1 call per cc1* binary is removed late during LTO build.
>
> This is more frequent in codebases with higher abstraction penalty, with
> -Os or with profile feedback in sections optimized for size. I also hope
> we will be able to CSE such calls and that would make DSE more
> important.
>
> Bootstrapped/regtested x86_64-linux, OK?
>
> gcc/ChangeLog:
>
>         * tree-ssa-alias.c (ao_ref_alias_ptr_type): Export.

ao_ref_init_from_ptr_and_range it is

>         * tree-ssa-alias.h (ao_ref_init_from_ptr_and_range): Declare.
>         * tree-ssa-dse.c (dse_optimize_stmt): Rename to ...
>         (dse_optimize_store): ... this;
>         (dse_optimize_call): New function.
>         (pass_dse::execute): Use dse_optimize_call and update
>         call to dse_optimize_store.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/modref-dse-1.c: New test.
>         * gcc.dg/tree-ssa/modref-dse-2.c: New test.
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c
> new file mode 100644
> index 00000000000..e78693b349a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse1"  } */
> +volatile int *ptr;
> +struct a {
> +       int a,b,c;
> +} a;
> +__attribute__((noinline))
> +static int init (struct a*a)
> +{
> +       a->a=0;
> +       a->b=1;
> +}
> +__attribute__((noinline))
> +static int use (struct a*a)
> +{
> +       if (a->c != 3)
> +               *ptr=5;
> +}
> +
> +void
> +main(void)
> +{
> +       struct a a;
> +       init (&a);
> +       a.c=3;
> +       use (&a);
> +}
> +/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c
> new file mode 100644
> index 00000000000..99c8ceb8127
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse2 -fno-ipa-sra -fno-ipa-cp"  } */
> +volatile int *ptr;
> +struct a {
> +       int a,b,c;
> +} a;
> +__attribute__((noinline))
> +static int init (struct a*a)
> +{
> +       a->a=0;
> +       a->b=1;
> +       a->c=1;
> +}
> +__attribute__((noinline))
> +static int use (struct a*a)
> +{
> +       if (a->c != 3)
> +               *ptr=5;
> +}
> +
> +void
> +main(void)
> +{
> +       struct a a;
> +       init (&a);
> +       a.c=3;
> +       use (&a);
> +}
> +/* Only DSE2 is tracking live bytes needed to figure out that store to c is
> +   also dead above.  */
> +/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse2" } } */
> diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
> index eabf6805f2b..affb5d40d4b 100644
> --- a/gcc/tree-ssa-alias.c
> +++ b/gcc/tree-ssa-alias.c
> @@ -782,7 +782,7 @@ ao_ref_alias_ptr_type (ao_ref *ref)
>     The access is assumed to be only to or after of the pointer target 
> adjusted
>     by the offset, not before it (even in the case RANGE_KNOWN is false).  */
>
> -static void
> +void
>  ao_ref_init_from_ptr_and_range (ao_ref *ref, tree ptr,
>                                 bool range_known,
>                                 poly_int64 offset,
> diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
> index 275dea10397..c2e28a74999 100644
> --- a/gcc/tree-ssa-alias.h
> +++ b/gcc/tree-ssa-alias.h
> @@ -111,6 +111,8 @@ ao_ref::max_size_known_p () const
>  /* In tree-ssa-alias.c  */
>  extern void ao_ref_init (ao_ref *, tree);
>  extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree);
> +void ao_ref_init_from_ptr_and_range (ao_ref *, tree, bool,
> +                                    poly_int64, poly_int64, poly_int64);
>  extern tree ao_ref_base (ao_ref *);
>  extern alias_set_type ao_ref_alias_set (ao_ref *);
>  extern alias_set_type ao_ref_base_alias_set (ao_ref *);
> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
> index 27287fe88ee..1fec9100011 100644
> --- a/gcc/tree-ssa-dse.c
> +++ b/gcc/tree-ssa-dse.c
> @@ -40,6 +40,9 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimplify.h"
>  #include "tree-eh.h"
>  #include "cfganal.h"
> +#include "cgraph.h"
> +#include "ipa-modref-tree.h"
> +#include "ipa-modref.h"
>
>  /* This file implements dead store elimination.
>
> @@ -1039,7 +1042,8 @@ delete_dead_or_redundant_assignment 
> (gimple_stmt_iterator *gsi, const char *type
>     post dominates the first store, then the first store is dead.  */
>
>  static void
> -dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap 
> live_bytes)
> +dse_optimize_store (function *fun, gimple_stmt_iterator *gsi,
> +                   sbitmap live_bytes)
>  {
>    gimple *stmt = gsi_stmt (*gsi);
>
> @@ -1182,6 +1186,128 @@ dse_optimize_stmt (function *fun, 
> gimple_stmt_iterator *gsi, sbitmap live_bytes)
>      delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup);
>  }
>
> +/* Try to prove, using modref summary, that all memory written to by a call 
> is
> +   dead and remove it.  */
> +
> +static bool
> +dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
> +{
> +  gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
> +  tree callee = gimple_call_fndecl (stmt);
> +
> +  if (!callee)
> +    return false;
> +
> +  /* Pure/const functions are optimized by normal DCE
> +     or handled as store above.  */
> +  int flags = gimple_call_flags (stmt);
> +  if ((flags & (ECF_PURE|ECF_CONST|ECF_NOVOPS))
> +      && !(flags & (ECF_LOOPING_CONST_OR_PURE)))
> +    return false;
>
> +  cgraph_node *node = cgraph_node::get (callee);
> +  if (!node)
> +    return false;
> +
> +  if (stmt_could_throw_p (cfun, stmt)
> +      && !cfun->can_delete_dead_exceptions)
> +    return false;
> +
> +  /* If return value is used the call is not dead.  */
> +  tree lhs = gimple_call_lhs (stmt);
> +  if (lhs && TREE_CODE (lhs) == SSA_NAME)
> +    {
> +      imm_use_iterator ui;
> +      gimple *use_stmt;
> +      FOR_EACH_IMM_USE_STMT (use_stmt, ui, lhs)
> +       if (!is_gimple_debug (use_stmt))
> +         return false;
> +    }
> +
> +  /* Verify that there are no side-effects except for return value
> +     and memory writes tracked by modref.  */
> +  modref_summary *summary = get_modref_function_summary (node);
> +  if (!summary || summary->writes_errno
> +      || summary->side_effects
> +      || summary->global_memory_written_p ())
> +    return false;
> +
> +  modref_base_node <alias_set_type> *base_node;
> +  modref_ref_node <alias_set_type> *ref_node;
> +  modref_access_node *access_node;
> +  size_t i, j, k;
> +  bool by_clobber_p = false;
> +  int num_tests = 0, max_tests = param_modref_max_tests;
> +
> +  /* Unlike alias oracle we can not skip subtrees based on TBAA check.
> +     Count the size of the whole tree to verify that we will not need too 
> many
> +     tests.  */
> +  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
> +    FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
> +      FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
> +       if (num_tests++ > max_tests)
> +         return false;

at least the innermost loop can be done as

          if (num_tests += ref_node->accesses.length () > max_tests)

no?

> +
> +  /* Walk all memory writes and verify that they are dead.  */
> +  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
> +    FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
> +      FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
> +       {
> +         /* ??? if offset is unkonwn it may be negative.  Not sure
> +            how to construct ref here.  */

I think you can't, you could use -poly_int64_max or so.

> +         if (!access_node->parm_offset_known)
> +           return false;

But you could do this check in the loop computing num_tests ...
(we could also cache the count and whether any of the refs have unknown offset
in the summary?)

> +         tree arg;
> +         if (access_node->parm_index == MODREF_STATIC_CHAIN_PARM)
> +           arg = gimple_call_chain (stmt);
> +         else
> +           arg = gimple_call_arg (stmt, access_node->parm_index);
> +
> +         ao_ref ref;
> +         poly_offset_int off = (poly_offset_int)access_node->offset
> +               + ((poly_offset_int)access_node->parm_offset
> +                  << LOG2_BITS_PER_UNIT);
> +         poly_int64 off2;
> +         if (!off.to_shwi (&off2))
> +           return false;
> +         ao_ref_init_from_ptr_and_range
> +                (&ref, arg, true, off2, access_node->size,
> +                 access_node->max_size);
> +         ref.ref_alias_set = ref_node->ref;
> +         ref.base_alias_set = base_node->base;
> +
> +         bool byte_tracking_enabled
> +             = setup_live_bytes_from_ref (&ref, live_bytes);
> +         enum dse_store_status store_status;
> +
> +         store_status = dse_classify_store (&ref, stmt,
> +                                            byte_tracking_enabled,
> +                                            live_bytes, &by_clobber_p);
> +         if (store_status != DSE_STORE_DEAD)
> +           return false;
> +       }
> +  /* Check also value stored by the call.  */
> +  if (gimple_store_p (stmt))
> +    {
> +      ao_ref ref;
> +
> +      if (!initialize_ao_ref_for_dse (stmt, &ref))
> +       gcc_unreachable ();
> +      bool byte_tracking_enabled
> +         = setup_live_bytes_from_ref (&ref, live_bytes);
> +      enum dse_store_status store_status;
> +
> +      store_status = dse_classify_store (&ref, stmt,
> +                                        byte_tracking_enabled,
> +                                        live_bytes, &by_clobber_p);
> +      if (store_status != DSE_STORE_DEAD)
> +       return false;
> +    }
> +  delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup);
> +  return true;
> +}
> +
>  namespace {
>
>  const pass_data pass_data_dse =
> @@ -1235,7 +1363,14 @@ pass_dse::execute (function *fun)
>           gimple *stmt = gsi_stmt (gsi);
>
>           if (gimple_vdef (stmt))
> -           dse_optimize_stmt (fun, &gsi, live_bytes);
> +           {
> +             gcall *call = dyn_cast <gcall *> (stmt);
> +
> +             if (call && dse_optimize_call (&gsi, live_bytes))
> +               /* We removed a dead call.  */;
> +             else
> +               dse_optimize_store (fun, &gsi, live_bytes);

I think we want to refactor both functions, dse_optimize_stmt has some
early outs that apply generally, and it handles some builtin calls
that we don't want to re-handle with dse_optimize_call.

So I wonder if it is either possible to call the new function from
inside dse_optimize_stmt instead, after we handled the return
value of call for example or different refactoring can make the flow
more obvious.

Thanks,
Richard.

> +           }
>           else if (def_operand_p
>                      def_p = single_ssa_def_operand (stmt, SSA_OP_DEF))
>             {

Reply via email to