On Fri, Nov 12, 2021 at 12:39 PM Jan Hubicka <hubi...@kam.mff.cuni.cz> wrote: > > Hi, > this is updated patch. It moves the summary walk checking if we can > possibly suceed on dse to summary->finalize member function so it is done > once per summary and refactors dse_optimize_call to be called from > dse_optimize_stmt after early checks. > > I did not try to handle the special case of parm_offset_known but we can > do it incrementally. I think initializing range with offset being > polyin64_int_min and max_size unkonwn as suggested is going to work. > I am bit worried this is in bits so we have 2^61 range instead of 2^64 > but I guess once can not offset pointer back and forth in valid program?
Not sure indeed. I'd only special-case when the call argument is &decl, then the start offset can be simply zero. > Bootstrapped/regtested x86_64-linux, ltobootstrap in progress, OK if it > succeeds? OK. Thanks, Richard. > gcc/ChangeLog: > > * ipa-modref.c (modref_summary::modref_summary): Clear new flags. > (modref_summary::dump): Dump try_dse. > (modref_summary::finalize): Add FUN attribute; compute try-dse. > (analyze_function): Update. > (read_section): Update. > (update_signature): Update. > (pass_ipa_modref::execute): Update. > * ipa-modref.h (struct modref_summary): > * tree-ssa-alias.c (ao_ref_init_from_ptr_and_range): Export. > * tree-ssa-alias.h (ao_ref_init_from_ptr_and_range): Declare. > * tree-ssa-dse.c: Include cgraph.h, ipa-modref-tree.h and > ipa-modref.h > (dse_optimize_call): New function. > (dse_optimize_stmt): Use it. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/modref-dse-1.c: New test. > * gcc.dg/tree-ssa/modref-dse-2.c: New test. > > diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c > index c6efacb0e20..ea6a27ae767 100644 > --- a/gcc/ipa-modref.c > +++ b/gcc/ipa-modref.c > @@ -276,7 +276,8 @@ static GTY(()) fast_function_summary <modref_summary_lto > *, va_gc> > > modref_summary::modref_summary () > : loads (NULL), stores (NULL), retslot_flags (0), static_chain_flags (0), > - writes_errno (false), side_effects (false) > + writes_errno (false), side_effects (false), global_memory_read (false), > + global_memory_written (false), try_dse (false) > { > } > > @@ -605,6 +606,8 @@ modref_summary::dump (FILE *out) > fprintf (out, " Global memory read\n"); > if (global_memory_written) > fprintf (out, " Global memory written\n"); > + if (try_dse) > + fprintf (out, " Try dse\n"); > if (arg_flags.length ()) > { > for (unsigned int i = 0; i < arg_flags.length (); i++) > @@ -661,12 +664,56 @@ modref_summary_lto::dump (FILE *out) > } > > /* Called after summary is produced and before it is used by local analysis. > - Can be called multiple times in case summary needs to update signature. > */ > + Can be called multiple times in case summary needs to update signature. > + FUN is decl of function summary is attached to. */ > void > -modref_summary::finalize () > +modref_summary::finalize (tree fun) > { > global_memory_read = !loads || loads->global_access_p (); > global_memory_written = !stores || stores->global_access_p (); > + > + /* We can do DSE if we know function has no side effects and > + we can analyse all stores. Disable dse if there are too many > + stores to try. */ > + if (side_effects || global_memory_written || writes_errno) > + try_dse = false; > + else > + { > + try_dse = true; > + size_t i, j, k; > + int num_tests = 0, max_tests > + = opt_for_fn (fun, param_modref_max_tests); > + modref_base_node <alias_set_type> *base_node; > + modref_ref_node <alias_set_type> *ref_node; > + modref_access_node *access_node; > + FOR_EACH_VEC_SAFE_ELT (stores->bases, i, base_node) > + { > + if (base_node->every_ref) > + { > + try_dse = false; > + break; > + } > + FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node) > + { > + if (base_node->every_ref) > + { > + try_dse = false; > + break; > + } > + FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node) > + if (num_tests++ > max_tests > + || !access_node->parm_offset_known) > + { > + try_dse = false; > + break; > + } > + if (!try_dse) > + break; > + } > + if (!try_dse) > + break; > + } > + } > } > > /* Get function summary for FUNC if it exists, return NULL otherwise. */ > @@ -2803,7 +2850,7 @@ analyze_function (function *f, bool ipa) > summary = NULL; > } > else if (summary) > - summary->finalize (); > + summary->finalize (current_function_decl); > if (summary_lto && !summary_lto->useful_p (ecf_flags)) > { > summaries_lto->remove (fnode); > @@ -3524,7 +3571,7 @@ read_section (struct lto_file_decl_data *file_data, > const char *data, > } > } > if (flag_ltrans) > - modref_sum->finalize (); > + modref_sum->finalize (node->decl); > if (dump_file) > { > fprintf (dump_file, "Read modref for %s\n", > @@ -3682,7 +3729,7 @@ update_signature (struct cgraph_node *node) > r_lto->dump (dump_file); > } > if (r) > - r->finalize (); > + r->finalize (node->decl); > return; > } > > @@ -4907,7 +4954,7 @@ pass_ipa_modref::execute (function *) > for (struct cgraph_node *cur = component_node; cur; > cur = ((struct ipa_dfs_info *) cur->aux)->next_cycle) > if (modref_summary *sum = optimization_summaries->get (cur)) > - sum->finalize (); > + sum->finalize (cur->decl); > if (dump_file) > modref_propagate_dump_scc (component_node); > } > diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h > index 9a8d565d770..43353ad2ef4 100644 > --- a/gcc/ipa-modref.h > +++ b/gcc/ipa-modref.h > @@ -38,13 +38,14 @@ struct GTY(()) modref_summary > /* Flags coputed by finalize method. */ > unsigned global_memory_read : 1; > unsigned global_memory_written : 1; > + unsigned try_dse : 1; > > > modref_summary (); > ~modref_summary (); > void dump (FILE *); > bool useful_p (int ecf_flags, bool check_flags = true); > - void finalize (); > + void finalize (tree); > }; > > modref_summary *get_modref_function_summary (cgraph_node *func); > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c > new file mode 100644 > index 00000000000..1f80cc57c57 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c > @@ -0,0 +1,28 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-dse1-details" } */ > +volatile int *ptr; > +struct a { > + int a,b,c; > +} a; > +__attribute__((noinline)) > +static int init (struct a*a) > +{ > + a->a=0; > + a->b=1; > +} > +__attribute__((noinline)) > +static int use (struct a*a) > +{ > + if (a->c != 3) > + *ptr=5; > +} > + > +void > +main(void) > +{ > + struct a a; > + init (&a); > + a.c=3; > + use (&a); > +} > +/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c > new file mode 100644 > index 00000000000..e41d065a5e3 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c > @@ -0,0 +1,31 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-dse2-details" } */ > +volatile int *ptr; > +struct a { > + int a,b,c; > +} a; > +__attribute__((noinline)) > +static int init (struct a*a) > +{ > + a->a=0; > + a->b=1; > + a->c=1; > +} > +__attribute__((noinline)) > +static int use (struct a*a) > +{ > + if (a->c != 3) > + *ptr=5; > +} > + > +void > +main(void) > +{ > + struct a a; > + init (&a); > + a.c=3; > + use (&a); > +} > +/* Only DSE2 is tracking live bytes needed to figure out that store to c is > + also dead above. */ > +/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse2" } } */ > diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c > index 17ff6bb582c..2965902912f 100644 > --- a/gcc/tree-ssa-alias.c > +++ b/gcc/tree-ssa-alias.c > @@ -782,7 +782,7 @@ ao_ref_alias_ptr_type (ao_ref *ref) > The access is assumed to be only to or after of the pointer target > adjusted > by the offset, not before it (even in the case RANGE_KNOWN is false). */ > > -static void > +void > ao_ref_init_from_ptr_and_range (ao_ref *ref, tree ptr, > bool range_known, > poly_int64 offset, > diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h > index 275dea10397..64d4865f58d 100644 > --- a/gcc/tree-ssa-alias.h > +++ b/gcc/tree-ssa-alias.h > @@ -111,6 +111,9 @@ ao_ref::max_size_known_p () const > /* In tree-ssa-alias.c */ > extern void ao_ref_init (ao_ref *, tree); > extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree); > +extern void ao_ref_init_from_ptr_and_range (ao_ref *, tree, bool, > + poly_int64, poly_int64, > + poly_int64); > extern tree ao_ref_base (ao_ref *); > extern alias_set_type ao_ref_alias_set (ao_ref *); > extern alias_set_type ao_ref_base_alias_set (ao_ref *); > diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c > index 27287fe88ee..0e8c4ed1435 100644 > --- a/gcc/tree-ssa-dse.c > +++ b/gcc/tree-ssa-dse.c > @@ -40,6 +40,9 @@ along with GCC; see the file COPYING3. If not see > #include "gimplify.h" > #include "tree-eh.h" > #include "cfganal.h" > +#include "cgraph.h" > +#include "ipa-modref-tree.h" > +#include "ipa-modref.h" > > /* This file implements dead store elimination. > > @@ -1027,6 +1030,101 @@ delete_dead_or_redundant_assignment > (gimple_stmt_iterator *gsi, const char *type > release_defs (stmt); > } > > +/* Try to prove, using modref summary, that all memory written to by a call > is > + dead and remove it. Assume that if return value is written to memory > + it is already proved to be dead. */ > + > +static bool > +dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes) > +{ > + gcall *stmt = dyn_cast <gcall *> (gsi_stmt (*gsi)); > + > + if (!stmt) > + return false; > + > + tree callee = gimple_call_fndecl (stmt); > + > + if (!callee) > + return false; > + > + /* Pure/const functions are optimized by normal DCE > + or handled as store above. */ > + int flags = gimple_call_flags (stmt); > + if ((flags & (ECF_PURE|ECF_CONST|ECF_NOVOPS)) > + && !(flags & (ECF_LOOPING_CONST_OR_PURE))) > + return false; > + > + cgraph_node *node = cgraph_node::get (callee); > + if (!node) > + return false; > + > + if (stmt_could_throw_p (cfun, stmt) > + && !cfun->can_delete_dead_exceptions) > + return false; > + > + /* If return value is used the call is not dead. */ > + tree lhs = gimple_call_lhs (stmt); > + if (lhs && TREE_CODE (lhs) == SSA_NAME) > + { > + imm_use_iterator ui; > + gimple *use_stmt; > + FOR_EACH_IMM_USE_STMT (use_stmt, ui, lhs) > + if (!is_gimple_debug (use_stmt)) > + return false; > + } > + > + /* Verify that there are no side-effects except for return value > + and memory writes tracked by modref. */ > + modref_summary *summary = get_modref_function_summary (node); > + if (!summary || !summary->try_dse) > + return false; > + > + modref_base_node <alias_set_type> *base_node; > + modref_ref_node <alias_set_type> *ref_node; > + modref_access_node *access_node; > + size_t i, j, k; > + bool by_clobber_p = false; > + > + /* Walk all memory writes and verify that they are dead. */ > + FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node) > + FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node) > + FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node) > + { > + gcc_checking_assert (access_node->parm_offset_known); > + > + tree arg; > + if (access_node->parm_index == MODREF_STATIC_CHAIN_PARM) > + arg = gimple_call_chain (stmt); > + else > + arg = gimple_call_arg (stmt, access_node->parm_index); > + > + ao_ref ref; > + poly_offset_int off = (poly_offset_int)access_node->offset > + + ((poly_offset_int)access_node->parm_offset > + << LOG2_BITS_PER_UNIT); > + poly_int64 off2; > + if (!off.to_shwi (&off2)) > + return false; > + ao_ref_init_from_ptr_and_range > + (&ref, arg, true, off2, access_node->size, > + access_node->max_size); > + ref.ref_alias_set = ref_node->ref; > + ref.base_alias_set = base_node->base; > + > + bool byte_tracking_enabled > + = setup_live_bytes_from_ref (&ref, live_bytes); > + enum dse_store_status store_status; > + > + store_status = dse_classify_store (&ref, stmt, > + byte_tracking_enabled, > + live_bytes, &by_clobber_p); > + if (store_status != DSE_STORE_DEAD) > + return false; > + } > + delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup); > + return true; > +} > + > /* Attempt to eliminate dead stores in the statement referenced by BSI. > > A dead store is a store into a memory location which will later be > @@ -1050,8 +1148,13 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator > *gsi, sbitmap live_bytes) > return; > > ao_ref ref; > + /* If this is not a store we can still remove dead call using > + modref summary. */ > if (!initialize_ao_ref_for_dse (stmt, &ref)) > - return; > + { > + dse_optimize_call (gsi, live_bytes); > + return; > + } > > /* We know we have virtual definitions. We can handle assignments and > some builtin calls. */ > @@ -1162,6 +1265,9 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator > *gsi, sbitmap live_bytes) > || (stmt_could_throw_p (fun, stmt) > && !fun->can_delete_dead_exceptions))) > { > + /* See if we can remove complete call. */ > + if (dse_optimize_call (gsi, live_bytes)) > + return; > /* Make sure we do not remove a return slot we cannot reconstruct > later. */ > if (gimple_call_return_slot_opt_p (as_a <gcall *>(stmt))