On Mon, 31 Jul 2023, Richard Biener wrote: > statement_sink_location for loads is currently confused about > stores that are not on the paths we are sinking across. The > following avoids this by explicitely checking whether a block > with a store is on any of those paths. To not perform too many > walks over the sub-part of the CFG between the orignal stmt > location and the found sinking candidate we first collect all > blocks to check and then perform a single walk from the sinking > candidate location to the original stmt location. We avoid enlarging > the region by conservatively handling backedges. > > The original heuristics what store locations to ignore have been > refactored, some can possibly be removed now. > > If anybody knows a cheaper way to check whether a BB is on a path > from block A to block B which is dominated by A I'd be happy to > know (or if there would be a clever caching method at least - I'm > probably going to limit the number of blocks to walk to aovid > quadraticness).
I've replaced the whole thing with something based on virtual operand liveness. Richard. > Boostrapped and tested on x86_64-unknown-linux-gnu. This depends > on the previous sent RFC to limit testsuite fallout. > > * tree-ssa-sink.cc (pass_sink_code::execute): Mark backedges. > (statement_sink_location): Do not consider > stores that are not on any path from the original to the > destination location. > > * gcc.dg/tree-ssa/ssa-sink-20.c: New testcase. > --- > gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 16 +++ > gcc/tree-ssa-sink.cc | 125 ++++++++++++++++---- > 2 files changed, 121 insertions(+), 20 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > new file mode 100644 > index 00000000000..266ceb000a5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-sink1-details" } */ > + > +void bar (); > +int foo (int *p, int x) > +{ > + int res = *p; > + if (x) > + { > + bar (); > + res = 1; > + } > + return res; > +} > + > +/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink1" } } */ > diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc > index cf0a32a954b..e996f46c864 100644 > --- a/gcc/tree-ssa-sink.cc > +++ b/gcc/tree-ssa-sink.cc > @@ -388,13 +388,32 @@ statement_sink_location (gimple *stmt, basic_block > frombb, > > imm_use_iterator imm_iter; > use_operand_p use_p; > + auto_bitmap bbs_to_check; > FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vuse (stmt)) > { > gimple *use_stmt = USE_STMT (use_p); > basic_block bb = gimple_bb (use_stmt); > + > + /* If there is no virtual definition here, continue. */ > + if (gimple_code (use_stmt) != GIMPLE_PHI > + && !gimple_vdef (use_stmt)) > + continue; > + > + /* When the virtual definition is possibly within the same > + irreducible region as the current sinking location all > + bets are off. */ > + if (((bb->flags & commondom->flags & BB_IRREDUCIBLE_LOOP) > + && bb->loop_father == commondom->loop_father) > + || ((commondom->flags & BB_IRREDUCIBLE_LOOP) > + && flow_loop_nested_p (commondom->loop_father, > + bb->loop_father)) > + || ((bb->flags & BB_IRREDUCIBLE_LOOP) > + && flow_loop_nested_p (bb->loop_father, > + commondom->loop_father))) > + ; > /* For PHI nodes the block we know sth about is the incoming block > with the use. */ > - if (gimple_code (use_stmt) == GIMPLE_PHI) > + else if (gimple_code (use_stmt) == GIMPLE_PHI) > { > /* If the PHI defines the virtual operand, ignore it. */ > if (gimple_phi_result (use_stmt) == gimple_vuse (stmt)) > @@ -402,32 +421,97 @@ statement_sink_location (gimple *stmt, basic_block > frombb, > /* In case the PHI node post-dominates the current insert > location we can disregard it. But make sure it is not > dominating it as well as can happen in a CFG cycle. */ > - if (commondom != bb > - && !dominated_by_p (CDI_DOMINATORS, commondom, bb) > - && dominated_by_p (CDI_POST_DOMINATORS, commondom, bb) > - /* If the blocks are possibly within the same irreducible > - cycle the above check breaks down. */ > - && !((bb->flags & commondom->flags & BB_IRREDUCIBLE_LOOP) > - && bb->loop_father == commondom->loop_father) > - && !((commondom->flags & BB_IRREDUCIBLE_LOOP) > - && flow_loop_nested_p (commondom->loop_father, > - bb->loop_father)) > - && !((bb->flags & BB_IRREDUCIBLE_LOOP) > - && flow_loop_nested_p (bb->loop_father, > - commondom->loop_father))) > + if (!dominated_by_p (CDI_DOMINATORS, commondom, bb) > + && dominated_by_p (CDI_POST_DOMINATORS, commondom, bb)) > continue; > - bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src; > } > - else if (!gimple_vdef (use_stmt)) > + > + /* For PHIs the relevant block is the edge source. */ > + if (gimple_code (use_stmt) == GIMPLE_PHI) > + bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src; > + if (bb == commondom) > continue; > + if (bb == frombb) > + return false; > + > /* If the use is not dominated by the path entry it is not on > the path. */ > if (!dominated_by_p (CDI_DOMINATORS, bb, frombb)) > continue; > - /* There is no easy way to disregard defs not on the path from > - frombb to commondom so just consider them all. */ > - commondom = nearest_common_dominator (CDI_DOMINATORS, > - bb, commondom); > + > + /* If BB is definitely on the path, shrink it. Else defer > + processing. */ > + if (dominated_by_p (CDI_DOMINATORS, commondom, bb)) > + commondom = bb; > + else > + bitmap_set_bit (bbs_to_check, bb->index); > + } > + > + /* Now check whether any of the BBs recorded in bbs_to_check > + is on a path from frombb to commondom. If so, adjust > + commondom accordingly. */ > + if (!bitmap_empty_p (bbs_to_check)) > + { > + auto_bb_flag visited (cfun); > + auto_vec<basic_block, 3> worklist; > + auto_vec<basic_block, 3> toclean; > + worklist.quick_push (commondom); > + do > + { > + basic_block pb = worklist.pop (); > + edge_iterator ei; > + edge e; > + FOR_EACH_EDGE (e, ei, pb->preds) > + /* When we run into backedges instead of checking paths > + covering those (esp. considering irreducible regions) > + simply adjust commondom to the non-backedge common > + dominators. */ > + if (e->flags & EDGE_DFS_BACK) > + { > + FOR_EACH_EDGE (e, ei, pb->preds) > + if (!(e->flags & EDGE_DFS_BACK)) > + commondom > + = nearest_common_dominator (CDI_DOMINATORS, > + e->src, commondom); > + if (!(commondom->flags & visited)) > + { > + commondom->flags |= visited; > + toclean.safe_push (commondom); > + worklist.safe_push (commondom); > + } > + break; > + } > + else if (e->src == frombb) > + ; > + else if (!(e->src->flags & visited)) > + { > + if (bitmap_clear_bit (bbs_to_check, e->src->index)) > + { > + commondom = nearest_common_dominator > (CDI_DOMINATORS, > + e->src, > + commondom); > + if (commondom == frombb) > + break; > + if (!(commondom->flags & visited)) > + { > + commondom->flags |= visited; > + toclean.safe_push (commondom); > + worklist.safe_push (commondom); > + } > + } > + else > + { > + e->src->flags |= visited; > + toclean.safe_push (e->src); > + worklist.safe_push (e->src); > + } > + } > + } > + while (commondom != frombb > + && !worklist.is_empty () > + && !bitmap_empty_p (bbs_to_check)); > + for (basic_block cb : toclean) > + cb->flags &= ~visited; > if (commondom == frombb) > return false; > } > @@ -848,6 +932,7 @@ pass_sink_code::execute (function *fun) > /* Arrange for the critical edge splitting to be undone if requested. */ > unsigned todo = unsplit_edges ? TODO_cleanup_cfg : 0; > connect_infinite_loops_to_exit (); > + mark_dfs_back_edges (fun); > memset (&sink_stats, 0, sizeof (sink_stats)); > calculate_dominance_info (CDI_DOMINATORS); > calculate_dominance_info (CDI_POST_DOMINATORS); > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)