On Wed, Oct 7, 2015 at 4:26 AM, Jeff Law <l...@redhat.com> wrote: > > As touched on in the BZ, we record jump threads as a list of edges to > traverse. A jump thread may be recorded through a block which hasn't been > optimized by DOM yet. > > If DOM is able to optimize a control flow statement in such a block, then it > will remove one or more outgoing edges from the block. > > Removal of an edge triggers releasing the edge back to the GC system, so > naturally bad things happen if the threader then looks at the content of > those edges. > > After some instrumentation I found this was happening for both jump threads > with joiner blocks as well as FSM jump threads. The former are actually > more common than the latter. > > Given the sequencing of the recording of the jump thread, DOM optimizing the > COND_EXPR, subsequent releasing the edge structure and finally examination > of the jump thread to modify the CFG I saw only one good solution and one > bad solution. > > The bad solution involved removing jump thread paths at the time in which > DOM optimizes the COND_EXPR. The problem is we have to walk the entire path > of every recorded jump thread each time DOM performs this optimization. > Obviously not good. > > So instead we record the affected edge pointers into a hash table and use > those later to prune the jump thread paths. It's a single walk over each > recorded jump threading path. Since we're not looking at the contents of > the edge, this works reasonably well. > > One more reason to push harder for jump threading to occur independently of > DOM using Sebastian's backward walking FSM bits. > > Anyway, bootstrapped and regression tested on x86_64-linux-gnu. Also > bootstrapped and testcase tested on x86_64-linux-gnu with just release > checking enabled. > > Installed on the trunk.
Hmm, other passes avoid all this by not removing edges or blocks themselves but leaving that to cfgcleanup. They simply replace the condition in GIMPLE_CONDs or the switch value in GIMPLE_SWITCHes and let cleanup_control_expr_graph do the hard part of the job. Now - I don't know how that would interact with jump threads covering those paths, that is, what kind of "mess" jump threading would produce with the conditions/switches mangled that way. The other nice thing is that CFG cleanup properly deals with loop structure changes. Richard. > Jeff > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 732b3d1..db6f1b6 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,17 @@ > +2015-10-06 Jeff Law <l...@redhat.com> > + > + PR tree-optimization/67816 > + * tree-ssa-threadupdate.h (remove_jump_threads_including): Renamed > + from remove_jump_threads_starting_at. Accept an edge rather than > + a basic block. > + * tree-ssa-threadupdate.c (removed_edges): New hash table. > + (remove_jump_threads_including): Note edges that get removed from > + the CFG for later pruning of jump threading paths including them. > + (thread_through_all_blocks): Remove paths which include edges that > + have been removed. > + * tree-ssa-dom.c (optimize_stmt): Call remove_jump_threads_including > + on each outgoing edges when optimizing away a control statement. > + > 2015-10-06 Trevor Saunders <tbsaunde+...@tbsaunde.org> > > * reorg.c (emit_delay_sequence): Store list of delay slot insns > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index 4ec4743..1882fbd 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -1,3 +1,7 @@ > +2015-10-06 Jeff Law <l...@redhat.com> > + > + * gcc.c-torture/compile/pr67816.c: New test. > + > 2015-10-07 Kugan Vivekanandarajah <kug...@linaro.org> > > * gcc.target/aarch64/get_lane_f16_1.c: New test. > diff --git a/gcc/testsuite/gcc.c-torture/compile/pr67816.c > b/gcc/testsuite/gcc.c-torture/compile/pr67816.c > new file mode 100644 > index 0000000..c3db3a3 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/compile/pr67816.c > @@ -0,0 +1,19 @@ > +int a, c, d, e; > +int b[10]; > +void fn1() { > + int i, f = 0; > + for (;;) { > + i = 0; > + for (; i < a; i++) > + if (c) { > + if (b[i]) > + f = 1; > + } else if (b[i]) > + f = 0; > + if (f) > + d++; > + while (e) > + ; > + } > +} > + > diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c > index c226da5..941087d 100644 > --- a/gcc/tree-ssa-dom.c > +++ b/gcc/tree-ssa-dom.c > @@ -1840,8 +1840,13 @@ optimize_stmt (basic_block bb, gimple_stmt_iterator > si, > edge taken_edge = find_taken_edge (bb, val); > if (taken_edge) > { > - /* Delete threads that start at BB. */ > - remove_jump_threads_starting_at (bb); > + > + /* We need to remove any queued jump threads that > + reference outgoing edges from this block. */ > + edge_iterator ei; > + edge e; > + FOR_EACH_EDGE (e, ei, bb->succs) > + remove_jump_threads_including (e); > > /* If BB is in a loop, then removing an outgoing edge from BB > may cause BB to move outside the loop, changes in the > diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c > index 4a147bb..26b199b 100644 > --- a/gcc/tree-ssa-threadupdate.c > +++ b/gcc/tree-ssa-threadupdate.c > @@ -215,6 +215,18 @@ redirection_data::equal (const redirection_data *p1, > const redirection_data *p2) > return true; > } > > +/* Rather than search all the edges in jump thread paths each time > + DOM is able to simply if control statement, we build a hash table > + with the deleted edges. We only care about the address of the edge, > + not its contents. */ > +struct removed_edges : nofree_ptr_hash<edge_def> > +{ > + static hashval_t hash (edge e) { return htab_hash_pointer (e); } > + static bool equal (edge e1, edge e2) { return e1 == e2; } > +}; > + > +static hash_table<removed_edges> *removed_edges; > + > /* Data structure of information to pass to hash table traversal routines. > */ > struct ssa_local_info_t > { > @@ -2539,35 +2551,23 @@ valid_jump_thread_path (vec<jump_thread_edge *> > *path) > return true; > } > > -/* Remove any queued jump threads that start at BB. */ > +/* Remove any queued jump threads that include edge E. > + > + We don't actually remove them here, just record the edges into ax > + hash table. That way we can do the search once per iteration of > + DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. > */ > > void > -remove_jump_threads_starting_at (basic_block bb) > +remove_jump_threads_including (edge_def *e) > { > if (!paths.exists ()) > return; > > - for (unsigned i = 0; i < paths.length ();) > - { > - vec<jump_thread_edge *> *path = paths[i]; > + if (!removed_edges) > + removed_edges = new hash_table<struct removed_edges> (17); > > - /* Sadly, FSM jump threads have a slightly different > - representation than the rest of the jump threads. */ > - if ((*path)[0]->type == EDGE_FSM_THREAD > - && (*path)[0]->e->src == bb) > - { > - delete_jump_thread_path (path); > - paths.unordered_remove (i); > - } > - else if ((*path)[0]->type != EDGE_FSM_THREAD > - && (*path)[0]->e->dest == bb) > - { > - delete_jump_thread_path (path); > - paths.unordered_remove (i); > - } > - else > - i++; > - } > + edge *slot = removed_edges->find_slot (e, INSERT); > + *slot = e; > } > > /* Walk through all blocks and thread incoming edges to the appropriate > @@ -2591,11 +2591,37 @@ thread_through_all_blocks (bool > may_peel_loop_headers) > struct loop *loop; > > if (!paths.exists ()) > - return false; > + { > + retval = false; > + goto out; > + } > > threaded_blocks = BITMAP_ALLOC (NULL); > memset (&thread_stats, 0, sizeof (thread_stats)); > > + /* Remove any paths that referenced removed edges. */ > + if (removed_edges) > + for (i = 0; i < paths.length (); ) > + { > + unsigned int j; > + vec<jump_thread_edge *> *path = paths[i]; > + > + for (j = 0; j < path->length (); j++) > + { > + edge e = (*path)[j]->e; > + if (removed_edges->find_slot (e, NO_INSERT)) > + break; > + } > + > + if (j != path->length ()) > + { > + delete_jump_thread_path (path); > + paths.unordered_remove (i); > + continue; > + } > + i++; > + } > + > /* Jump-thread all FSM threads before other jump-threads. */ > for (i = 0; i < paths.length ();) > { > @@ -2778,6 +2804,9 @@ thread_through_all_blocks (bool may_peel_loop_headers) > if (retval) > loops_state_set (LOOPS_NEED_FIXUP); > > + out: > + delete removed_edges; > + removed_edges = NULL; > return retval; > } > > diff --git a/gcc/tree-ssa-threadupdate.h b/gcc/tree-ssa-threadupdate.h > index 30428e8..984b6c4 100644 > --- a/gcc/tree-ssa-threadupdate.h > +++ b/gcc/tree-ssa-threadupdate.h > @@ -43,7 +43,7 @@ public: > }; > > extern void register_jump_thread (vec <class jump_thread_edge *> *); > -extern void remove_jump_threads_starting_at (basic_block); > +extern void remove_jump_threads_including (edge); > extern void delete_jump_thread_path (vec <class jump_thread_edge *> *); > extern void remove_ctrl_stmt_and_useless_edges (basic_block, basic_block); > #endif >