Hi,
On 18 September 2016 at 20:48, Jan Hubicka <hubi...@ucw.cz> wrote: > Hi, > this is the patch compensating testsuite I commited after re-testing > on x86_64-linux. > > Other placements of early_thread_jumps does not work veyr well (at least in > current implementation). Putting it before forwprop disables about 15% of > threadings. Placing it after DCE makes inliner to not see much of benefits > because threading requires a cleanup propagation+DCE after itself. > So unless we extend threader to be smarter or add extra DCE cleanup, i think > this is best placement. > > Honza > > * passes.def (pass_early_thread_jumps): Schedule after forwprop. > * tree-pass.h (make_pass_early_thread_jumps): Declare. > * tree-ssa-threadbackward.c (fsm_find_thread_path, > fsm_find_thread_path, profitable_jump_thread_path, > fsm_find_control_statement_thread_paths, > find_jump_threads_backwards): Add speed_p parameter. > (pass_data_early_thread_jumps): New pass. > (make_pass_early_thread_jumps): New function. > > * g++.dg/predict-loop-exit-1.C: Disable early jump threading. > * g++.dg/predict-loop-exit-2.C: Disable early jump threading. > * g++.dg/predict-loop-exit-3.C: Disable early jump threading. > * gcc.dg/tree-ssa/pr69196-1.c: Disable early jump threading. > * gcc.dg/tree-ssa/vrp01.c: Disable early jump threading. > * gcc.dg/tree-ssa/ssa-dom-thread-2b.c: Disable early jump threading. > * gcc.dg/tree-ssa/pr68198.c: Scan ethread dump. > * gcc.dg/tree-ssa/ssa-thread-13.c: Scan ethread dump. > * gcc.dg/tree-ssa/vrp56.c: Scan ethread dump. > * gcc.dg/tree-ssa/vrp92.c: Scan ethread dump. > * gcc.dg/uninit-15.c: Swap xfailed and non-xfailed alternative. > > Index: passes.def > =================================================================== > --- passes.def (revision 240109) > +++ passes.def (working copy) > @@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. > /* After CCP we rewrite no longer addressed locals into SSA > form if possible. */ > NEXT_PASS (pass_forwprop); > + NEXT_PASS (pass_early_thread_jumps); > NEXT_PASS (pass_sra_early); > /* pass_build_ealias is a dummy pass that ensures that we > execute TODO_rebuild_alias at this point. */ > Index: testsuite/g++.dg/predict-loop-exit-1.C > =================================================================== > --- testsuite/g++.dg/predict-loop-exit-1.C (revision 240109) > +++ testsuite/g++.dg/predict-loop-exit-1.C (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-profile_estimate" } */ > +/* { dg-options "-O2 -fdump-tree-profile_estimate -fdisable-tree-ethread" } > */ > > int g; > int foo(); > Index: testsuite/g++.dg/predict-loop-exit-2.C > =================================================================== > --- testsuite/g++.dg/predict-loop-exit-2.C (revision 240109) > +++ testsuite/g++.dg/predict-loop-exit-2.C (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-profile_estimate" } */ > +/* { dg-options "-O2 -fdump-tree-profile_estimate -fdisable-tree-ethread" } > */ > > int g; > int foo(); > Index: testsuite/g++.dg/predict-loop-exit-3.C > =================================================================== > --- testsuite/g++.dg/predict-loop-exit-3.C (revision 240109) > +++ testsuite/g++.dg/predict-loop-exit-3.C (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-profile_estimate" } */ > +/* { dg-options "-O2 -fdump-tree-profile_estimate -fdisable-tree-ethread" } > */ > > int g; > int foo(); > Index: testsuite/gcc.dg/tree-ssa/pr68198.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/pr68198.c (revision 240109) > +++ testsuite/gcc.dg/tree-ssa/pr68198.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-thread1-details" } */ > +/* { dg-options "-O2 -fdump-tree-thread1-details -fdisable-tree-ethread" } */ > > extern void abort (void); > > @@ -40,4 +40,4 @@ c_finish_omp_clauses (tree clauses) > /* There are 3 FSM jump threading opportunities, two of which will > get filtered out. */ > /* { dg-final { scan-tree-dump-times "Registering FSM" 1 "thread1"} } */ > -/* { dg-final { scan-tree-dump-times "FSM Thread through multiway branch > without threading a multiway branch" 2 "thread1"} } */ > +/* { dg-final { scan-tree-dump-times "FSM Thread through multiway branch > without threading a multiway branch" 2 "ethread"} } */ This test does not work, at least on arm* and aarch64*. I'm seeing: cc1: note: disable pass tree-ethread for functions in the range of [0, 4294967295] PASS: gcc.dg/tree-ssa/pr68198.c (test for excess errors) PASS: gcc.dg/tree-ssa/pr68198.c scan-tree-dump-times thread1 "Registering FSM" 1 gcc.dg/tree-ssa/pr68198.c: dump file does not exist UNRESOLVED: gcc.dg/tree-ssa/pr68198.c scan-tree-dump-times ethread "FSM Thread through multiway branch without threading a multiway branch" 2 Christophe > Index: testsuite/gcc.dg/tree-ssa/pr69196-1.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/pr69196-1.c (revision 240109) > +++ testsuite/gcc.dg/tree-ssa/pr69196-1.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile { target sparc*-*-* x86_64-*-* } } */ > -/* { dg-options "-O2 -fdump-tree-thread1-details" } */ > +/* { dg-options "-O2 -fdump-tree-thread1-details -fdisable-tree-ethread" } */ > > /* { dg-final { scan-tree-dump "FSM did not thread around loop and would > copy too many statements" "thread1" } } */ > > Index: testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c (revision 240109) > +++ testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2b.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-dom2-stats" } */ > +/* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-dom2-stats > -fdisable-tree-ethread" } */ > > void foo(); > void bla(); > Index: testsuite/gcc.dg/tree-ssa/ssa-thread-13.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/ssa-thread-13.c (revision 240109) > +++ testsuite/gcc.dg/tree-ssa/ssa-thread-13.c (working copy) > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-thread1-details" } */ > -/* { dg-final { scan-tree-dump "FSM" "thread1" } } */ > +/* { dg-options "-O2 -fdump-tree-ethread-details" } */ > +/* { dg-final { scan-tree-dump "FSM" "ethread" } } */ > > typedef struct rtx_def *rtx; > typedef const struct rtx_def *const_rtx; > Index: testsuite/gcc.dg/tree-ssa/vrp01.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/vrp01.c (revision 240109) > +++ testsuite/gcc.dg/tree-ssa/vrp01.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-vrp1" } */ > +/* { dg-options "-O2 -fdump-tree-vrp1 -fdisable-tree-ethread" } */ > > int > foo (int *p, int i) > Index: testsuite/gcc.dg/tree-ssa/vrp56.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/vrp56.c (revision 240109) > +++ testsuite/gcc.dg/tree-ssa/vrp56.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-thread1-stats" } */ > +/* { dg-options "-O2 -fdump-tree-ethread-stats" } */ > typedef struct basic_block_def *basic_block; > struct basic_block_def; > struct edge_def; > @@ -38,5 +38,5 @@ cleanup_empty_eh (basic_block bb) > foo (); > } > } > -/* { dg-final { scan-tree-dump "Jumps threaded: 1" "thread1"} } */ > +/* { dg-final { scan-tree-dump "Jumps threaded: 1" "ethread"} } */ > > Index: testsuite/gcc.dg/tree-ssa/vrp92.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/vrp92.c (revision 240109) > +++ testsuite/gcc.dg/tree-ssa/vrp92.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fdump-tree-vrp1-details" } */ > +/* { dg-options "-O2 -fdump-tree-vrp1-details -fdisable-tree-ethread" } */ > > void bar (void); > int foo (int i, int j) > Index: tree-pass.h > =================================================================== > --- tree-pass.h (revision 240109) > +++ tree-pass.h (working copy) > @@ -399,6 +399,7 @@ extern gimple_opt_pass *make_pass_cd_dce > extern gimple_opt_pass *make_pass_call_cdce (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_merge_phi (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_thread_jumps (gcc::context *ctxt); > +extern gimple_opt_pass *make_pass_early_thread_jumps (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_split_crit_edges (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_laddress (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_pre (gcc::context *ctxt); > Index: tree-ssa-threadbackward.c > =================================================================== > --- tree-ssa-threadbackward.c (revision 240109) > +++ tree-ssa-threadbackward.c (working copy) > @@ -61,12 +61,14 @@ get_gimple_control_stmt (basic_block bb) > /* Return true if the CFG contains at least one path from START_BB to END_BB. > When a path is found, record in PATH the blocks from END_BB to START_BB. > VISITED_BBS is used to make sure we don't fall into an infinite loop. > Bound > - the recursion to basic blocks belonging to LOOP. */ > + the recursion to basic blocks belonging to LOOP. > + SPEED_P indicate that we could increase code size to improve the code > path */ > > static bool > fsm_find_thread_path (basic_block start_bb, basic_block end_bb, > vec<basic_block, va_gc> *&path, > - hash_set<basic_block> *visited_bbs, loop_p loop) > + hash_set<basic_block> *visited_bbs, loop_p loop, > + bool speed_p) > { > if (loop != start_bb->loop_father) > return false; > @@ -82,7 +84,8 @@ fsm_find_thread_path (basic_block start_ > edge e; > edge_iterator ei; > FOR_EACH_EDGE (e, ei, start_bb->succs) > - if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop)) > + if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop, > + speed_p)) > { > vec_safe_push (path, start_bb); > return true; > @@ -101,11 +104,13 @@ fsm_find_thread_path (basic_block start_ > value on PATH. ARG is the value of that SSA_NAME. > > BBI will be appended to PATH when we have a profitable jump threading > - path. Callers are responsible for removing BBI from PATH in that case. */ > + path. Callers are responsible for removing BBI from PATH in that case. > + > + SPEED_P indicate that we could increase code size to improve the code > path */ > > static edge > profitable_jump_thread_path (vec<basic_block, va_gc> *&path, > - basic_block bbi, tree name, tree arg) > + basic_block bbi, tree name, tree arg, bool > speed_p) > { > /* Note BBI is not in the path yet, hence the +1 in the test below > to make sure BBI is accounted for in the path length test. */ > @@ -307,7 +312,7 @@ profitable_jump_thread_path (vec<basic_b > return NULL; > } > > - if (optimize_edge_for_speed_p (taken_edge)) > + if (speed_p && optimize_edge_for_speed_p (taken_edge)) > { > if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS)) > { > @@ -422,13 +427,15 @@ convert_and_register_jump_thread_path (v > > /* We trace the value of the SSA_NAME NAME back through any phi nodes looking > for places where it gets a constant value and save the path. Stop after > - having recorded MAX_PATHS jump threading paths. */ > + having recorded MAX_PATHS jump threading paths. > + > + SPEED_P indicate that we could increase code size to improve the code > path */ > > static void > fsm_find_control_statement_thread_paths (tree name, > hash_set<basic_block> *visited_bbs, > vec<basic_block, va_gc> *&path, > - bool seen_loop_phi) > + bool seen_loop_phi, bool speed_p) > { > /* If NAME appears in an abnormal PHI, then don't try to trace its > value back through PHI nodes. */ > @@ -496,7 +503,7 @@ fsm_find_control_statement_thread_paths > hash_set<basic_block> *visited_bbs = new hash_set<basic_block>; > > if (fsm_find_thread_path (var_bb, e->src, next_path, visited_bbs, > - e->src->loop_father)) > + e->src->loop_father, speed_p)) > ++e_count; > > delete visited_bbs; > @@ -562,7 +569,7 @@ fsm_find_control_statement_thread_paths > /* Recursively follow SSA_NAMEs looking for a constant > definition. */ > fsm_find_control_statement_thread_paths (arg, visited_bbs, path, > - seen_loop_phi); > + seen_loop_phi, > speed_p); > > path->pop (); > continue; > @@ -573,7 +580,8 @@ fsm_find_control_statement_thread_paths > > /* If this is a profitable jump thread path, then convert it > into the canonical form and register it. */ > - edge taken_edge = profitable_jump_thread_path (path, bbi, name, > arg); > + edge taken_edge = profitable_jump_thread_path (path, bbi, name, arg, > + speed_p); > if (taken_edge) > { > if (bb_loop_depth (taken_edge->src) > @@ -589,7 +597,7 @@ fsm_find_control_statement_thread_paths > > if (TREE_CODE (arg) == SSA_NAME) > fsm_find_control_statement_thread_paths (arg, visited_bbs, > - path, seen_loop_phi); > + path, seen_loop_phi, > speed_p); > > else > { > @@ -599,7 +607,7 @@ fsm_find_control_statement_thread_paths > path->pop (); > > edge taken_edge = profitable_jump_thread_path (path, var_bb, > - name, arg); > + name, arg, speed_p); > if (taken_edge) > { > if (bb_loop_depth (taken_edge->src) > @@ -623,10 +631,11 @@ fsm_find_control_statement_thread_paths > is a constant. Record such paths for jump threading. > > It is assumed that BB ends with a control statement and that by > - finding a path where NAME is a constant, we can thread the path. */ > + finding a path where NAME is a constant, we can thread the path. > + SPEED_P indicate that we could increase code size to improve the code > path */ > > void > -find_jump_threads_backwards (basic_block bb) > +find_jump_threads_backwards (basic_block bb, bool speed_p) > { > gimple *stmt = get_gimple_control_stmt (bb); > if (!stmt) > @@ -656,7 +665,8 @@ find_jump_threads_backwards (basic_block > hash_set<basic_block> *visited_bbs = new hash_set<basic_block>; > > max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS); > - fsm_find_control_statement_thread_paths (name, visited_bbs, bb_path, > false); > + fsm_find_control_statement_thread_paths (name, visited_bbs, bb_path, false, > + speed_p); > > delete visited_bbs; > vec_free (bb_path); > @@ -706,7 +716,7 @@ pass_thread_jumps::execute (function *fu > FOR_EACH_BB_FN (bb, fun) > { > if (EDGE_COUNT (bb->succs) > 1) > - find_jump_threads_backwards (bb); > + find_jump_threads_backwards (bb, true); > } > bool changed = thread_through_all_blocks (true); > > @@ -721,3 +731,59 @@ make_pass_thread_jumps (gcc::context *ct > { > return new pass_thread_jumps (ctxt); > } > + > +namespace { > + > +const pass_data pass_data_early_thread_jumps = > +{ > + GIMPLE_PASS, > + "ethread", > + OPTGROUP_NONE, > + TV_TREE_SSA_THREAD_JUMPS, > + ( PROP_cfg | PROP_ssa ), > + 0, > + 0, > + 0, > + ( TODO_cleanup_cfg | TODO_update_ssa ), > +}; > + > +class pass_early_thread_jumps : public gimple_opt_pass > +{ > +public: > + pass_early_thread_jumps (gcc::context *ctxt) > + : gimple_opt_pass (pass_data_early_thread_jumps, ctxt) > + {} > + > + opt_pass * clone (void) { return new pass_early_thread_jumps (m_ctxt); } > + virtual bool gate (function *); > + virtual unsigned int execute (function *); > +}; > + > +bool > +pass_early_thread_jumps::gate (function *fun ATTRIBUTE_UNUSED) > +{ > + return true; > +} > + > + > +unsigned int > +pass_early_thread_jumps::execute (function *fun) > +{ > + /* Try to thread each block with more than one successor. */ > + basic_block bb; > + FOR_EACH_BB_FN (bb, fun) > + { > + if (EDGE_COUNT (bb->succs) > 1) > + find_jump_threads_backwards (bb, false); > + } > + thread_through_all_blocks (true); > + return 0; > +} > + > +} > + > +gimple_opt_pass * > +make_pass_early_thread_jumps (gcc::context *ctxt) > +{ > + return new pass_early_thread_jumps (ctxt); > +} > Index: testsuite/gcc.dg/uninit-15.c > =================================================================== > --- testsuite/gcc.dg/uninit-15.c (revision 240109) > +++ testsuite/gcc.dg/uninit-15.c (working copy) > @@ -1,16 +1,16 @@ > /* PR tree-optimization/17506 > We issue an uninitialized variable warning at a wrong location at > line 11, which is very confusing. Make sure we print out a note to > - make it less confusing. (xfailed alternative) > + make it less confusing. (not xfailed alternative) > But it is of course ok if we warn in bar about uninitialized use > - of j. (not xfailed alternative) */ > + of j. (xfailed alternative) */ > /* { dg-do compile } */ > /* { dg-options "-O1 -Wuninitialized" } */ > > inline int > foo (int i) > { > - if (i) /* { dg-warning "used uninitialized in this function" "" { xfail > *-*-* } } */ > + if (i) /* { dg-warning "used uninitialized in this function" } */ > return 1; > return 0; > } > @@ -20,7 +20,7 @@ void baz (void); > void > bar (void) > { > - int j; /* { dg-message "note: 'j' was declared here" "" { xfail *-*-* } } > */ > - for (; foo (j); ++j) /* { dg-warning "'j' is used uninitialized" } */ > + int j; /* { dg-message "note: 'j' was declared here" } */ > + for (; foo (j); ++j) /* { dg-warning "'j' is used uninitialized" "" { > xfail *-*-* } } */ > baz (); > }