On Tue, 11 Jul 2023, Jan Hubicka wrote: > Hi, > this patch improves profile update in loop-ch to handle situation where > duplicated header > has loop invariant test. In this case we konw that all count of the exit > edge belongs to > the duplicated loop header edge and can update probabilities accordingly. > Since we also do all the work to track this information from analysis to > duplicaiton > I also added code to turn those conditionals to constants so we do not need > later > jump threading pass to clean up. > > This made me to work out that the propagatoin was buggy in few aspects > 1) it handled every PHI as PHI in header and incorrectly assigned some PHIs > to be IV-like when they are not > 2) it did not check for novops calls that are not required to return same > value on every invocation. > 3) I also added check for asm statement since those are not necessarily > reproducible either. > > I would like to do more changes, but tried to prevent this patch from > snowballing. The analysis of what statements will remain after duplication > can > be improved. I think we should use ranger query for other than first basic > block, too and possibly drop the IV heuristics then. Also it seems that a lot > of this logic is pretty much same to analysis in peeling pass, so unifying > this > would be nice.
Indeed. > I also think I should move the profile update out of > gimple_duplicate_sese_region (it is now very specific to ch) and rename it, > since those regions are singe entry multiple exit. Please. Maybe move it to tree-ssa-loop-ch.cc as well. > Bootstrapped/regtsted x86_64-linux, OK? OK, thanks for improving this. Richard. > Honza > > gcc/ChangeLog: > > * tree-cfg.cc (gimple_duplicate_sese_region): Add ORIG_ELIMINATED_EDGES > parameter and rewrite profile updating code to handle edges elimination. > * tree-cfg.h (gimple_duplicate_sese_region): Update prototpe. > * tree-ssa-loop-ch.cc (loop_invariant_op_p): New function. > (loop_iv_derived_p): New function. > (should_duplicate_loop_header_p): Track invariant exit edges; fix > handling > of PHIs and propagation of IV derived variables. > (ch_base::copy_headers): Pass around the invariant edges hash set. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/loop-ch-profile-1.c: Remove xfail. > > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc > index 4989906706c..3879fb7c4c1 100644 > --- a/gcc/tree-cfg.cc > +++ b/gcc/tree-cfg.cc > @@ -6661,14 +6661,16 @@ add_phi_args_after_copy (basic_block *region_copy, > unsigned n_region, > true otherwise. > > ELIMINATED_EDGE is an edge that is known to be removed in the dupicated > - region. */ > + region. ORIG_ELIMINATED_EDGES, if non-NULL is set of edges known to be > + removed from the original region. */ > > bool > gimple_duplicate_sese_region (edge entry, edge exit, > basic_block *region, unsigned n_region, > basic_block *region_copy, > bool update_dominance, > - edge eliminated_edge) > + edge eliminated_edge, > + hash_set <edge> *orig_eliminated_edges) > { > unsigned i; > bool free_region_copy = false, copying_header = false; > @@ -6747,7 +6749,8 @@ gimple_duplicate_sese_region (edge entry, edge exit, > split_edge_bb_loc (entry), update_dominance); > if (total_count.initialized_p () && entry_count.initialized_p ()) > { > - if (!eliminated_edge) > + if (!eliminated_edge > + && (!orig_eliminated_edges || orig_eliminated_edges->is_empty ())) > { > scale_bbs_frequencies_profile_count (region, n_region, > total_count - entry_count, > @@ -6765,7 +6768,7 @@ gimple_duplicate_sese_region (edge entry, edge exit, > if (cond1) <- this condition will become false > and we update probabilities > goto loop_exit; > - if (cond2) > + if (cond2) <- this condition is loop invariant > goto loop_exit; > goto loop_header <- this will be redirected to loop. > // region_copy_end > @@ -6776,6 +6779,7 @@ gimple_duplicate_sese_region (edge entry, edge exit, > if (cond1) <- we need to update probabbility here > goto loop_exit; > if (cond2) <- and determine scaling factor here. > + moreover cond2 is now always true > goto loop_exit; > else > goto loop; > @@ -6785,53 +6789,84 @@ gimple_duplicate_sese_region (edge entry, edge exit, > but only consumer so far is tree-ssa-loop-ch and it uses only this > to handle the common case of peeling headers which have > conditionals known to be always true upon entry. */ > - gcc_assert (eliminated_edge->src == region[0] > - && EDGE_COUNT (region[0]->succs) == 2 > - && copying_header); > - > - edge e, e_copy, eliminated_edge_copy; > - if (EDGE_SUCC (region[0], 0) == eliminated_edge) > - { > - e = EDGE_SUCC (region[0], 1); > - e_copy = EDGE_SUCC (region_copy[0], 1); > - eliminated_edge_copy = EDGE_SUCC (region_copy[0], 0); > - } > - else > + gcc_checking_assert (copying_header); > + for (unsigned int i = 0; i < n_region; i++) > { > - e = EDGE_SUCC (region[0], 0); > - e_copy = EDGE_SUCC (region_copy[0], 0); > - eliminated_edge_copy = EDGE_SUCC (region_copy[0], 1); > - } > - gcc_checking_assert (e != e_copy > - && eliminated_edge_copy != eliminated_edge > - && eliminated_edge_copy->dest > - == eliminated_edge->dest); > - > + edge exit_e, exit_e_copy, e, e_copy; > + if (EDGE_COUNT (region[i]->succs) == 1) > + { > + region_copy[i]->count = entry_count; > + region[i]->count -= entry_count; > + continue; > + } > > - /* Handle first basic block in duplicated region as in the > - non-eliminating case. */ > - scale_bbs_frequencies_profile_count (region_copy, n_region, > - entry_count, total_count); > - /* Now update redirecting eliminated edge to the other edge. > - Actual CFG update is done by caller. */ > - e_copy->probability = profile_probability::always (); > - eliminated_edge_copy->probability = profile_probability::never (); > - /* Header copying is a special case of jump threading, so use > - common code to update loop body exit condition. */ > - update_bb_profile_for_threading (region[0], e_copy->count (), e); > - /* If we duplicated more conditionals first scale the profile of > - rest of the preheader. Then work out the probability of > - entering the loop and scale rest of the loop. */ > - if (n_region > 1) > - { > - scale_bbs_frequencies_profile_count (region_copy + 1, > - n_region - 1, > - e_copy->count (), > - region_copy[1]->count); > - scale_bbs_frequencies_profile_count (region + 1, n_region - 1, > - e->count (), > - region[1]->count); > + gcc_checking_assert (EDGE_COUNT (region[i]->succs) == 2); > + if (loop_exit_edge_p (region[0]->loop_father, > + EDGE_SUCC (region[i], 0))) > + { > + exit_e = EDGE_SUCC (region[i], 0); > + exit_e_copy = EDGE_SUCC (region_copy[i], 0); > + e = EDGE_SUCC (region[i], 1); > + e_copy = EDGE_SUCC (region_copy[i], 1); > + } > + else > + { > + exit_e = EDGE_SUCC (region[i], 1); > + exit_e_copy = EDGE_SUCC (region_copy[i], 1); > + e = EDGE_SUCC (region[i], 0); > + e_copy = EDGE_SUCC (region_copy[i], 0); > + } > + gcc_assert (i == n_region - 1 > + || (e->dest == region[i + 1] > + && e_copy->dest == region_copy[i + 1])); > + region_copy[i]->count = entry_count; > + profile_count exit_e_count = exit_e->count (); > + if (eliminated_edge == exit_e) > + { > + /* Update profile and the conditional. > + CFG update is done by caller. */ > + e_copy->probability = profile_probability::always (); > + exit_e_copy->probability = profile_probability::never (); > + gcond *cond_stmt > + = as_a <gcond *>(*gsi_last_bb (region_copy[i])); > + if (e_copy->flags & EDGE_TRUE_VALUE) > + gimple_cond_make_true (cond_stmt); > + else > + gimple_cond_make_false (cond_stmt); > + update_stmt (cond_stmt); > + /* Header copying is a special case of jump threading, so use > + common code to update loop body exit condition. */ > + update_bb_profile_for_threading (region[i], entry_count, e); > + eliminated_edge = NULL; > + } > + else > + region[i]->count -= region_copy[i]->count; > + if (orig_eliminated_edges->contains (exit_e)) > + { > + orig_eliminated_edges->remove (exit_e); > + /* All exits will happen in exit_e_copy which is out of the > + loop, so increase probability accordingly. > + If the edge is eliminated_edge we already corrected > + profile above. */ > + if (entry_count.nonzero_p () && eliminated_edge != exit_e) > + set_edge_probability_and_rescale_others > + (exit_e_copy, exit_e_count.probability_in > + (entry_count)); > + /* Eliminate in-loop conditional. */ > + e->probability = profile_probability::always (); > + exit_e->probability = profile_probability::never (); > + gcond *cond_stmt = as_a <gcond *>(*gsi_last_bb (region[i])); > + if (e->flags & EDGE_TRUE_VALUE) > + gimple_cond_make_true (cond_stmt); > + else > + gimple_cond_make_false (cond_stmt); > + update_stmt (cond_stmt); > + } > + entry_count = e_copy->count (); > } > + /* Be sure that we seen all edges we are supposed to update. */ > + gcc_checking_assert (!eliminated_edge > + && orig_eliminated_edges->is_empty ()); > } > } > > diff --git a/gcc/tree-cfg.h b/gcc/tree-cfg.h > index b9ccd58c3db..a7cc37f3b97 100644 > --- a/gcc/tree-cfg.h > +++ b/gcc/tree-cfg.h > @@ -70,7 +70,8 @@ extern void add_phi_args_after_copy_bb (basic_block); > extern void add_phi_args_after_copy (basic_block *, unsigned, edge); > extern basic_block split_edge_bb_loc (edge); > extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, > unsigned, > - basic_block *, bool, edge); > + basic_block *, bool, edge, > + hash_set <edge> *); > extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned, > basic_block *); > extern void gather_blocks_in_sese_region (basic_block entry, basic_block > exit, > diff --git a/gcc/tree-ssa-loop-ch.cc b/gcc/tree-ssa-loop-ch.cc > index 72792cec21f..cae6266be46 100644 > --- a/gcc/tree-ssa-loop-ch.cc > +++ b/gcc/tree-ssa-loop-ch.cc > @@ -97,13 +97,42 @@ static_loop_exit (class loop *l, gimple_ranger *ranger) > return r == desired_static_range ? ret_e : NULL; > } > > +/* Return true if OP is invariant. */ > + > +static bool > +loop_invariant_op_p (class loop *loop, > + tree op) > +{ > + if (is_gimple_min_invariant (op)) > + return true; > + if (SSA_NAME_IS_DEFAULT_DEF (op) > + || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op)))) > + return true; > + return gimple_uid (SSA_NAME_DEF_STMT (op)) & 2; > +} > + > +/* Return true if OP looks like it is derived from IV. */ > + > +static bool > +loop_iv_derived_p (class loop *loop, > + tree op) > +{ > + /* Always check for invariant first. */ > + gcc_checking_assert (!is_gimple_min_invariant (op) > + && !SSA_NAME_IS_DEFAULT_DEF (op) > + && flow_bb_inside_loop_p (loop, > + gimple_bb (SSA_NAME_DEF_STMT (op)))); > + return gimple_uid (SSA_NAME_DEF_STMT (op)) & 1; > +} > + > /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT > instructions should be duplicated, limit is decreased by the actual > amount. */ > > static bool > should_duplicate_loop_header_p (basic_block header, class loop *loop, > - int *limit) > + int *limit, > + hash_set <edge> *invariant_exits) > { > gimple_stmt_iterator bsi; > > @@ -152,15 +181,20 @@ should_duplicate_loop_header_p (basic_block header, > class loop *loop, > > for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi); > gsi_next (&psi)) > - { > - gphi *phi = psi.phi (); > - tree res = gimple_phi_result (phi); > - if (INTEGRAL_TYPE_P (TREE_TYPE (res)) > - || POINTER_TYPE_P (TREE_TYPE (res))) > - gimple_set_uid (phi, 1 /* IV */); > - else > - gimple_set_uid (phi, 0); > - } > + /* If this is actual loop header PHIs indicate that the SSA_NAME > + may be IV. Otherwise just give up. */ > + if (header == loop->header) > + { > + gphi *phi = psi.phi (); > + tree res = gimple_phi_result (phi); > + if (INTEGRAL_TYPE_P (TREE_TYPE (res)) > + || POINTER_TYPE_P (TREE_TYPE (res))) > + gimple_set_uid (phi, 1 /* IV */); > + else > + gimple_set_uid (phi, 0); > + } > + else > + gimple_set_uid (psi.phi (), 0); > > /* Count number of instructions and punt on calls. > Populate stmts INV/IV flag to later apply heuristics to the > @@ -201,21 +235,26 @@ should_duplicate_loop_header_p (basic_block header, > class loop *loop, > /* Classify the stmt based on whether its computation is based > on a IV or whether it is invariant in the loop. */ > gimple_set_uid (last, 0); > - if (!gimple_vuse (last)) > + if (!gimple_vuse (last) > + && gimple_code (last) != GIMPLE_ASM > + && (gimple_code (last) != GIMPLE_CALL > + || gimple_call_flags (last) & ECF_CONST)) > { > bool inv = true; > - bool iv = false; > + bool iv = true; > ssa_op_iter i; > tree op; > FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE) > - if (!SSA_NAME_IS_DEFAULT_DEF (op) > - && flow_bb_inside_loop_p (loop, > - gimple_bb (SSA_NAME_DEF_STMT (op)))) > + if (!loop_invariant_op_p (loop, op)) > { > - if (!(gimple_uid (SSA_NAME_DEF_STMT (op)) & 2 /* INV */)) > + if (!loop_iv_derived_p (loop, op)) > + { > + inv = false; > + iv = false; > + break; > + } > + else > inv = false; > - if (gimple_uid (SSA_NAME_DEF_STMT (op)) & 1 /* IV */) > - iv = true; > } > gimple_set_uid (last, (iv ? 1 : 0) | (inv ? 2 : 0)); > } > @@ -225,16 +264,32 @@ should_duplicate_loop_header_p (basic_block header, > class loop *loop, > the loop further. Unless this is the original loop header. */ > tree lhs = gimple_cond_lhs (last); > tree rhs = gimple_cond_rhs (last); > + bool lhs_invariant = loop_invariant_op_p (loop, lhs); > + bool rhs_invariant = loop_invariant_op_p (loop, rhs); > + if (lhs_invariant && rhs_invariant) > + { > + if (invariant_exits) > + { > + edge e; > + edge_iterator ei; > + FOR_EACH_EDGE (e, ei, header->succs) > + if (loop_exit_edge_p (loop, e)) > + { > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, > + " Will elliminate invariant exit %i->%i\n", > + e->src->index, e->dest->index); > + invariant_exits->add (e); > + } > + } > + return true; > + } > + /* TODO: This is heuristics that claims that IV based ocnditionals will > + likely be optimized out in duplicated header. We could use ranger > + query instead to tell this more precisely. */ > if (header != loop->header > - && ((TREE_CODE (lhs) == SSA_NAME > - && !SSA_NAME_IS_DEFAULT_DEF (lhs) > - && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (lhs))) > - && gimple_uid (SSA_NAME_DEF_STMT (lhs)) == 0) > - || (TREE_CODE (rhs) == SSA_NAME > - && !SSA_NAME_IS_DEFAULT_DEF (rhs) > - && flow_bb_inside_loop_p (loop, > - gimple_bb (SSA_NAME_DEF_STMT (rhs))) > - && gimple_uid (SSA_NAME_DEF_STMT (rhs)) == 0))) > + && ((!lhs_invariant && !loop_iv_derived_p (loop, lhs)) > + || (!rhs_invariant && !loop_iv_derived_p (loop, rhs)))) > { > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, > @@ -452,7 +507,7 @@ ch_base::copy_headers (function *fun) > continue; > } > > - if (should_duplicate_loop_header_p (header, loop, &remaining_limit)) > + if (should_duplicate_loop_header_p (header, loop, &remaining_limit, > NULL)) > candidates.safe_push ({loop, static_exit}); > } > /* Do not use ranger after we change the IL and not have updated SSA. */ > @@ -482,10 +537,12 @@ ch_base::copy_headers (function *fun) > profile_count entry_count = profile_count::zero (); > edge e; > edge_iterator ei; > + hash_set <edge> invariant_exits; > FOR_EACH_EDGE (e, ei, loop->header->preds) > if (e->src != loop->latch) > entry_count += e->count (); > - while (should_duplicate_loop_header_p (header, loop, &remaining_limit)) > + while (should_duplicate_loop_header_p (header, loop, &remaining_limit, > + &invariant_exits)) > { > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, " Will duplicate bb %i\n", header->index); > @@ -537,7 +594,8 @@ ch_base::copy_headers (function *fun) > > propagate_threaded_block_debug_into (exit->dest, entry->dest); > if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs, > - true, candidate.static_exit)) > + true, candidate.static_exit, > + &invariant_exits)) > { > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, "Duplication failed.\n"); > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c > index 16340868abf..bfb0f17284d 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c > @@ -9,4 +9,4 @@ void test(int v, int q) > /* { dg-final { scan-tree-dump-not "Invalid sum" "ch2"} } */ > /* dom2 optimizes out the redundant test for loop invariant v/q > which leads to inconsistent profile. */ > -/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" { xfail *-*-* > }} } */ > +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */ > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman; HRB 36809 (AG Nuernberg)