On 1 August 2013 18:32, Teresa Johnson <tejohn...@google.com> wrote: > Patch 3 of 3 split out from the patch I sent in May that fixes problems with > -freorder-blocks-and-partition, with changes/fixes discussed in that thread. > > See http://gcc.gnu.org/ml/gcc-patches/2013-05/threads.html#00388 for context. > > This patch sanitizes the partitioning to address issues such as edge > weight insanities that sometimes occur due to upstream optimizations, > and ensures that hot blocks are not dominated by cold blocks. This > needs to be resanitized after certain cfg optimizations that may > cause hot blocks previously reached via both hot and cold paths to > only be reached by cold paths. > > The verification code in sanitize_dominator_hotness was contributed by > Steven Bosscher. > > Bootstrapped and tested on x86-64-unknown-linux-gnu. Also ensured that > a profiledbootstrap passed with -freorder-blocks-and-partition enabled > (and with the dwarf version changed to 2 to work around PR57451). > > Ok for trunk? > > (I also included the patch as an attachment since my mailer invariably > messes up the formatting in the pasted version.) > > Thanks, > Teresa > > 2013-08-01 Teresa Johnson <tejohn...@google.com> > Steven Bosscher <ste...@gcc.gnu.org> > > * cfgrtl.c (fixup_bb_partition): New routine. > (commit_edge_insertions): Invoke fixup_partitions. > (find_partition_fixes): New routine. > (fixup_partitions): Ditto. > (verify_hot_cold_block_grouping): Update comments. > (rtl_verify_edges): Invoke find_partition_fixes. > (rtl_verify_bb_pointers): Update comments. > (rtl_verify_bb_layout): Ditto. > * basic-block.h (fixup_partitions): Declare. > * cfgcleanup.c (try_optimize_cfg): Invoke fixup_partitions. > * bb-reorder.c (sanitize_dominator_hotness): New function. > (find_rarely_executed_basic_blocks_and_crossing_edges): Invoke > sanitize_dominator_hotness. > > Index: cfgrtl.c > =================================================================== > --- cfgrtl.c (revision 201281) > +++ cfgrtl.c (working copy) > @@ -1341,6 +1341,34 @@ fixup_partition_crossing (edge e) > } > } > > +/* Called when block BB has been reassigned to a different partition, > + to ensure that the region crossing attributes are updated. */ > + > +static void > +fixup_bb_partition (basic_block bb) > +{ > + edge e; > + edge_iterator ei; > + > + /* Now need to make bb's pred edges non-region crossing. */ > + FOR_EACH_EDGE (e, ei, bb->preds) > + { > + fixup_partition_crossing (e); > + } > + > + /* Possibly need to make bb's successor edges region crossing, > + or remove stale region crossing. */ > + FOR_EACH_EDGE (e, ei, bb->succs) > + { > + if ((e->flags & EDGE_FALLTHRU) > + && BB_PARTITION (bb) != BB_PARTITION (e->dest) > + && e->dest != EXIT_BLOCK_PTR) > + force_nonfallthru (e); > + else > + fixup_partition_crossing (e); > + } > +} > + > /* Attempt to change code to redirect edge E to TARGET. Don't do that on > expense of adding new instructions or reordering basic blocks. > > @@ -1979,6 +2007,14 @@ commit_edge_insertions (void) > { > basic_block bb; > > + /* Optimization passes that invoke this routine can cause hot blocks > + previously reached by both hot and cold blocks to become dominated only > + by cold blocks. This will cause the verification below to fail, > + and lead to now cold code in the hot section. In some cases this > + may only be visible after newly unreachable blocks are deleted, > + which will be done by fixup_partitions. */ > + fixup_partitions (); > + > #ifdef ENABLE_CHECKING > verify_flow_info (); > #endif > @@ -2173,6 +2209,101 @@ get_last_bb_insn (basic_block bb) > return end; > } > > +/* Sanity check partition hotness to ensure that basic blocks in > + the cold partition don't dominate basic blocks in the hot partition. > + If FLAG_ONLY is true, report violations as errors. Otherwise > + re-mark the dominated blocks as cold, since this is run after > + cfg optimizations that may make hot blocks previously reached > + by both hot and cold blocks now only reachable along cold paths. */ > + > +vec<basic_block> > +find_partition_fixes (bool flag_only) > +{ > + basic_block bb; > + vec<basic_block> bbs_in_cold_partition = vNULL; > + vec<basic_block> bbs_to_fix = vNULL; > + > + if (!crtl->has_bb_partition) > + return vNULL;
I'd push this early return into the callers instead, at most turn it into a gcc_checking_assert to be safe. Both callers, fixup_partitions and rtl_verify_edges, look at ctrl->has_bb_partition already before calling this, so the above should be dead already. Did my mailer somehow swallow the static from find_partition_fixes? > + > + FOR_EACH_BB (bb) > + if ((BB_PARTITION (bb) == BB_COLD_PARTITION)) > + bbs_in_cold_partition.safe_push (bb); > + > + if (bbs_in_cold_partition.is_empty ()) > + return vNULL; > + > + bool dom_calculated_here = !dom_info_available_p (CDI_DOMINATORS); > + > + if (dom_calculated_here) > + calculate_dominance_info (CDI_DOMINATORS); > + > + while (! bbs_in_cold_partition.is_empty ()) > + { > + bb = bbs_in_cold_partition.pop (); > + /* Any blocks dominated by a block in the cold section > + must also be cold. */ > + basic_block son; > + for (son = first_dom_son (CDI_DOMINATORS, bb); > + son; > + son = next_dom_son (CDI_DOMINATORS, son)) > + { > + /* If son is not yet cold, then mark it cold here and > + enqueue it for further processing. */ > + if ((BB_PARTITION (son) != BB_COLD_PARTITION)) > + { > + if (flag_only) > + error ("non-cold basic block %d dominated " > + "by a block in the cold partition", son->index); > + else > + BB_SET_PARTITION (son, BB_COLD_PARTITION); > + bbs_to_fix.safe_push (son); > + bbs_in_cold_partition.safe_push (son); > + } > + } > + } > + > + if (dom_calculated_here) > + free_dominance_info (CDI_DOMINATORS); > + > + return bbs_to_fix; > +} > + > +/* Perform cleanup on the hot/cold bb partitioning after optimization > + passes that modify the cfg. */ > + > +void > +fixup_partitions (void) > +{ > + basic_block bb; > + > + if (!crtl->has_bb_partition) > + return; > + > + /* Delete any blocks that became unreachable and weren't > + already cleaned up, for example during edge forwarding > + and convert_jumps_to_returns. This will expose more > + opportunities for fixing the partition boundaries here. > + Also, the calculation of the dominance graph during verification > + will assert if there are unreachable nodes. */ > + delete_unreachable_blocks (); > + > + /* If there are partitions, do a sanity check on them: A basic block in > + a cold partition cannot dominate a basic block in a hot partition. > + Fixup any that now violate this requirement, as a result of edge > + forwarding and unreachable block deletion. */ > + vec<basic_block> bbs_to_fix = find_partition_fixes (false); > + > + /* Do the partition fixup after all necessary blocks have been converted to > + cold, so that we only update the region crossings the minimum number of > + places, which can require forcing edges to be non fallthru. */ > + while (! bbs_to_fix.is_empty ()) > + { > + bb = bbs_to_fix.pop (); > + fixup_bb_partition (bb); > + } > +} > + > /* Verify, in the basic block chain, that there is at most one switch > between hot/cold partitions. This condition will not be true until > after reorder_basic_blocks is called. */ > @@ -2219,7 +2350,8 @@ verify_hot_cold_block_grouping (void) > /* Perform several checks on the edges out of each block, such as > the consistency of the branch probabilities, the correctness > of hot/cold partition crossing edges, and the number of expected > - successor edges. */ > + successor edges. Also verify that the dominance relationship > + between hot/cold blocks is sane. */ > > static int > rtl_verify_edges (void) > @@ -2382,6 +2514,14 @@ rtl_verify_edges (void) > } > } > > + /* If there are partitions, do a sanity check on them: A basic block in > + a cold partition cannot dominate a basic block in a hot partition. */ > + if (crtl->has_bb_partition && !err) > + { > + vec<basic_block> bbs_to_fix = find_partition_fixes (true); > + err = !bbs_to_fix.is_empty (); > + } > + > /* Clean up. */ > return err; > } > @@ -2515,7 +2655,7 @@ rtl_verify_bb_pointers (void) > and NOTE_INSN_BASIC_BLOCK > - verify that no fall_thru edge crosses hot/cold partition boundaries > - verify that there are no pending RTL branch predictions > - - verify that there is a single hot/cold partition boundary after bbro > + - verify that hot blocks are not dominated by cold blocks > > In future it can be extended check a lot of other stuff as well > (reachability of basic blocks, life information, etc. etc.). */ > @@ -2761,7 +2901,8 @@ rtl_verify_bb_layout (void) > - check that all insns are in the basic blocks > (except the switch handling code, barriers and notes) > - check that all returns are followed by barriers > - - check that all fallthru edge points to the adjacent blocks. */ > + - check that all fallthru edge points to the adjacent blocks > + - verify that there is a single hot/cold partition boundary after bbro */ > > static int > rtl_verify_flow_info (void) > Index: basic-block.h > =================================================================== > --- basic-block.h (revision 201281) > +++ basic-block.h (working copy) > @@ -797,6 +797,7 @@ extern bool contains_no_active_insn_p (const_basic > extern bool forwarder_block_p (const_basic_block); > extern bool can_fallthru (basic_block, basic_block); > extern void emit_barrier_after_bb (basic_block bb); > +extern void fixup_partitions (void); > > /* In cfgbuild.c. */ > extern void find_many_sub_basic_blocks (sbitmap); > Index: cfgcleanup.c > =================================================================== > --- cfgcleanup.c (revision 201281) > +++ cfgcleanup.c (working copy) > @@ -2807,10 +2807,21 @@ try_optimize_cfg (int mode) > df_analyze (); > } > > + if (changed) > + { > + /* Edge forwarding in particular can cause hot blocks > previously > + reached by both hot and cold blocks to become dominated only > + by cold blocks. This will cause the verification > below to fail, > + and lead to now cold code in the hot section. This is not > easy > + to detect and fix during edge forwarding, and in some cases > + is only visible after newly unreachable blocks are deleted, > + which will be done in fixup_partitions. */ > + fixup_partitions (); > + > #ifdef ENABLE_CHECKING > - if (changed) > - verify_flow_info (); > + verify_flow_info (); > #endif > + } > > changed_overall |= changed; > first_pass = false; > Index: bb-reorder.c > =================================================================== > --- bb-reorder.c (revision 201281) > +++ bb-reorder.c (working copy) > @@ -1444,6 +1444,55 @@ fix_up_crossing_landing_pad (eh_landing_pad old_lp > ei_next (&ei); > } > > + > +/* Ensure that no cold bbs dominate hot bbs along the dominance or > + post-dominance DIR, for example as a result of edge weight insanities. > + Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs > + to BBS_IN_HOT_PARTITION. */ > + > +static unsigned int > +sanitize_dominator_hotness (enum cdi_direction dir, unsigned int > cold_bb_count, > + vec<basic_block> *bbs_in_hot_partition) > +{ > + if (!cold_bb_count) > + return 0; Same pattern as above. Callers do not invoke us if !cold_bb_count so the above check is dead code. Again, remove or checking assert? > + > + bool dom_calculated_here = !dom_info_available_p (dir); > + > + if (dom_calculated_here) > + calculate_dominance_info (dir); > + > + /* Keep examining hot bbs until we have either checked them all, or > + re-marked all cold bbs as hot. */ > + vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy (); > + while (! hot_bbs_to_check.is_empty () > + && cold_bb_count) The comment says "or", which sounds plausible, but the code says "and"? > + { > + basic_block bb = hot_bbs_to_check.pop (); > + basic_block dom_bb = get_immediate_dominator (dir, bb); > + > + /* If bb's immediate dominator is also hot then it is ok. */ > + if (BB_PARTITION (dom_bb) != BB_COLD_PARTITION) Why not follow the comment here and == BB_HOT_PARTITION instead, for clarity? > + continue; > + > + /* We have a hot bb with an immediate dominator that is cold. > + The dominator needs to be re-marked hot. */ > + BB_SET_PARTITION (dom_bb, BB_HOT_PARTITION); > + cold_bb_count--; > + > + /* Now we need to examine newly-hot dom_bb to see if it is also > + dominated by a cold bb. */ > + bbs_in_hot_partition->safe_push (dom_bb); > + hot_bbs_to_check.safe_push (dom_bb); > + } > + > + if (dom_calculated_here) > + free_dominance_info (dir); > + > + return cold_bb_count; > +} > + > + > /* Find the basic blocks that are rarely executed and need to be moved to > a separate section of the .o file (to cut down on paging and improve > cache locality). Return a vector of all edges that cross. */ > @@ -1455,16 +1504,42 @@ find_rarely_executed_basic_blocks_and_crossing_edg > basic_block bb; > edge e; > edge_iterator ei; > + unsigned int cold_bb_count = 0; > + vec<basic_block> bbs_in_hot_partition = vNULL; > > /* Mark which partition (hot/cold) each basic block belongs in. */ > FOR_EACH_BB (bb) > { > if (probably_never_executed_bb_p (cfun, bb)) > - BB_SET_PARTITION (bb, BB_COLD_PARTITION); > + { > + BB_SET_PARTITION (bb, BB_COLD_PARTITION); > + cold_bb_count++; > + } > else > - BB_SET_PARTITION (bb, BB_HOT_PARTITION); > + { > + BB_SET_PARTITION (bb, BB_HOT_PARTITION); > + bbs_in_hot_partition.safe_push (bb); > + } > } > > + /* Ensure that no cold bbs dominate hot bbs. This could happen as a result > of > + several different possibilities. One is that there are edge > weight insanities > + due to optimization phases that do not properly update basic block > profile > + counts. The second is that the entry of the function may not be > hot, because > + it is entered fewer times than the number of profile training > runs, but there > + is a loop inside the function that causes blocks within the function to > be > + above the threshold for hotness. Then do the same along the > post-dominator > + tree (which could have additional changes required after fixing up > + dominators). */ > + if (cold_bb_count) > + cold_bb_count = sanitize_dominator_hotness (CDI_DOMINATORS, > + cold_bb_count, > + &bbs_in_hot_partition); > + if (cold_bb_count) > + cold_bb_count = sanitize_dominator_hotness (CDI_POST_DOMINATORS, > + cold_bb_count, > + &bbs_in_hot_partition); I take it this last store to cold_bb_count is eliminated anyway. Thanks, > + > /* The format of .gcc_except_table does not allow landing pads to > be in a different partition as the throw. Fix this by either > moving or duplicating the landing pads. */ > > > -- > Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413