commit 3f4e4338dbd268b37996b27e7bcc86f388f420d7 Author: bin.cheng Date: Mon Mar 22 18:45:26 2021 +0800 Use programing order preserved RPO in loop distribution. Tree loop distribution uses RPO to build reduced dependence graph, it's important that RPO preserves the original programing order and usually it does. However, when distributing loop nest, the exit bb could be placed before some loop basic blocks while after loop header. This patch fixes the issue by preferring loop exit edge in DFS when computing RPO. diff --git a/gcc/cfganal.c b/gcc/cfganal.c index 2627c2ff457..4b9bb71aafc 100644 --- a/gcc/cfganal.c +++ b/gcc/cfganal.c @@ -1038,6 +1038,137 @@ pre_and_rev_post_order_compute_fn (struct function *fn, return pre_order_num; } +/* Same as above. Compute the depth first search order of FN and store the RPO + in the array REV_POST_ORDER, return the reverse completion number for each + node. Returns the number of nodes visited. A depth first search tries to + get as far away from the starting point as quickly as possible. + + This function prefers loop exit edge in depth first search, i.e, all basic + blocks in the loop are placed before exit basic blocks in RPO if possible. + This in additionally preserves the original programming order, For example: + PREHEADER + | +----------+ + | | | + V V | + HEADER -> LATCH + | + V + EXIT + We get RPO like , rather than . + + Note this function clobbers aux field in basic block. + + rev_post_order is really a reverse postorder numbering of the graph. */ + +int +loop_first_rev_post_order_compute_fn (struct function *fn, int *rev_post_order, + bool include_entry_exit) +{ + int pre_order_num = 0; + int n_bbs = n_basic_blocks_for_fn (fn); + int rev_post_order_num = n_bbs - 1; + + gcc_assert (rev_post_order != NULL); + /* Allocate stack for back-tracking up CFG. */ + typedef std::pair stack_elem; + auto_vec stack (n_bbs + 1); + + basic_block bb; + FOR_ALL_BB_FN (bb, fn) + { + struct loop *loop = bb->loop_father; + gcc_assert (!bb->aux); + bb->aux = XCNEWVEC (edge, EDGE_COUNT (bb->succs)); + int i = 0, j = EDGE_COUNT (bb->succs); + /* Prefer loop exit edge in DFS by placing it at the front. */ + for (auto ei = ei_start (bb->succs); !ei_end_p (ei); ei_next (&ei)) + if (flow_bb_inside_loop_p (loop, ei_edge (ei)->dest)) + ((edge *)bb->aux)[--j] = ei_edge (ei); + else + ((edge *)bb->aux)[i++] = ei_edge (ei); + + gcc_assert (i == j); + } + + if (include_entry_exit) + { + pre_order_num++; + rev_post_order[rev_post_order_num--] = EXIT_BLOCK; + } + else + rev_post_order_num -= NUM_FIXED_BLOCKS; + + /* BB flag to track nodes that have been visited. */ + auto_bb_flag visited (fn); + + /* Push the first edge on to the stack. */ + stack.quick_push ({ENTRY_BLOCK_PTR_FOR_FN (fn), 0}); + + while (!stack.is_empty ()) + { + basic_block src; + basic_block dest; + + /* Look at the edge on the top of the stack. */ + stack_elem *elem = &stack.last (); + src = elem->first; + gcc_assert (elem->second < EDGE_COUNT (src->succs)); + + edge e = ((edge *)src->aux)[elem->second]; + + gcc_assert (src == e->src); + dest = e->dest; + + /* Check if the edge destination has been visited yet. */ + if (dest != EXIT_BLOCK_PTR_FOR_FN (fn) + && ! (dest->flags & visited)) + { + /* Mark that we have visited the destination. */ + dest->flags |= visited; + pre_order_num++; + + if (EDGE_COUNT (dest->succs) > 0) + /* Since the DEST node has been visited for the first + time, check its successors. */ + stack.quick_push ({dest, 0}); + else + /* There are no successors for the DEST node so assign + its reverse completion number. */ + rev_post_order[rev_post_order_num--] = dest->index; + } + else + { + if (elem->second < EDGE_COUNT (src->succs) - 1) + ++elem->second; + else + { + if (src != ENTRY_BLOCK_PTR_FOR_FN (fn)) + /* There are no more successors for the SRC node + so assign its reverse completion number. */ + rev_post_order[rev_post_order_num--] = src->index; + + stack.pop (); + } + } + } + + if (include_entry_exit) + { + pre_order_num++; + rev_post_order[rev_post_order_num--] = ENTRY_BLOCK; + } + + FOR_ALL_BB_FN (bb, fn) + { + bb->flags &= ~visited; + free (bb->aux); + bb->aux = NULL; + } + + return pre_order_num; +} + /* Like pre_and_rev_post_order_compute_fn but operating on the current function and asserting that all nodes were visited. */ diff --git a/gcc/cfganal.h b/gcc/cfganal.h index 31ce56ca40c..074830b7882 100644 --- a/gcc/cfganal.h +++ b/gcc/cfganal.h @@ -66,6 +66,8 @@ extern basic_block dfs_find_deadend (basic_block); extern void inverted_post_order_compute (vec *postorder, sbitmap *start_points = 0); extern int pre_and_rev_post_order_compute_fn (struct function *, int *, int *, bool); +extern int loop_first_rev_post_order_compute_fn (struct function *, int *, + bool); extern int pre_and_rev_post_order_compute (int *, int *, bool); extern int rev_post_order_and_mark_dfs_back_seme (struct function *, edge, bitmap, bool, int *, diff --git a/gcc/testsuite/gcc.c-torture/execute/pr98736.c b/gcc/testsuite/gcc.c-torture/execute/pr98736.c new file mode 100644 index 00000000000..c066abcd86a --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr98736.c @@ -0,0 +1,14 @@ +/* PR tree-optimization/98736 */ + +int a[6]; +char b, c; +int main() { + int d[4] = {0, 0, 0, 0}; + for (c = 0; c <= 5; c++) { + for (b = 2; b != 0; b++) + a[c] = 8; + a[c] = d[3]; + } + if (a[0] != 0) + __builtin_abort(); +} diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 7ee19fc8677..a5acfaff1f4 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -3156,7 +3156,7 @@ void loop_distribution::bb_top_order_init (void) bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun)); bb_top_order_index_size = last_basic_block_for_fn (cfun); - rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true); + rpo_num = loop_first_rev_post_order_compute_fn (cfun, rpo, true); for (int i = 0; i < rpo_num; i++) bb_top_order_index[rpo[i]] = i;