On Tue, Jul 18, 2023 at 1:17 PM Ajit Agarwal <aagar...@linux.ibm.com> wrote: > > > > On 18/07/23 4:38 pm, Prathamesh Kulkarni wrote: > > On Tue, 18 Jul 2023 at 13:26, Ajit Agarwal via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > >> > >> > >> Ping! > >> > >> please review. > >> > >> Thanks & Regards > >> Ajit > >> > >> > >> This patch improves code sinking pass to sink statements before call to > >> reduce > >> register pressure. > >> Review comments are incorporated. > >> > >> For example : > >> > >> void bar(); > >> int j; > >> void foo(int a, int b, int c, int d, int e, int f) > >> { > >> int l; > >> l = a + b + c + d +e + f; > >> if (a != 5) > >> { > >> bar(); > >> j = l; > >> } > >> } > >> > >> Code Sinking does the following: > >> > >> void bar(); > >> int j; > >> void foo(int a, int b, int c, int d, int e, int f) > >> { > >> int l; > >> > >> if (a != 5) > >> { > >> l = a + b + c + d +e + f; > >> bar(); > >> j = l; > >> } > >> } > >> > >> Bootstrapped regtested on powerpc64-linux-gnu. > >> > >> Thanks & Regards > >> Ajit > >> > >> > >> tree-ssa-sink: Improve code sinking pass > >> > >> Currently, code sinking will sink code after function calls. This > >> increases > >> register pressure for callee-saved registers. The following patch improves > >> code sinking by placing the sunk code before calls in the use block or in > >> the immediate dominator of the use blocks. > >> > >> 2023-06-01 Ajit Kumar Agarwal <aagar...@linux.ibm.com> > >> > >> gcc/ChangeLog: > >> > >> PR tree-optimization/81953 > >> * tree-ssa-sink.cc (statement_sink_location): Move statements > >> before > >> calls. > >> (def_use_same_block): New function. > >> (select_best_block): Add heuristics to select the best blocks in > >> the > >> immediate post dominator. > >> > >> gcc/testsuite/ChangeLog: > >> > >> PR tree-optimization/81953 > >> * gcc.dg/tree-ssa/ssa-sink-20.c: New testcase. > >> * gcc.dg/tree-ssa/ssa-sink-21.c: New testcase. > >> --- > >> gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 ++++ > >> gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +++++ > >> gcc/tree-ssa-sink.cc | 79 ++++++++++++++------- > >> 3 files changed, 87 insertions(+), 26 deletions(-) > >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c > >> > >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > >> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > >> new file mode 100644 > >> index 00000000000..d3b79ca5803 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > >> @@ -0,0 +1,15 @@ > >> +/* { dg-do compile } */ > >> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */ > >> +void bar(); > >> +int j; > >> +void foo(int a, int b, int c, int d, int e, int f) > >> +{ > >> + int l; > >> + l = a + b + c + d +e + f; > >> + if (a != 5) > >> + { > >> + bar(); > >> + j = l; > >> + } > >> +} > >> +/* { dg-final { scan-tree-dump > >> {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */ > >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c > >> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c > >> new file mode 100644 > >> index 00000000000..84e7938c54f > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c > >> @@ -0,0 +1,19 @@ > >> +/* { dg-do compile } */ > >> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */ > >> +void bar(); > >> +int j, x; > >> +void foo(int a, int b, int c, int d, int e, int f) > >> +{ > >> + int l; > >> + l = a + b + c + d +e + f; > >> + if (a != 5) > >> + { > >> + bar(); > >> + if (b != 3) > >> + x = 3; > >> + else > >> + x = 5; > >> + j = l; > >> + } > >> +} > >> +/* { dg-final { scan-tree-dump > >> {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */ > >> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc > >> index b1ba7a2ad6c..113c89d0967 100644 > >> --- a/gcc/tree-ssa-sink.cc > >> +++ b/gcc/tree-ssa-sink.cc > >> @@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p > >> def_p, bool *debug_stmts) > >> return commondom; > >> } > >> > >> +/* Return TRUE if immediate defs of STMT and STMT are in same > >> + * block, FALSE otherwise. */ > >> + > >> +static bool > >> +def_use_same_block (gimple *stmt) > >> +{ > >> + def_operand_p def; > >> + ssa_op_iter iter; > >> + > >> + FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF) > >> + { > >> + gimple *def_stmt = SSA_NAME_DEF_STMT (DEF_FROM_PTR (def)); > >> + if ((gimple_bb (def_stmt) == gimple_bb (stmt))) > >> + return true; > > Hi Ajit, > > Just wondering, won't this always return true since you're iterating over > > defs, > > and def_stmt == stmt ? Sorry, if I misunderstood. > > > Hello Prathamesh: > > In this passing we are passing use and in this function we are iterating over > defs > for the use and we are checking block of use (which is stmt) and def are in > the same > block or not. I dont think its always true.
The function still always returns true and I've pointe this out repeatedly before. Richard. > Thanks & Regards > Ajit > > > > Thanks, > > Prathamesh > >> + } > >> + return false; > >> +} > >> + > >> /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator > >> tree, return the best basic block between them (inclusive) to place > >> - statements. > >> + statements. The best basic block should be an immediate dominator of > >> + best basic block if the use stmt is after the call. > >> > >> We want the most control dependent block in the shallowest loop nest. > >> > >> @@ -190,11 +209,22 @@ nearest_common_dominator_of_uses (def_operand_p > >> def_p, bool *debug_stmts) > >> static basic_block > >> select_best_block (basic_block early_bb, > >> basic_block late_bb, > >> - gimple *stmt) > >> + gimple *stmt, > >> + gimple *use) > >> { > >> basic_block best_bb = late_bb; > >> basic_block temp_bb = late_bb; > >> int threshold; > >> + /* Get the sinking threshold. If the statement to be moved has memory > >> + operands, then increase the threshold by 7% as those are even more > >> + profitable to avoid, clamping at 100%. */ > >> + threshold = param_sink_frequency_threshold; > >> + if (gimple_vuse (stmt) || gimple_vdef (stmt)) > >> + { > >> + threshold += 7; > >> + if (threshold > 100) > >> + threshold = 100; > >> + } > >> > >> while (temp_bb != early_bb) > >> { > >> @@ -203,34 +233,33 @@ select_best_block (basic_block early_bb, > >> if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb)) > >> best_bb = temp_bb; > >> > >> + /* Placing a statement before a setjmp-like function would be > >> invalid > >> + (it cannot be reevaluated when execution follows an abnormal > >> edge). > >> + If we selected a block with abnormal predecessors, just punt. */ > >> + if (bb_has_abnormal_pred (temp_bb)) > >> + return early_bb; > >> + > >> + /* if we have temp_bb post dominated by use block and def > >> + and use are not in same block then immediate dominator > >> + would be our best block. */ > >> + if (use > >> + && bb_loop_depth(temp_bb) == bb_loop_depth (early_bb) > >> + && !(temp_bb->count * 100 >= early_bb->count * threshold) > >> + && dominated_by_p (CDI_POST_DOMINATORS, temp_bb, gimple_bb (use)) > >> + && !def_use_same_block (use)) > >> + best_bb = temp_bb; > >> + > >> /* Walk up the dominator tree, hopefully we'll find a shallower > >> loop nest. */ > >> temp_bb = get_immediate_dominator (CDI_DOMINATORS, temp_bb); > >> } > >> > >> - /* Placing a statement before a setjmp-like function would be invalid > >> - (it cannot be reevaluated when execution follows an abnormal edge). > >> - If we selected a block with abnormal predecessors, just punt. */ > >> - if (bb_has_abnormal_pred (best_bb)) > >> - return early_bb; > >> - > >> /* If we found a shallower loop nest, then we always consider that > >> a win. This will always give us the most control dependent block > >> within that loop nest. */ > >> if (bb_loop_depth (best_bb) < bb_loop_depth (early_bb)) > >> return best_bb; > >> > >> - /* Get the sinking threshold. If the statement to be moved has memory > >> - operands, then increase the threshold by 7% as those are even more > >> - profitable to avoid, clamping at 100%. */ > >> - threshold = param_sink_frequency_threshold; > >> - if (gimple_vuse (stmt) || gimple_vdef (stmt)) > >> - { > >> - threshold += 7; > >> - if (threshold > 100) > >> - threshold = 100; > >> - } > >> - > >> /* If BEST_BB is at the same nesting level, then require it to have > >> significantly lower execution frequency to avoid gratuitous > >> movement. */ > >> if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb) > >> @@ -439,7 +468,7 @@ statement_sink_location (gimple *stmt, basic_block > >> frombb, > >> if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb)) > >> return false; > >> > >> - commondom = select_best_block (frombb, commondom, stmt); > >> + commondom = select_best_block (frombb, commondom, stmt, NULL); > >> > >> if (commondom == frombb) > >> return false; > >> @@ -456,19 +485,17 @@ statement_sink_location (gimple *stmt, basic_block > >> frombb, > >> continue; > >> break; > >> } > >> + > >> use = USE_STMT (one_use); > >> > >> if (gimple_code (use) != GIMPLE_PHI) > >> { > >> - sinkbb = select_best_block (frombb, gimple_bb (use), stmt); > >> + sinkbb = select_best_block (frombb, gimple_bb (use), stmt, use); > >> > >> if (sinkbb == frombb) > >> return false; > >> > >> - if (sinkbb == gimple_bb (use)) > >> - *togsi = gsi_for_stmt (use); > >> - else > >> - *togsi = gsi_after_labels (sinkbb); > >> + *togsi = gsi_after_labels (sinkbb); > >> > >> return true; > >> } > >> @@ -480,7 +507,7 @@ statement_sink_location (gimple *stmt, basic_block > >> frombb, > >> if (!sinkbb) > >> return false; > >> > >> - sinkbb = select_best_block (frombb, sinkbb, stmt); > >> + sinkbb = select_best_block (frombb, sinkbb, stmt, NULL); > >> if (!sinkbb || sinkbb == frombb) > >> return false; > >> > >> -- > >> 2.39.3 > >>