On Sat, Jun 24, 2023 at 6:12 AM Ajit Agarwal <aagar...@linux.ibm.com> wrote: > > Hello All: > > This patch improves code sinking pass to sink statements before call to reduce > register pressure. > Review comments are incorporated. > > For example : > > void bar(); > int j; > void foo(int a, int b, int c, int d, int e, int f) > { > int l; > l = a + b + c + d +e + f; > if (a != 5) > { > bar(); > j = l; > } > } > > Code Sinking does the following: > > void bar(); > int j; > void foo(int a, int b, int c, int d, int e, int f) > { > int l; > > if (a != 5) > { > l = a + b + c + d +e + f; > bar(); > j = l; > } > } > > Bootstrapped regtested on powerpc64-linux-gnu. > > Thanks & Regards > Ajit > > > tree-ssa-sink: Improve code sinking pass > > Currently, code sinking will sink code after function calls. This increases > register pressure for callee-saved registers. The following patch improves > code sinking by placing the sunk code before calls in the use block or in > the immediate dominator of the use blocks. > > 2023-06-24 Ajit Kumar Agarwal <aagar...@linux.ibm.com> > > gcc/ChangeLog: > > PR tree-optimization/81953 > * tree-ssa-sink.cc (statement_sink_location): Move statements before > calls. > (def_use_same_block): New function. > (select_best_block): Add heuristics to select the best blocks in the > immediate post dominator. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/81953 > * gcc.dg/tree-ssa/ssa-sink-20.c: New testcase. > * gcc.dg/tree-ssa/ssa-sink-21.c: New testcase. > --- > gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 +++++ > gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 ++++++ > gcc/tree-ssa-sink.cc | 68 ++++++++++++++++++--- > 3 files changed, 92 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > new file mode 100644 > index 00000000000..d3b79ca5803 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-sink-stats" } */ > +void bar(); > +int j; > +void foo(int a, int b, int c, int d, int e, int f) > +{ > + int l; > + l = a + b + c + d +e + f; > + if (a != 5) > + { > + bar(); > + j = l; > + } > +} > +/* { dg-final { scan-tree-dump > {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c > new file mode 100644 > index 00000000000..84e7938c54f > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-sink-stats" } */ > +void bar(); > +int j, x; > +void foo(int a, int b, int c, int d, int e, int f) > +{ > + int l; > + l = a + b + c + d +e + f; > + if (a != 5) > + { > + bar(); > + if (b != 3) > + x = 3; > + else > + x = 5; > + j = l; > + } > +} > +/* { dg-final { scan-tree-dump > {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */ > diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc > index b1ba7a2ad6c..791d44249f9 100644 > --- a/gcc/tree-ssa-sink.cc > +++ b/gcc/tree-ssa-sink.cc > @@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p def_p, > bool *debug_stmts) > return commondom; > } > > +/* Return TRUE if immediate uses of the defs in > + STMT occur in the same block as STMT, FALSE otherwise. */ > + > +static bool > +def_use_same_block (gimple *stmt) > +{ > + def_operand_p def; > + ssa_op_iter iter; > + > + FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF) > + { > + gimple *def_stmt = SSA_NAME_DEF_STMT (DEF_FROM_PTR (def)); > + if ((gimple_bb (def_stmt) == gimple_bb (stmt))) > + return true;
This doesn't do what the comment says? It returns true if 'stmt' has any SSA DEF, because in fact def_stmt == stmt in all cases. I should probably stop looking here, but I'll point you to PR110218 where I note the selection of the block should happen in the walk where we walk immediate dominators, and both the abnormal_pred and ->count checks should happen there, causing us to consider the next dominator as candidate. > + } > + return false; > +} > + > /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator > tree, return the best basic block between them (inclusive) to place > - statements. > + statements. The best basic block should be an immediate dominator of > + best basic block if the use stmt is after the call. > > We want the most control dependent block in the shallowest loop nest. > > @@ -190,7 +209,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, > bool *debug_stmts) > static basic_block > select_best_block (basic_block early_bb, > basic_block late_bb, > - gimple *stmt) > + gimple *stmt, > + gimple *use) > { > basic_block best_bb = late_bb; > basic_block temp_bb = late_bb; > @@ -237,7 +257,37 @@ select_best_block (basic_block early_bb, > /* If result of comparsion is unknown, prefer EARLY_BB. > Thus use !(...>=..) rather than (...<...) */ > && !(best_bb->count * 100 >= early_bb->count * threshold)) > - return best_bb; > + { > + basic_block new_best_bb = get_immediate_dominator (CDI_DOMINATORS, > best_bb); > + /* Return best_bb if def and use are in same block otherwise > new_best_bb. > + > + Things to consider: > + > + new_best_bb is not equal to best_bb and early_bb. > + > + stmt is not call. > + > + new_best_bb doesnt have any phis. > + > + use basic block is not equal to early_bb. > + > + use basic block post dominates to new_best_bb. > + > + new_best_bb dominates early_bb. */ so that's what you try to check below. But the comment doesn't say _why_. So - why? > + if (new_best_bb always true && use > + && new_best_bb != best_bb always true > + && new_best_bb != early_bb > + && !is_gimple_call (stmt) > + && gsi_end_p (gsi_start_phis (new_best_bb)) > + && gimple_bb (use) != early_bb > + && !is_gimple_call (use) > + && dominated_by_p (CDI_POST_DOMINATORS, new_best_bb, gimple_bb > (use)) > + && dominated_by_p (CDI_DOMINATORS, new_best_bb, early_bb) > + && !def_use_same_block (use)) > + return new_best_bb; > + > + return best_bb; > + } > > /* No better block found, so return EARLY_BB, which happens to be the > statement's original block. */ > @@ -439,7 +489,7 @@ statement_sink_location (gimple *stmt, basic_block frombb, > if (!dominated_by_p (CDI_DOMINATORS, commondom, frombb)) > return false; > > - commondom = select_best_block (frombb, commondom, stmt); > + commondom = select_best_block (frombb, commondom, stmt, NULL); > > if (commondom == frombb) > return false; > @@ -456,19 +506,17 @@ statement_sink_location (gimple *stmt, basic_block > frombb, > continue; > break; > } > + > use = USE_STMT (one_use); > > if (gimple_code (use) != GIMPLE_PHI) > { > - sinkbb = select_best_block (frombb, gimple_bb (use), stmt); > + sinkbb = select_best_block (frombb, gimple_bb (use), stmt, use); you are only handling the case where all uses are in the same place, not sure why. > > if (sinkbb == frombb) > return false; > > - if (sinkbb == gimple_bb (use)) > - *togsi = gsi_for_stmt (use); > - else > - *togsi = gsi_after_labels (sinkbb); > + *togsi = gsi_after_labels (sinkbb); > > return true; > } > @@ -480,7 +528,7 @@ statement_sink_location (gimple *stmt, basic_block frombb, > if (!sinkbb) > return false; > > - sinkbb = select_best_block (frombb, sinkbb, stmt); > + sinkbb = select_best_block (frombb, sinkbb, stmt, NULL); > if (!sinkbb || sinkbb == frombb) > return false; > > -- > 2.39.3 >