Hi Richard, on 2021/6/3 上午1:19, Richard Sandiford wrote: > "Kewen.Lin via Gcc-patches" <gcc-patches@gcc.gnu.org> writes: >> Hi, >> >> As PR100794 shows, in the current implementation PRE bypasses >> some optimization to avoid introducing loop carried dependence >> which stops loop vectorizer to vectorize the loop. At -O2, >> there is no downstream pass to re-catch this kind of opportunity >> if loop vectorizer fails to vectorize that loop. >> >> This patch follows Richi's suggestion in the PR, if predcom flag >> isn't set and loop vectorization will enable predcom without any >> unrolling implicitly. The Power9 SPEC2017 evaluation showed it >> can speed up 521.wrf_r 3.30% and 554.roms_r 1.08% at very-cheap >> cost model, no remarkable impact at cheap cost model, the build >> time and size impact is fine (see the PR for the details). >> >> By the way, I tested another proposal to guard PRE not skip the >> optimization for cheap and very-cheap vect cost models, the >> evaluation results showed it's fine with very cheap cost model, >> but it can degrade some bmks like 521.wrf_r -9.17% and >> 549.fotonik3d_r -2.07% etc. >> >> Bootstrapped/regtested on powerpc64le-linux-gnu P9, >> x86_64-redhat-linux and aarch64-linux-gnu. >> >> Is it ok for trunk? >> >> BR, >> Kewen >> ----- >> gcc/ChangeLog: >> >> PR tree-optimization/100794 >> * tree-predcom.c (tree_predictive_commoning_loop): Add parameter >> allow_unroll_p and only allow unrolling when it's true. >> (tree_predictive_commoning): Add parameter allow_unroll_p and >> adjust for it. >> (run_tree_predictive_commoning): Likewise. >> (class pass_predcom): Add private member allow_unroll_p. >> (pass_predcom::pass_predcom): Init allow_unroll_p. >> (pass_predcom::gate): Check flag_tree_loop_vectorize and >> global_options_set.x_flag_predictive_commoning. >> (pass_predcom::execute): Adjust for allow_unroll_p. >> >> gcc/testsuite/ChangeLog: >> >> PR tree-optimization/100794 >> * gcc.dg/tree-ssa/pr100794.c: New test. >> >> gcc/testsuite/gcc.dg/tree-ssa/pr100794.c | 20 +++++++++ >> gcc/tree-predcom.c | 57 +++++++++++++++++------- >> 2 files changed, 60 insertions(+), 17 deletions(-) >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr100794.c >> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c >> b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c >> new file mode 100644 >> index 00000000000..6f707ae7fba >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c >> @@ -0,0 +1,20 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2 -ftree-loop-vectorize -fdump-tree-pcom-details >> -fdisable-tree-vect" } */ >> + >> +extern double arr[100]; >> +extern double foo (double, double); >> +extern double sum; >> + >> +void >> +test (int i_0, int i_n) >> +{ >> + int i; >> + for (i = i_0; i < i_n - 1; i++) >> + { >> + double a = arr[i]; >> + double b = arr[i + 1]; >> + sum += a * b; >> + } >> +} >> + >> +/* { dg-final { scan-tree-dump "Executing predictive commoning without >> unrolling" "pcom" } } */ >> diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c >> index 02f911a08bb..65a93c8e505 100644 >> --- a/gcc/tree-predcom.c >> +++ b/gcc/tree-predcom.c >> @@ -3178,13 +3178,13 @@ insert_init_seqs (class loop *loop, vec<chain_p> >> chains) >> applied to this loop. */ >> >> static unsigned >> -tree_predictive_commoning_loop (class loop *loop) >> +tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p) >> { >> vec<data_reference_p> datarefs; >> vec<ddr_p> dependences; >> struct component *components; >> vec<chain_p> chains = vNULL; >> - unsigned unroll_factor; >> + unsigned unroll_factor = 0; >> class tree_niter_desc desc; >> bool unroll = false, loop_closed_ssa = false; >> >> @@ -3272,11 +3272,13 @@ tree_predictive_commoning_loop (class loop *loop) >> dump_chains (dump_file, chains); >> } >> >> - /* Determine the unroll factor, and if the loop should be unrolled, ensure >> - that its number of iterations is divisible by the factor. */ >> - unroll_factor = determine_unroll_factor (chains); >> - unroll = (unroll_factor > 1 >> - && can_unroll_loop_p (loop, unroll_factor, &desc)); >> + if (allow_unroll_p) >> + /* Determine the unroll factor, and if the loop should be unrolled, >> ensure >> + that its number of iterations is divisible by the factor. */ >> + unroll_factor = determine_unroll_factor (chains); >> + >> + if (unroll_factor > 1) >> + unroll = can_unroll_loop_p (loop, unroll_factor, &desc); >> >> /* Execute the predictive commoning transformations, and possibly unroll >> the >> loop. */ >> @@ -3319,7 +3321,7 @@ tree_predictive_commoning_loop (class loop *loop) >> /* Runs predictive commoning. */ >> >> unsigned >> -tree_predictive_commoning (void) >> +tree_predictive_commoning (bool allow_unroll_p) >> { >> class loop *loop; >> unsigned ret = 0, changed = 0; >> @@ -3328,7 +3330,7 @@ tree_predictive_commoning (void) >> FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) >> if (optimize_loop_for_speed_p (loop)) >> { >> - changed |= tree_predictive_commoning_loop (loop); >> + changed |= tree_predictive_commoning_loop (loop, allow_unroll_p); >> } >> free_original_copy_tables (); >> >> @@ -3355,12 +3357,12 @@ tree_predictive_commoning (void) >> /* Predictive commoning Pass. */ >> >> static unsigned >> -run_tree_predictive_commoning (struct function *fun) >> +run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p) >> { >> if (number_of_loops (fun) <= 1) >> return 0; >> >> - return tree_predictive_commoning (); >> + return tree_predictive_commoning (allow_unroll_p); >> } >> >> namespace { >> @@ -3382,15 +3384,36 @@ class pass_predcom : public gimple_opt_pass >> { >> public: >> pass_predcom (gcc::context *ctxt) >> - : gimple_opt_pass (pass_data_predcom, ctxt) >> + : gimple_opt_pass (pass_data_predcom, ctxt), >> + allow_unroll_p (true) >> {} >> >> /* opt_pass methods: */ >> - virtual bool gate (function *) { return flag_predictive_commoning != 0; } >> - virtual unsigned int execute (function *fun) >> - { >> - return run_tree_predictive_commoning (fun); >> - } >> + virtual bool >> + gate (function *) >> + { >> + if (flag_predictive_commoning != 0) >> + return true; >> + /* Loop vectorization enables predictive commoning implicitly >> + only if predictive commoning isn't set explicitly, and it >> + doesn't allow unrolling. */ >> + if (flag_tree_loop_vectorize >> + && !global_options_set.x_flag_predictive_commoning) >> + { >> + allow_unroll_p = false; >> + return true; >> + } >> + return false; >> + } >> + >> + virtual unsigned int >> + execute (function *fun) >> + { >> + return run_tree_predictive_commoning (fun, allow_unroll_p); >> + } >> + >> +private: >> + bool allow_unroll_p; >> >> }; // class pass_predcom > > Calculating allow_unroll_p this way doesn't look robust against > changes in options caused by pragmas, etc. Would it work if we > dropped the member variable and just passed flag_predictive_commoning != 0 > to run_tree_predictive commoning? >
Thanks for the comments! Yeah, it would work well. The updated version of patch has been attached. BR, Kewen ----- gcc/ChangeLog: PR tree-optimization/100794 * tree-predcom.c (tree_predictive_commoning_loop): Add parameter allow_unroll_p and only allow unrolling when it's true. (tree_predictive_commoning): Add parameter allow_unroll_p and adjust for it. (run_tree_predictive_commoning): Likewise. (pass_predcom::gate): Check flag_tree_loop_vectorize and global_options_set.x_flag_predictive_commoning. (pass_predcom::execute): Adjust for allow_unroll_p. gcc/testsuite/ChangeLog: PR tree-optimization/100794 * gcc.dg/tree-ssa/pr100794.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c new file mode 100644 index 00000000000..6f707ae7fba --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-vectorize -fdump-tree-pcom-details -fdisable-tree-vect" } */ + +extern double arr[100]; +extern double foo (double, double); +extern double sum; + +void +test (int i_0, int i_n) +{ + int i; + for (i = i_0; i < i_n - 1; i++) + { + double a = arr[i]; + double b = arr[i + 1]; + sum += a * b; + } +} + +/* { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } */ diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index 02f911a08bb..ac1674d5486 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -3178,13 +3178,13 @@ insert_init_seqs (class loop *loop, vec<chain_p> chains) applied to this loop. */ static unsigned -tree_predictive_commoning_loop (class loop *loop) +tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p) { vec<data_reference_p> datarefs; vec<ddr_p> dependences; struct component *components; vec<chain_p> chains = vNULL; - unsigned unroll_factor; + unsigned unroll_factor = 0; class tree_niter_desc desc; bool unroll = false, loop_closed_ssa = false; @@ -3272,11 +3272,13 @@ tree_predictive_commoning_loop (class loop *loop) dump_chains (dump_file, chains); } - /* Determine the unroll factor, and if the loop should be unrolled, ensure - that its number of iterations is divisible by the factor. */ - unroll_factor = determine_unroll_factor (chains); - unroll = (unroll_factor > 1 - && can_unroll_loop_p (loop, unroll_factor, &desc)); + if (allow_unroll_p) + /* Determine the unroll factor, and if the loop should be unrolled, ensure + that its number of iterations is divisible by the factor. */ + unroll_factor = determine_unroll_factor (chains); + + if (unroll_factor > 1) + unroll = can_unroll_loop_p (loop, unroll_factor, &desc); /* Execute the predictive commoning transformations, and possibly unroll the loop. */ @@ -3319,7 +3321,7 @@ tree_predictive_commoning_loop (class loop *loop) /* Runs predictive commoning. */ unsigned -tree_predictive_commoning (void) +tree_predictive_commoning (bool allow_unroll_p) { class loop *loop; unsigned ret = 0, changed = 0; @@ -3328,7 +3330,7 @@ tree_predictive_commoning (void) FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) if (optimize_loop_for_speed_p (loop)) { - changed |= tree_predictive_commoning_loop (loop); + changed |= tree_predictive_commoning_loop (loop, allow_unroll_p); } free_original_copy_tables (); @@ -3355,12 +3357,12 @@ tree_predictive_commoning (void) /* Predictive commoning Pass. */ static unsigned -run_tree_predictive_commoning (struct function *fun) +run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p) { if (number_of_loops (fun) <= 1) return 0; - return tree_predictive_commoning (); + return tree_predictive_commoning (allow_unroll_p); } namespace { @@ -3386,11 +3388,27 @@ public: {} /* opt_pass methods: */ - virtual bool gate (function *) { return flag_predictive_commoning != 0; } - virtual unsigned int execute (function *fun) - { - return run_tree_predictive_commoning (fun); - } + virtual bool + gate (function *) + { + if (flag_predictive_commoning != 0) + return true; + /* Loop vectorization enables predictive commoning implicitly + only if predictive commoning isn't set explicitly, and it + doesn't allow unrolling. */ + if (flag_tree_loop_vectorize + && !global_options_set.x_flag_predictive_commoning) + return true; + + return false; + } + + virtual unsigned int + execute (function *fun) + { + bool allow_unroll_p = flag_predictive_commoning != 0; + return run_tree_predictive_commoning (fun, allow_unroll_p); + } }; // class pass_predcom