> -----Original Message----- > From: Richard Sandiford <richard.sandif...@arm.com> > Sent: Friday, December 29, 2023 6:24 PM > To: Di Zhao OS <diz...@os.amperecomputing.com> > Cc: gcc-patches@gcc.gnu.org > Subject: Re: [PATCH] aarch64: add 'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA' > > Di Zhao OS <diz...@os.amperecomputing.com> writes: > > This patch adds a new tuning option > > 'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA', > > to consider fully pipelined FMAs in reassociation. Also, set this option > > by default for Ampere CPUs. > > > > Tested on aarch64-unknown-linux-gnu. Is this OK for trunk? > > > > Thanks, > > Di Zhao > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNING_OPTION): > > New tuning option AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA. > > * config/aarch64/aarch64.cc (aarch64_override_options_internal): Set > > param_fully_pipelined_fma according to tuning option. > > * config/aarch64/tuning_models/ampere1.h: Add > > AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA to tune_flags. > > * config/aarch64/tuning_models/ampere1a.h: Likewise. > > * config/aarch64/tuning_models/ampere1b.h: Likewise. > > > > --- > > gcc/config/aarch64/aarch64-tuning-flags.def | 2 ++ > > gcc/config/aarch64/aarch64.cc | 6 ++++++ > > gcc/config/aarch64/tuning_models/ampere1.h | 3 ++- > > gcc/config/aarch64/tuning_models/ampere1a.h | 3 ++- > > gcc/config/aarch64/tuning_models/ampere1b.h | 3 ++- > > 5 files changed, 14 insertions(+), 3 deletions(-) > > > > diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def > b/gcc/config/aarch64/aarch64-tuning-flags.def > > index f28a73839a6..256f17bad60 100644 > > --- a/gcc/config/aarch64/aarch64-tuning-flags.def > > +++ b/gcc/config/aarch64/aarch64-tuning-flags.def > > @@ -49,4 +49,6 @@ AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", > MATCHED_VECTOR_THROUGH > > > > AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA) > > > > +AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_FMA", FULLY_PIPELINED_FMA) > > Could you change this to all-lowercase, i.e. fully_pipelined_fma, > for consistency with avoid_cross_loop_fma above? > > > + > > #undef AARCH64_EXTRA_TUNING_OPTION > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > > index f9850320f61..1b3b288cdf9 100644 > > --- a/gcc/config/aarch64/aarch64.cc > > +++ b/gcc/config/aarch64/aarch64.cc > > @@ -18289,6 +18289,12 @@ aarch64_override_options_internal (struct > gcc_options *opts) > > SET_OPTION_IF_UNSET (opts, &global_options_set, > param_avoid_fma_max_bits, > > 512); > > > > + /* Consider fully pipelined FMA in reassociation. */ > > + if (aarch64_tune_params.extra_tuning_flags > > + & AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA) > > + SET_OPTION_IF_UNSET (opts, &global_options_set, > param_fully_pipelined_fma, > > + 1); > > + > > aarch64_override_options_after_change_1 (opts); > > } > > > > diff --git a/gcc/config/aarch64/tuning_models/ampere1.h > b/gcc/config/aarch64/tuning_models/ampere1.h > > index a144e8f94b3..d63788528a7 100644 > > --- a/gcc/config/aarch64/tuning_models/ampere1.h > > +++ b/gcc/config/aarch64/tuning_models/ampere1.h > > @@ -104,7 +104,8 @@ static const struct tune_params ampere1_tunings = > > 2, /* min_div_recip_mul_df. */ > > 0, /* max_case_values. */ > > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > > - (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */ > > + (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA | > > + AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */ > > Formatting nit, but GCC style is to put the "|" at the start of the > following line: > > (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA > | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */ > > Same for the others. > > OK with those changes, thanks.
Fixed the problems and committed to master. Thanks, Di > > Richard > > > &ere1_prefetch_tune, > > AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */ > > AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */ > > diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h > b/gcc/config/aarch64/tuning_models/ampere1a.h > > index f688ed08a79..63506e1d1c6 100644 > > --- a/gcc/config/aarch64/tuning_models/ampere1a.h > > +++ b/gcc/config/aarch64/tuning_models/ampere1a.h > > @@ -56,7 +56,8 @@ static const struct tune_params ampere1a_tunings = > > 2, /* min_div_recip_mul_df. */ > > 0, /* max_case_values. */ > > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > > - (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */ > > + (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA | > > + AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */ > > &ere1_prefetch_tune, > > AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */ > > AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */ > > diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h > b/gcc/config/aarch64/tuning_models/ampere1b.h > > index a98b6a980f7..7894e730174 100644 > > --- a/gcc/config/aarch64/tuning_models/ampere1b.h > > +++ b/gcc/config/aarch64/tuning_models/ampere1b.h > > @@ -106,7 +106,8 @@ static const struct tune_params ampere1b_tunings = > > 0, /* max_case_values. */ > > tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */ > > (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND | > > - AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */ > > + AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA | > > + AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */ > > &ere1b_prefetch_tune, > > AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */ > > AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */