Wilco Dijkstra <wilco.dijks...@arm.com> writes:
> ping
>  
>
> Add FULLY_PIPELINED_FMA to tune baseline - this is a generic feature that is
> already enabled for some cores, but benchmarking it shows it is faster on all
> modern cores (SPECFP improves ~0.17% on Neoverse V1 and 0.04% on Neoverse N1).
>
> Passes regress & bootstrap, OK for commit?
>
> gcc/ChangeLog:
>
>         * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNE_BASE): 
> Add AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA.       
>         * config/aarch64/tuning_models/ampere1b.h: Remove redundant 
> AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA.
>         * config/aarch64/tuning_models/neoversev2.h: Likewise.

OK, thanks.  (The patch has been up for review for a while, with no
objections, so there doesn't seem much point in an extra grace period.)

Richard

>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def 
> b/gcc/config/aarch64/aarch64-tuning-flags.def
> index 
> ffbff20e29c78c00fc211adbba962c20827370aa..1d8abee1e263706e3930e4d39c59faefef8cfe41
>  100644
> --- a/gcc/config/aarch64/aarch64-tuning-flags.def
> +++ b/gcc/config/aarch64/aarch64-tuning-flags.def
> @@ -51,6 +51,7 @@ AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", 
> FULLY_PIPELINED_FMA)
>  AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", AVOID_PRED_RMW)
>  
>  /* Baseline tuning settings suitable for all modern cores.  */
> -#define AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND)
> +#define AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND \
> +                                | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA)
>  
>  #undef AARCH64_EXTRA_TUNING_OPTION
> diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h 
> b/gcc/config/aarch64/tuning_models/ampere1b.h
> index 
> 936fe7ad390edbf70f670d50843bc5caa4fa55e5..340f7b0b47943a43ac57342a464c9267d9912f28
>  100644
> --- a/gcc/config/aarch64/tuning_models/ampere1b.h
> +++ b/gcc/config/aarch64/tuning_models/ampere1b.h
> @@ -103,8 +103,7 @@ static const struct tune_params ampere1b_tunings =
>    0,   /* max_case_values.  */
>    tune_params::AUTOPREFETCHER_STRONG,  /* autoprefetcher_model.  */
>    (AARCH64_EXTRA_TUNE_BASE
> -   | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA
> -   | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags.  */
> +   | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags.  */
>    &ampere1b_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
>    AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h 
> b/gcc/config/aarch64/tuning_models/neoversev2.h
> index 
> 40af5f47f4f62757e8e374abbb29cec5d1a8f7f3..43baeafd646bafadb739376160eaaf268d0542a8
>  100644
> --- a/gcc/config/aarch64/tuning_models/neoversev2.h
> +++ b/gcc/config/aarch64/tuning_models/neoversev2.h
> @@ -234,8 +234,7 @@ static const struct tune_params neoversev2_tunings =
>     | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
>     | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
>     | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
> -   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
> -   | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA),  /* tune_flags.  */
> +   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),       /* tune_flags.  */
>    &neoversev2_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
>    AARCH64_LDP_STP_POLICY_ALWAYS           /* stp_policy_model.  */

Reply via email to