Wilco Dijkstra <wilco.dijks...@arm.com> writes:
> Hi Kyrill,
>
>>> Add AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS and 
>>> AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
>>> to the baseline tuning since all modern cores use it.  Fix the 
>>> neoverse512tvb tuning to be
>>> like Neoverse V1/V2.
>>
>> For neoversev512tvb this means adding AARCH64_EXTRA_TUNE_AVOID_PRED_RMW 
>> right?
>> That’s fine by me.
>
> Yes that was the intention.
>
>> AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS doesn’t exist anymore (i.e. it’s 
>> implicitly on) so the patch needs to be updated.
>
> I've rebased it to latest trunk - see v2 below.
>
> Cheers,
> Wilco
>
>
> v2: Rebase to trunk, update neoverse512tvb.
>
> Add AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT to the baseline tuning since 
> all modern
> cores use it.  Fix the neoverse512tvb extra tune to be like Neoverse V1/V2 by 
> adding
> AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.

Sorry to be awkward, but I don't think we should put
AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT in base.
CHEAP_SHIFT_EXTEND is a good base flag because it means we can make full
use of a certain group of instructions.  FULLY_PIPELINED_FMA similarly
means that FMA chains behave as one would expect.

But MATCHED_VECTOR_THROUGHPUT feels to me more like a property of
a particular uarch.  I don't see a reason in principle why future
cores must provide the same Advanced SIMD bandwidth as SVE bandwidth.

The AVOID_PRED_RMW is a good catch though, thanks.  +1 to Kyrill's ok
for that part.

Thanks,
Richard

>
> gcc:
>       * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNE_BASE): 
> Update.    
>       * config/aarch64/tuning_models/cortexx925.h: Update.
>       * config/aarch64/tuning_models/fujitsu_monaka.h: Likewise.
>       * config/aarch64/tuning_models/generic_armv8_a.h: Likewise.
>       * config/aarch64/tuning_models/generic_armv9_a.h: Likewise.
>       * config/aarch64/tuning_models/neoverse512tvb.h: Likewise.
>       * config/aarch64/tuning_models/neoversen2.h: Likewise.
>       * config/aarch64/tuning_models/neoversen3.h: Likewise.
>       * config/aarch64/tuning_models/neoversev1.h: Likewise.
>       * config/aarch64/tuning_models/neoversev2.h: Likewise.
>       * config/aarch64/tuning_models/neoversev3.h: Likewise.
>       * config/aarch64/tuning_models/neoversev3ae.h: Likewise.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def 
> b/gcc/config/aarch64/aarch64-tuning-flags.def
> index 
> 60967aac9037abe204ae1d0aabad31c1a3b4311b..1feff3beb348f45c254c5a7c346a1a9674dee362
>  100644
> --- a/gcc/config/aarch64/aarch64-tuning-flags.def
> +++ b/gcc/config/aarch64/aarch64-tuning-flags.def
> @@ -50,6 +50,7 @@ AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", 
> AVOID_PRED_RMW)
>  
>  /* Baseline tuning settings suitable for all modern cores.  */
>  #define AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND       
> \
> -                              | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA)
> +                              | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA \
> +                              | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT)
>  
>  #undef AARCH64_EXTRA_TUNING_OPTION
> diff --git a/gcc/config/aarch64/tuning_models/cortexx925.h 
> b/gcc/config/aarch64/tuning_models/cortexx925.h
> index 
> 7d0162eae54c1823eff7b954d5e1d7564eb31dab..59e8c5f002fbb2d8e372b71575c796ba005e5413
>  100644
> --- a/gcc/config/aarch64/tuning_models/cortexx925.h
> +++ b/gcc/config/aarch64/tuning_models/cortexx925.h
> @@ -221,7 +221,6 @@ static const struct tune_params cortexx925_tunings =
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
>    (AARCH64_EXTRA_TUNE_BASE
>     | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
>     | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),     /* tune_flags.  */
>    &generic_armv9a_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/fujitsu_monaka.h 
> b/gcc/config/aarch64/tuning_models/fujitsu_monaka.h
> index 
> 5dc40243fe3846feffb8c54dd98d1797b45b672c..6790cb42be8e99ce37a2f20e440d66b5cbbb316b
>  100644
> --- a/gcc/config/aarch64/tuning_models/fujitsu_monaka.h
> +++ b/gcc/config/aarch64/tuning_models/fujitsu_monaka.h
> @@ -54,8 +54,7 @@ static const struct tune_params fujitsu_monaka_tunings =
>    2, /* min_div_recip_mul_df.  */
>    0, /* max_case_values.  */
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
> -  (AARCH64_EXTRA_TUNE_BASE
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),  /* tune_flags.  */
> +  (AARCH64_EXTRA_TUNE_BASE), /* tune_flags.  */
>    &generic_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
>    AARCH64_LDP_STP_POLICY_ALWAYS         /* stp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/generic_armv8_a.h 
> b/gcc/config/aarch64/tuning_models/generic_armv8_a.h
> index 
> 35de3f032963980f48ad05b3bea69c26fc8ac654..d3f5b5d26443ef428c3a5eec189782fbe0a56150
>  100644
> --- a/gcc/config/aarch64/tuning_models/generic_armv8_a.h
> +++ b/gcc/config/aarch64/tuning_models/generic_armv8_a.h
> @@ -182,8 +182,7 @@ static const struct tune_params generic_armv8_a_tunings =
>    0, /* max_case_values.  */
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
>    (AARCH64_EXTRA_TUNE_BASE
> -   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),  /* tune_flags.  */
> +   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS),       /* tune_flags.  */
>    &generic_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
>    AARCH64_LDP_STP_POLICY_ALWAYS    /* stp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h 
> b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
> index 
> f76a2506f3841a05a89285e50ad96c8665732955..57cc4c717b63378e77b1693e2935234b68bb9b62
>  100644
> --- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
> +++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
> @@ -250,8 +250,7 @@ static const struct tune_params generic_armv9_a_tunings =
>    2, /* min_div_recip_mul_df.  */
>    0, /* max_case_values.  */
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
> -  (AARCH64_EXTRA_TUNE_BASE
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),  /* tune_flags.  */
> +  (AARCH64_EXTRA_TUNE_BASE), /* tune_flags.  */
>    &generic_armv9a_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
>    AARCH64_LDP_STP_POLICY_ALWAYS         /* stp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/neoverse512tvb.h 
> b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
> index 
> 50eb058e23d1a824d925f6258654f9c3c7abbdff..74f23755fa40ef89cbf8e29a935c15032c3e1644
>  100644
> --- a/gcc/config/aarch64/tuning_models/neoverse512tvb.h
> +++ b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
> @@ -155,8 +155,9 @@ static const struct tune_params neoverse512tvb_tunings =
>    2, /* min_div_recip_mul_df.  */
>    0, /* max_case_values.  */
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
> -  (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),  /* tune_flags.  */
> +  (AARCH64_EXTRA_TUNE_BASE
> +   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> +   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),     /* tune_flags.  */
>    &generic_armv9a_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
>    AARCH64_LDP_STP_POLICY_ALWAYS         /* stp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h 
> b/gcc/config/aarch64/tuning_models/neoversen2.h
> index 
> 9fbc059ea12ce6c0f881c5ebe05c60165183d51b..689ab24be68b1463611e3c6b93eeb75e2e7c6700
>  100644
> --- a/gcc/config/aarch64/tuning_models/neoversen2.h
> +++ b/gcc/config/aarch64/tuning_models/neoversen2.h
> @@ -219,7 +219,6 @@ static const struct tune_params neoversen2_tunings =
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
>    (AARCH64_EXTRA_TUNE_BASE
>     | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
>     | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),     /* tune_flags.  */
>    &generic_armv9a_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/neoversen3.h 
> b/gcc/config/aarch64/tuning_models/neoversen3.h
> index 
> 78177e78e0700282aceb30920cba581f2b0912d0..481f3ed2b02aaf06071dc0072a9b101d22dedc98
>  100644
> --- a/gcc/config/aarch64/tuning_models/neoversen3.h
> +++ b/gcc/config/aarch64/tuning_models/neoversen3.h
> @@ -218,8 +218,7 @@ static const struct tune_params neoversen3_tunings =
>    0, /* max_case_values.  */
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
>    (AARCH64_EXTRA_TUNE_BASE
> -   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),  /* tune_flags.  */
> +   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS),       /* tune_flags.  */
>    &generic_armv9a_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
>    AARCH64_LDP_STP_POLICY_ALWAYS         /* stp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h 
> b/gcc/config/aarch64/tuning_models/neoversev1.h
> index 
> f1ec7dcdda7bdd09db69b3717f7dc0569fec9b16..5544e277abfa569c27eed5d37bd452bc2aa78a31
>  100644
> --- a/gcc/config/aarch64/tuning_models/neoversev1.h
> +++ b/gcc/config/aarch64/tuning_models/neoversev1.h
> @@ -228,7 +228,6 @@ static const struct tune_params neoversev1_tunings =
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
>    (AARCH64_EXTRA_TUNE_BASE
>     | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
>     | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),     /* tune_flags.  */
>    &generic_armv9a_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h 
> b/gcc/config/aarch64/tuning_models/neoversev2.h
> index 
> b000fb46570953f368c553d991a66661f448c728..e53b43fafaab98ce00ee70784614852212692632
>  100644
> --- a/gcc/config/aarch64/tuning_models/neoversev2.h
> +++ b/gcc/config/aarch64/tuning_models/neoversev2.h
> @@ -219,7 +219,6 @@ static const struct tune_params neoversev2_tunings =
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
>    (AARCH64_EXTRA_TUNE_BASE
>     | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
>     | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),     /* tune_flags.  */
>    &generic_armv9a_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/neoversev3.h 
> b/gcc/config/aarch64/tuning_models/neoversev3.h
> index 
> ad3cd222512df8c55f298c3849bb782cc092f677..8e107ff40777cec8e2f48723dc15a7a2c83bfeab
>  100644
> --- a/gcc/config/aarch64/tuning_models/neoversev3.h
> +++ b/gcc/config/aarch64/tuning_models/neoversev3.h
> @@ -219,7 +219,6 @@ static const struct tune_params neoversev3_tunings =
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
>    (AARCH64_EXTRA_TUNE_BASE
>     | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
>     | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),     /* tune_flags.  */
>    &generic_armv9a_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
> diff --git a/gcc/config/aarch64/tuning_models/neoversev3ae.h 
> b/gcc/config/aarch64/tuning_models/neoversev3ae.h
> index 
> a0adef00824d906747502f0f7740e29f8bf1c04a..c22592d91a7a7c20b078e2fabd14cac0b2fc2671
>  100644
> --- a/gcc/config/aarch64/tuning_models/neoversev3ae.h
> +++ b/gcc/config/aarch64/tuning_models/neoversev3ae.h
> @@ -219,7 +219,6 @@ static const struct tune_params neoversev3ae_tunings =
>    tune_params::AUTOPREFETCHER_WEAK,  /* autoprefetcher_model.  */
>    (AARCH64_EXTRA_TUNE_BASE
>     | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> -   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
>     | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),     /* tune_flags.  */
>    &generic_armv9a_prefetch_tune,
>    AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */

Reply via email to