On Thu, Nov 7, 2024 at 10:29 AM MayShao-oc <mayshao...@zhaoxin.com> wrote:
>
> Hi all:
>    For zhaoxin, I find no improvement when enable pass_align_tight_loops,
> and have performance drop in some cases.
>    This patch add a new tunable to bypass pass_align_tight_loops in zhaoxin.
>
>    Bootstrapped X86_64.
>    Ok for trunk?
> BR
> Mayshao
> gcc/ChangeLog:
>
>         * config/i386/i386-features.cc (TARGET_ALIGN_TIGHT_LOOPS):
>         default true in all processors except for zhaoxin.
>         * config/i386/i386.h (TARGET_ALIGN_TIGHT_LOOPS): New Macro.
>         * config/i386/x86-tune.def (X86_TUNE_ALIGN_TIGHT_LOOPS):
>         New tune
> ---
>  gcc/config/i386/i386-features.cc | 4 +++-
>  gcc/config/i386/i386.h           | 3 +++
>  gcc/config/i386/x86-tune.def     | 4 ++++
>  3 files changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/i386/i386-features.cc 
> b/gcc/config/i386/i386-features.cc
> index e2e85212a4f..d9fd92964fe 100644
> --- a/gcc/config/i386/i386-features.cc
> +++ b/gcc/config/i386/i386-features.cc
> @@ -3620,7 +3620,9 @@ public:
>    /* opt_pass methods: */
>    bool gate (function *) final override
>      {
> -      return optimize && optimize_function_for_speed_p (cfun);
> +      return TARGET_ALIGN_TIGHT_LOOPS
> +            && optimize
> +            && optimize_function_for_speed_p (cfun);
>      }
>
>    unsigned int execute (function *) final override
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 2dcd8803a08..7f9010246c2 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -466,6 +466,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>  #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
>  #define TARGET_SSE_MOVCC_USE_BLENDV \
>         ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
> +#define TARGET_ALIGN_TIGHT_LOOPS \
> +        ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
> +
>
>  /* Feature tests against the various architecture variations.  */
>  enum ix86_arch_indices {
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index 6ebb2fd3414..bd4fa8b3eee 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -542,6 +542,10 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
>  DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
>           "sse_movcc_use_blendv", ~m_CORE_ATOM)
>
> +/* X86_TUNE_ALIGN_TIGHT_LOOPS: if false, tight loops are not aligned. */
> +DEF_TUNE (X86_TUNE_ALIGN_TIGHT_LOOPS, "align_tight_loops",
> +        ~(m_ZHAOXIN))
Please also add         ~(m_ZHAOXIN | m_CASCADELAKE | m_SKYLAKE_AVX512))
And could you put it under the section of

 /*****************************************************************************/
-/* Branch predictor tuning                                                  */
+/* Branch predictor and The Front-end tuning
                      */
 /*****************************************************************************/
> +
>  
> /*****************************************************************************/
>  /* AVX instruction selection tuning (some of SSE flags affects AVX, too)     
> */
>  
> /*****************************************************************************/
> --
> 2.27.0
>


--
BR,
Hongtao

Reply via email to