> > On Thu, Nov 7, 2024 at 10:29?AM MayShao-oc <mayshao...@zhaoxin.com> wrote:
> >
> > Hi all:
> >    For zhaoxin, I find no improvement when enable pass_align_tight_loops,
> > and have performance drop in some cases.
> >    This patch add a new tunable to bypass pass_align_tight_loops in zhaoxin.
> >
> >    Bootstrapped X86_64.
> >    Ok for trunk?
> > BR
> > Mayshao
> > gcc/ChangeLog:
> >
> >         * config/i386/i386-features.cc (TARGET_ALIGN_TIGHT_LOOPS):
> >         default true in all processors except for zhaoxin.
> >         * config/i386/i386.h (TARGET_ALIGN_TIGHT_LOOPS): New Macro.
> >         * config/i386/x86-tune.def (X86_TUNE_ALIGN_TIGHT_LOOPS):
> >         New tune
> > ---
> >  gcc/config/i386/i386-features.cc | 4 +++-
> >  gcc/config/i386/i386.h           | 3 +++
> >  gcc/config/i386/x86-tune.def     | 4 ++++
> >  3 files changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/config/i386/i386-features.cc 
> > b/gcc/config/i386/i386-features.cc
> > index e2e85212a4f..d9fd92964fe 100644
> > --- a/gcc/config/i386/i386-features.cc
> > +++ b/gcc/config/i386/i386-features.cc
> > @@ -3620,7 +3620,9 @@ public:
> >    /* opt_pass methods: */
> >    bool gate (function *) final override
> >      {
> > -      return optimize && optimize_function_for_speed_p (cfun);
> > +      return TARGET_ALIGN_TIGHT_LOOPS
> > +            && optimize
> > +            && optimize_function_for_speed_p (cfun);
> >      }
> >
> >    unsigned int execute (function *) final override
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 2dcd8803a08..7f9010246c2 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -466,6 +466,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
> >  #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
> >  #define TARGET_SSE_MOVCC_USE_BLENDV \
> >         ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
> > +#define TARGET_ALIGN_TIGHT_LOOPS \
> > +        ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
> > +
> >
> >  /* Feature tests against the various architecture variations.  */
> >  enum ix86_arch_indices {
> > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> > index 6ebb2fd3414..bd4fa8b3eee 100644
> > --- a/gcc/config/i386/x86-tune.def
> > +++ b/gcc/config/i386/x86-tune.def
> > @@ -542,6 +542,10 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
> >  DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
> >           "sse_movcc_use_blendv", ~m_CORE_ATOM)
> >
> > +/* X86_TUNE_ALIGN_TIGHT_LOOPS: if false, tight loops are not aligned. */
> > +DEF_TUNE (X86_TUNE_ALIGN_TIGHT_LOOPS, "align_tight_loops",
> > +        ~(m_ZHAOXIN))
> Please also add         ~(m_ZHAOXIN | m_CASCADELAKE | m_SKYLAKE_AVX512))
> And could you put it under the section of
> 
>  
> /*****************************************************************************/
> -/* Branch predictor tuning                                                  
> */
> +/* Branch predictor and The Front-end tuning
>                       */
>  
> /*****************************************************************************/
> > +
> >  
> > /*****************************************************************************/
> >  /* AVX instruction selection tuning (some of SSE flags affects AVX, too)   
> >   */
> >  
> > /*****************************************************************************/
> > --
> > 2.27.0
> >
> 
> 
> --
> BR,
> Hongtao

Ok

BR
Mayshao

Attachment: 0001-x86_64-Add-microarchtecture-tunable-for-pass_align_v1.patch
Description: 0001-x86_64-Add-microarchtecture-tunable-for-pass_align_v1.patch

Reply via email to