> > On Thu, Nov 7, 2024 at 10:29?AM MayShao-oc <mayshao...@zhaoxin.com> wrote: > > > > Hi all: > > For zhaoxin, I find no improvement when enable pass_align_tight_loops, > > and have performance drop in some cases. > > This patch add a new tunable to bypass pass_align_tight_loops in zhaoxin. > > > > Bootstrapped X86_64. > > Ok for trunk? > > BR > > Mayshao > > gcc/ChangeLog: > > > > * config/i386/i386-features.cc (TARGET_ALIGN_TIGHT_LOOPS): > > default true in all processors except for zhaoxin. > > * config/i386/i386.h (TARGET_ALIGN_TIGHT_LOOPS): New Macro. > > * config/i386/x86-tune.def (X86_TUNE_ALIGN_TIGHT_LOOPS): > > New tune > > --- > > gcc/config/i386/i386-features.cc | 4 +++- > > gcc/config/i386/i386.h | 3 +++ > > gcc/config/i386/x86-tune.def | 4 ++++ > > 3 files changed, 10 insertions(+), 1 deletion(-) > > > > diff --git a/gcc/config/i386/i386-features.cc > > b/gcc/config/i386/i386-features.cc > > index e2e85212a4f..d9fd92964fe 100644 > > --- a/gcc/config/i386/i386-features.cc > > +++ b/gcc/config/i386/i386-features.cc > > @@ -3620,7 +3620,9 @@ public: > > /* opt_pass methods: */ > > bool gate (function *) final override > > { > > - return optimize && optimize_function_for_speed_p (cfun); > > + return TARGET_ALIGN_TIGHT_LOOPS > > + && optimize > > + && optimize_function_for_speed_p (cfun); > > } > > > > unsigned int execute (function *) final override > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > > index 2dcd8803a08..7f9010246c2 100644 > > --- a/gcc/config/i386/i386.h > > +++ b/gcc/config/i386/i386.h > > @@ -466,6 +466,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; > > #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR] > > #define TARGET_SSE_MOVCC_USE_BLENDV \ > > ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV] > > +#define TARGET_ALIGN_TIGHT_LOOPS \ > > + ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS] > > + > > > > /* Feature tests against the various architecture variations. */ > > enum ix86_arch_indices { > > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def > > index 6ebb2fd3414..bd4fa8b3eee 100644 > > --- a/gcc/config/i386/x86-tune.def > > +++ b/gcc/config/i386/x86-tune.def > > @@ -542,6 +542,10 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD, > > DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV, > > "sse_movcc_use_blendv", ~m_CORE_ATOM) > > > > +/* X86_TUNE_ALIGN_TIGHT_LOOPS: if false, tight loops are not aligned. */ > > +DEF_TUNE (X86_TUNE_ALIGN_TIGHT_LOOPS, "align_tight_loops", > > + ~(m_ZHAOXIN)) > Please also add ~(m_ZHAOXIN | m_CASCADELAKE | m_SKYLAKE_AVX512)) > And could you put it under the section of > > > /*****************************************************************************/ > -/* Branch predictor tuning > */ > +/* Branch predictor and The Front-end tuning > */ > > /*****************************************************************************/ > > + > > > > /*****************************************************************************/ > > /* AVX instruction selection tuning (some of SSE flags affects AVX, too) > > */ > > > > /*****************************************************************************/ > > -- > > 2.27.0 > > > > > -- > BR, > Hongtao
Ok BR Mayshao
0001-x86_64-Add-microarchtecture-tunable-for-pass_align_v1.patch
Description: 0001-x86_64-Add-microarchtecture-tunable-for-pass_align_v1.patch