Hi all: For zhaoxin, I find no improvement when enable pass_align_tight_loops, and have performance drop in some cases. This patch add a new tunable to bypass pass_align_tight_loops in zhaoxin.
Bootstrapped X86_64. Ok for trunk? BR Mayshao gcc/ChangeLog: * config/i386/i386-features.cc (TARGET_ALIGN_TIGHT_LOOPS): default true in all processors except for zhaoxin. * config/i386/i386.h (TARGET_ALIGN_TIGHT_LOOPS): New Macro. * config/i386/x86-tune.def (X86_TUNE_ALIGN_TIGHT_LOOPS): New tune --- gcc/config/i386/i386-features.cc | 4 +++- gcc/config/i386/i386.h | 3 +++ gcc/config/i386/x86-tune.def | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index e2e85212a4f..d9fd92964fe 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3620,7 +3620,9 @@ public: /* opt_pass methods: */ bool gate (function *) final override { - return optimize && optimize_function_for_speed_p (cfun); + return TARGET_ALIGN_TIGHT_LOOPS + && optimize + && optimize_function_for_speed_p (cfun); } unsigned int execute (function *) final override diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2dcd8803a08..7f9010246c2 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -466,6 +466,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR] #define TARGET_SSE_MOVCC_USE_BLENDV \ ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV] +#define TARGET_ALIGN_TIGHT_LOOPS \ + ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS] + /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 6ebb2fd3414..bd4fa8b3eee 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -542,6 +542,10 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD, DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV, "sse_movcc_use_blendv", ~m_CORE_ATOM) +/* X86_TUNE_ALIGN_TIGHT_LOOPS: if false, tight loops are not aligned. */ +DEF_TUNE (X86_TUNE_ALIGN_TIGHT_LOOPS, "align_tight_loops", + ~(m_ZHAOXIN)) + /*****************************************************************************/ /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ /*****************************************************************************/ -- 2.27.0