Hi all:
   For zhaoxin, I find no improvement when enable pass_align_tight_loops,
and have performance drop in some cases.
   This patch add a new tunable to bypass pass_align_tight_loops in zhaoxin.

   Bootstrapped X86_64.
   Ok for trunk?
BR
Mayshao
gcc/ChangeLog:

        * config/i386/i386-features.cc (TARGET_ALIGN_TIGHT_LOOPS):
        default true in all processors except for zhaoxin.
        * config/i386/i386.h (TARGET_ALIGN_TIGHT_LOOPS): New Macro.
        * config/i386/x86-tune.def (X86_TUNE_ALIGN_TIGHT_LOOPS):
        New tune
---
 gcc/config/i386/i386-features.cc | 4 +++-
 gcc/config/i386/i386.h           | 3 +++
 gcc/config/i386/x86-tune.def     | 4 ++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index e2e85212a4f..d9fd92964fe 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3620,7 +3620,9 @@ public:
   /* opt_pass methods: */
   bool gate (function *) final override
     {
-      return optimize && optimize_function_for_speed_p (cfun);
+      return TARGET_ALIGN_TIGHT_LOOPS
+            && optimize
+            && optimize_function_for_speed_p (cfun);
     }
 
   unsigned int execute (function *) final override
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2dcd8803a08..7f9010246c2 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -466,6 +466,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
 #define TARGET_SSE_MOVCC_USE_BLENDV \
        ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
+#define TARGET_ALIGN_TIGHT_LOOPS \
+        ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 6ebb2fd3414..bd4fa8b3eee 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -542,6 +542,10 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
 DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
          "sse_movcc_use_blendv", ~m_CORE_ATOM)
 
+/* X86_TUNE_ALIGN_TIGHT_LOOPS: if false, tight loops are not aligned. */
+DEF_TUNE (X86_TUNE_ALIGN_TIGHT_LOOPS, "align_tight_loops",
+        ~(m_ZHAOXIN))
+
 /*****************************************************************************/
 /* AVX instruction selection tuning (some of SSE flags affects AVX, too)     */
 /*****************************************************************************/
-- 
2.27.0

Reply via email to