This change drops forced alignment to 8 if requested alignment is higher
than 8: before the patch, -falign-functions=9 was generating

        .p2align 4,,8
        .p2align 3

which means: "align to 16 if the skip is 8 bytes or less; else align to 8".
After this change, ".p2align 3" is not emitted.

For many generations now, x86 CPUs have at least 32, and usually 64 byte
cachelines. Aligning to a cacheline (e.g. -falign-functions=32) to avoid
needing two fetches to decode next insn makes sense, aligning to 8 bytes
within a cacheline does not. It simply wastes bytes.

I ultimately want to be able to do something like -falign-functions=64,8:
I want to align functions to 64 bytes, but only if that generates padding
of less than 8 bytes - otherwise I want *no alignment at all*.
The forced ".p2align 3" interferes with that intention.

Simple testing on a SandyBridge CPU did not reveal any performance difference
for a tight loop which starts at byte 7 inside 64-byte cacheline,
and the same loop at byte 8.

2016-08-12  Denys Vlasenko  <dvlas...@redhat.com>

    * config/i386/freebsd.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Remove "If N
    is large, do at least 8 byte alignment" code.
    * config/i386/gnu-user.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
    * config/i386/iamcu.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
    * config/i386/openbsdelf.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
    * config/i386/x86-64.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.

Index: gcc/config/i386/freebsd.h
===================================================================
--- gcc/config/i386/freebsd.h   (revision 239390)
+++ gcc/config/i386/freebsd.h   (working copy)
@@ -92,25 +92,17 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
 #undef  ASM_OUTPUT_MAX_SKIP_ALIGN
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 #endif
Index: gcc/config/i386/gnu-user.h
===================================================================
--- gcc/config/i386/gnu-user.h  (revision 239390)
+++ gcc/config/i386/gnu-user.h  (working copy)
@@ -94,24 +94,16 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 #endif
Index: gcc/config/i386/iamcu.h
===================================================================
--- gcc/config/i386/iamcu.h     (revision 239390)
+++ gcc/config/i386/iamcu.h     (working copy)
@@ -62,23 +62,15 @@ see the files COPYING3 and COPYING.RUNTIME respect
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 
Index: gcc/config/i386/openbsdelf.h
===================================================================
--- gcc/config/i386/openbsdelf.h        (revision 239390)
+++ gcc/config/i386/openbsdelf.h        (working copy)
@@ -63,24 +63,16 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 #endif
Index: gcc/config/i386/x86-64.h
===================================================================
--- gcc/config/i386/x86-64.h    (revision 239390)
+++ gcc/config/i386/x86-64.h    (working copy)
@@ -65,16 +65,10 @@ see the files COPYING3 and COPYING.RUNTIME respect
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 #undef  ASM_OUTPUT_MAX_SKIP_PAD

Reply via email to