This change drops forced alignment to 8 if requested alignment is higher than 8: before the patch, -falign-functions=9 was generating
.p2align 4,,8 .p2align 3 which means: "align to 16 if the skip is 8 bytes or less; else align to 8". After this change, ".p2align 3" is not emitted. For many generations now, x86 CPUs have at least 32, and usually 64 byte cachelines. Aligning to a cacheline (e.g. -falign-functions=32) to avoid needing two fetches to decode next insn makes sense, aligning to 8 bytes within a cacheline does not. It simply wastes bytes. I ultimately want to be able to do something like -falign-functions=64,8: I want to align functions to 64 bytes, but only if that generates padding of less than 8 bytes - otherwise I want *no alignment at all*. The forced ".p2align 3" interferes with that intention. Simple testing on a SandyBridge CPU did not reveal any performance difference for a tight loop which starts at byte 7 inside 64-byte cacheline, and the same loop at byte 8. 2016-08-12 Denys Vlasenko <dvlas...@redhat.com> * config/i386/freebsd.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Remove "If N is large, do at least 8 byte alignment" code. * config/i386/gnu-user.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise. * config/i386/iamcu.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise. * config/i386/openbsdelf.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise. * config/i386/x86-64.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise. Index: gcc/config/i386/freebsd.h =================================================================== --- gcc/config/i386/freebsd.h (revision 239390) +++ gcc/config/i386/freebsd.h (working copy) @@ -92,25 +92,17 @@ along with GCC; see the file COPYING3. If not see /* A C statement to output to the stdio stream FILE an assembler command to advance the location counter to a multiple of 1<<LOG - bytes if it is within MAX_SKIP bytes. + bytes if it is within MAX_SKIP bytes. */ - This is used to align code labels according to Intel recommendations. */ - #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN #undef ASM_OUTPUT_MAX_SKIP_ALIGN #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) #endif Index: gcc/config/i386/gnu-user.h =================================================================== --- gcc/config/i386/gnu-user.h (revision 239390) +++ gcc/config/i386/gnu-user.h (working copy) @@ -94,24 +94,16 @@ along with GCC; see the file COPYING3. If not see /* A C statement to output to the stdio stream FILE an assembler command to advance the location counter to a multiple of 1<<LOG - bytes if it is within MAX_SKIP bytes. + bytes if it is within MAX_SKIP bytes. */ - This is used to align code labels according to Intel recommendations. */ - #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) #endif Index: gcc/config/i386/iamcu.h =================================================================== --- gcc/config/i386/iamcu.h (revision 239390) +++ gcc/config/i386/iamcu.h (working copy) @@ -62,23 +62,15 @@ see the files COPYING3 and COPYING.RUNTIME respect /* A C statement to output to the stdio stream FILE an assembler command to advance the location counter to a multiple of 1<<LOG - bytes if it is within MAX_SKIP bytes. + bytes if it is within MAX_SKIP bytes. */ - This is used to align code labels according to Intel recommendations. */ - #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) Index: gcc/config/i386/openbsdelf.h =================================================================== --- gcc/config/i386/openbsdelf.h (revision 239390) +++ gcc/config/i386/openbsdelf.h (working copy) @@ -63,24 +63,16 @@ along with GCC; see the file COPYING3. If not see /* A C statement to output to the stdio stream FILE an assembler command to advance the location counter to a multiple of 1<<LOG - bytes if it is within MAX_SKIP bytes. + bytes if it is within MAX_SKIP bytes. */ - This is used to align code labels according to Intel recommendations. */ - #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) #endif Index: gcc/config/i386/x86-64.h =================================================================== --- gcc/config/i386/x86-64.h (revision 239390) +++ gcc/config/i386/x86-64.h (working copy) @@ -65,16 +65,10 @@ see the files COPYING3 and COPYING.RUNTIME respect #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) #undef ASM_OUTPUT_MAX_SKIP_PAD