falign-functions=N is too simplistic. Ingo Molnar ran some tests and it looks on latest x86 CPUs, 64-byte alignment runs fastest (he tried many other possibilites).
However, developers are less than thrilled by the idea of a slam-dunk 64-byte aligning everything. Too much waste: On 05/20/2015 02:47 AM, Linus Torvalds wrote: > At the same time, I have to admit that I abhor a 64-byte function > alignment, when we have a fair number of functions that are (much) > smaller than that. > > Is there some way to get gcc to take the size of the function into > account? Because aligning a 16-byte or 32-byte function on a 64-byte > alignment is just criminally nasty and wasteful. This change makes it possible to align function to 64-byte boundaries *IF* this does not introduce huge amount of padding. Patch drops forced alignment to 8 if requested alignment is higher than 8: before the patch, -falign-functions=9 was generating .p2align 4,,8 .p2align 3 which means: "align to 16 if the skip is 8 bytes or less; else align to 8". After this change, ".p2align 3" is not emitted. It is dropped because I ultimately want to do something like -falign-functions=64,8 - IOW, I want to align functions to 64 bytes, but only if that generates padding of less than 8 bytes - otherwise I want *no alignment at all*. The forced ".p2align 3" interferes with that intention. Testing: tested that with -falign-functions=N (tried 8, 15, 16, 17...) the alignment directives are the same before and after the patch. Tested that -falign-functions=N,N (two equal paramenters) works exactly like -falign-functions=N. Index: gcc/common.opt =================================================================== --- gcc/common.opt (revision 239390) +++ gcc/common.opt (working copy) @@ -900,7 +900,7 @@ Common Report Var(align_functions,0) Optimization Align the start of functions. falign-functions= -Common RejectNegative Joined UInteger Var(align_functions) +Common RejectNegative Joined Var(flag_align_functions) falign-jumps Common Report Var(align_jumps,0) Optimization UInteger @@ -907,7 +907,7 @@ Common Report Var(align_jumps,0) Optimization UInt Align labels which are only reached by jumping. falign-jumps= -Common RejectNegative Joined UInteger Var(align_jumps) +Common RejectNegative Joined Var(flag_align_jumps) falign-labels Common Report Var(align_labels,0) Optimization UInteger @@ -914,7 +914,7 @@ Common Report Var(align_labels,0) Optimization UIn Align all labels. falign-labels= -Common RejectNegative Joined UInteger Var(align_labels) +Common RejectNegative Joined Var(flag_align_labels) falign-loops Common Report Var(align_loops,0) Optimization UInteger @@ -921,7 +921,7 @@ Common Report Var(align_loops,0) Optimization UInt Align the start of loops. falign-loops= -Common RejectNegative Joined UInteger Var(align_loops) +Common RejectNegative Joined Var(flag_align_loops) fargument-alias Common Ignore Index: gcc/config/i386/freebsd.h =================================================================== --- gcc/config/i386/freebsd.h (revision 239390) +++ gcc/config/i386/freebsd.h (working copy) @@ -92,25 +92,17 @@ along with GCC; see the file COPYING3. If not see /* A C statement to output to the stdio stream FILE an assembler command to advance the location counter to a multiple of 1<<LOG - bytes if it is within MAX_SKIP bytes. + bytes if it is within MAX_SKIP bytes. */ - This is used to align code labels according to Intel recommendations. */ - #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN #undef ASM_OUTPUT_MAX_SKIP_ALIGN #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) #endif Index: gcc/config/i386/gnu-user.h =================================================================== --- gcc/config/i386/gnu-user.h (revision 239390) +++ gcc/config/i386/gnu-user.h (working copy) @@ -94,24 +94,16 @@ along with GCC; see the file COPYING3. If not see /* A C statement to output to the stdio stream FILE an assembler command to advance the location counter to a multiple of 1<<LOG - bytes if it is within MAX_SKIP bytes. + bytes if it is within MAX_SKIP bytes. */ - This is used to align code labels according to Intel recommendations. */ - #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) #endif Index: gcc/config/i386/iamcu.h =================================================================== --- gcc/config/i386/iamcu.h (revision 239390) +++ gcc/config/i386/iamcu.h (working copy) @@ -62,23 +62,15 @@ see the files COPYING3 and COPYING.RUNTIME respect /* A C statement to output to the stdio stream FILE an assembler command to advance the location counter to a multiple of 1<<LOG - bytes if it is within MAX_SKIP bytes. + bytes if it is within MAX_SKIP bytes. */ - This is used to align code labels according to Intel recommendations. */ - #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) Index: gcc/config/i386/openbsdelf.h =================================================================== --- gcc/config/i386/openbsdelf.h (revision 239390) +++ gcc/config/i386/openbsdelf.h (working copy) @@ -63,24 +63,16 @@ along with GCC; see the file COPYING3. If not see /* A C statement to output to the stdio stream FILE an assembler command to advance the location counter to a multiple of 1<<LOG - bytes if it is within MAX_SKIP bytes. + bytes if it is within MAX_SKIP bytes. */ - This is used to align code labels according to Intel recommendations. */ - #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) #endif Index: gcc/config/i386/x86-64.h =================================================================== --- gcc/config/i386/x86-64.h (revision 239390) +++ gcc/config/i386/x86-64.h (working copy) @@ -65,16 +65,10 @@ see the files COPYING3 and COPYING.RUNTIME respect #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ do { \ if ((LOG) != 0) { \ - if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ - else { \ + if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG))) \ + fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ + else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ - /* Make sure that we have at least 8 byte alignment if > 8 byte \ - alignment is preferred. */ \ - if ((LOG) > 3 \ - && (1 << (LOG)) > ((MAX_SKIP) + 1) \ - && (MAX_SKIP) >= 7) \ - fputs ("\t.p2align 3\n", (FILE)); \ - } \ } \ } while (0) #undef ASM_OUTPUT_MAX_SKIP_PAD Index: gcc/flags.h =================================================================== --- gcc/flags.h (revision 239390) +++ gcc/flags.h (working copy) @@ -55,6 +55,7 @@ struct target_flag_state { int x_align_labels_log; int x_align_labels_max_skip; int x_align_functions_log; + int x_align_functions_max_skip; /* The excess precision currently in effect. */ enum excess_precision x_flag_excess_precision; @@ -81,6 +82,8 @@ extern struct target_flag_state *this_target_flag_ (this_target_flag_state->x_align_labels_max_skip) #define align_functions_log \ (this_target_flag_state->x_align_functions_log) +#define align_functions_max_skip \ + (this_target_flag_state->x_align_functions_max_skip) #define flag_excess_precision \ (this_target_flag_state->x_flag_excess_precision) Index: gcc/toplev.c =================================================================== --- gcc/toplev.c (revision 239390) +++ gcc/toplev.c (working copy) @@ -1177,29 +1177,58 @@ target_supports_section_anchors_p (void) return true; } +static int +parse_N_M (int *align, int *maxskip, const char *flag, const char *name) +{ + int _align = *align; + int _maxskip = *maxskip; + + if (flag) + { + unsigned int n, m; + if (strchr (flag, ',')) + { + if (sscanf (flag, "%u,%u", &n, &m) != 2) goto bad; + _maxskip = m; + } + else + { + if (sscanf (flag, "%u", &n) != 1) goto bad; + _maxskip = n; + } + _align = n; + if (_maxskip > 0) + _maxskip--; /* -falign-xyz=N,M means M-1 max bytes of padding, not M */ + } + +normalize: + if (_align <= 0) + _align = 1; + if ((unsigned)_maxskip > (unsigned)_align) + _maxskip = _align - 1; + + *align = _align; + *maxskip = _maxskip; + return floor_log2 (_align * 2 - 1); + +bad: + error_at (UNKNOWN_LOCATION, "-falign-%s parameter '%s' is bad", name, flag); + goto normalize; +} + /* Default the align_* variables to 1 if they're still unset, and set up the align_*_log variables. */ static void init_alignments (void) { - if (align_loops <= 0) - align_loops = 1; - if (align_loops_max_skip > align_loops) - align_loops_max_skip = align_loops - 1; - align_loops_log = floor_log2 (align_loops * 2 - 1); - if (align_jumps <= 0) - align_jumps = 1; - if (align_jumps_max_skip > align_jumps) - align_jumps_max_skip = align_jumps - 1; - align_jumps_log = floor_log2 (align_jumps * 2 - 1); - if (align_labels <= 0) - align_labels = 1; - align_labels_log = floor_log2 (align_labels * 2 - 1); - if (align_labels_max_skip > align_labels) - align_labels_max_skip = align_labels - 1; - if (align_functions <= 0) - align_functions = 1; - align_functions_log = floor_log2 (align_functions * 2 - 1); + align_loops_log = parse_N_M (&align_loops, &align_loops_max_skip, + flag_align_loops, "loops"); + align_jumps_log = parse_N_M (&align_jumps, &align_jumps_max_skip, + flag_align_jumps, "jumps"); + align_labels_log = parse_N_M (&align_labels, &align_labels_max_skip, + flag_align_labels, "labels"); + align_functions_log = parse_N_M (&align_functions, &align_functions_max_skip, + flag_align_functions, "functions"); } /* Process the options that have been parsed. */ Index: gcc/varasm.c =================================================================== --- gcc/varasm.c (revision 239390) +++ gcc/varasm.c (working copy) @@ -1790,7 +1790,7 @@ assemble_start_function (tree decl, const char *fn { #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, - align_functions_log, align_functions - 1); + align_functions_log, align_functions_max_skip); #else ASM_OUTPUT_ALIGN (asm_out_file, align_functions_log); #endif