falign-functions=N is too simplistic.

Ingo Molnar ran some tests and it looks on latest x86 CPUs, 64-byte alignment
runs fastest (he tried many other possibilites).

However, developers are less than thrilled by the idea of a slam-dunk 64-byte
aligning everything. Too much waste:
        On 05/20/2015 02:47 AM, Linus Torvalds wrote:
        > At the same time, I have to admit that I abhor a 64-byte function
        > alignment, when we have a fair number of functions that are (much)
        > smaller than that.
        >
        > Is there some way to get gcc to take the size of the function into
        > account? Because aligning a 16-byte or 32-byte function on a 64-byte
        > alignment is just criminally nasty and wasteful.

This change makes it possible to align function to 64-byte boundaries *IF*
this does not introduce huge amount of padding.

Patch drops forced alignment to 8 if requested alignment is higher than 8:
before the patch, -falign-functions=9 was generating

        .p2align 4,,8
        .p2align 3

which means: "align to 16 if the skip is 8 bytes or less; else align to 8".
After this change, ".p2align 3" is not emitted.

It is dropped because I ultimately want to do something
like -falign-functions=64,8 - IOW, I want to align functions to 64 bytes,
but only if that generates padding of less than 8 bytes - otherwise I want
*no alignment at all*. The forced ".p2align 3" interferes with that intention.

Testing:
tested that with -falign-functions=N (tried 8, 15, 16, 17...) the alignment
directives are the same before and after the patch.
Tested that -falign-functions=N,N (two equal paramenters) works exactly
like -falign-functions=N.

Index: gcc/common.opt
===================================================================
--- gcc/common.opt      (revision 239390)
+++ gcc/common.opt      (working copy)
@@ -900,7 +900,7 @@ Common Report Var(align_functions,0) Optimization
 Align the start of functions.
 
 falign-functions=
-Common RejectNegative Joined UInteger Var(align_functions)
+Common RejectNegative Joined Var(flag_align_functions)
 
 falign-jumps
 Common Report Var(align_jumps,0) Optimization UInteger
@@ -907,7 +907,7 @@ Common Report Var(align_jumps,0) Optimization UInt
 Align labels which are only reached by jumping.
 
 falign-jumps=
-Common RejectNegative Joined UInteger Var(align_jumps)
+Common RejectNegative Joined Var(flag_align_jumps)
 
 falign-labels
 Common Report Var(align_labels,0) Optimization UInteger
@@ -914,7 +914,7 @@ Common Report Var(align_labels,0) Optimization UIn
 Align all labels.
 
 falign-labels=
-Common RejectNegative Joined UInteger Var(align_labels)
+Common RejectNegative Joined Var(flag_align_labels)
 
 falign-loops
 Common Report Var(align_loops,0) Optimization UInteger
@@ -921,7 +921,7 @@ Common Report Var(align_loops,0) Optimization UInt
 Align the start of loops.
 
 falign-loops=
-Common RejectNegative Joined UInteger Var(align_loops)
+Common RejectNegative Joined Var(flag_align_loops)
 
 fargument-alias
 Common Ignore
Index: gcc/config/i386/freebsd.h
===================================================================
--- gcc/config/i386/freebsd.h   (revision 239390)
+++ gcc/config/i386/freebsd.h   (working copy)
@@ -92,25 +92,17 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
 #undef  ASM_OUTPUT_MAX_SKIP_ALIGN
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 #endif
Index: gcc/config/i386/gnu-user.h
===================================================================
--- gcc/config/i386/gnu-user.h  (revision 239390)
+++ gcc/config/i386/gnu-user.h  (working copy)
@@ -94,24 +94,16 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 #endif
Index: gcc/config/i386/iamcu.h
===================================================================
--- gcc/config/i386/iamcu.h     (revision 239390)
+++ gcc/config/i386/iamcu.h     (working copy)
@@ -62,23 +62,15 @@ see the files COPYING3 and COPYING.RUNTIME respect
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 
Index: gcc/config/i386/openbsdelf.h
===================================================================
--- gcc/config/i386/openbsdelf.h        (revision 239390)
+++ gcc/config/i386/openbsdelf.h        (working copy)
@@ -63,24 +63,16 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 #endif
Index: gcc/config/i386/x86-64.h
===================================================================
--- gcc/config/i386/x86-64.h    (revision 239390)
+++ gcc/config/i386/x86-64.h    (working copy)
@@ -65,16 +65,10 @@ see the files COPYING3 and COPYING.RUNTIME respect
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)                   \
   do {                                                                 \
     if ((LOG) != 0) {                                                  \
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
-      else {                                                           \
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))                 \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));                    \
+      else                                                             \
        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));     \
-       /* Make sure that we have at least 8 byte alignment if > 8 byte \
-          alignment is preferred.  */                                  \
-       if ((LOG) > 3                                                   \
-           && (1 << (LOG)) > ((MAX_SKIP) + 1)                          \
-           && (MAX_SKIP) >= 7)                                         \
-         fputs ("\t.p2align 3\n", (FILE));                             \
-      }                                                                        
\
     }                                                                  \
   } while (0)
 #undef  ASM_OUTPUT_MAX_SKIP_PAD
Index: gcc/flags.h
===================================================================
--- gcc/flags.h (revision 239390)
+++ gcc/flags.h (working copy)
@@ -55,6 +55,7 @@ struct target_flag_state {
   int x_align_labels_log;
   int x_align_labels_max_skip;
   int x_align_functions_log;
+  int x_align_functions_max_skip;
 
   /* The excess precision currently in effect.  */
   enum excess_precision x_flag_excess_precision;
@@ -81,6 +82,8 @@ extern struct target_flag_state *this_target_flag_
   (this_target_flag_state->x_align_labels_max_skip)
 #define align_functions_log \
   (this_target_flag_state->x_align_functions_log)
+#define align_functions_max_skip \
+  (this_target_flag_state->x_align_functions_max_skip)
 #define flag_excess_precision \
   (this_target_flag_state->x_flag_excess_precision)
 
Index: gcc/toplev.c
===================================================================
--- gcc/toplev.c        (revision 239390)
+++ gcc/toplev.c        (working copy)
@@ -1177,29 +1177,58 @@ target_supports_section_anchors_p (void)
   return true;
 }
 
+static int
+parse_N_M (int *align, int *maxskip, const char *flag, const char *name)
+{
+  int _align = *align;
+  int _maxskip = *maxskip;
+
+  if (flag)
+    {
+      unsigned int n, m;
+      if (strchr (flag, ','))
+       {
+         if (sscanf (flag, "%u,%u", &n, &m) != 2) goto bad;
+         _maxskip = m;
+       }
+      else
+       {
+          if (sscanf (flag, "%u", &n) != 1) goto bad;
+         _maxskip = n;
+       }
+      _align = n;
+      if (_maxskip > 0)
+       _maxskip--; /* -falign-xyz=N,M means M-1 max bytes of padding, not M */
+    }
+
+normalize:
+  if (_align <= 0)
+    _align = 1;
+  if ((unsigned)_maxskip > (unsigned)_align)
+    _maxskip = _align - 1;
+
+  *align = _align;
+  *maxskip = _maxskip;
+  return floor_log2 (_align * 2 - 1);
+
+bad:
+  error_at (UNKNOWN_LOCATION, "-falign-%s parameter '%s' is bad", name, flag);
+  goto normalize;
+}
+
 /* Default the align_* variables to 1 if they're still unset, and
    set up the align_*_log variables.  */
 static void
 init_alignments (void)
 {
-  if (align_loops <= 0)
-    align_loops = 1;
-  if (align_loops_max_skip > align_loops)
-    align_loops_max_skip = align_loops - 1;
-  align_loops_log = floor_log2 (align_loops * 2 - 1);
-  if (align_jumps <= 0)
-    align_jumps = 1;
-  if (align_jumps_max_skip > align_jumps)
-    align_jumps_max_skip = align_jumps - 1;
-  align_jumps_log = floor_log2 (align_jumps * 2 - 1);
-  if (align_labels <= 0)
-    align_labels = 1;
-  align_labels_log = floor_log2 (align_labels * 2 - 1);
-  if (align_labels_max_skip > align_labels)
-    align_labels_max_skip = align_labels - 1;
-  if (align_functions <= 0)
-    align_functions = 1;
-  align_functions_log = floor_log2 (align_functions * 2 - 1);
+  align_loops_log = parse_N_M (&align_loops, &align_loops_max_skip,
+                              flag_align_loops, "loops");
+  align_jumps_log = parse_N_M (&align_jumps, &align_jumps_max_skip,
+                              flag_align_jumps, "jumps");
+  align_labels_log = parse_N_M (&align_labels, &align_labels_max_skip,
+                               flag_align_labels, "labels");
+  align_functions_log = parse_N_M (&align_functions, &align_functions_max_skip,
+                                  flag_align_functions, "functions");
 }
 
 /* Process the options that have been parsed.  */
Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c        (revision 239390)
+++ gcc/varasm.c        (working copy)
@@ -1790,7 +1790,7 @@ assemble_start_function (tree decl, const char *fn
     {
 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
       ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,
-                                align_functions_log, align_functions - 1);
+                                align_functions_log, align_functions_max_skip);
 #else
       ASM_OUTPUT_ALIGN (asm_out_file, align_functions_log);
 #endif

Reply via email to