On 09/30/2016 01:20 PM, Bernd Schmidt wrote:
On 09/29/2016 07:32 PM, Denys Vlasenko wrote:
On 09/29/2016 04:45 PM, Bernd Schmidt wrote:
On 09/28/2016 02:57 PM, Denys Vlasenko wrote:
-  /* Comes from final.c -- no real reason to change it.  */
-#define MAX_CODE_ALIGN 16
-
     case OPT_malign_loops_:
       warning_at (loc, 0, "-malign-loops is obsolete, use
-falign-loops");
-      if (value > MAX_CODE_ALIGN)
-    error_at (loc, "-malign-loops=%d is not between 0 and %d",
-          value, MAX_CODE_ALIGN);
-      else
-    opts->x_align_loops = 1 << value;
       return true;

That does seem to be a functional change. I'll defer to Uros.

It would be awkward to translate -malign-loops=%d et al
to comma-separated string format.
Since this warning is there for some 15 years already,
anyone who actually cares should have converted to new options
long ago.

Hmm, if it's been 15 years, maybe it's time to remove these. Could you submit a 
patch separately?

Sure.

-  if (opts->x_align_functions <= 0)
+  if (opts->x_flag_align_functions && !opts->x_str_align_functions)

Are these conditions really equivalent? It looks like zero was
the default even when no -falign-functions was specified.
 Or is that overriden by init_alignments?

 {
-  if (opts->x_align_loops == 0)
+  /* -falign-foo without argument: supply one */
+  if (opts->x_flag_align_loops && !opts->x_str_align_loops)

Same here.

The execution flow for option parsing is somewhat convoluted, no doubt.

I found it experimentally that these are locations where
default alignment parameters are set when -falign-functions
is given with no arguments (or when it is implied by -O2).

I applied your latest two patches to experiment with them, and I see different
 behaviour before and after on x86_64-linux. There seems to be a difference
 in function alignment and label alignment at -O2.

Let me try harder, I was only checking -ffunction-alignment...

My test program:

int g();
int f(int i)
{
        i *= 3;
        while (--i > 100) {
 L1:
                if (g())
                        goto L1;
                if (g())
                        goto L2;
        }
        return i;
 L2:
        return 123;
}

Before-and-after "gcc -O2 -S" assembly (after the patch is on the right):

        .text                                   .text
        .p2align 4,,15                          .p2align 4,,15
        .globl  f                               .p2align 3
        .type   f, @function                    .globl  f
f:                                              .type   f, @function
.LFB0:                                  f:
        .cfi_startproc                  .LFB0:
        pushq   %rbx                            .cfi_startproc
        .cfi_def_cfa_offset 16                  pushq   %rbx
        .cfi_offset 3, -16                      .cfi_def_cfa_offset 16
        leal    (%rdi,%rdi,2), %ebx             .cfi_offset 3, -16
        .p2align 4,,10                          leal    (%rdi,%rdi,2), %ebx
        .p2align 3                              .p2align 4,,10
.L2:                                    .L2:
        subl    $1, %ebx                        subl    $1, %ebx
        cmpl    $100, %ebx                      cmpl    $100, %ebx
        jle     .L1                             jle     .L1
        .p2align 4,,10                          .p2align 4,,10
        .p2align 3                      .L3:
.L3:                                            xorl    %eax, %eax
        xorl    %eax, %eax                      call    g
        call    g                               testl   %eax, %eax
        testl   %eax, %eax                      jne     .L3
        jne     .L3                             call    g
        call    g                               testl   %eax, %eax
        testl   %eax, %eax                      je      .L2
        je      .L2                             movl    $123, %ebx
        movl    $123, %ebx              .L4:
.L4:                                    .L1:
.L1:                                            movl    %ebx, %eax
        movl    %ebx, %eax                      popq    %rbx
        popq    %rbx                            .cfi_def_cfa_offset 8
        .cfi_def_cfa_offset 8                   ret
        ret                                     .cfi_endproc
        .cfi_endproc                    .LFE0:
.LFE0:


Yes, I see differences. ".p2align 3" appeared in function alignment.
The reason is that old code had an optimization - it noticed that
".p2align 4,,15" _always_ aligns (because 2^4=15+1), thus ".p2align 3"
is superfluous. My patch doesn't do that. I fixed this already
in the next version of the patch I'm going to send.

The other difference is that ".p2align 4,,10" is no longer followed
by ".p2align 3". Well... this one is harder to make happen.
It comes from here in gcc/final.c:

    case CODE_LABEL:
      /* The target port might emit labels in the output function for
         some insn, e.g. sh.c output_branchy_insn.  */
      if (CODE_LABEL_NUMBER (insn) <= max_labelno)
        {
          int align = LABEL_TO_ALIGNMENT (insn);
#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
          int max_skip = LABEL_TO_MAX_SKIP (insn);
#endif

          if (align && NEXT_INSN (insn))
            {
#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
              ASM_OUTPUT_MAX_SKIP_ALIGN (file, align, max_skip);  <===HERE
#else


The difficulty is that (align, max_skip) tuple here is arrived at
a rather complex process. It's not a label, jump or loop alignment,
it can even be neither of them (rs6000 has an override).

I am inclined to do this:

              ASM_OUTPUT_MAX_SKIP_ALIGN (file, align, max_skip);
              /* Above, we don't know whether a label, jump or loop
                 alignment was used. Conservatively apply
                 label subalignment if any, not jump or loop
                 subalignment (they are almost always larger).  */
              ASM_OUTPUT_MAX_SKIP_ALIGN (file, align_labels[1].log,
                                         align_labels[1].maxskip);

With default -O2 parameters, this still wouldn't match the previous
behavior: the default label alignment is 0, so implied subalignment
is 0 too. I'll try to tweak processor_target_table[] and let you know
how it goes.

The big way of handling this would be to stop carrying separate
(log, maxskip) pairs around, pass a pointer to struct align_flags[2]
instead which contains two pairs of (log,maxskip) tuples.

Reply via email to