[Bug target/81646] i386 SSE2 compilation mode which preserves psABI stack alignment without requiring it

hjl.tools at gmail dot com Tue, 01 Aug 2017 11:11:39 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81646


--- Comment #6 from H.J. Lu <hjl.tools at gmail dot com> ---
(In reply to Florian Weimer from comment #5)
> (In reply to H.J. Lu from comment #4)
> > You can use -mstackrealign.
> 
> I don't want to realign the stack unconditionally for performance reasons. 
> I want to preserve alignment for callback functions, and give GCC the option
> to use SSE2 where beneficial.  If that's not possible, so be it, considering
> that it's only i386.

Have you tried mstackrealign on your code? I got

[hjl@gnu-6 gcc]$ cat x.c
#include <x86intrin.h>

extern void foo1 (__m128, __m128, __m128);
extern void foo2 (__m128, __m128, __m128, __m128);

extern __m128 x;

void
bar1 (void)
{
  foo1 (x, x, x);
}

void
bar2 (void)
{
  foo2 (x, x, x, x);
}
[hjl@gnu-6 gcc]$ gcc -S -O2 -m32 x.c -mstackrealign  -msse2
[hjl@gnu-6 gcc]$ cat x.s
        .file   "x.c"
        .text
        .p2align 4,,15
        .globl  bar1
        .type   bar1, @function
bar1:
.LFB4910:
        .cfi_startproc
        movaps  x, %xmm0
        movaps  %xmm0, %xmm2
        movaps  %xmm0, %xmm1
        jmp     foo1
        .cfi_endproc
.LFE4910:
        .size   bar1, .-bar1
        .p2align 4,,15
        .globl  bar2
        .type   bar2, @function
bar2:
.LFB4911:
        .cfi_startproc
        leal    4(%esp), %ecx
        .cfi_def_cfa 1, 0
        andl    $-16, %esp
        pushl   -4(%ecx)
        pushl   %ebp
        .cfi_escape 0x10,0x5,0x2,0x75,0
        movl    %esp, %ebp
        pushl   %ecx
        .cfi_escape 0xf,0x3,0x75,0x7c,0x6
        subl    $20, %esp
        movaps  x, %xmm0
        movaps  %xmm0, %xmm2
        movaps  %xmm0, %xmm1
        movaps  %xmm0, (%esp)
        call    foo2
        addl    $16, %esp
        movl    -4(%ebp), %ecx
        .cfi_def_cfa 1, 0
        leave
        .cfi_restore 5
        leal    -4(%ecx), %esp
        .cfi_def_cfa 4, 4
        ret
        .cfi_endproc
.LFE4911:
        .size   bar2, .-bar2
        .ident  "GCC: (GNU) 7.1.1 20170709 (Red Hat 7.1.1-4)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-6 gcc]$ 

GCC aligns stack only in foo2, not in foo1 since there is no need for it.

[Bug target/81646] i386 SSE2 compilation mode which preserves psABI stack alignment without requiring it

Reply via email to