Severe problems with vectorizing stuff in 4.0.3 HEAD

Kean Johnston Fri, 14 Oct 2005 12:11:57 -0700

All,

I am getting a lot of test suite failures with almost all of
the vect/* tests. I am using pr18400.c from the test suite
as an example here, becuase its about the smallest one I
can find. Here is what is generated at -O2:


        .file   "pr18400.c"
        .version        "01.01"
        .text
        .align 16
        .globl  sig_ill_handler
        .type   sig_ill_handler, @function
sig_ill_handler:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $20, %esp
        pushl   $0
        call    exit
        .size   sig_ill_handler, .-sig_ill_handler
        .align 16
        .globl  check_vect
        .type   check_vect, @function
check_vect:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $16, %esp
        pushl   $sig_ill_handler
        pushl   $4
        call    signal
/APP
        .byte 0xf2,0x0f,0x10,0xc0
/NO_APP
        popl    %eax
        popl    %edx
        pushl   $0
        pushl   $4
        call    signal
        addl    $16, %esp
        leave
        ret
        .size   check_vect, .-check_vect
        .section        .rodata
        .align 32
        .type   C.0.1905, @object
        .size   C.0.1905, 32
C.0.1905:
        .long   0
        .long   3
        .long   6
        .long   9
        .long   12
        .long   15
        .long   18
        .long   21
        .text
        .align 16
        .globl  main1
        .type   main1, @function
main1:
        pushl   %ebp
        movl    $8, %ecx
        movl    %esp, %ebp
        pushl   %edi
        cld
        pushl   %esi
        leal    -40(%ebp), %edi
        subl    $64, %esp
        movl    $C.0.1905, %esi
        rep
        movsl
        xorl    %edx, %edx
        leal    -40(%ebp), %esi
        leal    -72(%ebp), %ecx
        .align 16
.L6:
        leal    0(,%edx,4), %eax
        addl    $4, %edx
        cmpl    $8, %edx
*** At this point, the registers have the following values:
*** %eax = 0,  %ecx = 0x8047d84,  %edx = 4,  %ebx = 0x8047dec
*** %esi = 0x8047da4, %edi = 0x8047dc4, %ebp = 0x8047dcc
*** This is guaranteed to cause a SIGSEGV, and it does, becuase
*** %esi is aligned on a 16-byte boundary. But ... see below ...
        movdqa  (%esi,%eax), %xmm0
        movdqa  %xmm0, (%ecx,%eax)
        jne     .L6
        movb    $1, %dl
        .align 16
.L8:
        movl    -4(%ecx,%edx,4), %eax
        cmpl    -4(%esi,%edx,4), %eax
        jne     .L18
        incl    %edx
        cmpl    $9, %edx
        jne     .L8
        addl    $64, %esp
        xorl    %eax, %eax
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
.L18:
        call    abort
        .size   main1, .-main1
        .align 16
        .globl  main
        .type   main, @function
main:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %ecx
        pushl   %ecx
        andl    $-16, %esp
        subl    $16, %esp
        call    check_vect
        leave
*** Looks like it was trying to align the stack on a 16-byte
*** boundary here. But on entry into main1(), its doing 3
*** push's at the beginning. Thus teh offsets into teh stack
*** (like the leal -40(%ebp), %edi close to the top of main1)
*** appear to be being incorrectly calculated.
        jmp     main1
        .size   main, .-main
        .ident  "GCC: (GNU) 4.0.3 20051013 (prerelease)"


Thats the first problem. I then compiled with -O6, and got this:
        .file   "pr18400.c"
        .version        "01.01"
        .text
        .align 16
        .globl  sig_ill_handler
        .type   sig_ill_handler, @function
sig_ill_handler:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $20, %esp
        pushl   $0
        call    exit
        .size   sig_ill_handler, .-sig_ill_handler
        .align 16
        .globl  check_vect
        .type   check_vect, @function
check_vect:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $16, %esp
        pushl   $sig_ill_handler
        pushl   $4
        call    signal
/APP
        .byte 0xf2,0x0f,0x10,0xc0
/NO_APP
        popl    %eax
        popl    %edx
        pushl   $0
        pushl   $4
        call    signal
        addl    $16, %esp
        leave
        ret
        .size   check_vect, .-check_vect
        .section        .rodata
        .align 32
        .type   C.0.1905, @object
        .size   C.0.1905, 32
C.0.1905:
        .long   0
        .long   3
        .long   6
        .long   9
        .long   12
        .long   15
        .long   18
        .long   21
        .text
        .align 16
        .globl  main1
        .type   main1, @function
main1:
        pushl   %ebp
        movl    $8, %ecx
        movl    %esp, %ebp
        pushl   %edi
        cld
        pushl   %esi
        leal    -40(%ebp), %edi
        subl    $64, %esp
        movl    $C.0.1905, %esi
        rep
        movsl
        xorl    %edx, %edx
        leal    -40(%ebp), %esi
        leal    -72(%ebp), %ecx
        .align 16
.L6:
        leal    0(,%edx,4), %eax
        addl    $4, %edx
        cmpl    $8, %edx
        movdqa  (%esi,%eax), %xmm0
        movdqa  %xmm0, (%ecx,%eax)
        jne     .L6
        movb    $1, %dl
        .align 16
.L8:
        movl    -4(%ecx,%edx,4), %eax
        cmpl    -4(%esi,%edx,4), %eax
        jne     .L18
        incl    %edx
        cmpl    $9, %edx
        jne     .L8
        addl    $64, %esp
        xorl    %eax, %eax
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
.L18:
        call    abort
        .size   main1, .-main1
        .align 16
        .globl  main
        .type   main, @function
main:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        subl    $64, %esp
        andl    $-16, %esp
        subl    $24, %esp
        pushl   $sig_ill_handler
        pushl   $4
        call    signal
/APP
        .byte 0xf2,0x0f,0x10,0xc0
/NO_APP
        popl    %ecx
        popl    %esi
        pushl   $0
        pushl   $4
        call    signal
        leal    -40(%ebp), %edi
        cld
        movl    $C.0.1905, %esi
        movl    $8, %ecx
        rep
        movsl
        xorl    %edx, %edx
        leal    -40(%ebp), %esi
        leal    -72(%ebp), %ecx
        addl    $16, %esp
        .align 16
.L20:
        leal    0(,%edx,4), %eax
        addl    $4, %edx
        cmpl    $8, %edx
*** At this point the registers have:
*** %eax 0, %esi = 0x8047da4, %ecx = 0x8047d84.
*** Again, this will cause SIGSEGV becyase neither %esi
*** nor %ecx are 16-byte aligned. They are both off by 4.
        movdqa  (%esi,%eax), %xmm0
        movdqa  %xmm0, (%ecx,%eax)
        jne     .L20
        movb    $1, %dl
        .align 16
.L22:
        movl    -4(%ecx,%edx,4), %eax
        cmpl    -4(%esi,%edx,4), %eax
        jne     .L31
        incl    %edx
        cmpl    $9, %edx
        jne     .L22
        leal    -8(%ebp), %esp
        xorl    %eax, %eax
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
.L31:
        call    abort
        .size   main, .-main
        .ident  "GCC: (GNU) 4.0.3 20051013 (prerelease)"

Notice that GCC has decided to inline main1 into main, but the
code for main1 is left in place. Surely thats wrong?

So it seems like the offset calculations are always off by
4. I am guessing thats becuase of a push that isn't being taken
into account, perhaps the push of %ebp at the top of the function?

Any help at all *greatly* appreciated.

Kean

Severe problems with vectorizing stuff in 4.0.3 HEAD

Reply via email to