All, I am getting a lot of test suite failures with almost all of the vect/* tests. I am using pr18400.c from the test suite as an example here, becuase its about the smallest one I can find. Here is what is generated at -O2:
.file "pr18400.c" .version "01.01" .text .align 16 .globl sig_ill_handler .type sig_ill_handler, @function sig_ill_handler: pushl %ebp movl %esp, %ebp subl $20, %esp pushl $0 call exit .size sig_ill_handler, .-sig_ill_handler .align 16 .globl check_vect .type check_vect, @function check_vect: pushl %ebp movl %esp, %ebp subl $16, %esp pushl $sig_ill_handler pushl $4 call signal /APP .byte 0xf2,0x0f,0x10,0xc0 /NO_APP popl %eax popl %edx pushl $0 pushl $4 call signal addl $16, %esp leave ret .size check_vect, .-check_vect .section .rodata .align 32 .type C.0.1905, @object .size C.0.1905, 32 C.0.1905: .long 0 .long 3 .long 6 .long 9 .long 12 .long 15 .long 18 .long 21 .text .align 16 .globl main1 .type main1, @function main1: pushl %ebp movl $8, %ecx movl %esp, %ebp pushl %edi cld pushl %esi leal -40(%ebp), %edi subl $64, %esp movl $C.0.1905, %esi rep movsl xorl %edx, %edx leal -40(%ebp), %esi leal -72(%ebp), %ecx .align 16 .L6: leal 0(,%edx,4), %eax addl $4, %edx cmpl $8, %edx *** At this point, the registers have the following values: *** %eax = 0, %ecx = 0x8047d84, %edx = 4, %ebx = 0x8047dec *** %esi = 0x8047da4, %edi = 0x8047dc4, %ebp = 0x8047dcc *** This is guaranteed to cause a SIGSEGV, and it does, becuase *** %esi is aligned on a 16-byte boundary. But ... see below ... movdqa (%esi,%eax), %xmm0 movdqa %xmm0, (%ecx,%eax) jne .L6 movb $1, %dl .align 16 .L8: movl -4(%ecx,%edx,4), %eax cmpl -4(%esi,%edx,4), %eax jne .L18 incl %edx cmpl $9, %edx jne .L8 addl $64, %esp xorl %eax, %eax popl %esi popl %edi popl %ebp ret .L18: call abort .size main1, .-main1 .align 16 .globl main .type main, @function main: pushl %ebp movl %esp, %ebp pushl %ecx pushl %ecx andl $-16, %esp subl $16, %esp call check_vect leave *** Looks like it was trying to align the stack on a 16-byte *** boundary here. But on entry into main1(), its doing 3 *** push's at the beginning. Thus teh offsets into teh stack *** (like the leal -40(%ebp), %edi close to the top of main1) *** appear to be being incorrectly calculated. jmp main1 .size main, .-main .ident "GCC: (GNU) 4.0.3 20051013 (prerelease)" Thats the first problem. I then compiled with -O6, and got this: .file "pr18400.c" .version "01.01" .text .align 16 .globl sig_ill_handler .type sig_ill_handler, @function sig_ill_handler: pushl %ebp movl %esp, %ebp subl $20, %esp pushl $0 call exit .size sig_ill_handler, .-sig_ill_handler .align 16 .globl check_vect .type check_vect, @function check_vect: pushl %ebp movl %esp, %ebp subl $16, %esp pushl $sig_ill_handler pushl $4 call signal /APP .byte 0xf2,0x0f,0x10,0xc0 /NO_APP popl %eax popl %edx pushl $0 pushl $4 call signal addl $16, %esp leave ret .size check_vect, .-check_vect .section .rodata .align 32 .type C.0.1905, @object .size C.0.1905, 32 C.0.1905: .long 0 .long 3 .long 6 .long 9 .long 12 .long 15 .long 18 .long 21 .text .align 16 .globl main1 .type main1, @function main1: pushl %ebp movl $8, %ecx movl %esp, %ebp pushl %edi cld pushl %esi leal -40(%ebp), %edi subl $64, %esp movl $C.0.1905, %esi rep movsl xorl %edx, %edx leal -40(%ebp), %esi leal -72(%ebp), %ecx .align 16 .L6: leal 0(,%edx,4), %eax addl $4, %edx cmpl $8, %edx movdqa (%esi,%eax), %xmm0 movdqa %xmm0, (%ecx,%eax) jne .L6 movb $1, %dl .align 16 .L8: movl -4(%ecx,%edx,4), %eax cmpl -4(%esi,%edx,4), %eax jne .L18 incl %edx cmpl $9, %edx jne .L8 addl $64, %esp xorl %eax, %eax popl %esi popl %edi popl %ebp ret .L18: call abort .size main1, .-main1 .align 16 .globl main .type main, @function main: pushl %ebp movl %esp, %ebp pushl %edi pushl %esi subl $64, %esp andl $-16, %esp subl $24, %esp pushl $sig_ill_handler pushl $4 call signal /APP .byte 0xf2,0x0f,0x10,0xc0 /NO_APP popl %ecx popl %esi pushl $0 pushl $4 call signal leal -40(%ebp), %edi cld movl $C.0.1905, %esi movl $8, %ecx rep movsl xorl %edx, %edx leal -40(%ebp), %esi leal -72(%ebp), %ecx addl $16, %esp .align 16 .L20: leal 0(,%edx,4), %eax addl $4, %edx cmpl $8, %edx *** At this point the registers have: *** %eax 0, %esi = 0x8047da4, %ecx = 0x8047d84. *** Again, this will cause SIGSEGV becyase neither %esi *** nor %ecx are 16-byte aligned. They are both off by 4. movdqa (%esi,%eax), %xmm0 movdqa %xmm0, (%ecx,%eax) jne .L20 movb $1, %dl .align 16 .L22: movl -4(%ecx,%edx,4), %eax cmpl -4(%esi,%edx,4), %eax jne .L31 incl %edx cmpl $9, %edx jne .L22 leal -8(%ebp), %esp xorl %eax, %eax popl %esi popl %edi popl %ebp ret .L31: call abort .size main, .-main .ident "GCC: (GNU) 4.0.3 20051013 (prerelease)" Notice that GCC has decided to inline main1 into main, but the code for main1 is left in place. Surely thats wrong? So it seems like the offset calculations are always off by 4. I am guessing thats becuase of a push that isn't being taken into account, perhaps the push of %ebp at the top of the function? Any help at all *greatly* appreciated. Kean