https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86848

            Bug ID: 86848
           Summary: ARM: (-O3 -march=armv7-a -mfpu=neon-vfpv4) vst1 wrong
                    alignment for `vst1.64 {d16-d17}, [r4 :64]` before
                    function call
           Product: gcc
           Version: 6.3.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: cjd at cjdns dot fr
  Target Milestone: ---

Created attachment 44504
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=44504&action=edit
.i file which can be compiled to an o file to observe the vst1.64

Comments:
Using -O3, and -mfpu=neon-vfpv4, I'm getting a wrong alignment when pushing
local variables to the stack before a function call.
Unfortunately I was not able to reproduce this with a simple code snippet but I
have an .i file which produces the assembly and
you can see the effect by doing:
git clone git://github.com/cjdelisle/cjdns && cd cjdns
git checkout crashey
CC=arm-linux-gnueabihf-gcc-8 AR=arm-linux-gnueabihf-gcc-ar-8
RANLIB=arm-linux-gnueabihf-gcc-ranlib-8 CFLAGS="-marm -march=armv7-a
-mfpu=neon-vfpv4" CROSS=1 NO_LTO=1 VERBOSE=1 ./do
## copying ./build_linux/test_testcjdroute_c to the ARM device and running
gdb ./test_testcjdroute_c -ex 'r RouteGen_test'


If you only want to see the assembler dump, you can download the attached .i
file and compile it with the provided compile
command and then gdb the .o file and disassemble sockaddrToPrefix6.



Compiler version (default from Ubuntu):
root@dickbutt:/opt/cjdns# arm-linux-gnueabihf-gcc-8 -v
Using built-in specs.
COLLECT_GCC=arm-linux-gnueabihf-gcc-8
COLLECT_LTO_WRAPPER=/usr/lib/gcc-cross/arm-linux-gnueabihf/8/lto-wrapper
Target: arm-linux-gnueabihf
Configured with: ../src/configure -v --with-pkgversion='Ubuntu
8-20180414-1ubuntu2' --with-bugurl=file:///usr/share/doc/gcc-8/README.Bugs
--enable-languages=c,ada,c++,go,d,fortran,objc,obj-c++ --prefix=/usr
--with-gcc-major-version-only --with-as=/usr/bin/arm-linux-gnueabihf-as
--with-ld=/usr/bin/arm-linux-gnueabihf-ld --program-suffix=-8 --enable-shared
--enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext
--enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/
--enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-libitm
--disable-libquadmath --disable-libquadmath-support --enable-plugin
--enable-default-pie --with-system-zlib --with-target-system-zlib
--enable-multiarch --enable-multilib --disable-sjlj-exceptions
--with-arch=armv7-a --with-fpu=vfpv3-d16 --with-float=hard --with-mode=thumb
--disable-werror --enable-multilib --enable-checking=release
--build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=arm-linux-gnueabihf
--program-prefix=arm-linux-gnueabihf-
--includedir=/usr/arm-linux-gnueabihf/include
Thread model: posix
gcc version 8.0.1 20180414 (experimental) [trunk revision 259383] (Ubuntu
8-20180414-1ubuntu2)



Relevant compile command:
arm-linux-gnueabihf-gcc-8 -c -x cpp-output -o build_linux/tunnel_RouteGen_c.o
-std=c99 -Wall -Wextra -Werror -Wno-pointer-sign -Wmissing-prototypes -pedantic
-D linux=1 -D CJD_PACKAGE_VERSION="cjdns-v20.2-39-g5d561d65-dirty"
-Wno-unused-parameter -D Log_DEBUG -g -D NumberCompress_TYPE=v3x5x8 -D
Identity_CHECK=1 -D Allocator_USE_CANARIES=1 -D PARANOIA=1
-DHAS_ETH_INTERFACE=1 -fPIE -marm -march=armv7-a -mfpu=neon-vfpv4 -marm
-march=armv7-a -mfpu=neon-vfpv4 -marm -march=armv7-a -mfpu=neon-vfpv4
-D_FORTIFY_SOURCE=2 -fno-stack-protector -fstack-protector-all
-Wstack-protector -O3 build_linux/tunnel_RouteGen_c.o.i



Debugger output:
root@dirtysanchez:/home/user# gdb -q ./test_testcjdroute_c -ex 'r
RouteGen_test'
Reading symbols from ./test_testcjdroute_c...done.
Starting program: /home/user/test_testcjdroute_c RouteGen_test
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/arm-linux-gnueabihf/libthread_db.so.1".
1533318603 DEBUG RouteGen_test.c:108 Forward

Program received signal SIGBUS, Bus error.
sockaddrToPrefix6 (sa=sa@entry=0xbeffeb98, allocator=<optimized out>) at
tunnel/RouteGen.c:164
164         Assert_true(pfx > -1);
(gdb) disassemble
Dump of assembler code for function sockaddrToPrefix6:
   0x0042a49c <+0>:     ldr     r3, [pc, #308]  ; 0x42a5d8
<sockaddrToPrefix6+316>
   0x0042a4a0 <+4>:     ldr     r2, [pc, #308]  ; 0x42a5dc
<sockaddrToPrefix6+320>
   0x0042a4a4 <+8>:     add     r3, pc, r3
   0x0042a4a8 <+12>:    push    {r4, r5, r6, r7, r8, lr}
   0x0042a4ac <+16>:    sub     sp, sp, #40     ; 0x28
   0x0042a4b0 <+20>:    ldr     r6, [r3, r2]
   0x0042a4b4 <+24>:    mov     r4, r1
   0x0042a4b8 <+28>:    add     r1, sp, #12
   0x0042a4bc <+32>:    mov     r5, r0
   0x0042a4c0 <+36>:    ldr     r3, [r6]
   0x0042a4c4 <+40>:    str     r3, [sp, #36]   ; 0x24
   0x0042a4c8 <+44>:    bl      0x4102a0 <Sockaddr_getAddress>
   0x0042a4cc <+48>:    cmp     r0, #16
   0x0042a4d0 <+52>:    bne     0x42a580 <sockaddrToPrefix6+228>
   0x0042a4d4 <+56>:    ldr     r3, [sp, #12]
   0x0042a4d8 <+60>:    add     r12, sp, #16
   0x0042a4dc <+64>:    cmp     r3, r12
   0x0042a4e0 <+68>:    bhi     0x42a4f0 <sockaddrToPrefix6+84>
   0x0042a4e4 <+72>:    add     r2, r3, #16
   0x0042a4e8 <+76>:    cmp     r12, r2
   0x0042a4ec <+80>:    bcc     0x42a5c0 <sockaddrToPrefix6+292>
   0x0042a4f0 <+84>:    ldr     r7, [pc, #232]  ; 0x42a5e0
<sockaddrToPrefix6+324>
   0x0042a4f4 <+88>:    ldr     r0, [r3]
   0x0042a4f8 <+92>:    ldr     r1, [r3, #4]
   0x0042a4fc <+96>:    add     r7, pc, r7
   0x0042a500 <+100>:   ldr     r2, [r3, #8]
   0x0042a504 <+104>:   ldr     r3, [r3, #12]
   0x0042a508 <+108>:   stmia   r12!, {r0, r1, r2, r3}
   0x0042a50c <+112>:   mov     r0, r4
   0x0042a510 <+116>:   mov     r1, r7
   0x0042a514 <+120>:   mov     r2, #160        ; 0xa0
   0x0042a518 <+124>:   bl      0x4062cc <Allocator__child>
   0x0042a51c <+128>:   mov     r3, r7
   0x0042a520 <+132>:   mov     r1, #161        ; 0xa1
   0x0042a524 <+136>:   mov     r2, #1
   0x0042a528 <+140>:   str     r1, [sp]
   0x0042a52c <+144>:   mov     r1, #24
   0x0042a530 <+148>:   mov     r8, r0
   0x0042a534 <+152>:   bl      0x405b74 <Allocator__calloc>
   0x0042a538 <+156>:   vldr    d16, [sp, #16]
   0x0042a53c <+160>:   vldr    d17, [sp, #24]
   0x0042a540 <+164>:   vrev64.8        q8, q8
   0x0042a544 <+168>:   mov     r4, r0
   0x0042a548 <+172>:   mov     r0, r5
   0x0042a54c <+176>:   vst1.64 {d16-d17}, [r4 :64]
---Type <return> to continue, or q <return> to quit---
=> 0x0042a550 <+180>:   bl      0x40fb18 <Sockaddr_getPrefix>
   0x0042a554 <+184>:   subs    r3, r0, #0
   0x0042a558 <+188>:   blt     0x42a5a4 <sockaddrToPrefix6+264>
   0x0042a55c <+192>:   ldr     r1, [sp, #36]   ; 0x24
   0x0042a560 <+196>:   mov     r0, r4
   0x0042a564 <+200>:   ldr     r2, [r6]
   0x0042a568 <+204>:   str     r3, [r4, #16]
   0x0042a56c <+208>:   cmp     r1, r2
   0x0042a570 <+212>:   str     r8, [r4, #20]
   0x0042a574 <+216>:   bne     0x42a5a0 <sockaddrToPrefix6+260>
   0x0042a578 <+220>:   add     sp, sp, #40     ; 0x28
   0x0042a57c <+224>:   pop     {r4, r5, r6, r7, r8, pc}
   0x0042a580 <+228>:   ldr     r3, [pc, #92]   ; 0x42a5e4
<sockaddrToPrefix6+328>
   0x0042a584 <+232>:   mov     r2, #158        ; 0x9e
   0x0042a588 <+236>:   ldr     r1, [pc, #88]   ; 0x42a5e8
<sockaddrToPrefix6+332>
   0x0042a58c <+240>:   ldr     r0, [pc, #88]   ; 0x42a5ec
<sockaddrToPrefix6+336>
   0x0042a590 <+244>:   add     r3, pc, r3
   0x0042a594 <+248>:   add     r1, pc, r1
   0x0042a598 <+252>:   add     r0, pc, r0
   0x0042a59c <+256>:   bl      0x403e1c <Assert_failure>
   0x0042a5a0 <+260>:   bl      0x40332c <__stack_chk_fail@plt>
   0x0042a5a4 <+264>:   ldr     r3, [pc, #68]   ; 0x42a5f0
<sockaddrToPrefix6+340>
   0x0042a5a8 <+268>:   mov     r1, r7
   0x0042a5ac <+272>:   ldr     r0, [pc, #64]   ; 0x42a5f4
<sockaddrToPrefix6+344>
   0x0042a5b0 <+276>:   mov     r2, #165        ; 0xa5
   0x0042a5b4 <+280>:   add     r3, pc, r3
   0x0042a5b8 <+284>:   add     r0, pc, r0
   0x0042a5bc <+288>:   bl      0x403e1c <Assert_failure>
   0x0042a5c0 <+292>:   ldr     r2, [pc, #48]   ; 0x42a5f8
<sockaddrToPrefix6+348>
   0x0042a5c4 <+296>:   mov     r1, #159        ; 0x9f
   0x0042a5c8 <+300>:   ldr     r0, [pc, #44]   ; 0x42a5fc
<sockaddrToPrefix6+352>
   0x0042a5cc <+304>:   add     r2, pc, r2
   0x0042a5d0 <+308>:   add     r0, pc, r0
   0x0042a5d4 <+312>:   bl      0x403e1c <Assert_failure>
   0x0042a5d8 <+316>:   andeq   r7, r9, r12, lsr #16
   0x0042a5dc <+320>:   ldrdeq  r0, [r0], -r12
   0x0042a5e0 <+324>:   andeq   r10, r7, r0, lsr #30
   0x0042a5e4 <+328>:   andeq   r10, r7, r4, lsr #29
   0x0042a5e8 <+332>:   andeq   r10, r7, r8, lsl #29
   0x0042a5ec <+336>:   andeq   r7, r7, r4, ror #4
   0x0042a5f0 <+340>:   andeq   r10, r7, r4, ror lr
   0x0042a5f4 <+344>:   andeq   r7, r7, r4, asr #4
   0x0042a5f8 <+348>:   muleq   r7, r12, r8
   0x0042a5fc <+352>:   andeq   r10, r7, r12, asr #28
End of assembler dump.
(gdb) print/x $r4
$1 = 0x4cbc34
(gdb)

Reply via email to