https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86848
Bug ID: 86848 Summary: ARM: (-O3 -march=armv7-a -mfpu=neon-vfpv4) vst1 wrong alignment for `vst1.64 {d16-d17}, [r4 :64]` before function call Product: gcc Version: 6.3.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: cjd at cjdns dot fr Target Milestone: --- Created attachment 44504 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=44504&action=edit .i file which can be compiled to an o file to observe the vst1.64 Comments: Using -O3, and -mfpu=neon-vfpv4, I'm getting a wrong alignment when pushing local variables to the stack before a function call. Unfortunately I was not able to reproduce this with a simple code snippet but I have an .i file which produces the assembly and you can see the effect by doing: git clone git://github.com/cjdelisle/cjdns && cd cjdns git checkout crashey CC=arm-linux-gnueabihf-gcc-8 AR=arm-linux-gnueabihf-gcc-ar-8 RANLIB=arm-linux-gnueabihf-gcc-ranlib-8 CFLAGS="-marm -march=armv7-a -mfpu=neon-vfpv4" CROSS=1 NO_LTO=1 VERBOSE=1 ./do ## copying ./build_linux/test_testcjdroute_c to the ARM device and running gdb ./test_testcjdroute_c -ex 'r RouteGen_test' If you only want to see the assembler dump, you can download the attached .i file and compile it with the provided compile command and then gdb the .o file and disassemble sockaddrToPrefix6. Compiler version (default from Ubuntu): root@dickbutt:/opt/cjdns# arm-linux-gnueabihf-gcc-8 -v Using built-in specs. COLLECT_GCC=arm-linux-gnueabihf-gcc-8 COLLECT_LTO_WRAPPER=/usr/lib/gcc-cross/arm-linux-gnueabihf/8/lto-wrapper Target: arm-linux-gnueabihf Configured with: ../src/configure -v --with-pkgversion='Ubuntu 8-20180414-1ubuntu2' --with-bugurl=file:///usr/share/doc/gcc-8/README.Bugs --enable-languages=c,ada,c++,go,d,fortran,objc,obj-c++ --prefix=/usr --with-gcc-major-version-only --with-as=/usr/bin/arm-linux-gnueabihf-as --with-ld=/usr/bin/arm-linux-gnueabihf-ld --program-suffix=-8 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-libitm --disable-libquadmath --disable-libquadmath-support --enable-plugin --enable-default-pie --with-system-zlib --with-target-system-zlib --enable-multiarch --enable-multilib --disable-sjlj-exceptions --with-arch=armv7-a --with-fpu=vfpv3-d16 --with-float=hard --with-mode=thumb --disable-werror --enable-multilib --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=arm-linux-gnueabihf --program-prefix=arm-linux-gnueabihf- --includedir=/usr/arm-linux-gnueabihf/include Thread model: posix gcc version 8.0.1 20180414 (experimental) [trunk revision 259383] (Ubuntu 8-20180414-1ubuntu2) Relevant compile command: arm-linux-gnueabihf-gcc-8 -c -x cpp-output -o build_linux/tunnel_RouteGen_c.o -std=c99 -Wall -Wextra -Werror -Wno-pointer-sign -Wmissing-prototypes -pedantic -D linux=1 -D CJD_PACKAGE_VERSION="cjdns-v20.2-39-g5d561d65-dirty" -Wno-unused-parameter -D Log_DEBUG -g -D NumberCompress_TYPE=v3x5x8 -D Identity_CHECK=1 -D Allocator_USE_CANARIES=1 -D PARANOIA=1 -DHAS_ETH_INTERFACE=1 -fPIE -marm -march=armv7-a -mfpu=neon-vfpv4 -marm -march=armv7-a -mfpu=neon-vfpv4 -marm -march=armv7-a -mfpu=neon-vfpv4 -D_FORTIFY_SOURCE=2 -fno-stack-protector -fstack-protector-all -Wstack-protector -O3 build_linux/tunnel_RouteGen_c.o.i Debugger output: root@dirtysanchez:/home/user# gdb -q ./test_testcjdroute_c -ex 'r RouteGen_test' Reading symbols from ./test_testcjdroute_c...done. Starting program: /home/user/test_testcjdroute_c RouteGen_test [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/arm-linux-gnueabihf/libthread_db.so.1". 1533318603 DEBUG RouteGen_test.c:108 Forward Program received signal SIGBUS, Bus error. sockaddrToPrefix6 (sa=sa@entry=0xbeffeb98, allocator=<optimized out>) at tunnel/RouteGen.c:164 164 Assert_true(pfx > -1); (gdb) disassemble Dump of assembler code for function sockaddrToPrefix6: 0x0042a49c <+0>: ldr r3, [pc, #308] ; 0x42a5d8 <sockaddrToPrefix6+316> 0x0042a4a0 <+4>: ldr r2, [pc, #308] ; 0x42a5dc <sockaddrToPrefix6+320> 0x0042a4a4 <+8>: add r3, pc, r3 0x0042a4a8 <+12>: push {r4, r5, r6, r7, r8, lr} 0x0042a4ac <+16>: sub sp, sp, #40 ; 0x28 0x0042a4b0 <+20>: ldr r6, [r3, r2] 0x0042a4b4 <+24>: mov r4, r1 0x0042a4b8 <+28>: add r1, sp, #12 0x0042a4bc <+32>: mov r5, r0 0x0042a4c0 <+36>: ldr r3, [r6] 0x0042a4c4 <+40>: str r3, [sp, #36] ; 0x24 0x0042a4c8 <+44>: bl 0x4102a0 <Sockaddr_getAddress> 0x0042a4cc <+48>: cmp r0, #16 0x0042a4d0 <+52>: bne 0x42a580 <sockaddrToPrefix6+228> 0x0042a4d4 <+56>: ldr r3, [sp, #12] 0x0042a4d8 <+60>: add r12, sp, #16 0x0042a4dc <+64>: cmp r3, r12 0x0042a4e0 <+68>: bhi 0x42a4f0 <sockaddrToPrefix6+84> 0x0042a4e4 <+72>: add r2, r3, #16 0x0042a4e8 <+76>: cmp r12, r2 0x0042a4ec <+80>: bcc 0x42a5c0 <sockaddrToPrefix6+292> 0x0042a4f0 <+84>: ldr r7, [pc, #232] ; 0x42a5e0 <sockaddrToPrefix6+324> 0x0042a4f4 <+88>: ldr r0, [r3] 0x0042a4f8 <+92>: ldr r1, [r3, #4] 0x0042a4fc <+96>: add r7, pc, r7 0x0042a500 <+100>: ldr r2, [r3, #8] 0x0042a504 <+104>: ldr r3, [r3, #12] 0x0042a508 <+108>: stmia r12!, {r0, r1, r2, r3} 0x0042a50c <+112>: mov r0, r4 0x0042a510 <+116>: mov r1, r7 0x0042a514 <+120>: mov r2, #160 ; 0xa0 0x0042a518 <+124>: bl 0x4062cc <Allocator__child> 0x0042a51c <+128>: mov r3, r7 0x0042a520 <+132>: mov r1, #161 ; 0xa1 0x0042a524 <+136>: mov r2, #1 0x0042a528 <+140>: str r1, [sp] 0x0042a52c <+144>: mov r1, #24 0x0042a530 <+148>: mov r8, r0 0x0042a534 <+152>: bl 0x405b74 <Allocator__calloc> 0x0042a538 <+156>: vldr d16, [sp, #16] 0x0042a53c <+160>: vldr d17, [sp, #24] 0x0042a540 <+164>: vrev64.8 q8, q8 0x0042a544 <+168>: mov r4, r0 0x0042a548 <+172>: mov r0, r5 0x0042a54c <+176>: vst1.64 {d16-d17}, [r4 :64] ---Type <return> to continue, or q <return> to quit--- => 0x0042a550 <+180>: bl 0x40fb18 <Sockaddr_getPrefix> 0x0042a554 <+184>: subs r3, r0, #0 0x0042a558 <+188>: blt 0x42a5a4 <sockaddrToPrefix6+264> 0x0042a55c <+192>: ldr r1, [sp, #36] ; 0x24 0x0042a560 <+196>: mov r0, r4 0x0042a564 <+200>: ldr r2, [r6] 0x0042a568 <+204>: str r3, [r4, #16] 0x0042a56c <+208>: cmp r1, r2 0x0042a570 <+212>: str r8, [r4, #20] 0x0042a574 <+216>: bne 0x42a5a0 <sockaddrToPrefix6+260> 0x0042a578 <+220>: add sp, sp, #40 ; 0x28 0x0042a57c <+224>: pop {r4, r5, r6, r7, r8, pc} 0x0042a580 <+228>: ldr r3, [pc, #92] ; 0x42a5e4 <sockaddrToPrefix6+328> 0x0042a584 <+232>: mov r2, #158 ; 0x9e 0x0042a588 <+236>: ldr r1, [pc, #88] ; 0x42a5e8 <sockaddrToPrefix6+332> 0x0042a58c <+240>: ldr r0, [pc, #88] ; 0x42a5ec <sockaddrToPrefix6+336> 0x0042a590 <+244>: add r3, pc, r3 0x0042a594 <+248>: add r1, pc, r1 0x0042a598 <+252>: add r0, pc, r0 0x0042a59c <+256>: bl 0x403e1c <Assert_failure> 0x0042a5a0 <+260>: bl 0x40332c <__stack_chk_fail@plt> 0x0042a5a4 <+264>: ldr r3, [pc, #68] ; 0x42a5f0 <sockaddrToPrefix6+340> 0x0042a5a8 <+268>: mov r1, r7 0x0042a5ac <+272>: ldr r0, [pc, #64] ; 0x42a5f4 <sockaddrToPrefix6+344> 0x0042a5b0 <+276>: mov r2, #165 ; 0xa5 0x0042a5b4 <+280>: add r3, pc, r3 0x0042a5b8 <+284>: add r0, pc, r0 0x0042a5bc <+288>: bl 0x403e1c <Assert_failure> 0x0042a5c0 <+292>: ldr r2, [pc, #48] ; 0x42a5f8 <sockaddrToPrefix6+348> 0x0042a5c4 <+296>: mov r1, #159 ; 0x9f 0x0042a5c8 <+300>: ldr r0, [pc, #44] ; 0x42a5fc <sockaddrToPrefix6+352> 0x0042a5cc <+304>: add r2, pc, r2 0x0042a5d0 <+308>: add r0, pc, r0 0x0042a5d4 <+312>: bl 0x403e1c <Assert_failure> 0x0042a5d8 <+316>: andeq r7, r9, r12, lsr #16 0x0042a5dc <+320>: ldrdeq r0, [r0], -r12 0x0042a5e0 <+324>: andeq r10, r7, r0, lsr #30 0x0042a5e4 <+328>: andeq r10, r7, r4, lsr #29 0x0042a5e8 <+332>: andeq r10, r7, r8, lsl #29 0x0042a5ec <+336>: andeq r7, r7, r4, ror #4 0x0042a5f0 <+340>: andeq r10, r7, r4, ror lr 0x0042a5f4 <+344>: andeq r7, r7, r4, asr #4 0x0042a5f8 <+348>: muleq r7, r12, r8 0x0042a5fc <+352>: andeq r10, r7, r12, asr #28 End of assembler dump. (gdb) print/x $r4 $1 = 0x4cbc34 (gdb)