https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64681
Bug ID: 64681 Summary: gcc assign wrong register for arm inline assembly Product: gcc Version: 4.9.0 Status: UNCONFIRMED Severity: blocker Priority: P3 Component: inline-asm Assignee: unassigned at gcc dot gnu.org Reporter: zhongwei.yao at arm dot com Created attachment 34492 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=34492&action=edit all related file. I compile following code on linux by arm-linux-androideabi-gcc (gcc version 4.9 20140827) with following command: arm-linux-androideabi-g++ -mfloat-abi=softfp -mfpu=neon -mthumb -Wall -Wextra -march=armv7-a --sysroot=$ndk/platforms/android-21/arch-arm -O2 -Wall main.cpp ==c code start== #include <arm_neon.h> int main(void) { return 0; } char buffer[32]; void bar(int n) { int j = 0; int64x1_t s = vdup_n_s64(0); int64x1_t onev = vdup_n_s64(1); int64_t sum = 0; for (j = 0; j <= n; j++) { asm ("vsub.s64 %0, %0, d1" : "+w" (s): : "memory"); } sum = (int64_t)j; sum >>= vget_lane_s64(s, 0); if(sum >0) s = vsub_s64(s, onev); vst1_s64((int64_t*)buffer, s); } ==code end== It returns: /tmp/ccE0j9sL.s: Assembler messages: /tmp/ccE0j9sL.s:55: Error: invalid instruction shape -- `vsub.s64 s14,s14,d1' I think it is the bug in gcc that assign wrong register for variable s. It should be d14 here, while it assignes s14. The generated assembly file is: ==asm code start== .syntax unified .arch armv7-a .eabi_attribute 27, 3 .fpu neon .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 2 .eabi_attribute 30, 2 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .thumb .file "main.cpp" .section .text.startup,"ax",%progbits .align 2 .global main .thumb .thumb_func .type main, %function main: .fnstart .LFB1870: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. movs r0, #0 bx lr .cantunwind .fnend .size main, .-main .text .align 2 .global _Z3bari .thumb .thumb_func .type _Z3bari, %function _Z3bari: .fnstart .LFB1871: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 cmp r0, #0 vmov.i32 d7, #0 @ di push {lr} blt .L5 adds r0, r0, #1 movs r2, #0 .L4: adds r2, r2, #1 cmp r2, r0 #APP @ 17 "src/main.cpp" 1 vsub.s64 s14, s14, d1 @ 0 "" 2 .thumb bne .L4 fmrs r1, s14 @ int asrs r3, r2, #31 rsb r0, r1, #32 subs lr, r1, #32 lsl r0, r3, r0 lsr r2, r2, r1 it pl asrpl lr, r3, lr orr r2, r2, r0 it pl orrpl r2, r2, lr asrs r3, r3, r1 cmp r2, #1 sbcs r3, r3, #0 blt .L5 vmov.i32 d16, #0xffffffff @ di vadd.i64 d7, d7, d16 .L5: ldr r3, .L10 .LPIC1: add r3, pc ldr r3, [r3] vst1.64 {d7}, [r3:64] ldr pc, [sp], #4 .L11: .align 2 .L10: .word buffer(GOT_PREL)+(.-(.LPIC1+4)) .cantunwind .fnend .size _Z3bari, .-_Z3bari .global buffer .bss .align 3 .type buffer, %object .size buffer, 32 buffer: .space 32 .ident "GCC: (GNU) 4.9 20140827 (prerelease)" .section .note.GNU-stack,"",%progbits ==asm code end==