https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69248
Bug ID: 69248 Summary: VFP register constraint 'w' is ignored when optimizations are enabled Product: gcc Version: 5.3.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: inline-asm Assignee: unassigned at gcc dot gnu.org Reporter: vorfeed.canal at gmail dot com Target Milestone: --- Looks like optimizations confuse something in GCC and cause it to ignore proper 'w' constraint. Unoptimized version produces "vcvt.f64.s32 d16, s15" command which is accepted by assembler, optimized version produces "vcvt.f64.s32 s16, s15" which assembler does not like. $ cat test2.cc struct FILE; int fprintf(FILE *stream, const char *format, ...); extern FILE *gcc_tests_file; int foo() { do { double result; __asm__ __volatile__( "vcvt.f64.s32" " %[result], %[arg1]\n" : [result]"=w"(result) : [arg1]"t"(0x01020304) ); fprintf(gcc_tests_file, "vcvt.f64.s32: "); fprintf(gcc_tests_file, "%.8g", result); } while (false); } $ arm-linux-gnueabihf-g++-5 -march=armv7-a -mfpu=vfpv3-d16 -mfloat-abi=softfp -mfpu=neon -mtls-dialect=gnu -O0 test.cc -o- .arch armv7-a .fpu neon .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 2 .eabi_attribute 30, 6 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .file "test2.cc" .section .rodata .align 2 .LC0: .ascii "vcvt.f64.s32\000" .align 2 .LC1: .ascii "%.8g\000" .text .align 2 .global _Z3foov .syntax unified .arm .type _Z3foov, %function _Z3foov: .fnstart .LFB0: @ args = 0, pretend = 0, frame = 8 @ frame_needed = 1, uses_anonymous_args = 0 push {fp, lr} .save {fp, lr} .setfp fp, sp, #4 add fp, sp, #4 .pad #8 sub sp, sp, #8 mov r3, #772 movt r3, 258 vmov s15, r3 @ int .syntax divided @ 8 "test2.cc" 1 vcvt.f64.s32 d16, s15 @ 0 "" 2 .syntax unified vstr.64 d16, [fp, #-12] movw r3, #:lower16:gcc_tests_file movt r3, #:upper16:gcc_tests_file ldr r3, [r3] movw r1, #:lower16:.LC0 movt r1, #:upper16:.LC0 mov r0, r3 bl _Z7fprintfP4FILEPKcz movw r3, #:lower16:gcc_tests_file movt r3, #:upper16:gcc_tests_file ldr r0, [r3] ldrd r2, [fp, #-12] movw r1, #:lower16:.LC1 movt r1, #:upper16:.LC1 bl _Z7fprintfP4FILEPKcz nop mov r0, r3 sub sp, fp, #4 @ sp needed pop {fp, pc} .fnend .size _Z3foov, .-_Z3foov .ident "GCC: (Ubuntu/Linaro 5.3.1-5ubuntu1) 5.3.1 20160101" .section .note.GNU-stack,"",%progbits $ arm-linux-gnueabihf-g++-5 -march=armv7-a -mfpu=vfpv3-d16 -mfloat-abi=softfp -mfpu=neon -mtls-dialect=gnu -O3 test2.cc -o- .arch armv7-a .fpu neon .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 2 .eabi_attribute 30, 2 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .file "test2.cc" .text .align 2 .global _Z3foov .syntax unified .arm .type _Z3foov, %function _Z3foov: .fnstart .LFB0: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 mov r3, #772 movt r3, 258 push {r4, lr} .save {r4, lr} vmov s15, r3 @ int vpush.64 {d8} .vsave {d8} .syntax divided @ 8 "test2.cc" 1 vcvt.f64.s32 s16, s15 @ 0 "" 2 .syntax unified movw r4, #:lower16:gcc_tests_file movt r4, #:upper16:gcc_tests_file movw r1, #:lower16:.LC0 movt r1, #:upper16:.LC0 ldr r0, [r4] bl _Z7fprintfP4FILEPKcz vmov r2, r3, d8 vldm sp!, {d8} movw r1, #:lower16:.LC1 ldr r0, [r4] movt r1, #:upper16:.LC1 pop {r4, lr} b _Z7fprintfP4FILEPKcz .fnend .size _Z3foov, .-_Z3foov .section .rodata.str1.4,"aMS",%progbits,1 .align 2 .LC0: .ascii "vcvt.f64.s32\000" .space 3 .LC1: .ascii "%.8g\000" .ident "GCC: (Ubuntu/Linaro 5.3.1-5ubuntu1) 5.3.1 20160101" .section .note.GNU-stack,"",%progbits