https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69248

            Bug ID: 69248
           Summary: VFP register constraint 'w' is ignored when
                    optimizations are enabled
           Product: gcc
           Version: 5.3.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: inline-asm
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vorfeed.canal at gmail dot com
  Target Milestone: ---

Looks like optimizations confuse something in GCC and cause it to ignore proper
'w' constraint. Unoptimized version produces "vcvt.f64.s32 d16, s15" command
which is accepted by assembler, optimized version produces "vcvt.f64.s32 s16,
s15" which assembler does not like.

$ cat test2.cc 

struct FILE;
int fprintf(FILE *stream, const char *format, ...);
extern FILE *gcc_tests_file;

int foo() {
  do {
    double result;
    __asm__ __volatile__( "vcvt.f64.s32" " %[result], %[arg1]\n" :
[result]"=w"(result) : [arg1]"t"(0x01020304) );
    fprintf(gcc_tests_file, "vcvt.f64.s32: ");
    fprintf(gcc_tests_file, "%.8g", result);
  } while (false);
}
$ arm-linux-gnueabihf-g++-5 -march=armv7-a -mfpu=vfpv3-d16 -mfloat-abi=softfp
-mfpu=neon -mtls-dialect=gnu -O0 test.cc -o-
        .arch armv7-a
        .fpu neon
        .eabi_attribute 20, 1
        .eabi_attribute 21, 1
        .eabi_attribute 23, 3
        .eabi_attribute 24, 1
        .eabi_attribute 25, 1
        .eabi_attribute 26, 2
        .eabi_attribute 30, 6
        .eabi_attribute 34, 1
        .eabi_attribute 18, 4
        .file   "test2.cc"
        .section        .rodata
        .align  2
.LC0:
        .ascii  "vcvt.f64.s32\000"
        .align  2
.LC1:
        .ascii  "%.8g\000"
        .text
        .align  2
        .global _Z3foov
        .syntax unified
        .arm
        .type   _Z3foov, %function
_Z3foov:
        .fnstart
.LFB0:
        @ args = 0, pretend = 0, frame = 8
        @ frame_needed = 1, uses_anonymous_args = 0
        push    {fp, lr}
        .save {fp, lr}
        .setfp fp, sp, #4
        add     fp, sp, #4
        .pad #8
        sub     sp, sp, #8
        mov     r3, #772
        movt    r3, 258
        vmov    s15, r3 @ int
        .syntax divided
@ 8 "test2.cc" 1
        vcvt.f64.s32 d16, s15

@ 0 "" 2
        .syntax unified
        vstr.64 d16, [fp, #-12]
        movw    r3, #:lower16:gcc_tests_file
        movt    r3, #:upper16:gcc_tests_file
        ldr     r3, [r3]
        movw    r1, #:lower16:.LC0
        movt    r1, #:upper16:.LC0
        mov     r0, r3
        bl      _Z7fprintfP4FILEPKcz
        movw    r3, #:lower16:gcc_tests_file
        movt    r3, #:upper16:gcc_tests_file
        ldr     r0, [r3]
        ldrd    r2, [fp, #-12]
        movw    r1, #:lower16:.LC1
        movt    r1, #:upper16:.LC1
        bl      _Z7fprintfP4FILEPKcz
        nop
        mov     r0, r3
        sub     sp, fp, #4
        @ sp needed
        pop     {fp, pc}
        .fnend
        .size   _Z3foov, .-_Z3foov
        .ident  "GCC: (Ubuntu/Linaro 5.3.1-5ubuntu1) 5.3.1 20160101"
        .section        .note.GNU-stack,"",%progbits
$ arm-linux-gnueabihf-g++-5 -march=armv7-a -mfpu=vfpv3-d16 -mfloat-abi=softfp
-mfpu=neon -mtls-dialect=gnu -O3 test2.cc -o-
        .arch armv7-a
        .fpu neon
        .eabi_attribute 20, 1
        .eabi_attribute 21, 1
        .eabi_attribute 23, 3
        .eabi_attribute 24, 1
        .eabi_attribute 25, 1
        .eabi_attribute 26, 2
        .eabi_attribute 30, 2
        .eabi_attribute 34, 1
        .eabi_attribute 18, 4
        .file   "test2.cc"
        .text
        .align  2
        .global _Z3foov
        .syntax unified
        .arm
        .type   _Z3foov, %function
_Z3foov:
        .fnstart
.LFB0:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        mov     r3, #772
        movt    r3, 258
        push    {r4, lr}
        .save {r4, lr}
        vmov    s15, r3 @ int
        vpush.64        {d8}
        .vsave {d8}
        .syntax divided
@ 8 "test2.cc" 1
        vcvt.f64.s32 s16, s15

@ 0 "" 2
        .syntax unified
        movw    r4, #:lower16:gcc_tests_file
        movt    r4, #:upper16:gcc_tests_file
        movw    r1, #:lower16:.LC0
        movt    r1, #:upper16:.LC0
        ldr     r0, [r4]
        bl      _Z7fprintfP4FILEPKcz
        vmov    r2, r3, d8
        vldm    sp!, {d8}
        movw    r1, #:lower16:.LC1
        ldr     r0, [r4]
        movt    r1, #:upper16:.LC1
        pop     {r4, lr}
        b       _Z7fprintfP4FILEPKcz
        .fnend
        .size   _Z3foov, .-_Z3foov
        .section        .rodata.str1.4,"aMS",%progbits,1
        .align  2
.LC0:
        .ascii  "vcvt.f64.s32\000"
        .space  3
.LC1:
        .ascii  "%.8g\000"
        .ident  "GCC: (Ubuntu/Linaro 5.3.1-5ubuntu1) 5.3.1 20160101"
        .section        .note.GNU-stack,"",%progbits

Reply via email to