https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115451

            Bug ID: 115451
           Summary: ARM neon: float32 comparison intrinsics get scalar
                    implementation since GCC 11
           Product: gcc
           Version: 11.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: povilas at radix dot lt
  Target Milestone: ---

The following code:

#include <arm_neon.h>

uint32x4_t test(float32x4_t a, float32x4_t b)
{
    return vcgtq_f32(a, b);
}

Results in the following assembly when compiled with GCC 11 to GCC 14.1 (g++
-O3 -mfpu=neon test.cc). The problem does not happen on ARM64, nor if
-funsafe-math-optimizations is enabled.

_Z4test19__simd128_float32_tS_:
        .fnstart
.LFB10719:
        vmov.32 r3, d0[1]
        vcmpe.f32       s0, s4
        sub     sp, sp, #16
        vmrs    APSR_nzcv, FPSCR
        vmov    s10, r3
        vmov.32 r3, d2[1]
        vmov    s11, r3
        vmov.32 r3, d1[0]
        vcmpe.f32       s10, s11
        vmov    s12, r3
        vmov.32 r3, d3[0]
        vmov    s13, r3
        vmov.32 r3, d1[1]
        vmov    s14, r3
        vmov.32 r3, d3[1]
        vmov    s15, r3
        ite     gt
        movgt   r3, #-1
        movle   r3, #0
        vmrs    APSR_nzcv, FPSCR
        vcmpe.f32       s12, s13
        str     r3, [sp]
        ite     gt
        movgt   r3, #-1
        movle   r3, #0
        vmrs    APSR_nzcv, FPSCR
        vcmpe.f32       s14, s15
        str     r3, [sp, #4]
        ite     gt
        movgt   r3, #-1
        movle   r3, #0
        vmrs    APSR_nzcv, FPSCR
        str     r3, [sp, #8]
        ite     gt
        movgt   r3, #-1
        movle   r3, #0
        str     r3, [sp, #12]
        vld1.64 {d0-d1}, [sp:64]
        add     sp, sp, #16
        @ sp needed
        bx      lr

Reply via email to