https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120027

            Bug ID: 120027
           Summary: Missing simplifications of SVE uxtb intrinsics
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Keywords: aarch64-sve, missed-optimization
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ktkachov at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64

Some cases:
#include <arm_sve.h>

#define UXT(SZ, TY) \
  svuint##SZ##_t uxt##TY##_z_##SZ(svuint##SZ##_t x) { return
svext##TY##_z(svptrue_b##SZ(), x); } \
  svuint##SZ##_t uxt##TY##_m_##SZ(svuint##SZ##_t x, svuint##SZ##_t y) { return
svext##TY##_m(y, svptrue_b##SZ(), x); } \
  svuint##SZ##_t uxt##TY##_x_##SZ(svbool_t p, svuint##SZ##_t x) { return
svext##TY##_x(p, x); } \
  svuint##SZ##_t uxt##TY##_z_##SZ##_no_ptrue(svbool_t p, svuint##SZ##_t x) {
return svext##TY##_z(p, x); } \
  svuint##SZ##_t uxt##TY##_m_##SZ##_no_ptrue(svbool_t p, svuint##SZ##_t x,
svuint##SZ##_t y) { return svext##TY##_m(y, p, x); }

UXT(64, b)
UXT(32, b)
UXT(16, b)

UXT(64, h)
UXT(32, h)

UXT(64, w)

on AArch64 SVE generate more code than Clang.
GCC:
uxtb_z_64:
        and     z0.d, z0.d, #0xff
        ret
uxtb_m_64:
        ptrue   p3.b, all
        mov     z31.d, z0.d
        movprfx z0, z1
        uxtb    z0.d, p3/m, z31.d
        ret
uxtb_x_64:
        and     z0.d, z0.d, #0xff
        ret
uxtb_z_64_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0.d, p0/z, z31.d
        uxtb    z0.d, p0/m, z31.d
        ret
uxtb_m_64_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0, z1
        uxtb    z0.d, p0/m, z31.d
        ret
uxtb_z_32:
        and     z0.s, z0.s, #0xff
        ret
uxtb_m_32:
        ptrue   p3.b, all
        mov     z31.d, z0.d
        movprfx z0, z1
        uxtb    z0.s, p3/m, z31.s
        ret
uxtb_x_32:
        and     z0.s, z0.s, #0xff
        ret
uxtb_z_32_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0.s, p0/z, z31.s
        uxtb    z0.s, p0/m, z31.s
        ret
uxtb_m_32_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0, z1
        uxtb    z0.s, p0/m, z31.s
        ret
uxtb_z_16:
        and     z0.h, z0.h, #0xff
        ret
uxtb_m_16:
        ptrue   p3.b, all
        mov     z31.d, z0.d
        movprfx z0, z1
        uxtb    z0.h, p3/m, z31.h
        ret
uxtb_x_16:
        and     z0.h, z0.h, #0xff
        ret
uxtb_z_16_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0.h, p0/z, z31.h
        uxtb    z0.h, p0/m, z31.h
        ret
uxtb_m_16_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0, z1
        uxtb    z0.h, p0/m, z31.h
        ret
uxth_z_64:
        and     z0.d, z0.d, #0xffff
        ret
uxth_m_64:
        ptrue   p3.b, all
        mov     z31.d, z0.d
        movprfx z0, z1
        uxth    z0.d, p3/m, z31.d
        ret
uxth_x_64:
        and     z0.d, z0.d, #0xffff
        ret
uxth_z_64_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0.d, p0/z, z31.d
        uxth    z0.d, p0/m, z31.d
        ret
uxth_m_64_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0, z1
        uxth    z0.d, p0/m, z31.d
        ret
uxth_z_32:
        and     z0.s, z0.s, #0xffff
        ret
uxth_m_32:
        ptrue   p3.b, all
        mov     z31.d, z0.d
        movprfx z0, z1
        uxth    z0.s, p3/m, z31.s
        ret
uxth_x_32:
        and     z0.s, z0.s, #0xffff
        ret
uxth_z_32_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0.s, p0/z, z31.s
        uxth    z0.s, p0/m, z31.s
        ret
uxth_m_32_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0, z1
        uxth    z0.s, p0/m, z31.s
        ret
uxtw_z_64:
        and     z0.d, z0.d, #0xffffffff
        ret
uxtw_m_64:
        ptrue   p3.b, all
        mov     z31.d, z0.d
        movprfx z0, z1
        uxtw    z0.d, p3/m, z31.d
        ret
uxtw_x_64:
        and     z0.d, z0.d, #0xffffffff
        ret
uxtw_z_64_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0.d, p0/z, z31.d
        uxtw    z0.d, p0/m, z31.d
        ret
uxtw_m_64_no_ptrue:
        mov     z31.d, z0.d
        movprfx z0, z1
        uxtw    z0.d, p0/m, z31.d
        ret

vs Clang:
uxtb_z_64:
        ptrue   p0.d
        movi    v1.2d, #0000000000000000
        uxtb    z0.d, p0/m, z0.d
        ret

uxtb_m_64:
        ptrue   p0.d
        uxtb    z0.d, p0/m, z0.d
        ret

uxtb_x_64:
        uxtb    z0.d, p0/m, z0.d
        ret

uxtb_z_64_no_ptrue:
        movi    v1.2d, #0000000000000000
        uxtb    z1.d, p0/m, z0.d
        mov     z0.d, z1.d
        ret

uxtb_m_64_no_ptrue:
        uxtb    z1.d, p0/m, z0.d
        mov     z0.d, z1.d
        ret

uxtb_z_32:
        ptrue   p0.s
        movi    v1.2d, #0000000000000000
        uxtb    z0.s, p0/m, z0.s
        ret

uxtb_m_32:
        ptrue   p0.s
        uxtb    z0.s, p0/m, z0.s
        ret

uxtb_x_32:
        uxtb    z0.s, p0/m, z0.s
        ret

uxtb_z_32_no_ptrue:
        movi    v1.2d, #0000000000000000
        uxtb    z1.s, p0/m, z0.s
        mov     z0.d, z1.d
        ret

uxtb_m_32_no_ptrue:
        uxtb    z1.s, p0/m, z0.s
        mov     z0.d, z1.d
        ret

uxtb_z_16:
        ptrue   p0.h
        movi    v1.2d, #0000000000000000
        uxtb    z0.h, p0/m, z0.h
        ret

uxtb_m_16:
        ptrue   p0.h
        uxtb    z0.h, p0/m, z0.h
        ret

uxtb_x_16:
        uxtb    z0.h, p0/m, z0.h
        ret

uxtb_z_16_no_ptrue:
        movi    v1.2d, #0000000000000000
        uxtb    z1.h, p0/m, z0.h
        mov     z0.d, z1.d
        ret

uxtb_m_16_no_ptrue:
        uxtb    z1.h, p0/m, z0.h
        mov     z0.d, z1.d
        ret

uxth_z_64:
        ptrue   p0.d
        movi    v1.2d, #0000000000000000
        uxth    z0.d, p0/m, z0.d
        ret

uxth_m_64:
        ptrue   p0.d
        uxth    z0.d, p0/m, z0.d
        ret

uxth_x_64:
        uxth    z0.d, p0/m, z0.d
        ret

uxth_z_64_no_ptrue:
        movi    v1.2d, #0000000000000000
        uxth    z1.d, p0/m, z0.d
        mov     z0.d, z1.d
        ret

uxth_m_64_no_ptrue:
        uxth    z1.d, p0/m, z0.d
        mov     z0.d, z1.d
        ret

uxth_z_32:
        ptrue   p0.s
        movi    v1.2d, #0000000000000000
        uxth    z0.s, p0/m, z0.s
        ret

uxth_m_32:
        ptrue   p0.s
        uxth    z0.s, p0/m, z0.s
        ret

uxth_x_32:
        uxth    z0.s, p0/m, z0.s
        ret

uxth_z_32_no_ptrue:
        movi    v1.2d, #0000000000000000
        uxth    z1.s, p0/m, z0.s
        mov     z0.d, z1.d
        ret

uxth_m_32_no_ptrue:
        uxth    z1.s, p0/m, z0.s
        mov     z0.d, z1.d
        ret

uxtw_z_64:
        ptrue   p0.d
        movi    v1.2d, #0000000000000000
        uxtw    z0.d, p0/m, z0.d
        ret

uxtw_m_64:
        ptrue   p0.d
        uxtw    z0.d, p0/m, z0.d
        ret

uxtw_x_64:
        uxtw    z0.d, p0/m, z0.d
        ret

uxtw_z_64_no_ptrue:
        movi    v1.2d, #0000000000000000
        uxtw    z1.d, p0/m, z0.d
        mov     z0.d, z1.d
        ret

uxtw_m_64_no_ptrue:
        uxtw    z1.d, p0/m, z0.d
        mov     z0.d, z1.d
        ret

Reply via email to