https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120027
Bug ID: 120027 Summary: Missing simplifications of SVE uxtb intrinsics Product: gcc Version: 15.0 Status: UNCONFIRMED Keywords: aarch64-sve, missed-optimization Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: ktkachov at gcc dot gnu.org Target Milestone: --- Target: aarch64 Some cases: #include <arm_sve.h> #define UXT(SZ, TY) \ svuint##SZ##_t uxt##TY##_z_##SZ(svuint##SZ##_t x) { return svext##TY##_z(svptrue_b##SZ(), x); } \ svuint##SZ##_t uxt##TY##_m_##SZ(svuint##SZ##_t x, svuint##SZ##_t y) { return svext##TY##_m(y, svptrue_b##SZ(), x); } \ svuint##SZ##_t uxt##TY##_x_##SZ(svbool_t p, svuint##SZ##_t x) { return svext##TY##_x(p, x); } \ svuint##SZ##_t uxt##TY##_z_##SZ##_no_ptrue(svbool_t p, svuint##SZ##_t x) { return svext##TY##_z(p, x); } \ svuint##SZ##_t uxt##TY##_m_##SZ##_no_ptrue(svbool_t p, svuint##SZ##_t x, svuint##SZ##_t y) { return svext##TY##_m(y, p, x); } UXT(64, b) UXT(32, b) UXT(16, b) UXT(64, h) UXT(32, h) UXT(64, w) on AArch64 SVE generate more code than Clang. GCC: uxtb_z_64: and z0.d, z0.d, #0xff ret uxtb_m_64: ptrue p3.b, all mov z31.d, z0.d movprfx z0, z1 uxtb z0.d, p3/m, z31.d ret uxtb_x_64: and z0.d, z0.d, #0xff ret uxtb_z_64_no_ptrue: mov z31.d, z0.d movprfx z0.d, p0/z, z31.d uxtb z0.d, p0/m, z31.d ret uxtb_m_64_no_ptrue: mov z31.d, z0.d movprfx z0, z1 uxtb z0.d, p0/m, z31.d ret uxtb_z_32: and z0.s, z0.s, #0xff ret uxtb_m_32: ptrue p3.b, all mov z31.d, z0.d movprfx z0, z1 uxtb z0.s, p3/m, z31.s ret uxtb_x_32: and z0.s, z0.s, #0xff ret uxtb_z_32_no_ptrue: mov z31.d, z0.d movprfx z0.s, p0/z, z31.s uxtb z0.s, p0/m, z31.s ret uxtb_m_32_no_ptrue: mov z31.d, z0.d movprfx z0, z1 uxtb z0.s, p0/m, z31.s ret uxtb_z_16: and z0.h, z0.h, #0xff ret uxtb_m_16: ptrue p3.b, all mov z31.d, z0.d movprfx z0, z1 uxtb z0.h, p3/m, z31.h ret uxtb_x_16: and z0.h, z0.h, #0xff ret uxtb_z_16_no_ptrue: mov z31.d, z0.d movprfx z0.h, p0/z, z31.h uxtb z0.h, p0/m, z31.h ret uxtb_m_16_no_ptrue: mov z31.d, z0.d movprfx z0, z1 uxtb z0.h, p0/m, z31.h ret uxth_z_64: and z0.d, z0.d, #0xffff ret uxth_m_64: ptrue p3.b, all mov z31.d, z0.d movprfx z0, z1 uxth z0.d, p3/m, z31.d ret uxth_x_64: and z0.d, z0.d, #0xffff ret uxth_z_64_no_ptrue: mov z31.d, z0.d movprfx z0.d, p0/z, z31.d uxth z0.d, p0/m, z31.d ret uxth_m_64_no_ptrue: mov z31.d, z0.d movprfx z0, z1 uxth z0.d, p0/m, z31.d ret uxth_z_32: and z0.s, z0.s, #0xffff ret uxth_m_32: ptrue p3.b, all mov z31.d, z0.d movprfx z0, z1 uxth z0.s, p3/m, z31.s ret uxth_x_32: and z0.s, z0.s, #0xffff ret uxth_z_32_no_ptrue: mov z31.d, z0.d movprfx z0.s, p0/z, z31.s uxth z0.s, p0/m, z31.s ret uxth_m_32_no_ptrue: mov z31.d, z0.d movprfx z0, z1 uxth z0.s, p0/m, z31.s ret uxtw_z_64: and z0.d, z0.d, #0xffffffff ret uxtw_m_64: ptrue p3.b, all mov z31.d, z0.d movprfx z0, z1 uxtw z0.d, p3/m, z31.d ret uxtw_x_64: and z0.d, z0.d, #0xffffffff ret uxtw_z_64_no_ptrue: mov z31.d, z0.d movprfx z0.d, p0/z, z31.d uxtw z0.d, p0/m, z31.d ret uxtw_m_64_no_ptrue: mov z31.d, z0.d movprfx z0, z1 uxtw z0.d, p0/m, z31.d ret vs Clang: uxtb_z_64: ptrue p0.d movi v1.2d, #0000000000000000 uxtb z0.d, p0/m, z0.d ret uxtb_m_64: ptrue p0.d uxtb z0.d, p0/m, z0.d ret uxtb_x_64: uxtb z0.d, p0/m, z0.d ret uxtb_z_64_no_ptrue: movi v1.2d, #0000000000000000 uxtb z1.d, p0/m, z0.d mov z0.d, z1.d ret uxtb_m_64_no_ptrue: uxtb z1.d, p0/m, z0.d mov z0.d, z1.d ret uxtb_z_32: ptrue p0.s movi v1.2d, #0000000000000000 uxtb z0.s, p0/m, z0.s ret uxtb_m_32: ptrue p0.s uxtb z0.s, p0/m, z0.s ret uxtb_x_32: uxtb z0.s, p0/m, z0.s ret uxtb_z_32_no_ptrue: movi v1.2d, #0000000000000000 uxtb z1.s, p0/m, z0.s mov z0.d, z1.d ret uxtb_m_32_no_ptrue: uxtb z1.s, p0/m, z0.s mov z0.d, z1.d ret uxtb_z_16: ptrue p0.h movi v1.2d, #0000000000000000 uxtb z0.h, p0/m, z0.h ret uxtb_m_16: ptrue p0.h uxtb z0.h, p0/m, z0.h ret uxtb_x_16: uxtb z0.h, p0/m, z0.h ret uxtb_z_16_no_ptrue: movi v1.2d, #0000000000000000 uxtb z1.h, p0/m, z0.h mov z0.d, z1.d ret uxtb_m_16_no_ptrue: uxtb z1.h, p0/m, z0.h mov z0.d, z1.d ret uxth_z_64: ptrue p0.d movi v1.2d, #0000000000000000 uxth z0.d, p0/m, z0.d ret uxth_m_64: ptrue p0.d uxth z0.d, p0/m, z0.d ret uxth_x_64: uxth z0.d, p0/m, z0.d ret uxth_z_64_no_ptrue: movi v1.2d, #0000000000000000 uxth z1.d, p0/m, z0.d mov z0.d, z1.d ret uxth_m_64_no_ptrue: uxth z1.d, p0/m, z0.d mov z0.d, z1.d ret uxth_z_32: ptrue p0.s movi v1.2d, #0000000000000000 uxth z0.s, p0/m, z0.s ret uxth_m_32: ptrue p0.s uxth z0.s, p0/m, z0.s ret uxth_x_32: uxth z0.s, p0/m, z0.s ret uxth_z_32_no_ptrue: movi v1.2d, #0000000000000000 uxth z1.s, p0/m, z0.s mov z0.d, z1.d ret uxth_m_32_no_ptrue: uxth z1.s, p0/m, z0.s mov z0.d, z1.d ret uxtw_z_64: ptrue p0.d movi v1.2d, #0000000000000000 uxtw z0.d, p0/m, z0.d ret uxtw_m_64: ptrue p0.d uxtw z0.d, p0/m, z0.d ret uxtw_x_64: uxtw z0.d, p0/m, z0.d ret uxtw_z_64_no_ptrue: movi v1.2d, #0000000000000000 uxtw z1.d, p0/m, z0.d mov z0.d, z1.d ret uxtw_m_64_no_ptrue: uxtw z1.d, p0/m, z0.d mov z0.d, z1.d ret