Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi, > > As subject, this patch rewrites the vpadal_[su]32 Neon intrinsics to use > RTL builtins rather than inline assembly code, allowing for better > scheduling and optimization. > > Regression tested and bootstrapped on aarch64-none-linux-gnu - no > issues. > > Ok for master?
OK, thanks. Seems odd that we were in a half-half state for this one. Richard > Thanks, > Jonathan > > --- > > gcc/ChangeLog: > > 2021-02-09 Jonathan Wright <jonathan.wri...@arm.com> > > * config/aarch64/aarch64-simd-builtins.def: Use VDQV_L > iterator to generate [su]adalp RTL builtins. > * config/aarch64/aarch64-simd.md: Use VDQV_L iterator in > [su]adalp RTL pattern. > * config/aarch64/arm_neon.h (vpadal_s32): Use RTL builtin > instead of inline asm. > (vpadal_u32): Likewise. > > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def > b/gcc/config/aarch64/aarch64-simd-builtins.def > index > 3430f627d09a99470dd9480c517be8a41c96ddf1..874fd89db5dbe6e0bac287bedaccc04b254e9883 > 100644 > --- a/gcc/config/aarch64/aarch64-simd-builtins.def > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def > @@ -172,8 +172,8 @@ > BUILTIN_VDQ_BHSI (TERNOP, saba, 0, NONE) > BUILTIN_VDQ_BHSI (TERNOPU, uaba, 0, NONE) > > - BUILTIN_VDQV_S (BINOP, sadalp, 0, NONE) > - BUILTIN_VDQV_S (BINOPU, uadalp, 0, NONE) > + BUILTIN_VDQV_L (BINOP, sadalp, 0, NONE) > + BUILTIN_VDQV_L (BINOPU, uadalp, 0, NONE) > > /* Implemented by aarch64_<sur>abal<mode>. */ > BUILTIN_VD_BHSI (TERNOP, sabal, 0, NONE) > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > dd97253f4f393ca37e608f391949a568729d452c..939c281d533261bb84dc451586da707953018fb8 > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -893,8 +893,8 @@ > > (define_insn "aarch64_<sur>adalp<mode>" > [(set (match_operand:<VDBLW> 0 "register_operand" "=w") > - (unspec:<VDBLW> [(match_operand:VDQV_S 2 "register_operand" "w") > - (match_operand:<VDBLW> 1 "register_operand" "0")] > + (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w") > + (match_operand:<VDBLW> 1 "register_operand" "0")] > ADALP))] > "TARGET_SIMD" > "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>" > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index > 703070cd257e46bd041a64d49f1d64da321285ff..f137b213699b8701383d044165e56b6ae717371a > 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -8441,12 +8441,7 @@ __extension__ extern __inline int64x1_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vpadal_s32 (int64x1_t __a, int32x2_t __b) > { > - int64x1_t __result; > - __asm__ ("sadalp %0.1d,%2.2s" > - : "=w"(__result) > - : "0"(__a), "w"(__b) > - : /* No clobbers */); > - return __result; > + return (int64x1_t) __builtin_aarch64_sadalpv2si (__a[0], __b); > } > > __extension__ extern __inline uint16x4_t > @@ -8467,12 +8462,7 @@ __extension__ extern __inline uint64x1_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vpadal_u32 (uint64x1_t __a, uint32x2_t __b) > { > - uint64x1_t __result; > - __asm__ ("uadalp %0.1d,%2.2s" > - : "=w"(__result) > - : "0"(__a), "w"(__b) > - : /* No clobbers */); > - return __result; > + return (uint64x1_t) __builtin_aarch64_uadalpv2si_uuu (__a[0], __b); > } > > __extension__ extern __inline int16x8_t