Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Hi,
>
> As subject, this patch rewrites the vpadal_[su]32 Neon intrinsics to use
> RTL builtins rather than inline assembly code, allowing for better
> scheduling and optimization.
>
> Regression tested and bootstrapped on aarch64-none-linux-gnu - no
> issues.
>
> Ok for master?

OK, thanks.  Seems odd that we were in a half-half state for this one.

Richard

> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-02-09  Jonathan Wright  <jonathan.wri...@arm.com>
>
>       * config/aarch64/aarch64-simd-builtins.def: Use VDQV_L
>       iterator to generate [su]adalp RTL builtins.
>       * config/aarch64/aarch64-simd.md: Use VDQV_L iterator in
>       [su]adalp RTL pattern.
>       * config/aarch64/arm_neon.h (vpadal_s32): Use RTL builtin
>       instead of inline asm.
>       (vpadal_u32): Likewise.
>
> diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
> b/gcc/config/aarch64/aarch64-simd-builtins.def
> index 
> 3430f627d09a99470dd9480c517be8a41c96ddf1..874fd89db5dbe6e0bac287bedaccc04b254e9883
>  100644
> --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> @@ -172,8 +172,8 @@
>    BUILTIN_VDQ_BHSI (TERNOP, saba, 0, NONE)
>    BUILTIN_VDQ_BHSI (TERNOPU, uaba, 0, NONE)
>  
> -  BUILTIN_VDQV_S (BINOP, sadalp, 0, NONE)
> -  BUILTIN_VDQV_S (BINOPU, uadalp, 0, NONE)
> +  BUILTIN_VDQV_L (BINOP, sadalp, 0, NONE)
> +  BUILTIN_VDQV_L (BINOPU, uadalp, 0, NONE)
>  
>    /* Implemented by aarch64_<sur>abal<mode>.  */
>    BUILTIN_VD_BHSI (TERNOP, sabal, 0, NONE)
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> dd97253f4f393ca37e608f391949a568729d452c..939c281d533261bb84dc451586da707953018fb8
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -893,8 +893,8 @@
>  
>  (define_insn "aarch64_<sur>adalp<mode>"
>    [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
> -     (unspec:<VDBLW> [(match_operand:VDQV_S 2 "register_operand" "w")
> -                       (match_operand:<VDBLW> 1 "register_operand" "0")]
> +     (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
> +                      (match_operand:<VDBLW> 1 "register_operand" "0")]
>       ADALP))]
>    "TARGET_SIMD"
>    "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 
> 703070cd257e46bd041a64d49f1d64da321285ff..f137b213699b8701383d044165e56b6ae717371a
>  100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -8441,12 +8441,7 @@ __extension__ extern __inline int64x1_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vpadal_s32 (int64x1_t __a, int32x2_t __b)
>  {
> -  int64x1_t __result;
> -  __asm__ ("sadalp %0.1d,%2.2s"
> -           : "=w"(__result)
> -           : "0"(__a), "w"(__b)
> -           : /* No clobbers */);
> -  return __result;
> +  return (int64x1_t) __builtin_aarch64_sadalpv2si (__a[0], __b);
>  }
>  
>  __extension__ extern __inline uint16x4_t
> @@ -8467,12 +8462,7 @@ __extension__ extern __inline uint64x1_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vpadal_u32 (uint64x1_t __a, uint32x2_t __b)
>  {
> -  uint64x1_t __result;
> -  __asm__ ("uadalp %0.1d,%2.2s"
> -           : "=w"(__result)
> -           : "0"(__a), "w"(__b)
> -           : /* No clobbers */);
> -  return __result;
> +  return (uint64x1_t) __builtin_aarch64_uadalpv2si_uuu (__a[0], __b);
>  }
>  
>  __extension__ extern __inline int16x8_t

Reply via email to