Re: [PATCH 15/20] aarch64: Use RTL builtins for vcvtx intrinsics

Richard Sandiford via Gcc-patches Wed, 28 Apr 2021 09:08:32 -0700

Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Hi,
>
> As subject, this patch rewrites the vcvtx Neon intrinsics to use RTL builtins
> rather than inline assembly code, allowing for better scheduling and
> optimization.
>
> Regression tested and bootstrapped on aarch64-none-linux-gnu and
> aarch64_be-none-elf - no issues.
>
> Ok for master?


OK, thanks.

Richard

> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-02-18  Jonathan Wright  <jonathan.wri...@arm.com>
>
>       * config/aarch64/aarch64-simd-builtins.def: Add
>       float_trunc_rodd builtin generator macros.
>       * config/aarch64/aarch64-simd.md (aarch64_float_trunc_rodd_df):
>       Define.
>       (aarch64_float_trunc_rodd_lo_v2sf): Define.
>       (aarch64_float_trunc_rodd_hi_v4sf_le): Define.
>       (aarch64_float_trunc_rodd_hi_v4sf_be): Define.
>       (aarch64_float_trunc_rodd_hi_v4sf): Define.
>       * config/aarch64/arm_neon.h (vcvtx_f32_f64): Use RTL builtin
>       instead of inline asm.
>       (vcvtx_high_f32_f64): Likewise.
>       (vcvtxd_f32_f64): Likewise.
>       * config/aarch64/iterators.md: Add FCVTXN unspec.
>
> diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
> b/gcc/config/aarch64/aarch64-simd-builtins.def
> index 
> 52ae398858db1ec506a97376e7ccc1153aa210c5..e4a9e8740c5f87f1d7feb8f6a9725d7def1a0323
>  100644
> --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> @@ -634,6 +634,10 @@
>    VAR1 (UNOP, float_extend_lo_,  0, FP, v4sf)
>    BUILTIN_VDF (UNOP, float_truncate_lo_, 0, FP)
>  
> +  VAR1 (UNOP, float_trunc_rodd_, 0, FP, df)
> +  VAR1 (UNOP, float_trunc_rodd_lo_, 0, FP, v2sf)
> +  VAR1 (BINOP, float_trunc_rodd_hi_, 0, FP, v4sf)
> +
>    /* Implemented by aarch64_ld1<VALL_F16:mode>.  */
>    BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
>    VAR1(STORE1P, ld1, 0, ALL, v2di)
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 207d644487e77cd66d933dc7860a59e57fee523d..fa4972e769ee79dfd369223e867ed18beb6dfb2c
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -3185,6 +3185,60 @@
>  
>  ;; Float narrowing operations.
>  
> +(define_insn "aarch64_float_trunc_rodd_df"
> +  [(set (match_operand:SF 0 "register_operand" "=w")
> +     (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
> +             UNSPEC_FCVTXN))]
> +  "TARGET_SIMD"
> +  "fcvtxn\\t%s0, %d1"
> +  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
> +)
> +
> +(define_insn "aarch64_float_trunc_rodd_lo_v2sf"
> +  [(set (match_operand:V2SF 0 "register_operand" "=w")
> +     (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
> +             UNSPEC_FCVTXN))]
> +  "TARGET_SIMD"
> +  "fcvtxn\\t%0.2s, %1.2d"
> +  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
> +)
> +
> +(define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
> +  [(set (match_operand:V4SF 0 "register_operand" "=w")
> +     (vec_concat:V4SF
> +       (match_operand:V2SF 1 "register_operand" "0")
> +       (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
> +             UNSPEC_FCVTXN)))]
> +  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
> +  "fcvtxn2\\t%0.4s, %2.2d"
> +  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
> +)
> +
> +(define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
> +  [(set (match_operand:V4SF 0 "register_operand" "=w")
> +     (vec_concat:V4SF
> +       (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
> +             UNSPEC_FCVTXN)
> +       (match_operand:V2SF 1 "register_operand" "0")))]
> +  "TARGET_SIMD && BYTES_BIG_ENDIAN"
> +  "fcvtxn2\\t%0.4s, %2.2d"
> +  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
> +)
> +
> +(define_expand "aarch64_float_trunc_rodd_hi_v4sf"
> +  [(match_operand:V4SF 0 "register_operand")
> +   (match_operand:V2SF 1 "register_operand")
> +   (match_operand:V2DF 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
> +                          ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
> +                          : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
> +  emit_insn (gen (operands[0], operands[1], operands[2]));
> +  DONE;
> +}
> +)
> +
>  (define_insn "aarch64_float_truncate_lo_<mode>"
>    [(set (match_operand:VDF 0 "register_operand" "=w")
>        (float_truncate:VDF
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 
> 5ff11e7ea4a9a722c66a37ee65319125313df436..17e059efb80fa86a8a32127ace4fc7f43e2040a8
>  100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -7014,36 +7014,21 @@ __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vcvtx_f32_f64 (float64x2_t __a)
>  {
> -  float32x2_t __result;
> -  __asm__ ("fcvtxn %0.2s,%1.2d"
> -           : "=w"(__result)
> -           : "w"(__a)
> -           : /* No clobbers */);
> -  return __result;
> +  return __builtin_aarch64_float_trunc_rodd_lo_v2sf (__a);
>  }
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vcvtx_high_f32_f64 (float32x2_t __a, float64x2_t __b)
>  {
> -  float32x4_t __result;
> -  __asm__ ("fcvtxn2 %0.4s,%1.2d"
> -           : "=w"(__result)
> -           : "w" (__b), "0"(__a)
> -           : /* No clobbers */);
> -  return __result;
> +  return __builtin_aarch64_float_trunc_rodd_hi_v4sf (__a, __b);
>  }
>  
>  __extension__ extern __inline float32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vcvtxd_f32_f64 (float64_t __a)
>  {
> -  float32_t __result;
> -  __asm__ ("fcvtxn %s0,%d1"
> -           : "=w"(__result)
> -           : "w"(__a)
> -           : /* No clobbers */);
> -  return __result;
> +  return __builtin_aarch64_float_trunc_rodd_df (__a);
>  }
>  
>  __extension__ extern __inline float32x2_t
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> 65e728acb3bc0cbcc8be29d330bc1ee66ef9a504..8b4c1bd1f72e43205ea33ec094b029026505b270
>  100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -864,6 +864,7 @@
>      UNSPEC_BFCVTN      ; Used in aarch64-simd.md.
>      UNSPEC_BFCVTN2     ; Used in aarch64-simd.md.
>      UNSPEC_BFCVT       ; Used in aarch64-simd.md.
> +    UNSPEC_FCVTXN    ; Used in aarch64-simd.md.
>  ])
>  
>  ;; ------------------------------------------------------------------

Re: [PATCH 15/20] aarch64: Use RTL builtins for vcvtx intrinsics

Reply via email to