[AArch64, 2/6] Reimplement vector fixed-point intrinsics

Jiong Wang Tue, 24 May 2016 01:26:28 -0700

Based on top of [1/6], this patch reimplement vector intrinsics for
conversion between floating-point and fixed-point.


gcc/
2016-05-23  Jiong Wang <jiong.w...@arm.com>

* config/aarch64/aarch64-builtins.def (scvtf): New builtins forvector types.

        (ucvtf): Likewise.
        (fcvtzs): Likewise.
        (fcvtzu): Likewise.
        * config/aarch64/aarch64-simd.md
        (<FCVT_F2FIXED:fcvt_fixed_insn><GPF:mode>3): Extend to more modes.
        Rename to <FCVT_F2FIXED:fcvt_fixed_insn><VALLF:mode>3.

(<FCVT_FIXED2F:fcvt_fixed_insn><GPI:mode>3): Likewise andrename to

<FCVT_FIXED2F:fcvt_fixed_insn><VALLI:mode>3.

* config/aarch64/arm_neon.h (vcvt_n_f32_s32): Remove inlineassembly.

        Use builtin.
        (vcvt_n_f32_u32): Likewise.
        (vcvt_n_s32_f32): Likewise.
        (vcvt_n_u32_f32): Likewise.
        (vcvtq_n_f32_s32): Likewise.
        (vcvtq_n_f32_u32): Likewise.
        (vcvtq_n_f64_s64): Likewise.
        (vcvtq_n_f64_u64): Likewise.
        (vcvtq_n_s32_f32): Likewise.
        (vcvtq_n_s64_f64): Likewise.
        (vcvtq_n_u32_f32): Likewise.
        (vcvtq_n_u64_f64): Likewise.
        * config/aarch64/iterators.md (VALLI): New mode iterator.
        (fcvt_target): Support V4DI, V4SI and V2SI.
        (FCVT_TARGET): Likewise.

>From 63e8362e7d0afc2f4dd4288d38d3f64b62bfd657 Mon Sep 17 00:00:00 2001
From: "Jiong.Wang" <jiong.w...@arm.com>
Date: Mon, 23 May 2016 12:12:04 +0100
Subject: [PATCH 2/6] 2

---
 gcc/config/aarch64/aarch64-builtins.def |   4 +
 gcc/config/aarch64/aarch64-simd.md      |  22 ++--
 gcc/config/aarch64/arm_neon.h           | 216 +++++++++++---------------------
 gcc/config/aarch64/iterators.md         |   5 +
 4 files changed, 92 insertions(+), 155 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.def b/gcc/config/aarch64/aarch64-builtins.def
index 4528db3..5e6280c 100644
--- a/gcc/config/aarch64/aarch64-builtins.def
+++ b/gcc/config/aarch64/aarch64-builtins.def
@@ -455,3 +455,7 @@
   BUILTIN_GPI (BINOP, fcvtzsdf, 3)
   BUILTIN_GPI (BINOP_USS, fcvtzusf, 3)
   BUILTIN_GPI (BINOP_USS, fcvtzudf, 3)
+  BUILTIN_VALLI (BINOP, scvtf, 3)
+  BUILTIN_VALLI (BINOP_SUS, ucvtf, 3)
+  BUILTIN_VALLF (BINOP, fcvtzs, 3)
+  BUILTIN_VALLF (BINOP_USS, fcvtzu, 3)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 670c690..66ca2de 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1778,26 +1778,26 @@
   [(set_attr "type" "neon_fp_cvt_widen_s")]
 )
 
-;; Convert between fixed-point and floating-point (scalar variant from SIMD)
+;; Convert between fixed-point and floating-point (SIMD)
 
-(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><GPF:mode>3"
-  [(set (match_operand:<GPF:FCVT_TARGET> 0 "register_operand" "=w")
-	(unspec:<GPF:FCVT_TARGET> [(match_operand:GPF 1 "register_operand" "w")
-				   (match_operand:SI 2 "immediate_operand" "i")]
+(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VALLF:mode>3"
+  [(set (match_operand:<VALLF:FCVT_TARGET> 0 "register_operand" "=w")
+	(unspec:<VALLF:FCVT_TARGET> [(match_operand:VALLF 1 "register_operand" "w")
+				     (match_operand:SI 2 "immediate_operand" "i")]
 	 FCVT_F2FIXED))]
   "TARGET_SIMD"
   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
-  [(set_attr "type" "neon_fp_to_int_<GPF:Vetype><q>")]
+  [(set_attr "type" "neon_fp_to_int_<VALLF:Vetype><q>")]
 )
 
-(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><GPI:mode>3"
-  [(set (match_operand:<GPI:FCVT_TARGET> 0 "register_operand" "=w")
-	(unspec:<GPI:FCVT_TARGET> [(match_operand:GPI 1 "register_operand" "w")
-				   (match_operand:SI 2 "immediate_operand" "i")]
+(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VALLI:mode>3"
+  [(set (match_operand:<VALLI:FCVT_TARGET> 0 "register_operand" "=w")
+	(unspec:<VALLI:FCVT_TARGET> [(match_operand:VALLI 1 "register_operand" "w")
+				     (match_operand:SI 2 "immediate_operand" "i")]
 	 FCVT_FIXED2F))]
   "TARGET_SIMD"
   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
-  [(set_attr "type" "neon_int_to_fp_<GPI:Vetype><q>")]
+  [(set_attr "type" "neon_int_to_fp_<VALLI:Vetype><q>")]
 )
 
 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 012a11a..bd712fc 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -6025,150 +6025,6 @@ vaddlvq_u32 (uint32x4_t a)
        result;                                                          \
      })
 
-#define vcvt_n_f32_s32(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t a_ = (a);                                              \
-       float32x2_t result;                                              \
-       __asm__ ("scvtf %0.2s, %1.2s, #%2"                               \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvt_n_f32_u32(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t a_ = (a);                                             \
-       float32x2_t result;                                              \
-       __asm__ ("ucvtf %0.2s, %1.2s, #%2"                               \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvt_n_s32_f32(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x2_t a_ = (a);                                            \
-       int32x2_t result;                                                \
-       __asm__ ("fcvtzs %0.2s, %1.2s, #%2"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvt_n_u32_f32(a, b)                                            \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x2_t a_ = (a);                                            \
-       uint32x2_t result;                                               \
-       __asm__ ("fcvtzu %0.2s, %1.2s, #%2"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvtq_n_f32_s32(a, b)                                           \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t a_ = (a);                                              \
-       float32x4_t result;                                              \
-       __asm__ ("scvtf %0.4s, %1.4s, #%2"                               \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvtq_n_f32_u32(a, b)                                           \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t a_ = (a);                                             \
-       float32x4_t result;                                              \
-       __asm__ ("ucvtf %0.4s, %1.4s, #%2"                               \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvtq_n_f64_s64(a, b)                                           \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t a_ = (a);                                              \
-       float64x2_t result;                                              \
-       __asm__ ("scvtf %0.2d, %1.2d, #%2"                               \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvtq_n_f64_u64(a, b)                                           \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x2_t a_ = (a);                                             \
-       float64x2_t result;                                              \
-       __asm__ ("ucvtf %0.2d, %1.2d, #%2"                               \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvtq_n_s32_f32(a, b)                                           \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x4_t a_ = (a);                                            \
-       int32x4_t result;                                                \
-       __asm__ ("fcvtzs %0.4s, %1.4s, #%2"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvtq_n_s64_f64(a, b)                                           \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x2_t a_ = (a);                                            \
-       int64x2_t result;                                                \
-       __asm__ ("fcvtzs %0.2d, %1.2d, #%2"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvtq_n_u32_f32(a, b)                                           \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x4_t a_ = (a);                                            \
-       uint32x4_t result;                                               \
-       __asm__ ("fcvtzu %0.4s, %1.4s, #%2"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vcvtq_n_u64_f64(a, b)                                           \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x2_t a_ = (a);                                            \
-       uint64x2_t result;                                               \
-       __asm__ ("fcvtzu %0.2d, %1.2d, #%2"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "i"(b)                                       \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vcvtx_f32_f64 (float64x2_t a)
 {
@@ -12760,6 +12616,42 @@ vcvts_n_f32_u32 (uint32_t __a, const int __b)
   return __builtin_aarch64_ucvtfsisf_sus (__a, __b);
 }
 
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_n_f32_s32 (int32x2_t __a, const int __b)
+{
+  return __builtin_aarch64_scvtfv2si (__a, __b);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
+{
+  return __builtin_aarch64_ucvtfv2si_sus (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
+{
+  return __builtin_aarch64_scvtfv4si (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_f32_u32 (uint32x4_t __a, const int __b)
+{
+  return __builtin_aarch64_ucvtfv4si_sus (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vcvtq_n_f64_s64 (int64x2_t __a, const int __b)
+{
+  return __builtin_aarch64_scvtfv2di (__a, __b);
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vcvtq_n_f64_u64 (uint64x2_t __a, const int __b)
+{
+  return __builtin_aarch64_ucvtfv2di_sus (__a, __b);
+}
+
 /* vcvt (float -> <u>fixed-point).  */
 
 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
@@ -12786,6 +12678,42 @@ vcvts_n_u32_f32 (float32_t __a, const int __b)
   return __builtin_aarch64_fcvtzusfsi_uss (__a, __b);
 }
 
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvt_n_s32_f32 (float32x2_t __a, const int __b)
+{
+  return __builtin_aarch64_fcvtzsv2sf (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvt_n_u32_f32 (float32x2_t __a, const int __b)
+{
+  return __builtin_aarch64_fcvtzuv2sf_uss (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
+{
+  return __builtin_aarch64_fcvtzsv4sf (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_u32_f32 (float32x4_t __a, const int __b)
+{
+  return __builtin_aarch64_fcvtzuv4sf_uss (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcvtq_n_s64_f64 (float64x2_t __a, const int __b)
+{
+  return __builtin_aarch64_fcvtzsv2df (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcvtq_n_u64_f64 (float64x2_t __a, const int __b)
+{
+  return __builtin_aarch64_fcvtzuv2df_uss (__a, __b);
+}
+
 /* vcvt  (<u>int -> float)  */
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 4ebd6f7..2264459 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -166,6 +166,9 @@
 ;; Vector and scalar integer modes for H and S
 (define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI])
 
+;; Vector and scalar integer modes for S and D
+(define_mode_iterator VALLI [V2SI V4SI V2DI SI DI])
+
 ;; Vector and scalar 64-bit container: 16, 32-bit integer modes
 (define_mode_iterator VSD_HSI [V4HI V2SI HI SI])
 
@@ -653,8 +656,10 @@
   [(QI "b") (HI "h") (SI "") (DI "")])
 
 (define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")
+			       (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
 			       (SF "si") (DF "di") (SI "sf") (DI "df")])
 (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
+			       (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
 			       (SF "SI") (DF "DI") (SI "SF") (DI "DF")])
 
 
-- 
1.9.1

[AArch64, 2/6] Reimplement vector fixed-point intrinsics

Reply via email to