[AArch64, Patch] Implement framework for Tree/Gimple Implementation of NEON intrinsics.

Tejas Belagod Thu, 14 Mar 2013 05:50:50 -0700

Hi,


Attached is a patch that implements the framework necessary for implementing
NEON Intrinsics' builtins in Tree/Gimple rather than RTL. For this it uses the
target hook TARGET_FOLD_BUILTIN and folds all the builtins for NEON Intrinsics
into equivalent trees. This framework is accompanied by an example
implementation of vaddv<q>_f<32, 64> intrinsics using the framework.

Regression tested on aarch64-none-elf. OK for trunk?

Thanks,
Tejas Belagod
ARM.

2013-03-14  Tejas Belagod  <tejas.bela...@arm.com>

gcc/
        * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): New.
        * config/aarch64/aarch64-protos.h (aarch64_fold_builtin): Declare.
        * config/aarch64/aarch64-simd-builtins.def: New entry for reduc_splus.
        * config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define.
        * config/aarch64/arm_neon.h (vaddv_f32, vaddvq_f32, vaddvq_f64): New.

testsuite/
        * gcc.target/aarch64/vaddv-intrinsic-compile.c: New.
        * gcc.target/aarch64/vaddv-intrinsic.c: New.

diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index 35475ba..a1bd032 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1254,6 +1254,31 @@ aarch64_builtin_vectorized_function (tree fndecl, tree 
type_out, tree type_in)
 
   return NULL_TREE;
 }
+
+#undef VAR1
+#define VAR1(T, N, MAP, A) \
+  case AARCH64_SIMD_BUILTIN_##N##A:
+
+tree
+aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
+                     bool ignore ATTRIBUTE_UNUSED)
+{
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree type = TREE_TYPE (TREE_TYPE (fndecl));
+
+  switch (fcode)
+    {
+      BUILTIN_VDQF (UNOP, reduc_splus_, 10)
+       return fold_build1 (REDUC_PLUS_EXPR, type, args[0]);
+       break;
+
+      default:
+       break;
+    }
+
+  return NULL_TREE;
+}
+
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
 #undef BUILTIN_DX
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 5d0072f..1bb33e8 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -177,6 +177,7 @@ rtx aarch64_simd_gen_const_vector_dup (enum machine_mode, 
int);
 bool aarch64_simd_mem_operand_p (rtx);
 rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool);
 rtx aarch64_tls_get_addr (void);
+tree aarch64_fold_builtin (tree, int, tree *, bool);
 unsigned aarch64_dbx_register_number (unsigned);
 unsigned aarch64_trampoline_size (void);
 void aarch64_asm_output_labelref (FILE *, const char *);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index e18e3f3..1dd4ad6 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -238,6 +238,9 @@
   BUILTIN_VDQF (BINOP, fmax, 0)
   BUILTIN_VDQF (BINOP, fmin, 0)
 
+  /* Implemented by reduc_splus_<mode>.  */
+  BUILTIN_VDQF (UNOP, reduc_splus_, 10)
+
   /* Implemented by <maxmin><mode>3.  */
   BUILTIN_VDQ_BHSI (BINOP, smax, 3)
   BUILTIN_VDQ_BHSI (BINOP, smin, 3)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 45c4106..156c20e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7829,6 +7829,9 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode 
vmode,
 #undef TARGET_EXPAND_BUILTIN_VA_START
 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
 
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
+
 #undef TARGET_FUNCTION_ARG
 #define TARGET_FUNCTION_ARG aarch64_function_arg
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 5e25c77..6198f99 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -19731,6 +19731,29 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
   return __a + __b;
 }
 
+/* vaddv */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddv_f32 (float32x2_t __a)
+{
+  float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
+  return vget_lane_f32 (t, 0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddvq_f32 (float32x4_t __a)
+{
+  float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
+  return vgetq_lane_f32 (t, 0);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vaddvq_f64 (float64x2_t __a)
+{
+  float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
+  return vgetq_lane_f64 (t, 0);
+}
+
 /* vceq */
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c 
b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
new file mode 100644
index 0000000..c736c0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
@@ -0,0 +1,36 @@
+
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+float32_t
+test_vaddv_v2sf (const float32_t *pool)
+{
+  float32x2_t val;
+
+  val = vld1_f32 (pool);
+  return vaddv_f32 (val);
+}
+
+float32_t
+test_vaddv_v4sf (const float32_t *pool)
+{
+  float32x4_t val;
+
+  val = vld1q_f32 (pool);
+  return vaddvq_f32 (val);
+}
+
+float64_t
+test_vaddv_v2df (const float64_t *pool)
+{
+  float64x2_t val;
+
+  val = vld1q_f64 (pool);
+  return vaddvq_f64 (val);
+}
+
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */
+/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c 
b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
new file mode 100644
index 0000000..d324333
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
@@ -0,0 +1,53 @@
+
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float32_t
+test_vaddv_v2sf (const float32_t *pool)
+{
+  float32x2_t val;
+
+  val = vld1_f32 (pool);
+  return vaddv_f32 (val);
+}
+
+float32_t
+test_vaddv_v4sf (const float32_t *pool)
+{
+  float32x4_t val;
+
+  val = vld1q_f32 (pool);
+  return vaddvq_f32 (val);
+}
+
+float64_t
+test_vaddv_v2df (const float64_t *pool)
+{
+  float64x2_t val;
+
+  val = vld1q_f64 (pool);
+  return vaddvq_f64 (val);
+}
+
+int
+main (void)
+{
+  const float32_t pool_v2sf[] = {4.0f, 9.0f};
+  const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f};
+  const float64_t pool_v2df[] = {4.0, 9.0};
+
+  if (test_vaddv_v2sf (pool_v2sf) != 13.0f)
+    abort ();
+
+  if (test_vaddv_v4sf (pool_v4sf) != 54.0f)
+    abort ();
+
+  if (test_vaddv_v2df (pool_v2df) != 13.0)
+    abort ();
+
+  return 0;
+}

[AArch64, Patch] Implement framework for Tree/Gimple Implementation of NEON intrinsics.

Reply via email to