Hi,
Attached is a patch that implements the framework necessary for implementing
NEON Intrinsics' builtins in Tree/Gimple rather than RTL. For this it uses the
target hook TARGET_FOLD_BUILTIN and folds all the builtins for NEON Intrinsics
into equivalent trees. This framework is accompanied by an example
implementation of vaddv<q>_f<32, 64> intrinsics using the framework.
Regression tested on aarch64-none-elf. OK for trunk?
Thanks,
Tejas Belagod
ARM.
2013-03-14 Tejas Belagod <tejas.bela...@arm.com>
gcc/
* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): New.
* config/aarch64/aarch64-protos.h (aarch64_fold_builtin): Declare.
* config/aarch64/aarch64-simd-builtins.def: New entry for reduc_splus.
* config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define.
* config/aarch64/arm_neon.h (vaddv_f32, vaddvq_f32, vaddvq_f64): New.
testsuite/
* gcc.target/aarch64/vaddv-intrinsic-compile.c: New.
* gcc.target/aarch64/vaddv-intrinsic.c: New.
diff --git a/gcc/config/aarch64/aarch64-builtins.c
b/gcc/config/aarch64/aarch64-builtins.c
index 35475ba..a1bd032 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1254,6 +1254,31 @@ aarch64_builtin_vectorized_function (tree fndecl, tree
type_out, tree type_in)
return NULL_TREE;
}
+
+#undef VAR1
+#define VAR1(T, N, MAP, A) \
+ case AARCH64_SIMD_BUILTIN_##N##A:
+
+tree
+aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
+ bool ignore ATTRIBUTE_UNUSED)
+{
+ int fcode = DECL_FUNCTION_CODE (fndecl);
+ tree type = TREE_TYPE (TREE_TYPE (fndecl));
+
+ switch (fcode)
+ {
+ BUILTIN_VDQF (UNOP, reduc_splus_, 10)
+ return fold_build1 (REDUC_PLUS_EXPR, type, args[0]);
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL_TREE;
+}
+
#undef AARCH64_CHECK_BUILTIN_MODE
#undef AARCH64_FIND_FRINT_VARIANT
#undef BUILTIN_DX
diff --git a/gcc/config/aarch64/aarch64-protos.h
b/gcc/config/aarch64/aarch64-protos.h
index 5d0072f..1bb33e8 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -177,6 +177,7 @@ rtx aarch64_simd_gen_const_vector_dup (enum machine_mode,
int);
bool aarch64_simd_mem_operand_p (rtx);
rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool);
rtx aarch64_tls_get_addr (void);
+tree aarch64_fold_builtin (tree, int, tree *, bool);
unsigned aarch64_dbx_register_number (unsigned);
unsigned aarch64_trampoline_size (void);
void aarch64_asm_output_labelref (FILE *, const char *);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def
b/gcc/config/aarch64/aarch64-simd-builtins.def
index e18e3f3..1dd4ad6 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -238,6 +238,9 @@
BUILTIN_VDQF (BINOP, fmax, 0)
BUILTIN_VDQF (BINOP, fmin, 0)
+ /* Implemented by reduc_splus_<mode>. */
+ BUILTIN_VDQF (UNOP, reduc_splus_, 10)
+
/* Implemented by <maxmin><mode>3. */
BUILTIN_VDQ_BHSI (BINOP, smax, 3)
BUILTIN_VDQ_BHSI (BINOP, smin, 3)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 45c4106..156c20e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7829,6 +7829,9 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode
vmode,
#undef TARGET_EXPAND_BUILTIN_VA_START
#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
+
#undef TARGET_FUNCTION_ARG
#define TARGET_FUNCTION_ARG aarch64_function_arg
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 5e25c77..6198f99 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -19731,6 +19731,29 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
return __a + __b;
}
+/* vaddv */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddv_f32 (float32x2_t __a)
+{
+ float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
+ return vget_lane_f32 (t, 0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddvq_f32 (float32x4_t __a)
+{
+ float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
+ return vgetq_lane_f32 (t, 0);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vaddvq_f64 (float64x2_t __a)
+{
+ float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
+ return vgetq_lane_f64 (t, 0);
+}
+
/* vceq */
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
new file mode 100644
index 0000000..c736c0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
@@ -0,0 +1,36 @@
+
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+float32_t
+test_vaddv_v2sf (const float32_t *pool)
+{
+ float32x2_t val;
+
+ val = vld1_f32 (pool);
+ return vaddv_f32 (val);
+}
+
+float32_t
+test_vaddv_v4sf (const float32_t *pool)
+{
+ float32x4_t val;
+
+ val = vld1q_f32 (pool);
+ return vaddvq_f32 (val);
+}
+
+float64_t
+test_vaddv_v2df (const float64_t *pool)
+{
+ float64x2_t val;
+
+ val = vld1q_f64 (pool);
+ return vaddvq_f64 (val);
+}
+
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */
+/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
new file mode 100644
index 0000000..d324333
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
@@ -0,0 +1,53 @@
+
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float32_t
+test_vaddv_v2sf (const float32_t *pool)
+{
+ float32x2_t val;
+
+ val = vld1_f32 (pool);
+ return vaddv_f32 (val);
+}
+
+float32_t
+test_vaddv_v4sf (const float32_t *pool)
+{
+ float32x4_t val;
+
+ val = vld1q_f32 (pool);
+ return vaddvq_f32 (val);
+}
+
+float64_t
+test_vaddv_v2df (const float64_t *pool)
+{
+ float64x2_t val;
+
+ val = vld1q_f64 (pool);
+ return vaddvq_f64 (val);
+}
+
+int
+main (void)
+{
+ const float32_t pool_v2sf[] = {4.0f, 9.0f};
+ const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f};
+ const float64_t pool_v2df[] = {4.0, 9.0};
+
+ if (test_vaddv_v2sf (pool_v2sf) != 13.0f)
+ abort ();
+
+ if (test_vaddv_v4sf (pool_v4sf) != 54.0f)
+ abort ();
+
+ if (test_vaddv_v2df (pool_v2df) != 13.0)
+ abort ();
+
+ return 0;
+}