--- Begin Message ---
* config/aarch64/aarch64-builtins.c: Builtins for rsqrt and rsqrtf.
* config/aarch64/aarch64-protos.h: Declare.
* config/aarch64/aarch64-simd.md: Matching expressions for frsqrte
and frsqrts.
* config/aarch64/aarch64.c: New functions. Emit rsqrt estimation code
in fast math mode.
* config/aarch64/aarch64.md: Added enum entry.
* testsuite/gcc.target/aarch64/rsqrt.c: Tests for single and double.
---
gcc/ChangeLog | 9 +++
gcc/config/aarch64/aarch64-builtins.c | 60 ++++++++++++++++
gcc/config/aarch64/aarch64-protos.h | 2 +
gcc/config/aarch64/aarch64-simd.md | 27 ++++++++
gcc/config/aarch64/aarch64.c | 63 +++++++++++++++++
gcc/config/aarch64/aarch64.md | 3 +
gcc/testsuite/gcc.target/aarch64/rsqrt.c | 113
+++++++++++++++++++++++++++++++
7 files changed, 277 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/rsqrt.c
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c9b156f..690ebba 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2015-06-15 Benedikt Huber <benedikt.hu...@theobroma-systems.com>
+
+ * config/aarch64/aarch64-builtins.c: Builtins for rsqrt and rsqrtf.
+ * config/aarch64/aarch64-protos.h: Declare.
+ * config/aarch64/aarch64-simd.md: Matching expressions for frsqrte
and frsqrts.
+ * config/aarch64/aarch64.c: New functions. Emit rsqrt estimation
code in fast math mode.
+ * config/aarch64/aarch64.md: Added enum entry.
+ * testsuite/gcc.target/aarch64/rsqrt.c: Tests for single and double.
+
2015-06-14 Richard Sandiford <richard.sandif...@arm.com>
* rtl.h (classify_insn): Declare.
diff --git a/gcc/config/aarch64/aarch64-builtins.c
b/gcc/config/aarch64/aarch64-builtins.c
index f7a39ec..484bb84 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -342,6 +342,8 @@ enum aarch64_builtins
AARCH64_BUILTIN_GET_FPSR,
AARCH64_BUILTIN_SET_FPSR,
+ AARCH64_BUILTIN_RSQRT,
+ AARCH64_BUILTIN_RSQRTF,
AARCH64_SIMD_BUILTIN_BASE,
AARCH64_SIMD_BUILTIN_LANE_CHECK,
#include "aarch64-simd-builtins.def"
@@ -831,6 +833,32 @@ aarch64_init_crc32_builtins ()
}
void
+aarch64_add_builtin_rsqrt (void)
+{
+ tree fndecl = NULL;
+ tree ftype = NULL;
+ ftype = build_function_type_list (double_type_node, double_type_node,
NULL_TREE);
+
+ fndecl = add_builtin_function ("__builtin_aarch64_rsqrt",
+ ftype,
+ AARCH64_BUILTIN_RSQRT,
+ BUILT_IN_MD,
+ NULL,
+ NULL_TREE);
+ aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT] = fndecl;
+
+ tree ftypef = NULL;
+ ftypef = build_function_type_list (float_type_node, float_type_node,
NULL_TREE);
+ fndecl = add_builtin_function ("__builtin_aarch64_rsqrtf",
+ ftypef,
+ AARCH64_BUILTIN_RSQRTF,
+ BUILT_IN_MD,
+ NULL,
+ NULL_TREE);
+ aarch64_builtin_decls[AARCH64_BUILTIN_RSQRTF] = fndecl;
+}
+
+void
aarch64_init_builtins (void)
{
tree ftype_set_fpr
@@ -855,6 +883,7 @@ aarch64_init_builtins (void)
aarch64_init_simd_builtins ();
if (TARGET_CRC32)
aarch64_init_crc32_builtins ();
+ aarch64_add_builtin_rsqrt ();
}
tree
@@ -1099,6 +1128,23 @@ aarch64_crc32_expand_builtin (int fcode, tree exp,
rtx target)
return target;
}
+static rtx
+aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ rtx op0 = expand_normal (arg0);
+
+ enum insn_code c = CODE_FOR_rsqrtdf;
+ if (fcode == AARCH64_BUILTIN_RSQRTF)
+ c = CODE_FOR_rsqrtsf;
+
+ pat = GEN_FCN (c) (target, op0);
+ emit_insn (pat);
+
+ return target;
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient. */
rtx
@@ -1146,6 +1192,11 @@ aarch64_expand_builtin (tree exp,
else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <=
AARCH64_CRC32_BUILTIN_MAX)
return aarch64_crc32_expand_builtin (fcode, exp, target);
+ if (fcode == AARCH64_BUILTIN_RSQRT ||
+ fcode == AARCH64_BUILTIN_RSQRTF)
+ return aarch64_expand_builtin_rsqrt (fcode, exp, target);
+
+ return NULL_RTX;
gcc_unreachable ();
}
@@ -1303,6 +1354,15 @@ aarch64_builtin_vectorized_function (tree fndecl,
tree type_out, tree type_in)
return NULL_TREE;
}
+tree
+aarch64_builtin_rsqrt (bool is_float)
+{
+ if (is_float)
+ return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRTF];
+ else
+ return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT];
+}
+
#undef VAR1
#define VAR1(T, N, MAP, A) \
case AARCH64_SIMD_BUILTIN_##T##_##N##A:
diff --git a/gcc/config/aarch64/aarch64-protos.h
b/gcc/config/aarch64/aarch64-protos.h
index 965a11b..4f1c8ce 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -270,6 +270,8 @@ void aarch64_print_operand (FILE *, rtx, char);
void aarch64_print_operand_address (FILE *, rtx);
void aarch64_emit_call_insn (rtx);
+void aarch64_emit_swrsqrt (rtx, rtx);
+
/* Initialize builtins for SIMD intrinsics. */
void init_aarch64_simd_builtins (void);
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index b90f938..266800a 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -353,6 +353,33 @@
[(set_attr "type" "neon_fp_mul_d_scalar_q")]
)
+(define_insn "*rsqrte_simd"
+ [(set (match_operand:VALLF 0 "register_operand" "=w")
+ (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+ UNSPEC_RSQRTE))]
+ "TARGET_SIMD"
+ "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
+ [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
+
+(define_insn "*rsqrts_simd"
+ [(set (match_operand:VALLF 0 "register_operand" "=w")
+ (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
+ (match_operand:VALLF 2 "register_operand" "w")]
+ UNSPEC_RSQRTS))]
+ "TARGET_SIMD"
+ "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+ [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
+
+(define_expand "rsqrt<mode>"
+ [(set (match_operand:GPF 0 "register_operand" "=w")
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+ UNSPEC_RSQRT))]
+ "TARGET_FLOAT"
+{
+ aarch64_emit_swrsqrt (operands[0], operands[1]);
+ DONE;
+})
+
(define_insn "*aarch64_mul3_elt_to_64v2df"
[(set (match_operand:DF 0 "register_operand" "=w")
(mult:DF
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c3c2795..281f0b2 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6816,6 +6816,66 @@ aarch64_memory_move_cost (machine_mode mode
ATTRIBUTE_UNUSED,
return aarch64_tune_params->memmov_cost;
}
+extern tree aarch64_builtin_rsqrt (bool is_float);
+
+static tree
+aarch64_builtin_reciprocal (unsigned int fn,
+ bool md_fn ATTRIBUTE_UNUSED,
+ bool sqrt ATTRIBUTE_UNUSED)
+{
+ if (!fast_math_flags_set_p (&global_options))
+ return NULL_TREE;
+
+ if (fn == BUILT_IN_SQRTF)
+ return aarch64_builtin_rsqrt (true);
+ else if (fn == BUILT_IN_SQRT)
+ return aarch64_builtin_rsqrt (false);
+ else
+ return NULL_TREE;
+}
+
+void
+aarch64_emit_swrsqrt (rtx dst, rtx src)
+{
+ enum machine_mode mode = GET_MODE (src);
+ rtx xsrc = gen_reg_rtx (mode);
+ emit_set_insn (xsrc, src);
+
+ rtx x0 = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (x0,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, xsrc),
+ UNSPEC_RSQRTE)));
+
+ bool double_mode = (DFmode == mode ||
+ V1DFmode == mode ||
+ V2DFmode == mode ||
+ V4DFmode == mode ||
+ V6DFmode == mode ||
+ V8DFmode == mode);
+
+ int iterations = 2;
+ if (double_mode)
+ iterations = 3;
+
+ for (int i = 0; i < iterations; ++i)
+ {
+ rtx x1 = gen_reg_rtx (mode);
+ rtx x2 = gen_reg_rtx (mode);
+ rtx x3 = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (x2,
+ gen_rtx_MULT (mode, x0, x0)));
+ emit_insn (gen_rtx_SET (x3,
+ gen_rtx_UNSPEC (mode, gen_rtvec (2, xsrc,
x2),
+ UNSPEC_RSQRTS)));
+ emit_insn (gen_rtx_SET (x1,
+ gen_rtx_MULT (mode, x0, x3)));
+ x0 = x1;
+ }
+
+ emit_move_insn (dst, x0);
+ return;
+}
+
/* Return the number of instructions that can be issued per cycle. */
static int
aarch64_sched_issue_rate (void)
@@ -11747,6 +11807,9 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool
load,
#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
+#undef TARGET_BUILTIN_RECIPROCAL
+#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
+
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 11123d6..7272d05 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -120,6 +120,9 @@
UNSPEC_VSTRUCTDUMMY
UNSPEC_SP_SET
UNSPEC_SP_TEST
+ UNSPEC_RSQRT
+ UNSPEC_RSQRTE
+ UNSPEC_RSQRTS
])
(define_c_enum "unspecv" [
diff --git a/gcc/testsuite/gcc.target/aarch64/rsqrt.c
b/gcc/testsuite/gcc.target/aarch64/rsqrt.c
new file mode 100644
index 0000000..6607483
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/rsqrt.c
@@ -0,0 +1,113 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline --save-temps -ffast-math" } */
+
+#include <math.h>
+#include <stdio.h>
+
+#include <values.h>
+
+#define PI 3.141592653589793
+#define SQRT2 1.4142135623730951
+
+#define PI_4 0.7853981633974483
+#define SQRT1_2 0.7071067811865475
+
+/* 2^25+1, float has 24 significand bits
+ * according to Single-precision floating-point format. */
+#define TESTA8_FLT 33554433
+/* 2^54+1, double has 53 significand bits
+ * according to Double-precision floating-point format. */
+#define TESTA8_DBL 18014398509481985
+
+#define SD(a, b) t_double ((#a), (a), (b));
+#define SF(a, b) t_float ((#a), (a), (b));
+
+#define EPSILON_double __DBL_EPSILON__
+#define EPSILON_float __FLT_EPSILON__
+#define ABS_double fabs
+#define ABS_float fabsf
+#define SQRT_double sqrt
+#define SQRT_float sqrtf
+
+extern void abort (void);
+
+#define TESTTYPE(TYPE)
\
+TYPE rsqrt_##TYPE (TYPE a)
\
+{
\
+ return 1.0/SQRT_##TYPE(a);
\
+}
\
+
\
+int equals_##TYPE (TYPE a, TYPE b)
\
+{
\
+ return (a == b ||
\
+ (isnan (a) && isnan (b)) ||
\
+ (ABS_##TYPE (a - b) < EPSILON_##TYPE));
\
+}
\
+
\
+void t_##TYPE (const char *s, TYPE a, TYPE result)
\
+{
\
+ TYPE r = rsqrt_##TYPE (a);
\
+ if (!equals_##TYPE (r, result))
\
+ {
\
+ abort ();
\
+ }
\
+}
\
+
+// printf ("Problem in %20s: %30.18A should be %30.18A\n", s, r, result);
\
+
+TESTTYPE(double)
+TESTTYPE(float)
+
+/* { dg-final { scan-assembler-times "frsqrte\\td\[0-9\]+, d\[0-9\]+" 1 } }
*/
+/* { dg-final { scan-assembler-times "frsqrts\\td\[0-9\]+, d\[0-9\]+,
d\[0-9\]+" 3 } } */
+
+/* { dg-final { scan-assembler-times "frsqrte\\ts\[0-9\]+, s\[0-9\]+" 1 } }
*/
+/* { dg-final { scan-assembler-times "frsqrts\\ts\[0-9\]+, s\[0-9\]+,
s\[0-9\]+" 2 } } */
+
+int main ()
+{
+ SD( 1.0/256, 0X1.00000000000000P+4 );
+ SD( 1.0, 0X1.00000000000000P+0 );
+ SD( -1.0, NAN);
+ SD( 11.0, 0X1.34BF63D1568260P-2 );
+ SD( 0.0, INFINITY);
+ SD( INFINITY, 0X0.00000000000000P+0 );
+ SD( NAN, NAN);
+ SD( -NAN, -NAN);
+ SD( DBL_MAX, 0X1.00000000000010P-512);
+ SD( DBL_MIN, 0X1.00000000000000P+511);
+ SD( PI, 0X1.20DD750429B6D0P-1 );
+ SD( PI_4, 0X1.20DD750429B6D0P+0 );
+ SD( SQRT2, 0X1.AE89F995AD3AE0P-1 );
+ SD( SQRT1_2, 0X1.306FE0A31B7150P+0 );
+ SD( -PI, NAN);
+ SD( -SQRT2, NAN);
+ SD( TESTA8_DBL, 0X1.00000000000000P-27 );
+
+ SF( 1.0/256, 0X1.00000000000000P+4 );
+ SF( 1.0, 0X1.00000000000000P+0 );
+ SF( -1.0, NAN);
+ SF( 11.0, 0X1.34BF6400000000P-2 );
+ SF( 0.0, INFINITY);
+ SF( INFINITY, 0X0.00000000000000P+0 );
+ SF( NAN, NAN);
+ SF( -NAN, -NAN);
+ SF( FLT_MAX, 0X1.00000200000000P-64 );
+ SF( FLT_MIN, 0X1.00000000000000P+63 );
+ SF( PI, 0X1.20DD7400000000P-1 );
+ SF( PI_4, 0X1.20DD7400000000P+0 );
+ SF( SQRT2, 0X1.AE89FA00000000P-1 );
+ SF( SQRT1_2, 0X1.306FE000000000P+0 );
+ SF( -PI, NAN);
+ SF( -SQRT2, NAN);
+ SF( TESTA8_FLT, 0X1.6A09E600000000P-13 );
+
+// With -ffast-math these return positive INF.
+// SD( -0.0, -INFINITY);
+// SF( -0.0, -INFINITY);
+// The reason here is that -ffast-math flushes to zero.
+// SD(DBL_MIN/256, 0X1.00000000000000P+515);
+// SF(FLT_MIN/256, 0X1.00000000000000P+67 );
+
+ return 0;
+}
--
1.9.1
--- End Message ---