Hi All, It seems the entry in config/aarch64/aarch64-builtins.c isn't needed, as such I've simplified the patch and the changelog.
Ok for trunk? Tamar gcc/ 2017-01-19 Tamar Christina <tamar.christ...@arm.com> * config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup): Changed int to HOST_WIDE_INT. * config/aarch64/aarch64-protos.h (aarch64_simd_gen_const_vector_dup): Likewise. * config/aarch64/aarch64-simd-builtins.def: Added copysign BINOP. * config/aarch64/aarch64-simd.md: Added copysign<mode>3. gcc/testsuite/ 2017-01-19 Tamar Christina <tamar.christ...@arm.com> * gcc.target/arm/vect-copysignf.c: Move to... * gcc.dg/vect/vect-copysignf.c: ... Here. ________________________________________ From: gcc-patches-ow...@gcc.gnu.org <gcc-patches-ow...@gcc.gnu.org> on behalf of Tamar Christina <tamar.christ...@arm.com> Sent: Thursday, January 19, 2017 9:38:09 AM To: GCC Patches; James Greenhalgh; Marcus Shawcroft; Richard Earnshaw Cc: nd Subject: Re: [PATCH][GCC][Aarch64] Add vectorize patten for copysign. Hi All, This is a slight modification of the earlier patch (Using a different constant in the mask creation.) < + HOST_WIDE_INT_M1 << bits)); --- > + HOST_WIDE_INT_M1U << > bits)); Kind Regards, Tamar ________________________________________ From: gcc-patches-ow...@gcc.gnu.org <gcc-patches-ow...@gcc.gnu.org> on behalf of Tamar Christina <tamar.christ...@arm.com> Sent: Tuesday, January 17, 2017 2:50:19 PM To: GCC Patches; James Greenhalgh; Marcus Shawcroft; Richard Earnshaw Cc: nd Subject: [PATCH][GCC][Aarch64] Add vectorize patten for copysign. Hi All, This patch vectorizes the copysign builtin for AArch64 similar to how it is done for Arm. AArch64 now generates: ... .L4: ldr q1, [x6, x3] add w4, w4, 1 ldr q0, [x5, x3] cmp w4, w7 bif v1.16b, v2.16b, v3.16b fmul v0.2d, v0.2d, v1.2d str q0, [x5, x3] for the input: x * copysign(1.0, y) On 481.wrf in Spec2006 on AArch64 this gives us a speedup of 9.1%. Regtested on aarch64-none-linux-gnu and no regressions. Ok for trunk? gcc/ 2017-01-17 Tamar Christina <tamar.christ...@arm.com> * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): Added CASE_CFN_COPYSIGN. * config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup): Changed int to HOST_WIDE_INT. * config/aarch64/aarch64-protos.h (aarch64_simd_gen_const_vector_dup): Likewise. * config/aarch64/aarch64-simd-builtins.def: Added copysign BINOP. * config/aarch64/aarch64-simd.md: Added copysign<mode>3. gcc/testsuite/ 2017-01-17 Tamar Christina <tamar.christ...@arm.com> * gcc.target/arm/vect-copysignf.c: Move to... * gcc.dg/vect/vect-copysignf.c: ... Here.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 29a3bd71151aa4fb7c6728f0fb52e2f3f233f41d..e75ba29f93e9e749791803ca3fa8d716ca261064 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -362,7 +362,7 @@ rtx aarch64_final_eh_return_addr (void); rtx aarch64_mask_from_zextract_ops (rtx, rtx); const char *aarch64_output_move_struct (rtx *operands); rtx aarch64_return_addr (int, rtx); -rtx aarch64_simd_gen_const_vector_dup (machine_mode, int); +rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT); bool aarch64_simd_mem_operand_p (rtx); rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool); rtx aarch64_tls_get_addr (void); diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index d713d5d8b88837ec6f2dc51188fb252f8d5bc8bd..a67b7589e8badfbd0f13168557ef87e052eedcb1 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -151,6 +151,9 @@ BUILTIN_VQN (TERNOP, raddhn2, 0) BUILTIN_VQN (TERNOP, rsubhn2, 0) + /* Implemented by copysign<mode>3. */ + BUILTIN_VHSDF (BINOP, copysign, 3) + BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0) /* Implemented by aarch64_<sur>qmovn<mode>. */ BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index a12e2268ef9b023112f8d05db0a86957fee83273..b61f79a09462b8cecca7dd2cc4ac0eb4be2dbc79 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -338,6 +338,24 @@ } ) +(define_expand "copysign<mode>3" + [(match_operand:VHSDF 0 "register_operand") + (match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand")] + "TARGET_FLOAT && TARGET_SIMD" +{ + rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode); + int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; + + emit_move_insn (v_bitmask, + aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode, + HOST_WIDE_INT_M1U << bits)); + emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask, + operands[2], operands[1])); + DONE; +} +) + (define_insn "*aarch64_mul3_elt<mode>" [(set (match_operand:VMUL 0 "register_operand" "=w") (mult:VMUL diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 0cf7d12186af3e05ba8742af5a03425f61f51754..1a69605db5d2a4a0efb8c9f97a019de9dded40eb 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -11244,14 +11244,16 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) /* Return a const_int vector of VAL. */ rtx -aarch64_simd_gen_const_vector_dup (machine_mode mode, int val) +aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val) { int nunits = GET_MODE_NUNITS (mode); rtvec v = rtvec_alloc (nunits); int i; + rtx cache = GEN_INT (val); + for (i=0; i < nunits; i++) - RTVEC_ELT (v, i) = GEN_INT (val); + RTVEC_ELT (v, i) = cache; return gen_rtx_CONST_VECTOR (mode, v); } diff --git a/gcc/testsuite/gcc.target/arm/vect-copysignf.c b/gcc/testsuite/gcc.dg/vect/vect-copysignf.c similarity index 91% rename from gcc/testsuite/gcc.target/arm/vect-copysignf.c rename to gcc/testsuite/gcc.dg/vect/vect-copysignf.c index 425f1b78af7b07be6929f9e5bc1118ca901bc9ce..dc961d0223399c6e7ee8209d22ca77f6d22dbd70 100644 --- a/gcc/testsuite/gcc.target/arm/vect-copysignf.c +++ b/gcc/testsuite/gcc.dg/vect/vect-copysignf.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-require-effective-target arm_neon_hw { target { arm*-*-* } } } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ /* { dg-add-options "arm_neon" } */