On Thu, Jan 19, 2017 at 06:05:52PM +0000, Tamar Christina wrote:
> Hi James,
> 
> I have corrected the testsuite changes and attached is the new file and 
> changelog.
> 
> Ok for trunk?
> 
> Tamar
> 
> Hi All,
> 
> This patch vectorizes the copysign builtin for Aarch64
> similar to how it is done for Arm.
> 
> AArch64 now generates:
> 
> ...
> .L4:
>         ldr     q1, [x6, x3]
>         add     w4, w4, 1
>         ldr     q0, [x5, x3]
>         cmp     w4, w7
>         bif     v1.16b, v2.16b, v3.16b
>         fmul    v0.2d, v0.2d, v1.2d
>         str     q0, [x5, x3]
> 
> for the input:
> 
>      x * copysign(1.0, y)
> 
> On 481.wrf in Spec2006 on AArch64 this gives us a speedup of 9.1%.
> Regtested on  aarch64-none-linux-gnu and arm-none-eabi and no regressions.
> 
> Ok for trunk?

OK. I think this is now suitably minimal (and safe) for the last
day of Stage 3.

Thanks,
James

> gcc/
> 2017-01-19  Tamar Christina  <tamar.christ...@arm.com>
> 
>         * config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup):
>         Change int to HOST_WIDE_INT.
>         * config/aarch64/aarch64-protos.h
>         (aarch64_simd_gen_const_vector_dup): Likewise.
>         * config/aarch64/aarch64-simd.md: Add copysign<mode>3.
> 
> gcc/testsuite/
> 2017-01-19  Tamar Christina  <tamar.christ...@arm.com>
> 
>         * gcc/testsuite/lib/target-supports.exp
>         (check_effective_target_vect_call_copysignf): Enable for AArch64.

> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 
> 29a3bd71151aa4fb7c6728f0fb52e2f3f233f41d..e75ba29f93e9e749791803ca3fa8d716ca261064
>  100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -362,7 +362,7 @@ rtx aarch64_final_eh_return_addr (void);
>  rtx aarch64_mask_from_zextract_ops (rtx, rtx);
>  const char *aarch64_output_move_struct (rtx *operands);
>  rtx aarch64_return_addr (int, rtx);
> -rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
> +rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
>  bool aarch64_simd_mem_operand_p (rtx);
>  rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool);
>  rtx aarch64_tls_get_addr (void);
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> a12e2268ef9b023112f8d05db0a86957fee83273..b61f79a09462b8cecca7dd2cc4ac0eb4be2dbc79
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -338,6 +338,24 @@
>    }
>  )
>  
> +(define_expand "copysign<mode>3"
> +  [(match_operand:VHSDF 0 "register_operand")
> +   (match_operand:VHSDF 1 "register_operand")
> +   (match_operand:VHSDF 2 "register_operand")]
> +  "TARGET_FLOAT && TARGET_SIMD"
> +{
> +  rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
> +  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
> +
> +  emit_move_insn (v_bitmask,
> +               aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
> +                                                  HOST_WIDE_INT_M1U << 
> bits));
> +  emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
> +                                      operands[2], operands[1]));
> +  DONE;
> +}
> +)
> +
>  (define_insn "*aarch64_mul3_elt<mode>"
>   [(set (match_operand:VMUL 0 "register_operand" "=w")
>      (mult:VMUL
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 0cf7d12186af3e05ba8742af5a03425f61f51754..1a69605db5d2a4a0efb8c9f97a019de9dded40eb
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -11244,14 +11244,16 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
>  
>  /* Return a const_int vector of VAL.  */
>  rtx
> -aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
> +aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
>  {
>    int nunits = GET_MODE_NUNITS (mode);
>    rtvec v = rtvec_alloc (nunits);
>    int i;
>  
> +  rtx cache = GEN_INT (val);
> +
>    for (i=0; i < nunits; i++)
> -    RTVEC_ELT (v, i) = GEN_INT (val);
> +    RTVEC_ELT (v, i) = cache;
>  
>    return gen_rtx_CONST_VECTOR (mode, v);
>  }
> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index 
> b88d13c13f277e8cdb88b5dc8545ffa01408a0fa..12dbf475e31933cff781c2f9e9c1cfbe2ce108bb
>  100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -6158,7 +6158,8 @@ proc check_effective_target_vect_call_copysignf { } {
>      } else {
>       set et_vect_call_copysignf_saved($et_index) 0
>       if { [istarget i?86-*-*] || [istarget x86_64-*-*]
> -          || [istarget powerpc*-*-*] } {
> +          || [istarget powerpc*-*-*]
> +          || [istarget aarch64*-*-*] } {
>          set et_vect_call_copysignf_saved($et_index) 1
>       }
>      }

Reply via email to