Tamar Christina <tamar.christ...@arm.com> writes:
> Hi All,
>
> This adds an implementation for masked copysign along with an optimized
> pattern for masked copysign (x, -1).
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>       PR tree-optimization/109154
>       * config/aarch64/aarch64-sve.md (cond_copysign<mode>): New.
>
> gcc/testsuite/ChangeLog:
>
>       PR tree-optimization/109154
>       * gcc.target/aarch64/sve/fneg-abs_5.c: New test.
>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 
> 071400c820a5b106ddf9dc9faebb117975d74ea0..00ca30c24624dc661254568f45b61a14aa11c305
>  100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -6429,6 +6429,57 @@ (define_expand "copysign<mode>3"
>    }
>  )
>  
> +(define_expand "cond_copysign<mode>"
> +  [(match_operand:SVE_FULL_F 0 "register_operand")
> +   (match_operand:<VPRED> 1 "register_operand")
> +   (match_operand:SVE_FULL_F 2 "register_operand")
> +   (match_operand:SVE_FULL_F 3 "nonmemory_operand")
> +   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
> +  "TARGET_SVE"
> +  {
> +    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
> +    rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
> +    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
> +    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
> +
> +    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
> +    rtx arg3 = lowpart_subreg (<V_INT_EQUIV>mode, operands[3], <MODE>mode);
> +    rtx arg4 = lowpart_subreg (<V_INT_EQUIV>mode, operands[4], <MODE>mode);
> +
> +    rtx v_sign_bitmask
> +      = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
> +                                        HOST_WIDE_INT_M1U << bits);
> +
> +    /* copysign (x, -1) should instead be expanded as orr with the sign
> +       bit.  */
> +    if (!REG_P (operands[3]))
> +      {
> +     auto r0
> +       = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[3]));
> +     if (-1 == real_to_integer (r0))

OK with the same change and under the same conditions as the FP/SIMD patch.

Thanks,
Richard

> +       {
> +         arg3 = force_reg (<V_INT_EQUIV>mode, v_sign_bitmask);
> +         emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], arg2,
> +                                               arg3, arg4));
> +         emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
> +         DONE;
> +       }
> +      }
> +
> +    operands[2] = force_reg (<MODE>mode, operands[3]);
> +    emit_insn (gen_and<v_int_equiv>3 (sign, arg3, v_sign_bitmask));
> +    emit_insn (gen_and<v_int_equiv>3
> +            (mant, arg2,
> +             aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
> +                                                ~(HOST_WIDE_INT_M1U
> +                                                  << bits))));
> +    emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], sign, mant,
> +                                       arg4));
> +    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
> +    DONE;
> +  }
> +)
> +
>  (define_expand "xorsign<mode>3"
>    [(match_operand:SVE_FULL_F 0 "register_operand")
>     (match_operand:SVE_FULL_F 1 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..f4ecbeecbe1290134e688f46a4389d17155e4a0a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**   ...
> +**   orr     z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s
> +**   ...
> +*/
> +void f1 (float32_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   if (a[i] > n)
> +     a[i] = -fabsf (a[i]);
> +   else
> +     a[i] = n;
> +}
> +
> +/*
> +** f2:
> +**   ...
> +**   orr     z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d
> +**   ...
> +*/
> +void f2 (float64_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   if (a[i] > n)
> +     a[i] = -fabs (a[i]);
> +   else
> +     a[i] = n;
> +}

Reply via email to