Tamar Christina <tamar.christ...@arm.com> writes: > Hi All, > > This adds an implementation for masked copysign along with an optimized > pattern for masked copysign (x, -1). > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/109154 > * config/aarch64/aarch64-sve.md (cond_copysign<mode>): New. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/109154 > * gcc.target/aarch64/sve/fneg-abs_5.c: New test. > > --- inline copy of patch -- > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index > 071400c820a5b106ddf9dc9faebb117975d74ea0..00ca30c24624dc661254568f45b61a14aa11c305 > 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -6429,6 +6429,57 @@ (define_expand "copysign<mode>3" > } > ) > > +(define_expand "cond_copysign<mode>" > + [(match_operand:SVE_FULL_F 0 "register_operand") > + (match_operand:<VPRED> 1 "register_operand") > + (match_operand:SVE_FULL_F 2 "register_operand") > + (match_operand:SVE_FULL_F 3 "nonmemory_operand") > + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] > + "TARGET_SVE" > + { > + rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); > + rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode); > + rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); > + int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; > + > + rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); > + rtx arg3 = lowpart_subreg (<V_INT_EQUIV>mode, operands[3], <MODE>mode); > + rtx arg4 = lowpart_subreg (<V_INT_EQUIV>mode, operands[4], <MODE>mode); > + > + rtx v_sign_bitmask > + = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, > + HOST_WIDE_INT_M1U << bits); > + > + /* copysign (x, -1) should instead be expanded as orr with the sign > + bit. */ > + if (!REG_P (operands[3])) > + { > + auto r0 > + = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[3])); > + if (-1 == real_to_integer (r0))
OK with the same change and under the same conditions as the FP/SIMD patch. Thanks, Richard > + { > + arg3 = force_reg (<V_INT_EQUIV>mode, v_sign_bitmask); > + emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], arg2, > + arg3, arg4)); > + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); > + DONE; > + } > + } > + > + operands[2] = force_reg (<MODE>mode, operands[3]); > + emit_insn (gen_and<v_int_equiv>3 (sign, arg3, v_sign_bitmask)); > + emit_insn (gen_and<v_int_equiv>3 > + (mant, arg2, > + aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, > + ~(HOST_WIDE_INT_M1U > + << bits)))); > + emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], sign, mant, > + arg4)); > + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); > + DONE; > + } > +) > + > (define_expand "xorsign<mode>3" > [(match_operand:SVE_FULL_F 0 "register_operand") > (match_operand:SVE_FULL_F 1 "register_operand") > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c > b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..f4ecbeecbe1290134e688f46a4389d17155e4a0a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c > @@ -0,0 +1,36 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3" } */ > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ > + > +#include <arm_neon.h> > +#include <math.h> > + > +/* > +** f1: > +** ... > +** orr z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s > +** ... > +*/ > +void f1 (float32_t *a, int n) > +{ > + for (int i = 0; i < (n & -8); i++) > + if (a[i] > n) > + a[i] = -fabsf (a[i]); > + else > + a[i] = n; > +} > + > +/* > +** f2: > +** ... > +** orr z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d > +** ... > +*/ > +void f2 (float64_t *a, int n) > +{ > + for (int i = 0; i < (n & -8); i++) > + if (a[i] > n) > + a[i] = -fabs (a[i]); > + else > + a[i] = n; > +}