Re: [PATCH v2] AArch64: Fix copysign patterns

Christophe Lyon Fri, 20 Sep 2024 02:52:17 -0700

Hi Saurabh,

On 9/18/24 21:53, Saurabh Jha wrote:

Hi Wilco,
Thanks for the patch. This mostly looks good. Just added a coupleclarifications.
On 9/18/2024 8:17 PM, Wilco Dijkstra wrote:
v2: Add more testcase fixes.
The current copysign pattern has a mismatch in the predicates andconstraints -operand[2] is a register_operand but also has an alternative X whichallows anyoperand. Since it is a floating point operation, having an integeralternativemakes no sense. Change the expander to always use the vector variantof copysignwhich results in better code. Add a SVE bitmask move immediatealternative tothe aarch64_simd_mov patterns so we emit a single move when SVE isavailable.
Passes bootstrap and regress, OK for commit?

gcc:
* config/aarch64/aarch64.md (copysign<GPF:mode>3): Defer toAdvSIMD copysign.
Should the things after "(copysign..") be on a newline? I have mostlyseen gcc ChangeLogs have file name and individual elements separated bynewlines.

AFAIK, ChangeLog entries follow to 80-columns limit and are indented bya tab. In emacs + change-log-mode, alt-Q formats the paragraph adequately.


Thanks,

Christophe

         (copysign<GPF:mode>3_insn): Remove pattern.
* config/aarch64/aarch64-simd.md(aarch64_simd_mov<VDMOV:mode>): Add SVE movimm
         alternative.

Similar comment about file name and the instruction pattern being onseparate lines.

(aarch64_simd_mov<VQMOV:mode>): Likewise. Remove redundantV2DI check.

         (copysign<mode>3): Make global.

(ior<mode>3<vczle><vczbe>): Move Neon immediate alternativebefore the SVE one.


testsuite:
         * gcc.target/aarch64/copysign_3.c: New test.
         * gcc.target/aarch64/copysign_4.c: New test.
         * gcc.target/aarch64/fneg-abs_2.c: Allow .2s and .4s.
         * gcc.target/aarch64/sve/fneg-abs_1.c: Fixup test.
         * gcc.target/aarch64/sve/fneg-abs_2.c: Likewise.

---

diff --git a/gcc/config/aarch64/aarch64-simd.mdb/gcc/config/aarch64/aarch64-simd.mdindexe70d59380ed295577721f15277c28829d42a0189..3077e920ce623c92d21193124747ff7ad010d006 100644

--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md

@@ -161,6 +161,7 @@ (define_insn_and_split"*aarch64_simd_mov<VDMOV:mode>"

       [?w, r ; f_mcr              , *        , *] fmov\t%d0, %1
       [?r, r ; mov_reg            , *        , *] mov\t%0, %1

[w , Dn; neon_move<q> , simd , *] <<aarch64_output_simd_mov_immediate (operands[1], 64);

+     [w , vsl; *                 , sve      , *] mov\t%Z0.<Vetype>, %1
       [w , Dz; f_mcr              , *        , *] fmov\t%d0, xzr
       [w , Dx; neon_move          , simd     , 8] #
    }

@@ -190,6 +191,7 @@ (define_insn_and_split"*aarch64_simd_mov<VQMOV:mode>"

       [?w , r ; multiple           , *   , 8] #
       [?r , r ; multiple           , *   , 8] #

[w , Dn; neon_move<q> , simd, 4] <<aarch64_output_simd_mov_immediate (operands[1], 128);

+     [w  , vsl; *                 , sve,  4] mov\t%Z0.<Vetype>, %1
       [w  , Dz; fmov               , *   , 4] fmov\t%d0, xzr
       [w  , Dx; neon_move          , simd, 8] #
    }

@@ -208,7 +210,6 @@ (define_insn_and_split"*aarch64_simd_mov<VQMOV:mode>"

      else
        {
      if (FP_REGNUM_P (REGNO (operands[0]))
-        && <MODE>mode == V2DImode

&& aarch64_maybe_generate_simd_constant (operands[0],operands[1],

                               <MODE>mode))
        ;

@@ -648,7 +649,7 @@ (define_insn"aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><

    [(set_attr "type" "neon_dot<VS:q>")]
  )
-(define_expand "copysign<mode>3"
+(define_expand "@copysign<mode>3"
    [(match_operand:VHSDF 0 "register_operand")
     (match_operand:VHSDF 1 "register_operand")
     (match_operand:VHSDF 2 "nonmemory_operand")]
@@ -1138,10 +1139,8 @@ (define_insn "ior<mode>3<vczle><vczbe>"
    "TARGET_SIMD"
    {@ [ cons: =0 , 1 , 2; attrs: arch ]

[ w , w , w ; simd ] orr\t%0.<Vbtype>,%1.<Vbtype>, %2.<Vbtype>- [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>,%Z0.<Vetype>, #%2

-     [ w        , 0 , Do ; simd      ] \
-       << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \
-                         AARCH64_CHECK_ORR);

+ [ w , 0 , Do ; simd ] <<aarch64_output_simd_mov_immediate (operands[2], <bitsize>,AARCH64_CHECK_ORR);+ [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>,%Z0.<Vetype>, %2

    }
    [(set_attr "type" "neon_logic<q>")]
  )

diff --git a/gcc/config/aarch64/aarch64.mdb/gcc/config/aarch64/aarch64.mdindexc54b29cd64b9e0dc6c6d12735049386ccedc5408..e9b148e59abf81cee53cb0dd846af9a62bbad294 100644

--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7218,20 +7218,11 @@ (define_expand "lrint<GPF:mode><GPI:mode>2"
  }
  )
-;; For copysign (x, y), we want to generate:
+;; For copysignf (x, y), we want to generate:
  ;;
-;;   LDR d2, #(1 << 63)
-;;   BSL v2.8b, [y], [x]
+;;    movi    v31.4s, 0x80, lsl 24
+;;    bit     v0.16b, v1.16b, v31.16b
  ;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because
-;; we expect these operations to nearly always operate on
-;; floating-point values, we do not want the operation to be
-;; simplified into a bit-field insert operation that operates on the
-;; integer side, since typically that would involve three inter-bank
-;; register copies.  As we do not expect copysign to be followed by
-;; other logical operations on the result, it seems preferable to keep
-;; this as an unspec operation, rather than exposing the underlying
-;; logic to the compiler.
  (define_expand "copysign<GPF:mode>3"
    [(match_operand:GPF 0 "register_operand")
@@ -7239,57 +7230,22 @@ (define_expand "copysign<GPF:mode>3"
     (match_operand:GPF 2 "nonmemory_operand")]
    "TARGET_SIMD"
  {
-  rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
-                   << (GET_MODE_BITSIZE (<MODE>mode) - 1));
-  /* copysign (x, -1) should instead be expanded as orr with the sign
-     bit.  */
-  rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
-  if (GET_CODE (op2_elt) == CONST_DOUBLE
-      && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
-    {
-      rtx v_bitmask
-    = force_reg (V2<V_INT_EQUIV>mode,
-             gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
-                          signbit_const));
-
-      emit_insn (gen_iorv2<v_int_equiv>3 (
-    lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
-    lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
-    v_bitmask));
-      DONE;
-    }
-
-  machine_mode int_mode = <V_INT_EQUIV>mode;
-  rtx bitmask = gen_reg_rtx (int_mode);
-  emit_move_insn (bitmask, signbit_const);
-  operands[2] = force_reg (<MODE>mode, operands[2]);

- emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1],operands[2],

-                       bitmask));
+  rtx tmp = gen_reg_rtx (<VCONQ>mode);
+  rtx op1 = lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode);
+  rtx op2 = REG_P (operands[2])
+          ? lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode)
+          : gen_const_vec_duplicate (<VCONQ>mode, operands[2]);
+  emit_insn (gen_copysign3 (<VCONQ>mode, tmp, op1, op2));

+ emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, tmp,<VCONQ>mode));

    DONE;
  }
  )
-(define_insn "copysign<GPF:mode>3_insn"
-  [(set (match_operand:GPF 0 "register_operand")
-    (unspec:GPF [(match_operand:GPF 1 "register_operand")
-             (match_operand:GPF 2 "register_operand")
-             (match_operand:<V_INT_EQUIV> 3 "register_operand")]
-     UNSPEC_COPYSIGN))]
-  "TARGET_SIMD"
-  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type  ]

- [ w , w , w , 0 ; neon_bsl<q> ] bsl\t%0.<Vbtype>,%2.<Vbtype>, %1.<Vbtype>- [ w , 0 , w , w ; neon_bsl<q> ] bit\t%0.<Vbtype>,%2.<Vbtype>, %3.<Vbtype>- [ w , w , 0 , w ; neon_bsl<q> ] bif\t%0.<Vbtype>,%1.<Vbtype>, %3.<Vbtype>- [ r , r , 0 , X ; bfm ] bfxil\t%<w1>0, %<w1>1,#0, <sizem1>

-  }
-)
-
-
-;; For xorsign (x, y), we want to generate:
+;; For xorsignf (x, y), we want to generate:
  ;;
-;; LDR   d2, #1<<63
-;; AND   v3.8B, v1.8B, v2.8B
-;; EOR   v0.8B, v0.8B, v3.8B
+;;    movi    v31.4s, 0x80, lsl 24
+;;    and     v31.16b, v31.16b, v1.16b
+;;    eor     v0.16b, v31.16b, v0.16b
  ;;
  (define_expand "@xorsign<mode>3"

diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_3.cb/gcc/testsuite/gcc.target/aarch64/copysign_3.c

new file mode 100644

index0000000000000000000000000000000000000000..be48682420f1ff84e80af9efd9d11f64bd6e8052

--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 2 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */

diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_4.cb/gcc/testsuite/gcc.target/aarch64/copysign_4.c

new file mode 100644

index0000000000000000000000000000000000000000..f3cec2fc9c21a4eaa3b6556479aeb15c04358a1c

--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+sve" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tz" 1 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */

diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.cb/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.cindex18d10ee834d5d9b4361d890447060e78f09d3a73..1544bc5f1a736e95dd8bd2c608405aebb54ded1f 100644

--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -9,7 +9,7 @@
  /*
  ** f1:
-**    orr    v[0-9]+.2s, #?128, lsl #?24
+**    orr    v[0-9]+.[24]s, #?128, lsl #?24
  **    ret
  */
  float32_t f1 (float32_t a)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.cb/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.cindexa8b27199ff83d0eebadfc7dcf03f94e1229d76b8..1ebdc6aaeb102da25ad561b24641e72a652175fa 100644

--- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -6,7 +6,7 @@
  /*
  ** t1:
-**    orr    z[0-9]+.s, z[0-9]+.s, #-2147483648
+**    orr    v0.2s, #?128, lsl #?24
  **    ret
  */
  float32x2_t t1 (float32x2_t a)
@@ -16,7 +16,7 @@ float32x2_t t1 (float32x2_t a)
  /*
  ** t2:
-**    orr    z[0-9]+.s, z[0-9]+.s, #-2147483648
+**    orr    v0.4s, #?128, lsl #?24
  **    ret
  */
  float32x4_t t2 (float32x4_t a)
@@ -26,7 +26,7 @@ float32x4_t t2 (float32x4_t a)
  /*
  ** t3:
-**    orr    z[0-9]+.d, z[0-9]+.d, #-9223372036854775808
+**    orr    z[0-9]+.d, z[0-9]+.d, -9223372036854775808
  **    ret
  */
  float64x2_t t3 (float64x2_t a)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.cb/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.cindex19a7695e605bc8aced486a9c450d1cdc6be4691a..122152c0ebe4ea6840e418e75a2cadbfc9b0aba4 100644

--- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -7,7 +7,7 @@
  /*
  ** f1:
-**    orr    z0.s, z0.s, #-2147483648
+**    orr    v0.4s, #?128, lsl #?24
  **    ret
  */
  float32_t f1 (float32_t a)
@@ -17,7 +17,7 @@ float32_t f1 (float32_t a)
  /*
  ** f2:
-**    orr    z0.d, z0.d, #-9223372036854775808
+**    orr    z0.d, z0.d, -9223372036854775808
  **    ret
  */
  float64_t f2 (float64_t a)

Re: [PATCH v2] AArch64: Fix copysign patterns

Reply via email to