Hi Saurabh,

On 9/18/24 21:53, Saurabh Jha wrote:
Hi Wilco,

Thanks for the patch. This mostly looks good. Just added a couple clarifications.

On 9/18/2024 8:17 PM, Wilco Dijkstra wrote:
v2: Add more testcase fixes.

The current copysign pattern has a mismatch in the predicates and constraints - operand[2] is a register_operand but also has an alternative X which allows any operand.  Since it is a floating point operation, having an integer alternative makes no sense.  Change the expander to always use the vector variant of copysign which results in better code.  Add a SVE bitmask move immediate alternative to the aarch64_simd_mov patterns so we emit a single move when SVE is available.

Passes bootstrap and regress, OK for commit?

gcc:
         * config/aarch64/aarch64.md (copysign<GPF:mode>3): Defer to AdvSIMD copysign.

Should the things after "(copysign..") be on a newline? I have mostly seen gcc ChangeLogs have file name and individual elements separated by newlines.

AFAIK, ChangeLog entries follow to 80-columns limit and are indented by a tab. In emacs + change-log-mode, alt-Q formats the paragraph adequately.

Thanks,

Christophe



         (copysign<GPF:mode>3_insn): Remove pattern.
         * config/aarch64/aarch64-simd.md (aarch64_simd_mov<VDMOV:mode>): Add SVE movimm
         alternative.

Similar comment about file name and the instruction pattern being on separate lines.

         (aarch64_simd_mov<VQMOV:mode>): Likewise.  Remove redundant V2DI check.
         (copysign<mode>3): Make global.
         (ior<mode>3<vczle><vczbe>): Move Neon immediate alternative before the SVE one.

testsuite:
         * gcc.target/aarch64/copysign_3.c: New test.
         * gcc.target/aarch64/copysign_4.c: New test.
         * gcc.target/aarch64/fneg-abs_2.c: Allow .2s and .4s.
         * gcc.target/aarch64/sve/fneg-abs_1.c: Fixup test.
         * gcc.target/aarch64/sve/fneg-abs_2.c: Likewise.

---

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e70d59380ed295577721f15277c28829d42a0189..3077e920ce623c92d21193124747ff7ad010d006 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -161,6 +161,7 @@ (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>"
       [?w, r ; f_mcr              , *        , *] fmov\t%d0, %1
       [?r, r ; mov_reg            , *        , *] mov\t%0, %1
       [w , Dn; neon_move<q>       , simd     , *] << aarch64_output_simd_mov_immediate (operands[1], 64);
+     [w , vsl; *                 , sve      , *] mov\t%Z0.<Vetype>, %1
       [w , Dz; f_mcr              , *        , *] fmov\t%d0, xzr
       [w , Dx; neon_move          , simd     , 8] #
    }
@@ -190,6 +191,7 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
       [?w , r ; multiple           , *   , 8] #
       [?r , r ; multiple           , *   , 8] #
       [w  , Dn; neon_move<q>       , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
+     [w  , vsl; *                 , sve,  4] mov\t%Z0.<Vetype>, %1
       [w  , Dz; fmov               , *   , 4] fmov\t%d0, xzr
       [w  , Dx; neon_move          , simd, 8] #
    }
@@ -208,7 +210,6 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
      else
        {
      if (FP_REGNUM_P (REGNO (operands[0]))
-        && <MODE>mode == V2DImode
          && aarch64_maybe_generate_simd_constant (operands[0], operands[1],
                               <MODE>mode))
        ;
@@ -648,7 +649,7 @@ (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><
    [(set_attr "type" "neon_dot<VS:q>")]
  )
-(define_expand "copysign<mode>3"
+(define_expand "@copysign<mode>3"
    [(match_operand:VHSDF 0 "register_operand")
     (match_operand:VHSDF 1 "register_operand")
     (match_operand:VHSDF 2 "nonmemory_operand")]
@@ -1138,10 +1139,8 @@ (define_insn "ior<mode>3<vczle><vczbe>"
    "TARGET_SIMD"
    {@ [ cons: =0 , 1 , 2; attrs: arch ]
       [ w        , w , w  ; simd      ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> -     [ w        , 0 , vsl; sve       ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
-     [ w        , 0 , Do ; simd      ] \
-       << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \
-                         AARCH64_CHECK_ORR);
+     [ w        , 0 , Do ; simd      ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_ORR); +     [ w        , 0 , vsl; sve       ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, %2
    }
    [(set_attr "type" "neon_logic<q>")]
  )
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c54b29cd64b9e0dc6c6d12735049386ccedc5408..e9b148e59abf81cee53cb0dd846af9a62bbad294 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7218,20 +7218,11 @@ (define_expand "lrint<GPF:mode><GPI:mode>2"
  }
  )
-;; For copysign (x, y), we want to generate:
+;; For copysignf (x, y), we want to generate:
  ;;
-;;   LDR d2, #(1 << 63)
-;;   BSL v2.8b, [y], [x]
+;;    movi    v31.4s, 0x80, lsl 24
+;;    bit     v0.16b, v1.16b, v31.16b
  ;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because
-;; we expect these operations to nearly always operate on
-;; floating-point values, we do not want the operation to be
-;; simplified into a bit-field insert operation that operates on the
-;; integer side, since typically that would involve three inter-bank
-;; register copies.  As we do not expect copysign to be followed by
-;; other logical operations on the result, it seems preferable to keep
-;; this as an unspec operation, rather than exposing the underlying
-;; logic to the compiler.
  (define_expand "copysign<GPF:mode>3"
    [(match_operand:GPF 0 "register_operand")
@@ -7239,57 +7230,22 @@ (define_expand "copysign<GPF:mode>3"
     (match_operand:GPF 2 "nonmemory_operand")]
    "TARGET_SIMD"
  {
-  rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
-                   << (GET_MODE_BITSIZE (<MODE>mode) - 1));
-  /* copysign (x, -1) should instead be expanded as orr with the sign
-     bit.  */
-  rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
-  if (GET_CODE (op2_elt) == CONST_DOUBLE
-      && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
-    {
-      rtx v_bitmask
-    = force_reg (V2<V_INT_EQUIV>mode,
-             gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
-                          signbit_const));
-
-      emit_insn (gen_iorv2<v_int_equiv>3 (
-    lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
-    lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
-    v_bitmask));
-      DONE;
-    }
-
-  machine_mode int_mode = <V_INT_EQUIV>mode;
-  rtx bitmask = gen_reg_rtx (int_mode);
-  emit_move_insn (bitmask, signbit_const);
-  operands[2] = force_reg (<MODE>mode, operands[2]);
-  emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
-                       bitmask));
+  rtx tmp = gen_reg_rtx (<VCONQ>mode);
+  rtx op1 = lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode);
+  rtx op2 = REG_P (operands[2])
+          ? lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode)
+          : gen_const_vec_duplicate (<VCONQ>mode, operands[2]);
+  emit_insn (gen_copysign3 (<VCONQ>mode, tmp, op1, op2));
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
    DONE;
  }
  )
-(define_insn "copysign<GPF:mode>3_insn"
-  [(set (match_operand:GPF 0 "register_operand")
-    (unspec:GPF [(match_operand:GPF 1 "register_operand")
-             (match_operand:GPF 2 "register_operand")
-             (match_operand:<V_INT_EQUIV> 3 "register_operand")]
-     UNSPEC_COPYSIGN))]
-  "TARGET_SIMD"
-  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type  ]
-     [ w        , w , w , 0 ; neon_bsl<q>  ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> -     [ w        , 0 , w , w ; neon_bsl<q>  ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> -     [ w        , w , 0 , w ; neon_bsl<q>  ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype> -     [ r        , r , 0 , X ; bfm          ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1>
-  }
-)
-
-
-;; For xorsign (x, y), we want to generate:
+;; For xorsignf (x, y), we want to generate:
  ;;
-;; LDR   d2, #1<<63
-;; AND   v3.8B, v1.8B, v2.8B
-;; EOR   v0.8B, v0.8B, v3.8B
+;;    movi    v31.4s, 0x80, lsl 24
+;;    and     v31.16b, v31.16b, v1.16b
+;;    eor     v0.16b, v31.16b, v0.16b
  ;;
  (define_expand "@xorsign<mode>3"
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_3.c b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..be48682420f1ff84e80af9efd9d11f64bd6e8052
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 2 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_4.c b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..f3cec2fc9c21a4eaa3b6556479aeb15c04358a1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+sve" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tz" 1 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c index 18d10ee834d5d9b4361d890447060e78f09d3a73..1544bc5f1a736e95dd8bd2c608405aebb54ded1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -9,7 +9,7 @@
  /*
  ** f1:
-**    orr    v[0-9]+.2s, #?128, lsl #?24
+**    orr    v[0-9]+.[24]s, #?128, lsl #?24
  **    ret
  */
  float32_t f1 (float32_t a)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c index a8b27199ff83d0eebadfc7dcf03f94e1229d76b8..1ebdc6aaeb102da25ad561b24641e72a652175fa 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -6,7 +6,7 @@
  /*
  ** t1:
-**    orr    z[0-9]+.s, z[0-9]+.s, #-2147483648
+**    orr    v0.2s, #?128, lsl #?24
  **    ret
  */
  float32x2_t t1 (float32x2_t a)
@@ -16,7 +16,7 @@ float32x2_t t1 (float32x2_t a)
  /*
  ** t2:
-**    orr    z[0-9]+.s, z[0-9]+.s, #-2147483648
+**    orr    v0.4s, #?128, lsl #?24
  **    ret
  */
  float32x4_t t2 (float32x4_t a)
@@ -26,7 +26,7 @@ float32x4_t t2 (float32x4_t a)
  /*
  ** t3:
-**    orr    z[0-9]+.d, z[0-9]+.d, #-9223372036854775808
+**    orr    z[0-9]+.d, z[0-9]+.d, -9223372036854775808
  **    ret
  */
  float64x2_t t3 (float64x2_t a)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c index 19a7695e605bc8aced486a9c450d1cdc6be4691a..122152c0ebe4ea6840e418e75a2cadbfc9b0aba4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -7,7 +7,7 @@
  /*
  ** f1:
-**    orr    z0.s, z0.s, #-2147483648
+**    orr    v0.4s, #?128, lsl #?24
  **    ret
  */
  float32_t f1 (float32_t a)
@@ -17,7 +17,7 @@ float32_t f1 (float32_t a)
  /*
  ** f2:
-**    orr    z0.d, z0.d, #-9223372036854775808
+**    orr    z0.d, z0.d, -9223372036854775808
  **    ret
  */
  float64_t f2 (float64_t a)


Reply via email to