[PATCH] AArch64: Fix copysign patterns

Wilco Dijkstra Tue, 17 Sep 2024 06:26:06 -0700

The current copysign pattern has a mismatch in the predicates and constraints -
operand[2] is a register_operand but also has an alternative X which allows any
operand.  Since it is a floating point operation, having an integer alternative
makes no sense.  Change the expander to always use the vector variant of 
copysign
which results in better code.  Add a SVE bitmask move immediate alternative to
the aarch64_simd_mov patterns so we emit a single move when SVE is available.


Passes bootstrap and regress, OK for commit?

gcc:
        * config/aarch64/aarch64.md (copysign<GPF:mode>3): Defer to AdvSIMD 
copysign.
        (copysign<GPF:mode>3_insn): Remove pattern.
        * config/aarch64/aarch64-simd.md (aarch64_simd_mov<VDMOV:mode>): Add 
SVE movimm
        alternative.
        (aarch64_simd_mov<VQMOV:mode>): Likewise.  Remove redundant V2DI check.
        (copysign<mode>3): Make global.
        (ior<mode>3<vczle><vczbe>): Move Neon immediate alternative before the 
SVE one. 

testsuite:
        * gcc.target/aarch64/copysign_3.c: New test.
        * gcc.target/aarch64/copysign_4.c: New test.
        * gcc.target/aarch64/fneg-abs_2.c: Allow .2s and .4s.

---

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
e70d59380ed295577721f15277c28829d42a0189..3077e920ce623c92d21193124747ff7ad010d006
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -161,6 +161,7 @@ (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>"
      [?w, r ; f_mcr              , *        , *] fmov\t%d0, %1
      [?r, r ; mov_reg            , *        , *] mov\t%0, %1
      [w , Dn; neon_move<q>       , simd     , *] << 
aarch64_output_simd_mov_immediate (operands[1], 64);
+     [w , vsl; *                 , sve      , *] mov\t%Z0.<Vetype>, %1
      [w , Dz; f_mcr              , *        , *] fmov\t%d0, xzr
      [w , Dx; neon_move          , simd     , 8] #
   }
@@ -190,6 +191,7 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
      [?w , r ; multiple           , *   , 8] #
      [?r , r ; multiple           , *   , 8] #
      [w  , Dn; neon_move<q>       , simd, 4] << 
aarch64_output_simd_mov_immediate (operands[1], 128);
+     [w  , vsl; *                 , sve,  4] mov\t%Z0.<Vetype>, %1
      [w  , Dz; fmov               , *   , 4] fmov\t%d0, xzr
      [w  , Dx; neon_move          , simd, 8] #
   }
@@ -208,7 +210,6 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
     else
       {
        if (FP_REGNUM_P (REGNO (operands[0]))
-           && <MODE>mode == V2DImode
            && aarch64_maybe_generate_simd_constant (operands[0], operands[1],
                                                     <MODE>mode))
          ;
@@ -648,7 +649,7 @@ (define_insn 
"aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><
   [(set_attr "type" "neon_dot<VS:q>")]
 )
 
-(define_expand "copysign<mode>3"
+(define_expand "@copysign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
    (match_operand:VHSDF 2 "nonmemory_operand")]
@@ -1138,10 +1139,8 @@ (define_insn "ior<mode>3<vczle><vczbe>"
   "TARGET_SIMD"
   {@ [ cons: =0 , 1 , 2; attrs: arch ]
      [ w        , w , w  ; simd      ] orr\t%0.<Vbtype>, %1.<Vbtype>, 
%2.<Vbtype>
-     [ w        , 0 , vsl; sve       ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
-     [ w        , 0 , Do ; simd      ] \
-       << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \
-                                            AARCH64_CHECK_ORR);
+     [ w        , 0 , Do ; simd      ] << aarch64_output_simd_mov_immediate 
(operands[2], <bitsize>, AARCH64_CHECK_ORR);
+     [ w        , 0 , vsl; sve       ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, %2
   }
   [(set_attr "type" "neon_logic<q>")]
 )
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
c54b29cd64b9e0dc6c6d12735049386ccedc5408..e9b148e59abf81cee53cb0dd846af9a62bbad294
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7218,20 +7218,11 @@ (define_expand "lrint<GPF:mode><GPI:mode>2"
 }
 )
 
-;; For copysign (x, y), we want to generate:
+;; For copysignf (x, y), we want to generate:
 ;;
-;;   LDR d2, #(1 << 63)
-;;   BSL v2.8b, [y], [x]
+;;     movi    v31.4s, 0x80, lsl 24
+;;     bit     v0.16b, v1.16b, v31.16b
 ;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because
-;; we expect these operations to nearly always operate on
-;; floating-point values, we do not want the operation to be
-;; simplified into a bit-field insert operation that operates on the
-;; integer side, since typically that would involve three inter-bank
-;; register copies.  As we do not expect copysign to be followed by
-;; other logical operations on the result, it seems preferable to keep
-;; this as an unspec operation, rather than exposing the underlying
-;; logic to the compiler.
 
 (define_expand "copysign<GPF:mode>3"
   [(match_operand:GPF 0 "register_operand")
@@ -7239,57 +7230,22 @@ (define_expand "copysign<GPF:mode>3"
    (match_operand:GPF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
-                              << (GET_MODE_BITSIZE (<MODE>mode) - 1));
-  /* copysign (x, -1) should instead be expanded as orr with the sign
-     bit.  */
-  rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
-  if (GET_CODE (op2_elt) == CONST_DOUBLE
-      && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
-    {
-      rtx v_bitmask
-       = force_reg (V2<V_INT_EQUIV>mode,
-                    gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
-                                             signbit_const));
-
-      emit_insn (gen_iorv2<v_int_equiv>3 (
-       lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
-       lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
-       v_bitmask));
-      DONE;
-    }
-
-  machine_mode int_mode = <V_INT_EQUIV>mode;
-  rtx bitmask = gen_reg_rtx (int_mode);
-  emit_move_insn (bitmask, signbit_const);
-  operands[2] = force_reg (<MODE>mode, operands[2]);
-  emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
-                                      bitmask));
+  rtx tmp = gen_reg_rtx (<VCONQ>mode);
+  rtx op1 = lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode);
+  rtx op2 = REG_P (operands[2])
+             ? lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode)
+             : gen_const_vec_duplicate (<VCONQ>mode, operands[2]);
+  emit_insn (gen_copysign3 (<VCONQ>mode, tmp, op1, op2));
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
   DONE;
 }
 )
 
-(define_insn "copysign<GPF:mode>3_insn"
-  [(set (match_operand:GPF 0 "register_operand")
-       (unspec:GPF [(match_operand:GPF 1 "register_operand")
-                    (match_operand:GPF 2 "register_operand")
-                    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
-        UNSPEC_COPYSIGN))]
-  "TARGET_SIMD"
-  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type  ]
-     [ w        , w , w , 0 ; neon_bsl<q>  ] bsl\t%0.<Vbtype>, %2.<Vbtype>, 
%1.<Vbtype>
-     [ w        , 0 , w , w ; neon_bsl<q>  ] bit\t%0.<Vbtype>, %2.<Vbtype>, 
%3.<Vbtype>
-     [ w        , w , 0 , w ; neon_bsl<q>  ] bif\t%0.<Vbtype>, %1.<Vbtype>, 
%3.<Vbtype>
-     [ r        , r , 0 , X ; bfm          ] bfxil\t%<w1>0, %<w1>1, #0, 
<sizem1>
-  }
-)
-
-
-;; For xorsign (x, y), we want to generate:
+;; For xorsignf (x, y), we want to generate:
 ;;
-;; LDR   d2, #1<<63
-;; AND   v3.8B, v1.8B, v2.8B
-;; EOR   v0.8B, v0.8B, v3.8B
+;;     movi    v31.4s, 0x80, lsl 24
+;;     and     v31.16b, v31.16b, v1.16b
+;;     eor     v0.16b, v31.16b, v0.16b
 ;;
 
 (define_expand "@xorsign<mode>3"
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_3.c 
b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..be48682420f1ff84e80af9efd9d11f64bd6e8052
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 2 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_4.c 
b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
new file mode 100644
index 
0000000000000000000000000000000000000000..f3cec2fc9c21a4eaa3b6556479aeb15c04358a1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+sve" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tz" 1 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c 
b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
index 
18d10ee834d5d9b4361d890447060e78f09d3a73..1544bc5f1a736e95dd8bd2c608405aebb54ded1f
 100644
--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -9,7 +9,7 @@
 
 /*
 ** f1:
-**     orr     v[0-9]+.2s, #?128, lsl #?24
+**     orr     v[0-9]+.[24]s, #?128, lsl #?24
 **     ret
 */
 float32_t f1 (float32_t a)

[PATCH] AArch64: Fix copysign patterns

Reply via email to