[PATCH]AArch64 xorsign: Fix scalar xorsign lowering

Tamar Christina via Gcc-patches Fri, 01 Sep 2023 05:55:35 -0700

Hi All,

In GCC-9 our scalar xorsign pattern broke and we didn't notice it because the
testcase was not strong enough.  With this commit


8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit
commit 8d2d39587d941a40f25ea0144cceb677df115040
Author: Segher Boessenkool <seg...@kernel.crashing.org>
Date:   Mon Oct 22 22:23:39 2018 +0200

    combine: Do not combine moves from hard registers

combine started introducing useless moves on hard registers,  when one of the
arguments to our scalar xorsign is a hardreg we get an additional move inserted.

This leads to combine forming an AND with the immediate inside and using the
superflous move to do the r->w move, instead of what we wanted before which was
for the `and` to be a vector and and have reload pick the right alternative.

To fix this the patch just forces the use of the vector version directly and
so combine has no chance to mess it up.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64-simd.md (xorsign<mode>3): Renamed to..
        (@xorsign<mode>3): ...This.
        * config/aarch64/aarch64.md (xorsign<mode>3): Renamed to...
        (@xorsign<mode>3): ..This and emit vectors directly
        * config/aarch64/iterators.md (VCONQ): Add SF and DF.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/xorsign.c:

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -500,7 +500,7 @@ (define_expand "ctz<mode>2"
   }
 )
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
    (match_operand:VHSDF 2 "register_operand")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aedecdc8462c94bf1a769
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6953,31 +6953,20 @@ (define_insn "copysign<GPF:mode>3_insn"
 ;; EOR   v0.8B, v0.8B, v3.8B
 ;;
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:GPF 0 "register_operand")
    (match_operand:GPF 1 "register_operand")
    (match_operand:GPF 2 "register_operand")]
   "TARGET_SIMD"
 {
-
-  machine_mode imode = <V_INT_EQUIV>mode;
-  rtx mask = gen_reg_rtx (imode);
-  rtx op1x = gen_reg_rtx (imode);
-  rtx op2x = gen_reg_rtx (imode);
-
-  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
-  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
-                                                    imode)));
-
-  emit_insn (gen_and<v_int_equiv>3 (op2x, mask,
-                                   lowpart_subreg (imode, operands[2],
-                                                   <MODE>mode)));
-  emit_insn (gen_xor<v_int_equiv>3 (op1x,
-                                   lowpart_subreg (imode, operands[1],
-                                                   <MODE>mode),
-                                   op2x));
+  rtx tmp = gen_reg_rtx (<VCONQ>mode);
+  rtx op1 = gen_reg_rtx (<VCONQ>mode);
+  rtx op2 = gen_reg_rtx (<VCONQ>mode);
+  emit_move_insn (op1, lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode));
+  emit_move_insn (op2, lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode));
+  emit_insn (gen_xorsign3(<VCONQ>mode, tmp, op1, op2));
   emit_move_insn (operands[0],
-                 lowpart_subreg (<MODE>mode, op1x, imode));
+                 lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
   DONE;
 }
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
                         (V4HF "V8HF") (V8HF "V8HF")
                         (V2SF "V4SF") (V4SF "V4SF")
                         (V2DF "V2DF") (SI   "V4SI")
-                        (HI   "V8HI") (QI   "V16QI")])
+                        (HI   "V8HI") (QI   "V16QI")
+                        (SF   "V4SF") (DF   "V2DF")])
 
 ;; Half modes of all vector modes.
 (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c 
b/gcc/testsuite/gcc.target/aarch64/xorsign.c
index 
22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753
 100644
--- a/gcc/testsuite/gcc.target/aarch64/xorsign.c
+++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
@@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y)
   return __builtin_copysignl (-1.0, y) * x;
 }
 
-/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
-/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
+/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, 
v[0-9]+\.16b} 8 } } */
+/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b, v[0-9]+\.16b, 
v[0-9]+\.16b} 8 } } */
 /* { dg-final { scan-assembler-not "copysign" } } */
+/* { dg-final { scan-assembler-not "fmov" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */




--

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -500,7 +500,7 @@ (define_expand "ctz<mode>2"
   }
 )
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
    (match_operand:VHSDF 2 "register_operand")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aedecdc8462c94bf1a769
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6953,31 +6953,20 @@ (define_insn "copysign<GPF:mode>3_insn"
 ;; EOR   v0.8B, v0.8B, v3.8B
 ;;
 
-(define_expand "xorsign<mode>3"
+(define_expand "@xorsign<mode>3"
   [(match_operand:GPF 0 "register_operand")
    (match_operand:GPF 1 "register_operand")
    (match_operand:GPF 2 "register_operand")]
   "TARGET_SIMD"
 {
-
-  machine_mode imode = <V_INT_EQUIV>mode;
-  rtx mask = gen_reg_rtx (imode);
-  rtx op1x = gen_reg_rtx (imode);
-  rtx op2x = gen_reg_rtx (imode);
-
-  int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
-  emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
-                                                    imode)));
-
-  emit_insn (gen_and<v_int_equiv>3 (op2x, mask,
-                                   lowpart_subreg (imode, operands[2],
-                                                   <MODE>mode)));
-  emit_insn (gen_xor<v_int_equiv>3 (op1x,
-                                   lowpart_subreg (imode, operands[1],
-                                                   <MODE>mode),
-                                   op2x));
+  rtx tmp = gen_reg_rtx (<VCONQ>mode);
+  rtx op1 = gen_reg_rtx (<VCONQ>mode);
+  rtx op2 = gen_reg_rtx (<VCONQ>mode);
+  emit_move_insn (op1, lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode));
+  emit_move_insn (op2, lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode));
+  emit_insn (gen_xorsign3(<VCONQ>mode, tmp, op1, op2));
   emit_move_insn (operands[0],
-                 lowpart_subreg (<MODE>mode, op1x, imode));
+                 lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
   DONE;
 }
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
                         (V4HF "V8HF") (V8HF "V8HF")
                         (V2SF "V4SF") (V4SF "V4SF")
                         (V2DF "V2DF") (SI   "V4SI")
-                        (HI   "V8HI") (QI   "V16QI")])
+                        (HI   "V8HI") (QI   "V16QI")
+                        (SF   "V4SF") (DF   "V2DF")])
 
 ;; Half modes of all vector modes.
 (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c 
b/gcc/testsuite/gcc.target/aarch64/xorsign.c
index 
22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753
 100644
--- a/gcc/testsuite/gcc.target/aarch64/xorsign.c
+++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c
@@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y)
   return __builtin_copysignl (-1.0, y) * x;
 }
 
-/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */
-/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */
+/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, 
v[0-9]+\.16b} 8 } } */
+/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b, v[0-9]+\.16b, 
v[0-9]+\.16b} 8 } } */
 /* { dg-final { scan-assembler-not "copysign" } } */
+/* { dg-final { scan-assembler-not "fmov" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */
 /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */

[PATCH]AArch64 xorsign: Fix scalar xorsign lowering

Reply via email to