Hi All, In GCC-9 our scalar xorsign pattern broke and we didn't notice it because the testcase was not strong enough. With this commit
8d2d39587d941a40f25ea0144cceb677df115040 is the first bad commit commit 8d2d39587d941a40f25ea0144cceb677df115040 Author: Segher Boessenkool <seg...@kernel.crashing.org> Date: Mon Oct 22 22:23:39 2018 +0200 combine: Do not combine moves from hard registers combine started introducing useless moves on hard registers, when one of the arguments to our scalar xorsign is a hardreg we get an additional move inserted. This leads to combine forming an AND with the immediate inside and using the superflous move to do the r->w move, instead of what we wanted before which was for the `and` to be a vector and and have reload pick the right alternative. To fix this the patch just forces the use of the vector version directly and so combine has no chance to mess it up. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (xorsign<mode>3): Renamed to.. (@xorsign<mode>3): ...This. * config/aarch64/aarch64.md (xorsign<mode>3): Renamed to... (@xorsign<mode>3): ..This and emit vectors directly * config/aarch64/iterators.md (VCONQ): Add SF and DF. gcc/testsuite/ChangeLog: * gcc.target/aarch64/xorsign.c: --- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -500,7 +500,7 @@ (define_expand "ctz<mode>2" } ) -(define_expand "xorsign<mode>3" +(define_expand "@xorsign<mode>3" [(match_operand:VHSDF 0 "register_operand") (match_operand:VHSDF 1 "register_operand") (match_operand:VHSDF 2 "register_operand")] diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aedecdc8462c94bf1a769 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -6953,31 +6953,20 @@ (define_insn "copysign<GPF:mode>3_insn" ;; EOR v0.8B, v0.8B, v3.8B ;; -(define_expand "xorsign<mode>3" +(define_expand "@xorsign<mode>3" [(match_operand:GPF 0 "register_operand") (match_operand:GPF 1 "register_operand") (match_operand:GPF 2 "register_operand")] "TARGET_SIMD" { - - machine_mode imode = <V_INT_EQUIV>mode; - rtx mask = gen_reg_rtx (imode); - rtx op1x = gen_reg_rtx (imode); - rtx op2x = gen_reg_rtx (imode); - - int bits = GET_MODE_BITSIZE (<MODE>mode) - 1; - emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits, - imode))); - - emit_insn (gen_and<v_int_equiv>3 (op2x, mask, - lowpart_subreg (imode, operands[2], - <MODE>mode))); - emit_insn (gen_xor<v_int_equiv>3 (op1x, - lowpart_subreg (imode, operands[1], - <MODE>mode), - op2x)); + rtx tmp = gen_reg_rtx (<VCONQ>mode); + rtx op1 = gen_reg_rtx (<VCONQ>mode); + rtx op2 = gen_reg_rtx (<VCONQ>mode); + emit_move_insn (op1, lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode)); + emit_move_insn (op2, lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode)); + emit_insn (gen_xorsign3(<VCONQ>mode, tmp, op1, op2)); emit_move_insn (operands[0], - lowpart_subreg (<MODE>mode, op1x, imode)); + lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode)); DONE; } ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") (V4HF "V8HF") (V8HF "V8HF") (V2SF "V4SF") (V4SF "V4SF") (V2DF "V2DF") (SI "V4SI") - (HI "V8HI") (QI "V16QI")]) + (HI "V8HI") (QI "V16QI") + (SF "V4SF") (DF "V2DF")]) ;; Half modes of all vector modes. (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c b/gcc/testsuite/gcc.target/aarch64/xorsign.c index 22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753 100644 --- a/gcc/testsuite/gcc.target/aarch64/xorsign.c +++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c @@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y) return __builtin_copysignl (-1.0, y) * x; } -/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */ -/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */ +/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */ +/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */ /* { dg-final { scan-assembler-not "copysign" } } */ +/* { dg-final { scan-assembler-not "fmov" } } */ /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */ /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */ --
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f67eb70577d0c2d9911d8c867d38a4d0b390337c..e955691f1be8830efacc237465119764ce2a4942 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -500,7 +500,7 @@ (define_expand "ctz<mode>2" } ) -(define_expand "xorsign<mode>3" +(define_expand "@xorsign<mode>3" [(match_operand:VHSDF 0 "register_operand") (match_operand:VHSDF 1 "register_operand") (match_operand:VHSDF 2 "register_operand")] diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 01cf989641fce8e6c3828f6cfef62e101c4142df..9db82347bf891f9bc40aedecdc8462c94bf1a769 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -6953,31 +6953,20 @@ (define_insn "copysign<GPF:mode>3_insn" ;; EOR v0.8B, v0.8B, v3.8B ;; -(define_expand "xorsign<mode>3" +(define_expand "@xorsign<mode>3" [(match_operand:GPF 0 "register_operand") (match_operand:GPF 1 "register_operand") (match_operand:GPF 2 "register_operand")] "TARGET_SIMD" { - - machine_mode imode = <V_INT_EQUIV>mode; - rtx mask = gen_reg_rtx (imode); - rtx op1x = gen_reg_rtx (imode); - rtx op2x = gen_reg_rtx (imode); - - int bits = GET_MODE_BITSIZE (<MODE>mode) - 1; - emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits, - imode))); - - emit_insn (gen_and<v_int_equiv>3 (op2x, mask, - lowpart_subreg (imode, operands[2], - <MODE>mode))); - emit_insn (gen_xor<v_int_equiv>3 (op1x, - lowpart_subreg (imode, operands[1], - <MODE>mode), - op2x)); + rtx tmp = gen_reg_rtx (<VCONQ>mode); + rtx op1 = gen_reg_rtx (<VCONQ>mode); + rtx op2 = gen_reg_rtx (<VCONQ>mode); + emit_move_insn (op1, lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode)); + emit_move_insn (op2, lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode)); + emit_insn (gen_xorsign3(<VCONQ>mode, tmp, op1, op2)); emit_move_insn (operands[0], - lowpart_subreg (<MODE>mode, op1x, imode)); + lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode)); DONE; } ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 9398d713044433cd89b2a83db5ae7969feb1dcf7..2451d8c2cd8e2da6ac8339eed9bc975cf203fa4c 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1428,7 +1428,8 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") (V4HF "V8HF") (V8HF "V8HF") (V2SF "V4SF") (V4SF "V4SF") (V2DF "V2DF") (SI "V4SI") - (HI "V8HI") (QI "V16QI")]) + (HI "V8HI") (QI "V16QI") + (SF "V4SF") (DF "V2DF")]) ;; Half modes of all vector modes. (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") diff --git a/gcc/testsuite/gcc.target/aarch64/xorsign.c b/gcc/testsuite/gcc.target/aarch64/xorsign.c index 22c5829449d932bed08de7e453c435ade3b787b2..dfb7ba7f140524507cb79cb06e12c72ad46eb753 100644 --- a/gcc/testsuite/gcc.target/aarch64/xorsign.c +++ b/gcc/testsuite/gcc.target/aarch64/xorsign.c @@ -79,8 +79,9 @@ check_l_neg_rev (long double x, long double y) return __builtin_copysignl (-1.0, y) * x; } -/* { dg-final { scan-assembler "\[ \t\]?eor\[ \t\]?" } } */ -/* { dg-final { scan-assembler "\[ \t\]?and\[ \t\]?" } } */ +/* { dg-final { scan-assembler-times {eor\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */ +/* { dg-final { scan-assembler-times {and\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 8 } } */ /* { dg-final { scan-assembler-not "copysign" } } */ +/* { dg-final { scan-assembler-not "fmov" } } */ /* { dg-final { scan-assembler-not "\[ \t\]?orr\[ \t\]?" } } */ /* { dg-final { scan-assembler-not "\[ \t\]?fmul\[ \t\]?" } } */