Hi All, This patch is to fix the ICE caused in expand pattern of copysignf builtin. This is a back port to r267019 of trunk.
Bootstrapped on aarch64-none-linux-gnu and regression tested on aarch64-none-elf. Ok for gcc 8 branch? If ok, could someone commit the patch on my behalf, I don't have commit rights. *** gcc/ChangeLog *** 2019-04-29 Srinath Parvathaneni <srinath.parvathan...@arm.com> Backport from mainline 2018-12-11 Richard Earnshaw <richard.earns...@arm.com> PR target/37369 * config/aarch64/iterators.md (sizem1): Add sizes for SFmode and DFmode. (Vbtype): Add SFmode mapping. * config/aarch64/aarch64.md (copysigndf3, copysignsf3): Delete. (copysign<GPF:mode>3): New expand pattern. (copysign<GPF:mode>3_insn): New insn pattern. *** gcc/testsuite/ChangeLog *** 2019-04-29 Srinath Parvathaneni <srinath.parvathan...@arm.com> PR target/90075 * gcc.target/aarch64/pr90075.c: New test.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 32a0e1f3685746b9a7d70239745586d0f0fa7ee1..11c0ef00dcfd5fc41a0eb93c8d5b5bda8a6e6885 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -189,6 +189,7 @@ UNSPEC_CLASTB UNSPEC_FADDA UNSPEC_REV_SUBREG + UNSPEC_COPYSIGN ]) (define_c_enum "unspecv" [ @@ -5427,49 +5428,48 @@ ;; LDR d2, #(1 << 63) ;; BSL v2.8b, [y], [x] ;; -;; or another, equivalent, sequence using one of BSL/BIT/BIF. -;; aarch64_simd_bsldf will select the best suited of these instructions -;; to generate based on register allocation, and knows how to partially -;; constant fold based on the values of X and Y, so expand through that. - -(define_expand "copysigndf3" - [(match_operand:DF 0 "register_operand") - (match_operand:DF 1 "register_operand") - (match_operand:DF 2 "register_operand")] +;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because +;; we expect these operations to nearly always operate on +;; floating-point values, we do not want the operation to be +;; simplified into a bit-field insert operation that operates on the +;; integer side, since typically that would involve three inter-bank +;; register copies. As we do not expect copysign to be followed by +;; other logical operations on the result, it seems preferable to keep +;; this as an unspec operation, rather than exposing the underlying +;; logic to the compiler. + +(define_expand "copysign<GPF:mode>3" + [(match_operand:GPF 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand")] "TARGET_FLOAT && TARGET_SIMD" { - rtx mask = gen_reg_rtx (DImode); - emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 63)); - emit_insn (gen_aarch64_simd_bsldf (operands[0], mask, - operands[2], operands[1])); + rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); + emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U + << (GET_MODE_BITSIZE (<MODE>mode) - 1))); + emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2], + bitmask)); DONE; } ) -;; As above, but we must first get to a 64-bit value if we wish to use -;; aarch64_simd_bslv2sf. - -(define_expand "copysignsf3" - [(match_operand:SF 0 "register_operand") - (match_operand:SF 1 "register_operand") - (match_operand:SF 2 "register_operand")] +(define_insn "copysign<GPF:mode>3_insn" + [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r") + (match_operand:GPF 2 "register_operand" "w,w,0,0") + (match_operand:<V_INT_EQUIV> 3 "register_operand" + "0,w,w,X")] + UNSPEC_COPYSIGN))] "TARGET_FLOAT && TARGET_SIMD" -{ - rtx v_bitmask = gen_reg_rtx (V2SImode); - - /* Juggle modes to get us in to a vector mode for BSL. */ - rtx op1 = lowpart_subreg (DImode, operands[1], SFmode); - rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode); - rtx tmp = gen_reg_rtx (V2SFmode); - emit_move_insn (v_bitmask, - aarch64_simd_gen_const_vector_dup (V2SImode, - HOST_WIDE_INT_M1U << 31)); - emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1)); - emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode)); - DONE; -} + "@ + bsl\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> + bit\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> + bif\\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype> + bfxil\\t%<w1>0, %<w1>1, #0, <sizem1>" + [(set_attr "type" "neon_bsl<q>,neon_bsl<q>,neon_bsl<q>,bfm")] ) + ;; For xorsign (x, y), we want to generate: ;; ;; LDR d2, #1<<63 diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 25991d97836498f48ca30b8757ed7963217da417..21d66d36f82cff639fbaf2de70e97e9767b1c748 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -578,7 +578,8 @@ (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")]) ;; Give the ordinal of the MSB in the mode -(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")]) +(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63") + (HF "#15") (SF "#31") (DF "#63")]) ;; Attribute to describe constants acceptable in logical operations (define_mode_attr lconst [(SI "K") (DI "L")]) @@ -664,7 +665,7 @@ (V8HF "16b") (V2SF "8b") (V4SF "16b") (V2DF "16b") (DI "8b") (DF "8b") - (SI "8b")]) + (SI "8b") (SF "8b")]) ;; Define element mode for each vector mode. (define_mode_attr VEL [(V8QI "QI") (V16QI "QI") (VNx16QI "QI") diff --git a/gcc/testsuite/gcc.target/aarch64/pr90075.c b/gcc/testsuite/gcc.target/aarch64/pr90075.c new file mode 100644 index 0000000000000000000000000000000000000000..cae7e618fc0cc34646e3e68a419b8de0631e6bda --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr90075.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O1" } */ + +typedef struct { + float one, two; +} twofloats; + +float +bug (twofloats tf) +{ + float f1, f2; + union { + twofloats tfloats; + float arr[2]; + } utfloats; + + utfloats.tfloats = tf; + f1 = utfloats.arr[1]; + f2 = __builtin_copysignf (0, f1); + return f2; +}