Bootstrapped and regtested on s390x-redhat-linux. OK for master?
The vector copysign pattern incorrectly assumes that vector
if_then_else operates on bits, not on elements. This can theoretically
mislead the optimizers. Fix by changing it to use bitwise operations,
like commit 2930bb321794 ("PR94613: Fix vec_sel builtin for IBM Z") did
for vec_sel builtin.
gcc/ChangeLog:
2020-10-07 Ilya Leoshkevich <[email protected]>
* config/s390/s390-protos.h (s390_build_signbit_mask): New
function.
* config/s390/s390.c (s390_tointvec): New function.
(s390_contiguous_bitmask_vector_p): Bitcast the argument to
an integral mode.
(s390_expand_vec_init): Do not call
s390_contiguous_bitmask_vector_p with a scalar argument.
(s390_build_signbit_mask): New function.
* config/s390/vector.md (copysign<mode>3): Use bitwise
operations.
---
gcc/config/s390/s390-protos.h | 1 +
gcc/config/s390/s390.c | 92 ++++++++++++++++++++++++++++++++---
gcc/config/s390/vector.md | 31 ++++--------
3 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 6f1bc07db17..029f7289fac 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -121,6 +121,7 @@ extern void s390_expand_vec_compare_cc (rtx, enum rtx_code,
rtx, rtx, bool);
extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
extern void s390_expand_vec_init (rtx, rtx);
+extern rtx s390_build_signbit_mask (machine_mode);
extern rtx s390_return_addr_rtx (int, rtx);
extern rtx s390_back_chain_rtx (void);
extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 93894307d62..554c1adf40a 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2450,6 +2450,54 @@ s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in,
bool wrap_p,
return b;
}
+/* Return the associated integral mode of VEC_MODE. Must be in sync with
+ tointvec mode_attr. */
+static machine_mode
+s390_tointvec (machine_mode vec_mode)
+{
+ switch (vec_mode)
+ {
+ case V1QImode:
+ return V1QImode;
+ case V2QImode:
+ return V2QImode;
+ case V4QImode:
+ return V4QImode;
+ case V8QImode:
+ return V8QImode;
+ case V16QImode:
+ return V16QImode;
+ case V1HImode:
+ return V1HImode;
+ case V2HImode:
+ return V2HImode;
+ case V4HImode:
+ return V4HImode;
+ case V8HImode:
+ return V8HImode;
+ case V1SImode:
+ case V1SFmode:
+ return V1SImode;
+ case V2SImode:
+ case V2SFmode:
+ return V2SImode;
+ case V4SImode:
+ case V4SFmode:
+ return V4SImode;
+ case V1DImode:
+ case V1DFmode:
+ return V1DImode;
+ case V2DImode:
+ case V2DFmode:
+ return V2DImode;
+ case V1TImode:
+ case V1TFmode:
+ return V1TImode;
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Return true if OP contains the same contiguous bitfield in *all*
its elements. START and END can be used to obtain the start and
end position of the bitfield.
@@ -2467,6 +2515,9 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, int
*end)
rtx elt;
bool b;
+ /* Handle floats by bitcasting them to ints. */
+ op = gen_lowpart (s390_tointvec (GET_MODE (op)), op);
+
gcc_assert (!!start == !!end);
if (!const_vec_duplicate_p (op, &elt)
|| !CONST_INT_P (elt))
@@ -6863,15 +6914,16 @@ s390_expand_vec_init (rtx target, rtx vals)
}
/* Use vector gen mask or vector gen byte mask if possible. */
- if (all_same && all_const_int
- && (XVECEXP (vals, 0, 0) == const0_rtx
- || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
- NULL, NULL)
- || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
+ if (all_same && all_const_int)
{
- emit_insn (gen_rtx_SET (target,
- gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
- return;
+ rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+ if (XVECEXP (vals, 0, 0) == const0_rtx
+ || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
+ || s390_bytemask_vector_p (vec, NULL))
+ {
+ emit_insn (gen_rtx_SET (target, vec));
+ return;
+ }
}
/* Use vector replicate instructions. vlrep/vrepi/vrep */
@@ -6949,6 +7001,30 @@ s390_expand_vec_init (rtx target, rtx vals)
}
}
+/* Emit a vector constant that contains 1s in each element's sign bit position
+ and 0s in other positions. MODE is the desired constant's mode. */
+extern rtx
+s390_build_signbit_mask (machine_mode mode)
+{
+ /* Generate the integral element mask value. */
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
+ wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
+
+ /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
+ value to the desired mode. */
+ machine_mode inner_int_mode = GET_MODE_INNER (s390_tointvec (mode));
+ rtx mask = immed_wide_int_const (mask_val, inner_int_mode);
+ mask = gen_lowpart (inner_mode, mask);
+
+ /* Emit the vector mask rtx by mode the element mask rtx. */
+ int nunits = GET_MODE_NUNITS (mode);
+ rtvec v = rtvec_alloc (nunits);
+ for (int i = 0; i < nunits; i++)
+ RTVEC_ELT (v, i) = mask;
+ return gen_rtx_CONST_VECTOR (mode, v);
+}
+
/* Structure to hold the initial parameters for a compare_and_swap operation
in HImode and QImode. */
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 2573b7d980a..9d76118f525 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -126,7 +126,8 @@ (define_mode_attr w [(V1QI "") (V2QI "") (V4QI "") (V8QI
"") (V16QI "")
(V1DI "") (V2DI "")])
; Resulting mode of a vector comparison. For floating point modes an
-; integer vector mode with the same element size is picked.
+; integer vector mode with the same element size is picked. Must be in sync
+; with s390_tointvec function.
(define_mode_attr tointvec [(V1QI "V1QI") (V2QI "V2QI") (V4QI "V4QI") (V8QI
"V8QI") (V16QI "V16QI")
(V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI
"V8HI")
(V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
@@ -1425,28 +1426,16 @@ (define_insn_and_split "*sminv2df3_vx"
; Vector copysign, implement using vector select
(define_expand "copysign<mode>3"
- [(set (match_operand:VFT 0 "register_operand" "")
- (if_then_else:VFT
- (eq (match_dup 3)
- (match_dup 4))
- (match_operand:VFT 1 "register_operand" "")
- (match_operand:VFT 2 "register_operand" "")))]
+ [(set (match_operand:VFT 0 "register_operand" "")
+ (ior:VFT
+ (and:VFT (match_operand:VFT 2 "register_operand" "")
+ (match_dup 3))
+ (and:VFT (not:VFT (match_dup 3))
+ (match_operand:VFT 1 "register_operand" ""))))]
"TARGET_VX"
{
- int sz = GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
- int prec = GET_MODE_PRECISION (GET_MODE_INNER (<tointvec>mode));
- wide_int mask_val = wi::shwi (1l << (sz - 1), prec);
-
- rtx mask = gen_reg_rtx (<tointvec>mode);
-
- int nunits = GET_MODE_NUNITS (<tointvec>mode);
- rtvec v = rtvec_alloc (nunits);
- for (int i = 0; i < nunits; i++)
- RTVEC_ELT (v, i) = GEN_INT (mask_val.to_shwi ());
-
- mask = gen_rtx_CONST_VECTOR (<tointvec>mode, v);
- operands[3] = force_reg (<tointvec>mode, mask);
- operands[4] = CONST0_RTX (<tointvec>mode);
+ rtx mask = s390_build_signbit_mask (<MODE>mode);
+ operands[3] = force_reg (<MODE>mode, mask);
})
;;
--
2.25.4