GCN uses V64BImode to represent vector masks in the middle-end, and DImode bit-masks to represent them in the back-end. These must be converted at expand time and the most convenient way is to simply use a SUBREG.
This works fine except that simplify_subreg needs to be able to convert immediates, mostly for REG_EQUAL and REG_EQUIV, and currently does not know how to convert vectors to integers where there is more than one element per byte. This patch implements such conversions for the cases that we need. I don't know why this is not a problem for other targets that use BImode vectors, such as ARM SVE, so it's possible I missed some magic somewhere? 2018-09-05 Andrew Stubbs <a...@codesourcery.com> gcc/ * simplify-rtx.c (convert_packed_vector): New function. (simplify_immed_subreg): Recognised Boolean vectors and call convert_packed_vector. --- gcc/simplify-rtx.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+)
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index b4c6883..89487f2 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5976,6 +5976,73 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, return 0; } +/* Convert a CONST_INT to a CONST_VECTOR, or vice versa. + + This should only occur for VECTOR_BOOL_MODE types, so the semantics + specified by that are assumed. In particular, the lowest value is + in the first byte. */ + +static rtx +convert_packed_vector (fixed_size_mode to_mode, rtx op, + machine_mode from_mode, unsigned int byte, + unsigned int first_elem, unsigned int inner_bytes) +{ + /* Sizes greater than HOST_WIDE_INT would need a better implementation. */ + gcc_assert (GET_MODE_SIZE (to_mode) <= sizeof (HOST_WIDE_INT)); + + if (GET_CODE (op) == CONST_VECTOR) + { + gcc_assert (!VECTOR_MODE_P (to_mode)); + + int num_elem = GET_MODE_NUNITS (from_mode).to_constant(); + int elem_bitsize = (GET_MODE_SIZE (from_mode).to_constant() + * BITS_PER_UNIT) / num_elem; + int elem_mask = (1 << elem_bitsize) - 1; + HOST_WIDE_INT subreg_mask = + (sizeof (HOST_WIDE_INT) == GET_MODE_SIZE (to_mode) + ? -1 + : (((HOST_WIDE_INT)1 << (GET_MODE_SIZE (to_mode) * BITS_PER_UNIT)) + - 1)); + + HOST_WIDE_INT val = 0; + for (int i = 0; i < num_elem; i++) + val |= ((INTVAL (CONST_VECTOR_ELT (op, i)) & elem_mask) + << (i * elem_bitsize)); + + val >>= byte * BITS_PER_UNIT; + val &= subreg_mask; + + return gen_rtx_CONST_INT (VOIDmode, val); + } + else if (GET_CODE (op) == CONST_INT) + { + /* Subregs of a vector not implemented yet. */ + gcc_assert (maybe_eq (GET_MODE_SIZE (to_mode), + GET_MODE_SIZE (from_mode))); + + gcc_assert (VECTOR_MODE_P (to_mode)); + + int num_elem = GET_MODE_NUNITS (to_mode); + int elem_bitsize = (GET_MODE_SIZE (to_mode) * BITS_PER_UNIT) / num_elem; + int elem_mask = (1 << elem_bitsize) - 1; + + rtvec val = rtvec_alloc (num_elem); + rtx *elem = &RTVEC_ELT (val, 0); + + for (int i = 0; i < num_elem; i++) + elem[i] = gen_rtx_CONST_INT (VOIDmode, + (INTVAL (op) >> (i * elem_bitsize)) + & elem_mask); + + return gen_rtx_CONST_VECTOR (to_mode, val); + } + else + { + gcc_unreachable (); + return op; + } +} + /* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR. @@ -6017,6 +6084,15 @@ simplify_immed_subreg (fixed_size_mode outermode, rtx op, if (COMPLEX_MODE_P (outermode)) return NULL_RTX; + /* Vectors with multiple elements per byte are a special case. */ + if ((VECTOR_MODE_P (innermode) + && ((GET_MODE_NUNITS (innermode).to_constant() + / GET_MODE_SIZE(innermode).to_constant()) > 1)) + || (VECTOR_MODE_P (outermode) + && (GET_MODE_NUNITS (outermode) / GET_MODE_SIZE(outermode) > 1))) + return convert_packed_vector (outermode, op, innermode, byte, first_elem, + inner_bytes); + /* We support any size mode. */ max_bitsize = MAX (GET_MODE_BITSIZE (outermode), inner_bytes * BITS_PER_UNIT);