This patch makes tree-vect-generic.c cope with variable-length vectors. Decomposition is only supported for constant-length vectors, since we should never generate unsupported variable-length operations.
2017-10-23 Richard Sandiford <richard.sandif...@linaro.org> Alan Hayward <alan.hayw...@arm.com> David Sherwood <david.sherw...@arm.com> gcc/ * tree-vect-generic.c (nunits_for_known_piecewise_op): New function. (expand_vector_piecewise): Use it instead of TYPE_VECTOR_SUBPARTS. (expand_vector_addition, add_rshift, expand_vector_divmod): Likewise. (expand_vector_condition, vector_element): Likewise. (subparts_gt): New function. (get_compute_type): Use subparts_gt. (count_type_subparts): Delete. (expand_vector_operations_1): Use subparts_gt instead of count_type_subparts. Index: gcc/tree-vect-generic.c =================================================================== --- gcc/tree-vect-generic.c 2017-10-23 17:11:39.944370794 +0100 +++ gcc/tree-vect-generic.c 2017-10-23 17:22:45.856865193 +0100 @@ -41,6 +41,26 @@ Free Software Foundation; either version static void expand_vector_operations_1 (gimple_stmt_iterator *); +/* Return the number of elements in a vector type TYPE that we have + already decided needs to be expanded piecewise. We don't support + this kind of expansion for variable-length vectors, since we should + always check for target support before introducing uses of those. */ +static unsigned int +nunits_for_known_piecewise_op (const_tree type) +{ + return TYPE_VECTOR_SUBPARTS (type); +} + +/* Return true if TYPE1 has more elements than TYPE2, where either + type may be a vector or a scalar. */ + +static inline bool +subparts_gt (tree type1, tree type2) +{ + poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1; + poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1; + return must_gt (n1, n2); +} /* Build a constant of type TYPE, made of VALUE's bits replicated every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ @@ -254,7 +274,7 @@ expand_vector_piecewise (gimple_stmt_ite vec<constructor_elt, va_gc> *v; tree part_width = TYPE_SIZE (inner_type); tree index = bitsize_int (0); - int nunits = TYPE_VECTOR_SUBPARTS (type); + int nunits = nunits_for_known_piecewise_op (type); int delta = tree_to_uhwi (part_width) / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); int i; @@ -338,7 +358,7 @@ expand_vector_addition (gimple_stmt_iter if (INTEGRAL_TYPE_P (TREE_TYPE (type)) && parts_per_word >= 4 - && TYPE_VECTOR_SUBPARTS (type) >= 4) + && nunits_for_known_piecewise_op (type) >= 4) return expand_vector_parallel (gsi, f_parallel, type, a, b, code); else @@ -373,7 +393,7 @@ expand_vector_comparison (gimple_stmt_it add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) { optab op; - unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type); + unsigned int i, nunits = nunits_for_known_piecewise_op (type); bool scalar_shift = true; for (i = 1; i < nunits; i++) @@ -418,7 +438,7 @@ expand_vector_divmod (gimple_stmt_iterat bool has_vector_shift = true; int mode = -1, this_mode; int pre_shift = -1, post_shift; - unsigned int nunits = TYPE_VECTOR_SUBPARTS (type); + unsigned int nunits = nunits_for_known_piecewise_op (type); int *shifts = XALLOCAVEC (int, nunits * 4); int *pre_shifts = shifts + nunits; int *post_shifts = pre_shifts + nunits; @@ -867,7 +887,6 @@ expand_vector_condition (gimple_stmt_ite tree index = bitsize_int (0); tree comp_width = width; tree comp_index = index; - int nunits = TYPE_VECTOR_SUBPARTS (type); int i; location_t loc = gimple_location (gsi_stmt (*gsi)); @@ -920,6 +939,7 @@ expand_vector_condition (gimple_stmt_ite warning_at (loc, OPT_Wvector_operation_performance, "vector condition will be expanded piecewise"); + int nunits = nunits_for_known_piecewise_op (type); vec_alloc (v, nunits); for (i = 0; i < nunits; i++) { @@ -1189,7 +1209,7 @@ vector_element (gimple_stmt_iterator *gs vect_type = TREE_TYPE (vect); vect_elt_type = TREE_TYPE (vect_type); - elements = TYPE_VECTOR_SUBPARTS (vect_type); + elements = nunits_for_known_piecewise_op (vect_type); if (TREE_CODE (idx) == INTEGER_CST) { @@ -1446,8 +1466,7 @@ get_compute_type (enum tree_code code, o tree vector_compute_type = type_for_widest_vector_mode (TREE_TYPE (type), op); if (vector_compute_type != NULL_TREE - && (TYPE_VECTOR_SUBPARTS (vector_compute_type) - < TYPE_VECTOR_SUBPARTS (compute_type)) + && subparts_gt (compute_type, vector_compute_type) && TYPE_VECTOR_SUBPARTS (vector_compute_type) > 1 && (optab_handler (op, TYPE_MODE (vector_compute_type)) != CODE_FOR_nothing)) @@ -1476,15 +1495,6 @@ get_compute_type (enum tree_code code, o return compute_type; } -/* Helper function of expand_vector_operations_1. Return number of - vector elements for vector types or 1 for other types. */ - -static inline int -count_type_subparts (tree type) -{ - return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1; -} - static tree do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, tree bitpos, tree bitsize, enum tree_code code, @@ -1704,8 +1714,7 @@ expand_vector_operations_1 (gimple_stmt_ /* The rtl expander will expand vector/scalar as vector/vector if necessary. Pick one with wider vector type. */ tree compute_vtype = get_compute_type (code, opv, type); - if (count_type_subparts (compute_vtype) - > count_type_subparts (compute_type)) + if (subparts_gt (compute_vtype, compute_type)) { compute_type = compute_vtype; op = opv; @@ -1735,14 +1744,12 @@ expand_vector_operations_1 (gimple_stmt_ tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type); /* The rtl expander will expand vector/scalar as vector/vector if necessary. Pick one with wider vector type. */ - if (count_type_subparts (compute_lvtype) - > count_type_subparts (compute_ltype)) + if (subparts_gt (compute_lvtype, compute_ltype)) { compute_ltype = compute_lvtype; opl = oplv; } - if (count_type_subparts (compute_rvtype) - > count_type_subparts (compute_rtype)) + if (subparts_gt (compute_rvtype, compute_rtype)) { compute_rtype = compute_rvtype; opr = oprv; @@ -1750,11 +1757,9 @@ expand_vector_operations_1 (gimple_stmt_ /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and BIT_IOR_EXPR. */ compute_type = compute_ltype; - if (count_type_subparts (compute_type) - > count_type_subparts (compute_rtype)) + if (subparts_gt (compute_type, compute_rtype)) compute_type = compute_rtype; - if (count_type_subparts (compute_type) - > count_type_subparts (compute_otype)) + if (subparts_gt (compute_type, compute_otype)) compute_type = compute_otype; /* Verify all 3 operations can be performed in that type. */ if (compute_type != TREE_TYPE (type))