This match makes tree-vect-slp.c track the maximum number of vector units as a poly_uint64 rather than an unsigned int.
2017-10-23 Richard Sandiford <richard.sandif...@linaro.org> Alan Hayward <alan.hayw...@arm.com> David Sherwood <david.sherw...@arm.com> gcc/ * tree-vect-slp.c (vect_record_max_nunits, vect_build_slp_tree_1) (vect_build_slp_tree_2, vect_build_slp_tree): Change max_nunits from an unsigned int * to a poly_uint64_pod *. (calculate_unrolling_factor): New function. (vect_analyze_slp_instance): Use it. Track polynomial max_nunits. Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c 2017-10-23 17:22:26.573499378 +0100 +++ gcc/tree-vect-slp.c 2017-10-23 17:22:27.793744215 +0100 @@ -489,7 +489,7 @@ vect_get_and_check_slp_defs (vec_info *v static bool vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size, - tree vectype, unsigned int *max_nunits) + tree vectype, poly_uint64 *max_nunits) { if (!vectype) { @@ -506,8 +506,11 @@ vect_record_max_nunits (vec_info *vinfo, /* If populating the vector type requires unrolling then fail before adjusting *max_nunits for basic-block vectorization. */ + poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); + unsigned HOST_WIDE_INT const_nunits; if (is_a <bb_vec_info> (vinfo) - && TYPE_VECTOR_SUBPARTS (vectype) > group_size) + && (!nunits.is_constant (&const_nunits) + || const_nunits > group_size)) { dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Build SLP failed: unrolling required " @@ -517,9 +520,7 @@ vect_record_max_nunits (vec_info *vinfo, } /* In case of multiple types we need to detect the smallest type. */ - if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) - *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); - + vect_update_max_nunits (max_nunits, vectype); return true; } @@ -540,7 +541,7 @@ vect_record_max_nunits (vec_info *vinfo, static bool vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, vec<gimple *> stmts, unsigned int group_size, - unsigned nops, unsigned int *max_nunits, + unsigned nops, poly_uint64 *max_nunits, bool *matches, bool *two_operators) { unsigned int i; @@ -966,16 +967,15 @@ bst_traits::equal (value_type existing, static slp_tree vect_build_slp_tree_2 (vec_info *vinfo, vec<gimple *> stmts, unsigned int group_size, - unsigned int *max_nunits, + poly_uint64 *max_nunits, vec<slp_tree> *loads, bool *matches, unsigned *npermutes, unsigned *tree_size, unsigned max_tree_size); static slp_tree vect_build_slp_tree (vec_info *vinfo, - vec<gimple *> stmts, unsigned int group_size, - unsigned int *max_nunits, - vec<slp_tree> *loads, + vec<gimple *> stmts, unsigned int group_size, + poly_uint64 *max_nunits, vec<slp_tree> *loads, bool *matches, unsigned *npermutes, unsigned *tree_size, unsigned max_tree_size) { @@ -1007,12 +1007,13 @@ vect_build_slp_tree (vec_info *vinfo, static slp_tree vect_build_slp_tree_2 (vec_info *vinfo, vec<gimple *> stmts, unsigned int group_size, - unsigned int *max_nunits, + poly_uint64 *max_nunits, vec<slp_tree> *loads, bool *matches, unsigned *npermutes, unsigned *tree_size, unsigned max_tree_size) { - unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits; + unsigned nops, i, this_tree_size = 0; + poly_uint64 this_max_nunits = *max_nunits; gimple *stmt; slp_tree node; @@ -1951,6 +1952,15 @@ vect_split_slp_store_group (gimple *firs return group2; } +/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE + statements and a vector of NUNITS elements. */ + +static poly_uint64 +calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size) +{ + return exact_div (common_multiple (nunits, group_size), group_size); +} + /* Analyze an SLP instance starting from a group of grouped stores. Call vect_build_slp_tree to build a tree of packed stmts if possible. Return FALSE if it's impossible to SLP any stmt in the loop. */ @@ -1962,11 +1972,9 @@ vect_analyze_slp_instance (vec_info *vin slp_instance new_instance; slp_tree node; unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt)); - unsigned int nunits; tree vectype, scalar_type = NULL_TREE; gimple *next; unsigned int i; - unsigned int max_nunits = 0; vec<slp_tree> loads; struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); vec<gimple *> scalar_stmts; @@ -2005,7 +2013,7 @@ vect_analyze_slp_instance (vec_info *vin return false; } - nunits = TYPE_VECTOR_SUBPARTS (vectype); + poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); /* Create a node (a root of the SLP tree) for the packed grouped stores. */ scalar_stmts.create (group_size); @@ -2043,32 +2051,35 @@ vect_analyze_slp_instance (vec_info *vin bool *matches = XALLOCAVEC (bool, group_size); unsigned npermutes = 0; bst_fail = new hash_set <vec <gimple *>, bst_traits> (); + poly_uint64 max_nunits = nunits; node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, - &max_nunits, &loads, matches, &npermutes, + &max_nunits, &loads, matches, &npermutes, NULL, max_tree_size); delete bst_fail; if (node != NULL) { /* Calculate the unrolling factor based on the smallest type. */ poly_uint64 unrolling_factor - = least_common_multiple (max_nunits, group_size) / group_size; + = calculate_unrolling_factor (max_nunits, group_size); if (may_ne (unrolling_factor, 1U) && is_a <bb_vec_info> (vinfo)) { - - if (max_nunits > group_size) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Build SLP failed: store group " - "size not a multiple of the vector size " - "in basic block SLP\n"); - vect_free_slp_tree (node); - loads.release (); - return false; - } + unsigned HOST_WIDE_INT const_max_nunits; + if (!max_nunits.is_constant (&const_max_nunits) + || const_max_nunits > group_size) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Build SLP failed: store group " + "size not a multiple of the vector size " + "in basic block SLP\n"); + vect_free_slp_tree (node); + loads.release (); + return false; + } /* Fatal mismatch. */ - matches[group_size/max_nunits * max_nunits] = false; + matches[group_size / const_max_nunits * const_max_nunits] = false; vect_free_slp_tree (node); loads.release (); } @@ -2187,20 +2198,22 @@ vect_analyze_slp_instance (vec_info *vin /* For basic block SLP, try to break the group up into multiples of the vector size. */ + unsigned HOST_WIDE_INT const_nunits; if (is_a <bb_vec_info> (vinfo) && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) - && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) + && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) + && nunits.is_constant (&const_nunits)) { /* We consider breaking the group only on VF boundaries from the existing start. */ for (i = 0; i < group_size; i++) if (!matches[i]) break; - if (i >= nunits && i < group_size) + if (i >= const_nunits && i < group_size) { /* Split into two groups at the first vector boundary before i. */ - gcc_assert ((nunits & (nunits - 1)) == 0); - unsigned group1_size = i & ~(nunits - 1); + gcc_assert ((const_nunits & (const_nunits - 1)) == 0); + unsigned group1_size = i & ~(const_nunits - 1); gimple *rest = vect_split_slp_store_group (stmt, group1_size); bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size); @@ -2208,9 +2221,9 @@ vect_analyze_slp_instance (vec_info *vin skip the rest of that vector. */ if (group1_size < i) { - i = group1_size + nunits; + i = group1_size + const_nunits; if (i < group_size) - rest = vect_split_slp_store_group (rest, nunits); + rest = vect_split_slp_store_group (rest, const_nunits); } if (i < group_size) res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);