Previously SLP_TREE_NUMBER_OF_VEC_STMTS was calculated while scheduling an SLP tree after analysis, but sometimes it can be useful to know the value during analysis too. This patch moves the calculation to vect_slp_analyze_node_operaions instead.
This became more natural after: 2017-06-30 Richard Biener <rguent...@suse.de> * tree-vect-slp.c (vect_slp_analyze_node_operations): Only analyze the first scalar stmt. Move vector type computation for the BB case here from ... * tree-vect-stmts.c (vect_analyze_stmt): ... here. Guard live operation processing in the SLP case properly. since the STMT_VINFO_VECTYPE is now always initialised in time. Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. OK to install? Richard 2017-09-15 Richard Sandiford <richard.sandif...@linaro.org> Alan Hayward <alan.hayw...@arm.com> David Sherwood <david.sherw...@arm.com> gcc/ * tree-vectorizer.h (vect_slp_analyze_operations): Replace parameters with a vec_info *. * tree-vect-loop.c (vect_analyze_loop_operations): Update call accordingly. * tree-vect-slp.c (vect_slp_analyze_node_operations): Add vec_info * parameter. Set SLP_TREE_NUMBER_OF_VEC_STMTS here rather than in vect_schedule_slp_instance. (vect_slp_analyze_operations): Replace parameters with a vec_info *. Update call to vect_slp_analyze_node_operations. Simplify return value. (vect_slp_analyze_bb_1): Update call accordingly. (vect_schedule_slp_instance): Remove vectorization_factor parameter. Don't calculate SLP_TREE_NUMBER_OF_VEC_STMTS here. (vect_schedule_slp): Update call accordingly. Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2017-09-14 17:35:26.635276568 +0100 +++ gcc/tree-vectorizer.h 2017-09-15 11:35:46.833592065 +0100 @@ -1246,8 +1246,7 @@ extern void vect_free_slp_instance (slp_ extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , gimple_stmt_iterator *, int, slp_instance, bool, unsigned *); -extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances, - void *); +extern bool vect_slp_analyze_operations (vec_info *); extern bool vect_schedule_slp (vec_info *); extern bool vect_analyze_slp (vec_info *, unsigned); extern bool vect_make_slp_decision (loop_vec_info); Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c 2017-09-14 17:35:26.635276568 +0100 +++ gcc/tree-vect-loop.c 2017-09-15 11:35:46.832592132 +0100 @@ -2031,8 +2031,7 @@ vect_analyze_loop_2 (loop_vec_info loop_ remove unsupported SLP instances which makes the above SLP kind detection invalid. */ unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length (); - vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), - LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); + vect_slp_analyze_operations (loop_vinfo); if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size) goto again; } Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c 2017-09-14 17:04:19.083694343 +0100 +++ gcc/tree-vect-slp.c 2017-09-15 11:35:46.833592065 +0100 @@ -2501,11 +2501,14 @@ _bb_vec_info::~_bb_vec_info () } -/* Analyze statements contained in SLP tree node after recursively analyzing - the subtree. Return TRUE if the operations are supported. */ +/* Analyze statements contained in SLP tree NODE after recursively analyzing + the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. + + Return true if the operations are supported. */ static bool -vect_slp_analyze_node_operations (slp_tree node, slp_instance node_instance) +vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, + slp_instance node_instance) { bool dummy; int i, j; @@ -2516,7 +2519,7 @@ vect_slp_analyze_node_operations (slp_tr return true; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - if (!vect_slp_analyze_node_operations (child, node_instance)) + if (!vect_slp_analyze_node_operations (vinfo, child, node_instance)) return false; stmt = SLP_TREE_SCALAR_STMTS (node)[0]; @@ -2568,6 +2571,29 @@ vect_slp_analyze_node_operations (slp_tr STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype; } + /* Calculate the number of vector statements to be created for the + scalar stmts in this node. For SLP reductions it is equal to the + number of vector statements in the children (which has already been + calculated by the recursive call). Otherwise it is the number of + scalar elements in one scalar iteration (GROUP_SIZE) multiplied by + VF divided by the number of elements in a vector. */ + if (GROUP_FIRST_ELEMENT (stmt_info) + && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) + SLP_TREE_NUMBER_OF_VEC_STMTS (node) + = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]); + else + { + int vf; + if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) + vf = loop_vinfo->vectorization_factor; + else + vf = 1; + unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + SLP_TREE_NUMBER_OF_VEC_STMTS (node) + = vf * group_size / TYPE_VECTOR_SUBPARTS (vectype); + } + /* Push SLP node def-type to stmt operands. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) @@ -2586,11 +2612,11 @@ vect_slp_analyze_node_operations (slp_tr } -/* Analyze statements in SLP instances of the basic block. Return TRUE if the +/* Analyze statements in SLP instances of VINFO. Return true if the operations are supported. */ bool -vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data) +vect_slp_analyze_operations (vec_info *vinfo) { slp_instance instance; int i; @@ -2599,9 +2625,10 @@ vect_slp_analyze_operations (vec<slp_ins dump_printf_loc (MSG_NOTE, vect_location, "=== vect_slp_analyze_operations ===\n"); - for (i = 0; slp_instances.iterate (i, &instance); ) + for (i = 0; vinfo->slp_instances.iterate (i, &instance); ) { - if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance), + if (!vect_slp_analyze_node_operations (vinfo, + SLP_INSTANCE_TREE (instance), instance)) { dump_printf_loc (MSG_NOTE, vect_location, @@ -2610,20 +2637,17 @@ vect_slp_analyze_operations (vec<slp_ins SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0], 0); vect_free_slp_instance (instance); - slp_instances.ordered_remove (i); + vinfo->slp_instances.ordered_remove (i); } else { /* Compute the costs of the SLP instance. */ - vect_analyze_slp_cost (instance, data); + vect_analyze_slp_cost (instance, vinfo->target_cost_data); i++; } } - if (!slp_instances.length ()) - return false; - - return true; + return !vinfo->slp_instances.is_empty (); } @@ -2897,8 +2921,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_itera return NULL; } - if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo), - BB_VINFO_TARGET_COST_DATA (bb_vinfo))) + if (!vect_slp_analyze_operations (bb_vinfo)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3652,14 +3675,13 @@ vect_transform_slp_perm_load (slp_tree n /* Vectorize SLP instance tree in postorder. */ static bool -vect_schedule_slp_instance (slp_tree node, slp_instance instance, - unsigned int vectorization_factor) +vect_schedule_slp_instance (slp_tree node, slp_instance instance) { gimple *stmt; bool grouped_store, is_store; gimple_stmt_iterator si; stmt_vec_info stmt_info; - unsigned int vec_stmts_size, nunits, group_size; + unsigned int group_size; tree vectype; int i, j; slp_tree child; @@ -3668,7 +3690,7 @@ vect_schedule_slp_instance (slp_tree nod return false; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - vect_schedule_slp_instance (child, instance, vectorization_factor); + vect_schedule_slp_instance (child, instance); /* Push SLP node def-type to stmts. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) @@ -3681,27 +3703,10 @@ vect_schedule_slp_instance (slp_tree nod /* VECTYPE is the type of the destination. */ vectype = STMT_VINFO_VECTYPE (stmt_info); - nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype); group_size = SLP_INSTANCE_GROUP_SIZE (instance); - /* For each SLP instance calculate number of vector stmts to be created - for the scalar stmts in each node of the SLP tree. Number of vector - elements in one vector iteration is the number of scalar elements in - one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector - size. - Unless this is a SLP reduction in which case the number of vector - stmts is equal to the number of vector stmts of the children. */ - if (GROUP_FIRST_ELEMENT (stmt_info) - && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) - vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]); - else - vec_stmts_size = (vectorization_factor * group_size) / nunits; - if (!SLP_TREE_VEC_STMTS (node).exists ()) - { - SLP_TREE_VEC_STMTS (node).create (vec_stmts_size); - SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size; - } + SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); if (dump_enabled_p ()) { @@ -3850,20 +3855,15 @@ vect_schedule_slp (vec_info *vinfo) { vec<slp_instance> slp_instances; slp_instance instance; - unsigned int i, vf; + unsigned int i; bool is_store = false; slp_instances = vinfo->slp_instances; - if (is_a <loop_vec_info> (vinfo)) - vf = as_a <loop_vec_info> (vinfo)->vectorization_factor; - else - vf = 1; - FOR_EACH_VEC_ELT (slp_instances, i, instance) { /* Schedule the tree of INSTANCE. */ is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), - instance, vf); + instance); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectorizing stmts using SLP.\n");