This patch adds a vec_basic_block that records the scalar phis and scalar statements that we need to vectorise. This is a slight simplification in its own right, since it avoids unnecesary statement lookups and shaves >50 LOC. But the main reason for doing it is to allow the rest of the series to treat pattern statements less specially.
Putting phis (which are logically parallel) and normal statements (which are logically serial) into a single list might seem dangerous, but I think in practice it should be fine. Very little vectoriser code needs to handle the parallel nature of phis specially, and code that does can still do so. Having a single list simplifies code that wants to look at every scalar phi or stmt in isolation. The new test is for cases in which we try to hoist the same expression twice, which I broke with an earlier version of the patch. 2018-08-28 Richard Sandiford <richard.sandif...@arm.com> gcc/ * tree-vectorizer.h (vec_basic_block): New structure. (vec_info::blocks, _stmt_vec_info::block, _stmt_vec_info::prev) (_stmt_vec_info::next): New member variables. (FOR_EACH_VEC_BB_STMT, FOR_EACH_VEC_BB_STMT_REVERSE): New macros. (vec_basic_block::vec_basic_block): New function. * tree-vectorizer.c (vec_basic_block::add_to_end): Likewise. (vec_basic_block::add_before): Likewise. (vec_basic_block::remove): Likewise. (vec_info::~vec_info): Free the vec_basic_blocks. (vec_info::remove_stmt): Remove the statement from the containing vec_basic_block. * tree-vect-patterns.c (vect_determine_precisions) (vect_pattern_recog): Iterate over vec_basic_blocks. * tree-vect-loop.c (vect_determine_vectorization_factor) (vect_compute_single_scalar_iteration_cost, vect_update_vf_for_slp) (vect_analyze_loop_operations, vect_transform_loop): Likewise. (_loop_vec_info::_loop_vec_info): Construct vec_basic_blocks. * tree-vect-slp.c (_bb_vec_info::_bb_vec_info): Likewise. (vect_detect_hybrid_slp): Iterate over vec_basic_blocks. * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Likewise. (vect_finish_replace_stmt, vectorizable_condition): Remove the original statement from the containing block. (hoist_defs_of_uses): Likewise the statement that we're hoisting. gcc/testsuite/ * gcc.dg/vect/vect-hoist-1.c: New test. Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2018-08-28 12:05:08.743005901 +0100 +++ gcc/tree-vectorizer.h 2018-08-28 12:05:11.466982927 +0100 @@ -171,7 +171,30 @@ #define SLP_TREE_LOAD_PERMUTATION(S) #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators #define SLP_TREE_DEF_TYPE(S) (S)->def_type +/* Information about the phis and statements in a block that we're trying + to vectorize, in their original order. */ +class vec_basic_block +{ +public: + vec_basic_block (basic_block); + + void add_to_end (stmt_vec_info); + void add_before (stmt_vec_info, stmt_vec_info); + void remove (stmt_vec_info); + + basic_block bb () const { return m_bb; } + stmt_vec_info first () const { return m_first; } + stmt_vec_info last () const { return m_last; } + +private: + /* The block itself. */ + basic_block m_bb; + /* The first and last statements in the block, forming a doubly-linked list. + The list includes both phis and true statements. */ + stmt_vec_info m_first; + stmt_vec_info m_last; +}; /* Describes two objects whose addresses must be unequal for the vectorized loop to be valid. */ @@ -249,6 +272,9 @@ struct vec_info { /* Cost data used by the target cost model. */ void *target_cost_data; + /* The basic blocks in the vectorization region. */ + auto_vec<vec_basic_block *, 5> blocks; + private: stmt_vec_info new_stmt_vec_info (gimple *stmt); void set_vinfo_for_stmt (gimple *, stmt_vec_info); @@ -776,6 +802,11 @@ struct dr_vec_info { typedef struct data_reference *dr_p; struct _stmt_vec_info { + /* The block to which the statement belongs, or null if none. */ + vec_basic_block *block; + + /* Link chains for the previous and next statements in BLOCK. */ + stmt_vec_info prev, next; enum stmt_vec_info_type type; @@ -1072,6 +1103,27 @@ #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) && TYPE_PRECISION (TYPE) == 1 \ && TYPE_UNSIGNED (TYPE))) +/* Make STMT_INFO iterate over each statement in vec_basic_block VEC_BB + in forward order. */ + +#define FOR_EACH_VEC_BB_STMT(VEC_BB, STMT_INFO) \ + for (stmt_vec_info STMT_INFO = (VEC_BB)->first (); STMT_INFO; \ + STMT_INFO = STMT_INFO->next) + +/* Make STMT_INFO iterate over each statement in vec_basic_block VEC_BB + in backward order. */ + +#define FOR_EACH_VEC_BB_STMT_REVERSE(VEC_BB, STMT_INFO) \ + for (stmt_vec_info STMT_INFO = (VEC_BB)->last (); STMT_INFO; \ + STMT_INFO = STMT_INFO->prev) + +/* Construct a vec_basic_block for BB. */ + +inline vec_basic_block::vec_basic_block (basic_block bb) + : m_bb (bb), m_first (NULL), m_last (NULL) +{ +} + static inline bool nested_in_vect_loop_p (struct loop *loop, stmt_vec_info stmt_info) { Index: gcc/tree-vectorizer.c =================================================================== --- gcc/tree-vectorizer.c 2018-08-28 12:05:08.743005901 +0100 +++ gcc/tree-vectorizer.c 2018-08-28 12:05:11.466982927 +0100 @@ -444,6 +444,61 @@ note_simd_array_uses (hash_table<simd_ar delete simd_array_to_simduid_htab; } +/* Add STMT_INFO to the end of the block. */ + +void +vec_basic_block::add_to_end (stmt_vec_info stmt_info) +{ + gcc_checking_assert (!stmt_info->block + && !stmt_info->prev + && !stmt_info->next); + if (m_last) + m_last->next = stmt_info; + else + m_first = stmt_info; + stmt_info->block = this; + stmt_info->prev = m_last; + m_last = stmt_info; +} + +/* Add STMT_INFO to the block, inserting it before NEXT_STMT_INFO. */ + +void +vec_basic_block::add_before (stmt_vec_info stmt_info, + stmt_vec_info next_stmt_info) +{ + gcc_checking_assert (!stmt_info->block + && !stmt_info->prev + && !stmt_info->next + && next_stmt_info->block == this); + if (next_stmt_info->prev) + next_stmt_info->prev->next = stmt_info; + else + m_first = stmt_info; + stmt_info->block = this; + stmt_info->prev = next_stmt_info->prev; + stmt_info->next = next_stmt_info; + next_stmt_info->prev = stmt_info; +} + +/* Remove STMT_INFO from the block. */ + +void +vec_basic_block::remove (stmt_vec_info stmt_info) +{ + gcc_checking_assert (stmt_info->block == this); + if (stmt_info->prev) + stmt_info->prev->next = stmt_info->next; + else + m_first = stmt_info->next; + if (stmt_info->next) + stmt_info->next->prev = stmt_info->prev; + else + m_last = stmt_info->prev; + stmt_info->block = NULL; + stmt_info->prev = stmt_info->next = NULL; +} + /* Initialize the vec_info with kind KIND_IN and target cost data TARGET_COST_DATA_IN. */ @@ -459,8 +514,12 @@ vec_info::vec_info (vec_info::vec_kind k vec_info::~vec_info () { slp_instance instance; + vec_basic_block *vec_bb; unsigned int i; + FOR_EACH_VEC_ELT (blocks, i, vec_bb) + delete vec_bb; + FOR_EACH_VEC_ELT (slp_instances, i, instance) vect_free_slp_instance (instance, true); @@ -597,6 +656,7 @@ vec_info::remove_stmt (stmt_vec_info stm unlink_stmt_vdef (stmt_info->stmt); gsi_remove (&si, true); release_defs (stmt_info->stmt); + stmt_info->block->remove (stmt_info); free_stmt_vec_info (stmt_info); } Index: gcc/tree-vect-patterns.c =================================================================== --- gcc/tree-vect-patterns.c 2018-08-01 15:40:18.277228757 +0100 +++ gcc/tree-vect-patterns.c 2018-08-28 12:05:11.462982962 +0100 @@ -4639,39 +4639,11 @@ vect_determine_precisions (vec_info *vin { DUMP_VECT_SCOPE ("vect_determine_precisions"); - if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) - { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - unsigned int nbbs = loop->num_nodes; - - for (unsigned int i = 0; i < nbbs; i++) - { - basic_block bb = bbs[nbbs - i - 1]; - for (gimple_stmt_iterator si = gsi_last_bb (bb); - !gsi_end_p (si); gsi_prev (&si)) - vect_determine_stmt_precisions - (vinfo->lookup_stmt (gsi_stmt (si))); - } - } - else - { - bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); - gimple_stmt_iterator si = bb_vinfo->region_end; - gimple *stmt; - do - { - if (!gsi_stmt (si)) - si = gsi_last_bb (bb_vinfo->bb); - else - gsi_prev (&si); - stmt = gsi_stmt (si); - stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt); - if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) - vect_determine_stmt_precisions (stmt_info); - } - while (stmt != gsi_stmt (bb_vinfo->region_begin)); - } + unsigned int i; + vec_basic_block *vec_bb; + FOR_EACH_VEC_ELT_REVERSE (vinfo->blocks, i, vec_bb) + FOR_EACH_VEC_BB_STMT_REVERSE (vec_bb, stmt_info) + vect_determine_stmt_precisions (stmt_info); } typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *); @@ -4931,51 +4903,16 @@ vect_pattern_recog_1 (vect_recog_func *r void vect_pattern_recog (vec_info *vinfo) { - struct loop *loop; - basic_block *bbs; - unsigned int nbbs; - gimple_stmt_iterator si; - unsigned int i, j; - vect_determine_precisions (vinfo); DUMP_VECT_SCOPE ("vect_pattern_recog"); - if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) - { - loop = LOOP_VINFO_LOOP (loop_vinfo); - bbs = LOOP_VINFO_BBS (loop_vinfo); - nbbs = loop->num_nodes; - - /* Scan through the loop stmts, applying the pattern recognition - functions starting at each stmt visited: */ - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) - { - stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); - /* Scan over all generic vect_recog_xxx_pattern functions. */ - for (j = 0; j < NUM_PATTERNS; j++) - vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], - stmt_info); - } - } - } - else - { - bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); - for (si = bb_vinfo->region_begin; - gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si)) - { - gimple *stmt = gsi_stmt (si); - stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt); - if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info)) - continue; - - /* Scan over all generic vect_recog_xxx_pattern functions. */ - for (j = 0; j < NUM_PATTERNS; j++) - vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info); - } - } + unsigned int i; + vec_basic_block *vec_bb; + FOR_EACH_VEC_ELT (vinfo->blocks, i, vec_bb) + FOR_EACH_VEC_BB_STMT (vec_bb, stmt_info) + if (STMT_VINFO_VECTORIZABLE (stmt_info)) + /* Scan over all generic vect_recog_xxx_pattern functions. */ + for (unsigned int j = 0; j < NUM_PATTERNS; j++) + vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info); } Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c 2018-08-28 12:05:08.743005901 +0100 +++ gcc/tree-vect-loop.c 2018-08-28 12:05:11.458982995 +0100 @@ -286,36 +286,26 @@ vect_determine_vf_for_stmt (stmt_vec_inf static bool vect_determine_vectorization_factor (loop_vec_info loop_vinfo) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - unsigned nbbs = loop->num_nodes; poly_uint64 vectorization_factor = 1; tree scalar_type = NULL_TREE; - gphi *phi; tree vectype; stmt_vec_info stmt_info; unsigned i; auto_vec<stmt_vec_info> mask_producers; + vec_basic_block *vec_bb; DUMP_VECT_SCOPE ("vect_determine_vectorization_factor"); - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - - for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); - gsi_next (&si)) + FOR_EACH_VEC_ELT (loop_vinfo->blocks, i, vec_bb) + FOR_EACH_VEC_BB_STMT (vec_bb, stmt_info) + if (gphi *phi = dyn_cast <gphi *> (stmt_info->stmt)) { - phi = si.phi (); - stmt_info = loop_vinfo->lookup_stmt (phi); if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: "); dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); } - gcc_assert (stmt_info); - if (STMT_VINFO_RELEVANT_P (stmt_info) || STMT_VINFO_LIVE_P (stmt_info)) { @@ -363,16 +353,9 @@ vect_determine_vectorization_factor (loo vect_update_max_nunits (&vectorization_factor, vectype); } } - - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { - stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - if (!vect_determine_vf_for_stmt (stmt_info, &vectorization_factor, - &mask_producers)) - return false; - } - } + else if (!vect_determine_vf_for_stmt (stmt_info, &vectorization_factor, + &mask_producers)) + return false; /* TODO: Analyze cost. Decide if worth while to vectorize. */ if (dump_enabled_p ()) @@ -881,21 +864,23 @@ _loop_vec_info::_loop_vec_info (struct l for (unsigned int i = 0; i < nbbs; i++) { basic_block bb = bbs[i]; + vec_basic_block *vec_bb = new vec_basic_block (bb); gimple_stmt_iterator si; for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) { gimple *phi = gsi_stmt (si); gimple_set_uid (phi, 0); - add_stmt (phi); + vec_bb->add_to_end (add_stmt (phi)); } for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) { gimple *stmt = gsi_stmt (si); gimple_set_uid (stmt, 0); - add_stmt (stmt); + vec_bb->add_to_end (add_stmt (stmt)); } + blocks.safe_push (vec_bb); } } @@ -1101,9 +1086,8 @@ vect_verify_full_masking (loop_vec_info vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes, factor; - int innerloop_iters, i; + int factor, innerloop_iters; + unsigned int i; /* Gather costs for statements in the scalar loop. */ @@ -1112,23 +1096,19 @@ vect_compute_single_scalar_iteration_cos if (loop->inner) innerloop_iters = 50; /* FIXME */ - for (i = 0; i < nbbs; i++) + vec_basic_block *vec_bb; + FOR_EACH_VEC_ELT (loop_vinfo->blocks, i, vec_bb) { - gimple_stmt_iterator si; - basic_block bb = bbs[i]; - - if (bb->loop_father == loop->inner) + if (vec_bb->bb ()->loop_father == loop->inner) factor = innerloop_iters; else factor = 1; - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) - { - gimple *stmt = gsi_stmt (si); - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); - - if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) - continue; + FOR_EACH_VEC_BB_STMT (vec_bb, stmt_info) + { + if (!is_gimple_assign (stmt_info->stmt) + && !is_gimple_call (stmt_info->stmt)) + continue; /* Skip stmts that are not vectorized inside the loop. */ if (stmt_info @@ -1432,11 +1412,8 @@ vect_analyze_loop_form (struct loop *loo static void vect_update_vf_for_slp (loop_vec_info loop_vinfo) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; poly_uint64 vectorization_factor; - int i; + unsigned int i; DUMP_VECT_SCOPE ("vect_update_vf_for_slp"); @@ -1449,21 +1426,17 @@ vect_update_vf_for_slp (loop_vec_info lo perform pure SLP on loop - cross iteration parallelism is not exploited. */ bool only_slp_in_loop = true; - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - stmt_info = vect_stmt_to_vectorize (stmt_info); - if ((STMT_VINFO_RELEVANT_P (stmt_info) - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - && !PURE_SLP_STMT (stmt_info)) - /* STMT needs both SLP and loop-based vectorization. */ - only_slp_in_loop = false; - } - } + vec_basic_block *vec_bb; + FOR_EACH_VEC_ELT (loop_vinfo->blocks, i, vec_bb) + FOR_EACH_VEC_BB_STMT (vec_bb, stmt_info) + { + stmt_vec_info final_info = vect_stmt_to_vectorize (stmt_info); + if ((STMT_VINFO_RELEVANT_P (final_info) + || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (final_info))) + && !PURE_SLP_STMT (final_info)) + /* STMT needs both SLP and loop-based vectorization. */ + only_slp_in_loop = false; + } if (only_slp_in_loop) { @@ -1526,11 +1499,7 @@ vect_active_double_reduction_p (stmt_vec static bool vect_analyze_loop_operations (loop_vec_info loop_vinfo) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; - int i; - stmt_vec_info stmt_info; + unsigned int i; bool need_to_vectorize = false; bool ok; @@ -1539,17 +1508,12 @@ vect_analyze_loop_operations (loop_vec_i stmt_vector_for_cost cost_vec; cost_vec.create (2); - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - - for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); - gsi_next (&si)) + vec_basic_block *vec_bb; + FOR_EACH_VEC_ELT (loop_vinfo->blocks, i, vec_bb) + FOR_EACH_VEC_BB_STMT (vec_bb, stmt_info) + if (gphi *phi = dyn_cast <gphi *> (stmt_info->stmt)) { - gphi *phi = si.phi (); ok = true; - - stmt_info = loop_vinfo->lookup_stmt (phi); if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, "examining phi: "); @@ -1560,7 +1524,7 @@ vect_analyze_loop_operations (loop_vec_i /* Inner-loop loop-closed exit phi in outer-loop vectorization (i.e., a phi in the tail of the outer-loop). */ - if (! is_loop_header_bb_p (bb)) + if (! is_loop_header_bb_p (vec_bb->bb ())) { /* FORNOW: we currently don't support the case that these phis are not used in the outerloop (unless it is double reduction, @@ -1599,8 +1563,6 @@ vect_analyze_loop_operations (loop_vec_i continue; } - gcc_assert (stmt_info); - if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope || STMT_VINFO_LIVE_P (stmt_info)) && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def) @@ -1645,18 +1607,10 @@ vect_analyze_loop_operations (loop_vec_i return false; } } - - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { - gimple *stmt = gsi_stmt (si); - if (!gimple_clobber_p (stmt) - && !vect_analyze_stmt (loop_vinfo->lookup_stmt (stmt), - &need_to_vectorize, - NULL, NULL, &cost_vec)) - return false; - } - } /* bbs */ + else if (!gimple_clobber_p (stmt_info->stmt) + && !vect_analyze_stmt (stmt_info, &need_to_vectorize, + NULL, NULL, &cost_vec)) + return false; add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec); cost_vec.release (); @@ -2242,32 +2196,22 @@ vect_analyze_loop_2 (loop_vec_info loop_ vect_free_slp_instance (instance, false); LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release (); /* Reset SLP type to loop_vect on all stmts. */ - for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) - { - basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; - for (gimple_stmt_iterator si = gsi_start_phis (bb); - !gsi_end_p (si); gsi_next (&si)) - { - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - STMT_SLP_TYPE (stmt_info) = loop_vect; - } - for (gimple_stmt_iterator si = gsi_start_bb (bb); - !gsi_end_p (si); gsi_next (&si)) - { - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - STMT_SLP_TYPE (stmt_info) = loop_vect; - if (STMT_VINFO_IN_PATTERN_P (stmt_info)) - { - gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); - stmt_info = STMT_VINFO_RELATED_STMT (stmt_info); - STMT_SLP_TYPE (stmt_info) = loop_vect; - for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq); - !gsi_end_p (pi); gsi_next (&pi)) - STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi))) - = loop_vect; - } - } - } + vec_basic_block *vec_bb; + FOR_EACH_VEC_ELT (loop_vinfo->blocks, i, vec_bb) + FOR_EACH_VEC_BB_STMT (vec_bb, stmt_info) + { + STMT_SLP_TYPE (stmt_info) = loop_vect; + if (STMT_VINFO_IN_PATTERN_P (stmt_info)) + { + gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); + STMT_SLP_TYPE (STMT_VINFO_RELATED_STMT (stmt_info)) = loop_vect; + for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq); + !gsi_end_p (pi); gsi_next (&pi)) + STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi))) + = loop_vect; + } + } + /* Free optimized alias test DDRS. */ LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).truncate (0); LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release (); @@ -8275,15 +8219,12 @@ vect_transform_loop (loop_vec_info loop_ { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *epilogue = NULL; - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; - int i; + unsigned int i; tree niters_vector = NULL_TREE; tree step_vector = NULL_TREE; tree niters_vector_mult_vf = NULL_TREE; poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); unsigned int lowest_vf = constant_lower_bound (vf); - gimple *stmt; bool check_profitability = false; unsigned int th; @@ -8401,90 +8342,73 @@ vect_transform_loop (loop_vec_info loop_ support more involved loop forms, the order by which the BBs are traversed need to be reconsidered. */ - for (i = 0; i < nbbs; i++) + vec_basic_block *vec_bb; + FOR_EACH_VEC_ELT (loop_vinfo->blocks, i, vec_bb) { - basic_block bb = bbs[i]; - stmt_vec_info stmt_info; - - for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); - gsi_next (&si)) - { - gphi *phi = si.phi (); - if (dump_enabled_p ()) + stmt_vec_info next_stmt_info; + for (stmt_vec_info stmt_info = vec_bb->first (); stmt_info; + stmt_info = next_stmt_info) + { + next_stmt_info = stmt_info->next; + if (gphi *phi = dyn_cast <gphi *> (stmt_info->stmt)) { - dump_printf_loc (MSG_NOTE, vect_location, - "------>vectorizing phi: "); - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); - } - stmt_info = loop_vinfo->lookup_stmt (phi); - if (!stmt_info) - continue; + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "------>vectorizing phi: "); + dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); + } - if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info)) - vect_loop_kill_debug_uses (loop, stmt_info); + if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info)) + vect_loop_kill_debug_uses (loop, stmt_info); - if (!STMT_VINFO_RELEVANT_P (stmt_info) - && !STMT_VINFO_LIVE_P (stmt_info)) - continue; - - if (STMT_VINFO_VECTYPE (stmt_info) - && (maybe_ne - (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)), vf)) - && dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); - - if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def - || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def - || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) - && ! PURE_SLP_STMT (stmt_info)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n"); - vect_transform_stmt (stmt_info, NULL, NULL, NULL); + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && !STMT_VINFO_LIVE_P (stmt_info)) + continue; + + if (STMT_VINFO_VECTYPE (stmt_info) + && (maybe_ne + (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)), + vf)) + && dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); + + if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) + && ! PURE_SLP_STMT (stmt_info)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "transform phi.\n"); + vect_transform_stmt (stmt_info, NULL, NULL, NULL); + } } - } - - for (gimple_stmt_iterator si = gsi_start_bb (bb); - !gsi_end_p (si);) - { - stmt = gsi_stmt (si); /* During vectorization remove existing clobber stmts. */ - if (gimple_clobber_p (stmt)) - { - unlink_stmt_vdef (stmt); - gsi_remove (&si, true); - release_defs (stmt); - } + else if (gimple_clobber_p (stmt_info->stmt)) + loop_vinfo->remove_stmt (stmt_info); else { - stmt_info = loop_vinfo->lookup_stmt (stmt); - - /* vector stmts created in the outer-loop during vectorization of - stmts in an inner-loop may not have a stmt_info, and do not - need to be vectorized. */ stmt_vec_info seen_store = NULL; - if (stmt_info) + gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt); + if (STMT_VINFO_IN_PATTERN_P (stmt_info)) { - if (STMT_VINFO_IN_PATTERN_P (stmt_info)) + gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); + for (gimple_stmt_iterator subsi = gsi_start (def_seq); + !gsi_end_p (subsi); gsi_next (&subsi)) { - gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); - for (gimple_stmt_iterator subsi = gsi_start (def_seq); - !gsi_end_p (subsi); gsi_next (&subsi)) - { - stmt_vec_info pat_stmt_info - = loop_vinfo->lookup_stmt (gsi_stmt (subsi)); - vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, - &si, &seen_store); - } stmt_vec_info pat_stmt_info - = STMT_VINFO_RELATED_STMT (stmt_info); - vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si, - &seen_store); + = loop_vinfo->lookup_stmt (gsi_stmt (subsi)); + vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, + &si, &seen_store); } - vect_transform_loop_stmt (loop_vinfo, stmt_info, &si, + stmt_vec_info pat_stmt_info + = STMT_VINFO_RELATED_STMT (stmt_info); + vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si, &seen_store); } - gsi_next (&si); + vect_transform_loop_stmt (loop_vinfo, stmt_info, &si, + &seen_store); if (seen_store) { if (STMT_VINFO_GROUPED_ACCESS (seen_store)) @@ -8502,7 +8426,7 @@ vect_transform_loop (loop_vec_info loop_ /* Stub out scalar statements that must not survive vectorization. Doing this here helps with grouped statements, or statements that are involved in patterns. */ - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); + for (gimple_stmt_iterator gsi = gsi_start_bb (vec_bb->bb ()); !gsi_end_p (gsi); gsi_next (&gsi)) { gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi)); Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c 2018-08-28 12:05:06.207027289 +0100 +++ gcc/tree-vect-slp.c 2018-08-28 12:05:11.462982962 +0100 @@ -2359,29 +2359,22 @@ vect_detect_hybrid_slp (loop_vec_info lo /* First walk all pattern stmt in the loop and mark defs of uses as hybrid because immediate uses in them are not recorded. */ - for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) - { - basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); - gsi_next (&gsi)) + vec_basic_block *vec_bb; + FOR_EACH_VEC_ELT (loop_vinfo->blocks, i, vec_bb) + FOR_EACH_VEC_BB_STMT (vec_bb, stmt_info) + if (STMT_VINFO_IN_PATTERN_P (stmt_info)) { - gimple *stmt = gsi_stmt (gsi); - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); - if (STMT_VINFO_IN_PATTERN_P (stmt_info)) - { - walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - wi.info = loop_vinfo; - gimple_stmt_iterator gsi2 - = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt); - walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2, - vect_detect_hybrid_slp_1, &wi); - walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), - vect_detect_hybrid_slp_2, - vect_detect_hybrid_slp_1, &wi); - } + walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + wi.info = loop_vinfo; + gimple_stmt_iterator gsi2 + = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt); + walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2, + vect_detect_hybrid_slp_1, &wi); + walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), + vect_detect_hybrid_slp_2, + vect_detect_hybrid_slp_1, &wi); } - } /* Then walk the SLP instance trees marking stmts with uses in non-SLP stmts as hybrid, also propagating hybrid down the @@ -2408,13 +2401,15 @@ _bb_vec_info::_bb_vec_info (gimple_stmt_ { gimple_stmt_iterator gsi; + vec_basic_block *vec_bb = new vec_basic_block (bb); for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); gimple_set_uid (stmt, 0); - add_stmt (stmt); + vec_bb->add_to_end (add_stmt (stmt)); } + blocks.quick_push (vec_bb); bb->aux = this; } Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c 2018-08-28 12:05:08.743005901 +0100 +++ gcc/tree-vect-stmts.c 2018-08-28 12:05:11.466982927 +0100 @@ -612,12 +612,7 @@ process_use (stmt_vec_info stmt_vinfo, t bool vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - unsigned int nbbs = loop->num_nodes; - gimple_stmt_iterator si; unsigned int i; - basic_block bb; bool live_p; enum vect_relevant relevant; @@ -626,34 +621,11 @@ vect_mark_stmts_to_be_vectorized (loop_v auto_vec<stmt_vec_info, 64> worklist; /* 1. Init worklist. */ - for (i = 0; i < nbbs; i++) - { - bb = bbs[i]; - for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) - { - stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? "); - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi_info->stmt, 0); - } - - if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p)) - vect_mark_relevant (&worklist, phi_info, relevant, live_p); - } - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) - { - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? "); - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0); - } - - if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p)) - vect_mark_relevant (&worklist, stmt_info, relevant, live_p); - } - } + vec_basic_block *vec_bb; + FOR_EACH_VEC_ELT (loop_vinfo->blocks, i, vec_bb) + FOR_EACH_VEC_BB_STMT (vec_bb, stmt_info) + if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p)) + vect_mark_relevant (&worklist, stmt_info, relevant, live_p); /* 2. Process_worklist */ while (worklist.length () > 0) @@ -1740,6 +1712,7 @@ vect_finish_replace_stmt (stmt_vec_info gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt); gsi_replace (&gsi, vec_stmt, false); + stmt_info->block->remove (stmt_info); return vect_finish_stmt_generation_1 (stmt_info, vec_stmt); } @@ -7309,6 +7282,7 @@ permute_vec_elements (tree x, tree y, tr static bool hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop) { + vec_info *vinfo = stmt_info->vinfo; ssa_op_iter i; tree op; bool any = false; @@ -7349,6 +7323,8 @@ hoist_defs_of_uses (stmt_vec_info stmt_i { gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt); gsi_remove (&gsi, false); + if (stmt_vec_info def_stmt_info = vinfo->lookup_stmt (def_stmt)) + def_stmt_info->block->remove (def_stmt_info); gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt); } } @@ -9068,6 +9044,7 @@ vectorizable_condition (stmt_vec_info st gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt_info->stmt); gsi_remove (&old_gsi, true); + stmt_info->block->remove (stmt_info); new_stmt_info = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); } Index: gcc/testsuite/gcc.dg/vect/vect-hoist-1.c =================================================================== --- /dev/null 2018-07-26 10:26:13.137955424 +0100 +++ gcc/testsuite/gcc.dg/vect/vect-hoist-1.c 2018-08-28 12:05:11.458982995 +0100 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +int c[20]; + +void +f (int *a, int *b, int j) +{ + for (int i = 0; i < 100; ++i) + { + a[i] = a[i] + c[j + 4]; + b[i] = b[i] + c[j + 4]; + } +}