On Sat, May 25, 2024 at 4:54 PM Feng Xue OS <f...@os.amperecomputing.com> wrote: > > Both derived classes ( loop_vec_info/bb_vec_info) have their own "bbs" > field, which have exactly same purpose of recording all basic blocks > inside the corresponding vect region, while the fields are composed by > different data type, one is normal array, the other is auto_vec. This > difference causes some duplicated code even handling the same stuff, > almost in tree-vect-patterns. One refinement is lifting this field into the > base class "vec_info", and reset its value to the continuous memory area > pointed by two old "bbs" in each constructor of derived classes.
Nice. But. bbs_as_vector - why is that necessary? Why is vinfo->bbs not a vec<basic_block>? Having bbs and nbbs feels like a step back. Note the code duplications can probably be removed by "indirecting" through an array_slice. I'm a bit torn to approve this as-is given the above. Can you explain what made you not choose vec<> for bbs? I bet you tried. Richard. > Regression test on x86-64 and aarch64. > > Feng > -- > gcc/ > * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Move > initialization of bbs to explicit construction code. Adjust the > definition of nbbs. > * tree-vect-pattern.cc (vect_determine_precisions): Make > loop_vec_info and bb_vec_info share same code. > (vect_pattern_recog): Remove duplicated vect_pattern_recog_1 loop. > * tree-vect-slp.cc (vect_get_and_check_slp_defs): Access to bbs[0] > via base vec_info class. > (_bb_vec_info::_bb_vec_info): Initialize bbs and nbbs using data > fields of input auto_vec<> bbs. > (_bb_vec_info::_bb_vec_info): Add assertions on bbs and nbbs to ensure > they are not changed externally. > (vect_slp_region): Use access to nbbs to replace original > bbs.length(). > (vect_schedule_slp_node): Access to bbs[0] via base vec_info class. > * tree-vectorizer.cc (vec_info::vec_info): Add initialization of > bbs and nbbs. > (vec_info::insert_seq_on_entry): Access to bbs[0] via base vec_info > class. > * tree-vectorizer.h (vec_info): Add new fields bbs and nbbs. > (_loop_vec_info): Remove field bbs. > (_bb_vec_info): Rename old bbs field to bbs_as_vector, and make it > be private. > --- > gcc/tree-vect-loop.cc | 6 +- > gcc/tree-vect-patterns.cc | 142 +++++++++++--------------------------- > gcc/tree-vect-slp.cc | 24 ++++--- > gcc/tree-vectorizer.cc | 7 +- > gcc/tree-vectorizer.h | 19 ++--- > 5 files changed, 72 insertions(+), 126 deletions(-) > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index 83c0544b6aa..aef17420a5f 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -1028,7 +1028,6 @@ bb_in_loop_p (const_basic_block bb, const void *data) > _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) > : vec_info (vec_info::loop, shared), > loop (loop_in), > - bbs (XCNEWVEC (basic_block, loop->num_nodes)), > num_itersm1 (NULL_TREE), > num_iters (NULL_TREE), > num_iters_unchanged (NULL_TREE), > @@ -1079,8 +1078,9 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, > vec_info_shared *shared) > case of the loop forms we allow, a dfs order of the BBs would the same > as reversed postorder traversal, so we are safe. */ > > - unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, > - bbs, loop->num_nodes, loop); > + bbs = XCNEWVEC (basic_block, loop->num_nodes); > + nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, bbs, > + loop->num_nodes, loop); > gcc_assert (nbbs == loop->num_nodes); > > for (unsigned int i = 0; i < nbbs; i++) > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index a313dc64643..848a3195a93 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -6925,81 +6925,41 @@ vect_determine_stmt_precisions (vec_info *vinfo, > stmt_vec_info stmt_info) > void > vect_determine_precisions (vec_info *vinfo) > { > + basic_block *bbs = vinfo->bbs; > + unsigned int nbbs = vinfo->nbbs; > + > DUMP_VECT_SCOPE ("vect_determine_precisions"); > > - if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) > + for (unsigned int i = 0; i < nbbs; i++) > { > - class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); > - unsigned int nbbs = loop->num_nodes; > - > - for (unsigned int i = 0; i < nbbs; i++) > + basic_block bb = bbs[i]; > + for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > - basic_block bb = bbs[i]; > - for (auto gsi = gsi_start_phis (bb); > - !gsi_end_p (gsi); gsi_next (&gsi)) > - { > - stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); > - if (stmt_info) > - vect_determine_mask_precision (vinfo, stmt_info); > - } > - for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) > - if (!is_gimple_debug (gsi_stmt (si))) > - vect_determine_mask_precision > - (vinfo, vinfo->lookup_stmt (gsi_stmt (si))); > + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); > + if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) > + vect_determine_mask_precision (vinfo, stmt_info); > } > - for (unsigned int i = 0; i < nbbs; i++) > + for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > - basic_block bb = bbs[nbbs - i - 1]; > - for (gimple_stmt_iterator si = gsi_last_bb (bb); > - !gsi_end_p (si); gsi_prev (&si)) > - if (!is_gimple_debug (gsi_stmt (si))) > - vect_determine_stmt_precisions > - (vinfo, vinfo->lookup_stmt (gsi_stmt (si))); > - for (auto gsi = gsi_start_phis (bb); > - !gsi_end_p (gsi); gsi_next (&gsi)) > - { > - stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); > - if (stmt_info) > - vect_determine_stmt_precisions (vinfo, stmt_info); > - } > + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); > + if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) > + vect_determine_mask_precision (vinfo, stmt_info); > } > } > - else > + for (unsigned int i = 0; i < nbbs; i++) > { > - bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); > - for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i) > + basic_block bb = bbs[nbbs - i - 1]; > + for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) > { > - basic_block bb = bb_vinfo->bbs[i]; > - for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next > (&gsi)) > - { > - stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); > - if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) > - vect_determine_mask_precision (vinfo, stmt_info); > - } > - for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next > (&gsi)) > - { > - stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); > - if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) > - vect_determine_mask_precision (vinfo, stmt_info); > - } > + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); > + if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) > + vect_determine_stmt_precisions (vinfo, stmt_info); > } > - for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i) > + for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > - for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]); > - !gsi_end_p (gsi); gsi_prev (&gsi)) > - { > - stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); > - if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) > - vect_determine_stmt_precisions (vinfo, stmt_info); > - } > - for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]); > - !gsi_end_p (gsi); gsi_next (&gsi)) > - { > - stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); > - if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) > - vect_determine_stmt_precisions (vinfo, stmt_info); > - } > + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); > + if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) > + vect_determine_stmt_precisions (vinfo, stmt_info); > } > } > } > @@ -7328,56 +7288,32 @@ vect_pattern_recog_1 (vec_info *vinfo, > void > vect_pattern_recog (vec_info *vinfo) > { > - class loop *loop; > - basic_block *bbs; > - unsigned int nbbs; > - gimple_stmt_iterator si; > - unsigned int i, j; > + basic_block *bbs = vinfo->bbs; > + unsigned int nbbs = vinfo->nbbs; > > vect_determine_precisions (vinfo); > > DUMP_VECT_SCOPE ("vect_pattern_recog"); > > - if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) > + /* Scan through the stmts in the region, applying the pattern recognition > + functions starting at each stmt visited. */ > + for (unsigned i = 0; i < nbbs; i++) > { > - loop = LOOP_VINFO_LOOP (loop_vinfo); > - bbs = LOOP_VINFO_BBS (loop_vinfo); > - nbbs = loop->num_nodes; > + basic_block bb = bbs[i]; > > - /* Scan through the loop stmts, applying the pattern recognition > - functions starting at each stmt visited: */ > - for (i = 0; i < nbbs; i++) > + for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) > { > - basic_block bb = bbs[i]; > - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) > - { > - if (is_gimple_debug (gsi_stmt (si))) > - continue; > - stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); > - /* Scan over all generic vect_recog_xxx_pattern functions. */ > - for (j = 0; j < NUM_PATTERNS; j++) > - vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j], > - stmt_info); > - } > + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); > + > + if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)) > + continue; > + > + /* Scan over all generic vect_recog_xxx_pattern functions. */ > + for (unsigned j = 0; j < NUM_PATTERNS; j++) > + vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j], > + stmt_info); > } > } > - else > - { > - bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); > - for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i) > - for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]); > - !gsi_end_p (gsi); gsi_next (&gsi)) > - { > - stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi)); > - if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)) > - continue; > - > - /* Scan over all generic vect_recog_xxx_pattern functions. */ > - for (j = 0; j < NUM_PATTERNS; j++) > - vect_pattern_recog_1 (vinfo, > - &vect_vect_recog_func_ptrs[j], stmt_info); > - } > - } > > /* After this no more add_stmt calls are allowed. */ > vinfo->stmt_vec_info_ro = true; > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index c7ed520b629..2e0244cf582 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -748,8 +748,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned > char swap, > && is_a <bb_vec_info> (vinfo) > && TREE_CODE (oprnd) == SSA_NAME > && !SSA_NAME_IS_DEFAULT_DEF (oprnd) > - && !dominated_by_p (CDI_DOMINATORS, > - as_a <bb_vec_info> (vinfo)->bbs[0], > + && !dominated_by_p (CDI_DOMINATORS, vinfo->bbs[0], > gimple_bb (SSA_NAME_DEF_STMT (oprnd)))) > { > if (dump_enabled_p ()) > @@ -6288,10 +6287,13 @@ vect_detect_hybrid_slp (loop_vec_info loop_vinfo) > > _bb_vec_info::_bb_vec_info (vec<basic_block> _bbs, vec_info_shared *shared) > : vec_info (vec_info::bb, shared), > - bbs (_bbs), > + bbs_as_vector (_bbs), > roots (vNULL) > { > - for (unsigned i = 0; i < bbs.length (); ++i) > + bbs = bbs_as_vector.address (); > + nbbs = bbs_as_vector.length (); > + > + for (unsigned i = 0; i < nbbs; ++i) > { > if (i != 0) > for (gphi_iterator si = gsi_start_phis (bbs[i]); !gsi_end_p (si); > @@ -6319,8 +6321,12 @@ _bb_vec_info::_bb_vec_info (vec<basic_block> _bbs, > vec_info_shared *shared) > > _bb_vec_info::~_bb_vec_info () > { > + /* Check that bbs stored in the vector is not changed externally. */ > + gcc_assert (bbs == bbs_as_vector.address ()); > + gcc_assert (nbbs = bbs_as_vector.length ()); > + > /* Reset region marker. */ > - for (unsigned i = 0; i < bbs.length (); ++i) > + for (unsigned i = 0; i < nbbs; ++i) > { > if (i != 0) > for (gphi_iterator si = gsi_start_phis (bbs[i]); !gsi_end_p (si); > @@ -7625,7 +7631,7 @@ vect_slp_is_lane_insert (gimple *use_stmt, tree vec, > unsigned *this_lane) > static void > vect_slp_check_for_roots (bb_vec_info bb_vinfo) > { > - for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i) > + for (unsigned i = 0; i < bb_vinfo->nbbs; ++i) > for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]); > !gsi_end_p (gsi); gsi_next (&gsi)) > { > @@ -8109,7 +8115,7 @@ vect_slp_region (vec<basic_block> bbs, > vec<data_reference_p> datarefs, > we vectorized all if-converted code. */ > if ((!profitable_subgraphs.is_empty () || force_clear) && orig_loop) > { > - gcc_assert (bb_vinfo->bbs.length () == 1); > + gcc_assert (bb_vinfo->nbbs == 1); > for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]); > !gsi_end_p (gsi); gsi_next (&gsi)) > { > @@ -9613,14 +9619,14 @@ vect_schedule_slp_node (vec_info *vinfo, > if (!last_stmt) > { > gcc_assert (seen_vector_def); > - si = gsi_after_labels (as_a <bb_vec_info> (vinfo)->bbs[0]); > + si = gsi_after_labels (vinfo->bbs[0]); > } > else if (is_ctrl_altering_stmt (last_stmt)) > { > /* We split regions to vectorize at control altering stmts > with a definition so this must be an external which > we can insert at the start of the region. */ > - si = gsi_after_labels (as_a <bb_vec_info> (vinfo)->bbs[0]); > + si = gsi_after_labels (vinfo->bbs[0]); > } > else if (is_a <bb_vec_info> (vinfo) > && gimple_bb (last_stmt) != gimple_bb (stmt_info->stmt) > diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc > index 9001b738bf3..1fb4fb36ed4 100644 > --- a/gcc/tree-vectorizer.cc > +++ b/gcc/tree-vectorizer.cc > @@ -463,7 +463,9 @@ shrink_simd_arrays > vec_info::vec_info (vec_info::vec_kind kind_in, vec_info_shared *shared_) > : kind (kind_in), > shared (shared_), > - stmt_vec_info_ro (false) > + stmt_vec_info_ro (false), > + bbs (NULL), > + nbbs (0) > { > stmt_vec_infos.create (50); > } > @@ -660,9 +662,8 @@ vec_info::insert_seq_on_entry (stmt_vec_info context, > gimple_seq seq) > } > else > { > - bb_vec_info bb_vinfo = as_a <bb_vec_info> (this); > gimple_stmt_iterator gsi_region_begin > - = gsi_after_labels (bb_vinfo->bbs[0]); > + = gsi_after_labels (bbs[0]); > gsi_insert_seq_before (&gsi_region_begin, seq, GSI_SAME_STMT); > } > } > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 93bc30ef660..ffc2b223dd2 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -499,6 +499,12 @@ public: > made any decisions about which vector modes to use. */ > machine_mode vector_mode; > > + /* The basic blocks in the vectorization region. */ > + basic_block *bbs; > + > + /* The count of the basic blocks in the vectorization region. */ > + unsigned int nbbs; > + > private: > stmt_vec_info new_stmt_vec_info (gimple *stmt); > void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true); > @@ -679,9 +685,6 @@ public: > /* The loop to which this info struct refers to. */ > class loop *loop; > > - /* The loop basic blocks. */ > - basic_block *bbs; > - > /* Number of latch executions. */ > tree num_itersm1; > /* Number of iterations. */ > @@ -1090,15 +1093,15 @@ struct slp_root > > typedef class _bb_vec_info : public vec_info > { > -public: > - _bb_vec_info (vec<basic_block> bbs, vec_info_shared *); > - ~_bb_vec_info (); > - > /* The region we are operating on. bbs[0] is the entry, excluding > its PHI nodes. In the future we might want to track an explicit > entry edge to cover bbs[0] PHI nodes and have a region entry > insert location. */ > - vec<basic_block> bbs; > + vec<basic_block> bbs_as_vector; > + > +public: > + _bb_vec_info (vec<basic_block> bbs, vec_info_shared *); > + ~_bb_vec_info (); > > vec<slp_root> roots; > } *bb_vec_info; > -- > 2.17.1