On Fri, Jul 31, 2020 at 12:30 PM Martin Liška <mli...@suse.cz> wrote: > > Hey. > > Motivation of the patch is to allow vectorization of an entire BB. > So far we bail out a sub-BB region when we reach a stmt for which > vect_find_stmt_data_reference returns false. That's replaced with > recoding of groups of the data references. > > We can newly vectorize code like: > > void foo(); > void bar (int i, double *a, double *b) > { > double tem1 = a[2*i]; > double tem2 = 2*a[2*i+1]; > foo (); > b[2*i] = 2*tem1; > b[2*i+1] = tem2; > } > > into: > <bb 2> [local count: 1073741824]: > _1 = i_12(D) * 2; > _2 = (long unsigned int) _1; > _3 = _2 * 8; > _4 = a_13(D) + _3; > vect_tem1_15.5_24 = MEM <vector(2) double> [(double *)_4]; > vect__10.6_25 = vect_tem1_15.5_24 * { 2.0e+0, 2.0e+0 }; > foo (); > _9 = b_18(D) + _3; > MEM <vector(2) double> [(double *)_9] = vect__10.6_25; > return; > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
OK with some minor changes, see below > Thoughs? > Martin > > gcc/ChangeLog: > > * tree-vect-data-refs.c (dr_group_sort_cmp): Work on > data_ref_pair. > (vect_analyze_data_ref_accesses): Work on groups. > (vect_find_stmt_data_reference): Add group_id argument and fill > up dataref_groups vector. > * tree-vect-loop.c (vect_get_datarefs_in_loop): Pass new > arguments. > (vect_analyze_loop_2): Likewise. > * tree-vect-slp.c (vect_slp_analyze_bb_1): Pass argument. > (vect_slp_bb_region): Likewise. > (vect_slp_region): Likewise. > (vect_slp_bb):Work on the entire BB. > * tree-vectorizer.h (vect_analyze_data_ref_accesses): Add new > argument. > (vect_find_stmt_data_reference): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.dg/vect/bb-slp-38.c: Adjust pattern as now we only process > a single vectorization and now 2 partial. > * gcc.dg/vect/bb-slp-45.c: New test. > --- > gcc/testsuite/gcc.dg/vect/bb-slp-38.c | 2 +- > gcc/testsuite/gcc.dg/vect/bb-slp-45.c | 36 ++++++++++++ > gcc/tree-vect-data-refs.c | 63 +++++++++++++------- > gcc/tree-vect-loop.c | 5 +- > gcc/tree-vect-slp.c | 82 ++++++++++----------------- > gcc/tree-vectorizer.h | 5 +- > 6 files changed, 118 insertions(+), 75 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-45.c > > diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c > b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c > index 59aec54fffd..9c57ea3c2c1 100644 > --- a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c > +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c > @@ -41,4 +41,4 @@ int main() > } > > /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target > vect_perm } } } */ > -/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" > { target vect_perm } } } */ > +/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" { target > vect_perm } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-45.c > b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c > new file mode 100644 > index 00000000000..4107a34cb93 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c > @@ -0,0 +1,36 @@ > +/* { dg-require-effective-target vect_double } */ > + > +#include "tree-vect.h" > + > +extern void abort (void); > + > +double a[8], b[8]; > +int x; > + > +void __attribute__((noinline,noclone)) > +bar (void) > +{ > + x = 1; > +} > + > +void __attribute__((noinline,noclone)) > +foo(int i) > +{ > + double tem1 = a[2*i]; > + double tem2 = 2*a[2*i+1]; > + bar (); > + b[2*i] = 2*tem1; > + b[2*i+1] = tem2; > +} > + > +int main() > +{ > + int i; > + check_vect (); > + for (i = 0; i < 8; ++i) > + b[i] = i; > + foo (2); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */ > diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c > index a78ae61d1b0..36e10ff3619 100644 > --- a/gcc/tree-vect-data-refs.c > +++ b/gcc/tree-vect-data-refs.c > @@ -2757,14 +2757,18 @@ vect_analyze_data_ref_access (vec_info *vinfo, > dr_vec_info *dr_info) > return vect_analyze_group_access (vinfo, dr_info); > } > > +typedef std::pair<data_reference_p, int> data_ref_pair; > + > /* Compare two data-references DRA and DRB to group them into chunks > suitable for grouping. */ > > static int > dr_group_sort_cmp (const void *dra_, const void *drb_) > { > - data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_); > - data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_); > + data_ref_pair dra_pair = *(data_ref_pair *)const_cast<void *>(dra_); > + data_ref_pair drb_pair = *(data_ref_pair *)const_cast<void *>(drb_); > + data_reference_p dra = dra_pair.first; > + data_reference_p drb = drb_pair.first; > int cmp; > > /* Stabilize sort. */ > @@ -2772,10 +2776,13 @@ dr_group_sort_cmp (const void *dra_, const void *drb_) > return 0; > > /* DRs in different loops never belong to the same group. */ Comment needs to be adjusted to mention basic-block instead of loop > - loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father; > - loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father; > - if (loopa != loopb) > - return loopa->num < loopb->num ? -1 : 1; > + int bb_index1 = gimple_bb (DR_STMT (dra))->index; > + int bb_index2 = gimple_bb (DR_STMT (drb))->index; > + if (bb_index1 != bb_index2) > + return bb_index1 < bb_index2 ? -1 : 1; > + > + if (dra_pair.second != drb_pair.second) > + return dra_pair.second < drb_pair.second ? -1 : 1; Please add a comment that this compares the DR group (not obvious because it says .second) > > /* Ordering of DRs according to base. */ > cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra), > @@ -2881,11 +2888,11 @@ can_group_stmts_p (stmt_vec_info stmt1_info, > stmt_vec_info stmt2_info, > FORNOW: handle only arrays and pointer accesses. */ > > opt_result > -vect_analyze_data_ref_accesses (vec_info *vinfo) > +vect_analyze_data_ref_accesses (vec_info *vinfo, > + vec<int> *dataref_groups) > { > unsigned int i; > vec<data_reference_p> datarefs = vinfo->shared->datarefs; > - struct data_reference *dr; > > DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses"); > > @@ -2895,14 +2902,21 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > /* Sort the array of datarefs to make building the interleaving chains > linear. Don't modify the original vector's order, it is needed for > determining what dependencies are reversed. */ > - vec<data_reference_p> datarefs_copy = datarefs.copy (); > + vec<data_ref_pair> datarefs_copy; > + datarefs_copy.create (datarefs.length ()); > + for (unsigned i = 0; i < datarefs.length (); i++) > + { > + int group_id = dataref_groups ? (*dataref_groups)[i] : 0; > + datarefs_copy.safe_push (data_ref_pair (datarefs[i], group_id)); quick_push > + } > datarefs_copy.qsort (dr_group_sort_cmp); > hash_set<stmt_vec_info> to_fixup; > > /* Build the interleaving chains. */ > for (i = 0; i < datarefs_copy.length () - 1;) > { > - data_reference_p dra = datarefs_copy[i]; > + data_reference_p dra = datarefs_copy[i].first; > + int dra_group_id = datarefs_copy[i].second; > dr_vec_info *dr_info_a = vinfo->lookup_dr (dra); > stmt_vec_info stmtinfo_a = dr_info_a->stmt; > stmt_vec_info lastinfo = NULL; > @@ -2914,7 +2928,8 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > } > for (i = i + 1; i < datarefs_copy.length (); ++i) > { > - data_reference_p drb = datarefs_copy[i]; > + data_reference_p drb = datarefs_copy[i].first; > + int drb_group_id = datarefs_copy[i].second; > dr_vec_info *dr_info_b = vinfo->lookup_dr (drb); > stmt_vec_info stmtinfo_b = dr_info_b->stmt; > if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b) > @@ -2927,10 +2942,14 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > matters we can push those to a worklist and re-iterate > over them. The we can just skip ahead to the next DR here. */ > > - /* DRs in a different loop should not be put into the same > + /* DRs in a different BBs should not be put into the same > interleaving group. */ > - if (gimple_bb (DR_STMT (dra))->loop_father > - != gimple_bb (DR_STMT (drb))->loop_father) > + int bb_index1 = gimple_bb (DR_STMT (dra))->index; > + int bb_index2 = gimple_bb (DR_STMT (drb))->index; > + if (bb_index1 != bb_index2) > + break; > + > + if (dra_group_id != drb_group_id) > break; > > /* Check that the data-refs have same first location (except init) > @@ -2977,7 +2996,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); > HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); > HOST_WIDE_INT init_prev > - = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1])); > + = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1].first)); > gcc_assert (init_a <= init_b > && init_a <= init_prev > && init_prev <= init_b); > @@ -2985,7 +3004,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > /* Do not place the same access in the interleaving chain twice. */ > if (init_b == init_prev) > { > - gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1])) > + gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1].first)) > < gimple_uid (DR_STMT (drb))); > /* Simply link in duplicates and fix up the chain below. */ > } > @@ -3098,9 +3117,10 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > to_fixup.add (newgroup); > } > > - FOR_EACH_VEC_ELT (datarefs_copy, i, dr) > + data_ref_pair *dr_pair; > + FOR_EACH_VEC_ELT (datarefs_copy, i, dr_pair) > { > - dr_vec_info *dr_info = vinfo->lookup_dr (dr); > + dr_vec_info *dr_info = vinfo->lookup_dr (dr_pair->first); > if (STMT_VINFO_VECTORIZABLE (dr_info->stmt) > && !vect_analyze_data_ref_access (vinfo, dr_info)) > { > @@ -3991,7 +4011,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > > opt_result > vect_find_stmt_data_reference (loop_p loop, gimple *stmt, > - vec<data_reference_p> *datarefs) > + vec<data_reference_p> *datarefs, > + vec<int> *dataref_groups, int group_id) You are always passing NULL here so simply avoid this and the following changes. OK with those changes. Thanks, Richard. > { > /* We can ignore clobbers for dataref analysis - they are removed during > loop vectorization and BB vectorization checks dependences with a > @@ -4118,6 +4139,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple > *stmt, > newdr->aux = (void *) (-1 - tree_to_uhwi (arg2)); > free_data_ref (dr); > datarefs->safe_push (newdr); > + if (dataref_groups) > + dataref_groups->safe_push (group_id); > return opt_result::success (); > } > } > @@ -4127,6 +4150,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple > *stmt, > } > > datarefs->safe_push (dr); > + if (dataref_groups) > + dataref_groups->safe_push (group_id); > return opt_result::success (); > } > > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index 185019c3305..2326d670bee 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -1865,7 +1865,8 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block > *bbs, > if (is_gimple_debug (stmt)) > continue; > ++(*n_stmts); > - opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs); > + opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs, > + NULL, 0); > if (!res) > { > if (is_gimple_call (stmt) && loop->safelen) > @@ -2087,7 +2088,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool > &fatal, unsigned *n_stmts) > /* Analyze the access patterns of the data-refs in the loop (consecutive, > complex, etc.). FORNOW: Only handle consecutive access pattern. */ > > - ok = vect_analyze_data_ref_accesses (loop_vinfo); > + ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL); > if (!ok) > { > if (dump_enabled_p ()) > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c > index 72192b5a813..167db076454 100644 > --- a/gcc/tree-vect-slp.c > +++ b/gcc/tree-vect-slp.c > @@ -3217,7 +3217,8 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo) > region. */ > > static bool > -vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) > +vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, > + vec<int> *dataref_groups) > { > DUMP_VECT_SCOPE ("vect_slp_analyze_bb"); > > @@ -3239,7 +3240,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int > n_stmts, bool &fatal) > return false; > } > > - if (!vect_analyze_data_ref_accesses (bb_vinfo)) > + if (!vect_analyze_data_ref_accesses (bb_vinfo, dataref_groups)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -3348,10 +3349,11 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int > n_stmts, bool &fatal) > given by DATAREFS. */ > > static bool > -vect_slp_bb_region (gimple_stmt_iterator region_begin, > - gimple_stmt_iterator region_end, > - vec<data_reference_p> datarefs, > - unsigned int n_stmts) > +vect_slp_region (gimple_stmt_iterator region_begin, > + gimple_stmt_iterator region_end, > + vec<data_reference_p> datarefs, > + vec<int> *dataref_groups, > + unsigned int n_stmts) > { > bb_vec_info bb_vinfo; > auto_vector_modes vector_modes; > @@ -3378,7 +3380,7 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, > bb_vinfo->shared->check_datarefs (); > bb_vinfo->vector_mode = next_vector_mode; > > - if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal) > + if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups) > && dbg_cnt (vect_slp)) > { > if (dump_enabled_p ()) > @@ -3473,45 +3475,30 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, > bool > vect_slp_bb (basic_block bb) > { > - gimple_stmt_iterator gsi; > - bool any_vectorized = false; > - > - gsi = gsi_after_labels (bb); > - while (!gsi_end_p (gsi)) > + vec<data_reference_p> datarefs = vNULL; > + vec<int> dataref_groups = vNULL; > + int insns = 0; > + int current_group = 0; > + gimple_stmt_iterator region_begin = gsi_start_nondebug_after_labels_bb > (bb); > + gimple_stmt_iterator region_end = gsi_last_bb (bb); > + if (!gsi_end_p (region_end)) > + gsi_next (®ion_end); > + > + for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); > + gsi_next (&gsi)) > { > - gimple_stmt_iterator region_begin = gsi; > - vec<data_reference_p> datarefs = vNULL; > - int insns = 0; > - > - for (; !gsi_end_p (gsi); gsi_next (&gsi)) > - { > - gimple *stmt = gsi_stmt (gsi); > - if (is_gimple_debug (stmt)) > - { > - /* Skip leading debug stmts. */ > - if (gsi_stmt (region_begin) == stmt) > - gsi_next (®ion_begin); > - continue; > - } > - insns++; > - > - if (gimple_location (stmt) != UNKNOWN_LOCATION) > - vect_location = stmt; > + gimple *stmt = gsi_stmt (gsi); > + if (is_gimple_debug (stmt)) > + continue; > > - if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs)) > - break; > - } > - if (gsi_end_p (region_begin)) > - break; > + insns++; > > - /* Skip leading unhandled stmts. */ > - if (gsi_stmt (region_begin) == gsi_stmt (gsi)) > - { > - gsi_next (&gsi); > - continue; > - } > + if (gimple_location (stmt) != UNKNOWN_LOCATION) > + vect_location = stmt; > > - gimple_stmt_iterator region_end = gsi; > + if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs, > + &dataref_groups, current_group)) > + ++current_group; > > if (insns > param_slp_max_insns_in_bb) > { > @@ -3520,17 +3507,10 @@ vect_slp_bb (basic_block bb) > "not vectorized: too many instructions in " > "basic block.\n"); > } > - else if (vect_slp_bb_region (region_begin, region_end, datarefs, > insns)) > - any_vectorized = true; > - > - if (gsi_end_p (region_end)) > - break; > - > - /* Skip the unhandled stmt. */ > - gsi_next (&gsi); > } > > - return any_vectorized; > + return vect_slp_region (region_begin, region_end, datarefs, > + &dataref_groups, insns); > } > > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 5466c78c20b..21881a9390c 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -1917,14 +1917,15 @@ extern bool vect_slp_analyze_instance_dependence > (vec_info *, slp_instance); > extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); > extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); > extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance); > -extern opt_result vect_analyze_data_ref_accesses (vec_info *); > +extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); > extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); > extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, > tree, int, internal_fn *, tree *); > extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, > gather_scatter_info *); > extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, > - vec<data_reference_p> *); > + vec<data_reference_p> *, > + vec<int> *, int); > extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool > *); > extern void vect_record_base_alignments (vec_info *); > extern tree vect_create_data_ref_ptr (vec_info *, > -- > 2.27.0 >