Hi Richi, on 2023/8/22 20:17, Richard Biener wrote: > On Tue, Aug 22, 2023 at 10:44 AM Kewen.Lin <li...@linux.ibm.com> wrote: >> >> Hi, >> >> Now we use DR_GROUP_STORE_COUNT to record how many stores >> in a group have been transformed and only do the actual >> transform when encountering the last one. I'm making >> patches to move costing next to the transform code, it's >> awkward to use this DR_GROUP_STORE_COUNT for both costing >> and transforming. This patch is to introduce last_element >> to record the last element to be transformed in the group >> rather than to sum up the store number we have seen, then >> we can only check the given stmt is the last or not. It >> can make it work simply for both costing and transforming. >> >> Bootstrapped and regtested on x86_64-redhat-linux, >> aarch64-linux-gnu and powerpc64{,le}-linux-gnu. >> >> Is it ok for trunk? > > This is all (existing) gross, so ... can't we do sth like the following > instead? Going to test this further besides the quick single > testcase I verified.
I just realized that dealing with this in vect_transform_stmt is super neat as you questioned and posted, thanks a lot for pushing commit r14-3383-g2c27600fa79431 for this! BR, Kewen > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index 33f62b77710..67de19d9ce5 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -8437,16 +8437,6 @@ vectorizable_store (vec_info *vinfo, > /* FORNOW */ > gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info)); > > - /* We vectorize all the stmts of the interleaving group when we > - reach the last stmt in the group. */ > - if (DR_GROUP_STORE_COUNT (first_stmt_info) > - < DR_GROUP_SIZE (first_stmt_info) > - && !slp) > - { > - *vec_stmt = NULL; > - return true; > - } > - > if (slp) > { > grouped_store = false; > @@ -12487,21 +12477,21 @@ vect_transform_stmt (vec_info *vinfo, > break; > > case store_vec_info_type: > - done = vectorizable_store (vinfo, stmt_info, > - gsi, &vec_stmt, slp_node, NULL); > - gcc_assert (done); > - if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node) > + if (STMT_VINFO_GROUPED_ACCESS (stmt_info) > + && !slp_node > + && DR_GROUP_NEXT_ELEMENT (stmt_info)) > + /* In case of interleaving, the whole chain is vectorized when the > + last store in the chain is reached. Store stmts before the last > + one are skipped, and there vec_stmt_info shouldn't be freed > + meanwhile. */ > + ; > + else > { > - /* In case of interleaving, the whole chain is vectorized when the > - last store in the chain is reached. Store stmts before the last > - one are skipped, and there vec_stmt_info shouldn't be freed > - meanwhile. */ > - stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info); > - if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info)) > - is_store = true; > + done = vectorizable_store (vinfo, stmt_info, > + gsi, &vec_stmt, slp_node, NULL); > + gcc_assert (done); > + is_store = true; > } > - else > - is_store = true; > break; > > case condition_vec_info_type: > > >> BR, >> Kewen >> ----- >> >> gcc/ChangeLog: >> >> * tree-vect-data-refs.cc (vect_set_group_last_element): New function. >> (vect_analyze_group_access): Call new function >> vect_set_group_last_element. >> * tree-vect-stmts.cc (vectorizable_store): Replace >> DR_GROUP_STORE_COUNT >> uses with DR_GROUP_LAST_ELEMENT. >> (vect_transform_stmt): Likewise. >> * tree-vect-slp.cc (vect_split_slp_store_group): Likewise. >> (vect_build_slp_instance): Likewise. >> * tree-vectorizer.h (DR_GROUP_LAST_ELEMENT): New macro. >> (DR_GROUP_STORE_COUNT): Remove. >> (class _stmt_vec_info::store_count): Remove. >> (class _stmt_vec_info::last_element): New class member. >> (vect_set_group_last_element): New function declaration. >> --- >> gcc/tree-vect-data-refs.cc | 30 ++++++++++++++++++++++++++++++ >> gcc/tree-vect-slp.cc | 13 +++++++++---- >> gcc/tree-vect-stmts.cc | 9 +++------ >> gcc/tree-vectorizer.h | 12 +++++++----- >> 4 files changed, 49 insertions(+), 15 deletions(-) >> >> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc >> index 3e9a284666c..c4a495431d5 100644 >> --- a/gcc/tree-vect-data-refs.cc >> +++ b/gcc/tree-vect-data-refs.cc >> @@ -2832,6 +2832,33 @@ vect_analyze_group_access_1 (vec_info *vinfo, >> dr_vec_info *dr_info) >> return true; >> } >> >> +/* Given vectorization information VINFO, set the last element in the >> + group led by FIRST_STMT_INFO. For now, it's only used for loop >> + vectorization and stores, since for loop-vect the grouped stores >> + are only transformed till encounting its last one. */ >> + >> +void >> +vect_set_group_last_element (vec_info *vinfo, stmt_vec_info first_stmt_info) >> +{ >> + if (first_stmt_info >> + && is_a<loop_vec_info> (vinfo) >> + && DR_IS_WRITE (STMT_VINFO_DATA_REF (first_stmt_info))) >> + { >> + stmt_vec_info stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info); >> + stmt_vec_info last_stmt_info = first_stmt_info; >> + while (stmt_info) >> + { >> + gimple *stmt = stmt_info->stmt; >> + gimple *last_stmt = last_stmt_info->stmt; >> + gcc_assert (gimple_bb (stmt) == gimple_bb (last_stmt)); >> + if (gimple_uid (stmt) > gimple_uid (last_stmt)) >> + last_stmt_info = stmt_info; >> + stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info); >> + } >> + DR_GROUP_LAST_ELEMENT (first_stmt_info) = last_stmt_info; >> + } >> +} >> + >> /* Analyze groups of accesses: check that DR_INFO belongs to a group of >> accesses of legal size, step, etc. Detect gaps, single element >> interleaving, and other special cases. Set grouped access info. >> @@ -2853,6 +2880,9 @@ vect_analyze_group_access (vec_info *vinfo, >> dr_vec_info *dr_info) >> } >> return false; >> } >> + >> + stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt); >> + vect_set_group_last_element (vinfo, first_stmt_info); >> return true; >> } >> >> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc >> index 89c3216afac..e9b64efe125 100644 >> --- a/gcc/tree-vect-slp.cc >> +++ b/gcc/tree-vect-slp.cc >> @@ -2827,7 +2827,8 @@ vect_find_first_scalar_stmt_in_slp (slp_tree node) >> Return the first stmt in the second group. */ >> >> static stmt_vec_info >> -vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size) >> +vect_split_slp_store_group (vec_info *vinfo, stmt_vec_info first_vinfo, >> + unsigned group1_size) >> { >> gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo); >> gcc_assert (group1_size > 0); >> @@ -2860,6 +2861,9 @@ vect_split_slp_store_group (stmt_vec_info first_vinfo, >> unsigned group1_size) >> /* DR_GROUP_GAP of the first group now has to skip over the second group >> too. */ >> DR_GROUP_GAP (first_vinfo) += group2_size; >> >> + vect_set_group_last_element (vinfo, first_vinfo); >> + vect_set_group_last_element (vinfo, group2); >> + >> if (dump_enabled_p ()) >> dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and >> %d\n", >> group1_size, group2_size); >> @@ -3321,7 +3325,7 @@ vect_build_slp_instance (vec_info *vinfo, >> if (dump_enabled_p ()) >> dump_printf_loc (MSG_NOTE, vect_location, >> "Splitting SLP group at stmt %u\n", i); >> - stmt_vec_info rest = vect_split_slp_store_group (stmt_info, >> + stmt_vec_info rest = vect_split_slp_store_group (vinfo, >> stmt_info, >> group1_size); >> bool res = vect_analyze_slp_instance (vinfo, bst_map, >> stmt_info, >> kind, max_tree_size, >> @@ -3334,7 +3338,8 @@ vect_build_slp_instance (vec_info *vinfo, >> || i - group1_size > 1)) >> { >> stmt_vec_info rest2 = rest; >> - rest = vect_split_slp_store_group (rest, i - group1_size); >> + rest = vect_split_slp_store_group (vinfo, rest, >> + i - group1_size); >> if (i - group1_size > 1) >> res |= vect_analyze_slp_instance (vinfo, bst_map, rest2, >> kind, max_tree_size, >> @@ -3361,7 +3366,7 @@ vect_build_slp_instance (vec_info *vinfo, >> dump_printf_loc (MSG_NOTE, vect_location, >> "Splitting SLP group at stmt %u\n", i); >> >> - stmt_vec_info rest = vect_split_slp_store_group (stmt_info, >> + stmt_vec_info rest = vect_split_slp_store_group (vinfo, stmt_info, >> group1_size); >> /* Loop vectorization cannot handle gaps in stores, make sure >> the split group appears as strided. */ >> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc >> index 33f62b77710..1580a396301 100644 >> --- a/gcc/tree-vect-stmts.cc >> +++ b/gcc/tree-vect-stmts.cc >> @@ -8429,9 +8429,6 @@ vectorizable_store (vec_info *vinfo, >> else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3) >> return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, >> ncopies); >> >> - if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) >> - DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++; >> - >> if (grouped_store) >> { >> /* FORNOW */ >> @@ -8439,8 +8436,8 @@ vectorizable_store (vec_info *vinfo, >> >> /* We vectorize all the stmts of the interleaving group when we >> reach the last stmt in the group. */ >> - if (DR_GROUP_STORE_COUNT (first_stmt_info) >> - < DR_GROUP_SIZE (first_stmt_info) >> + if (STMT_VINFO_GROUPED_ACCESS (stmt_info) >> + && stmt_info != DR_GROUP_LAST_ELEMENT (first_stmt_info) >> && !slp) >> { >> *vec_stmt = NULL; >> @@ -12497,7 +12494,7 @@ vect_transform_stmt (vec_info *vinfo, >> one are skipped, and there vec_stmt_info shouldn't be freed >> meanwhile. */ >> stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info); >> - if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE >> (group_info)) >> + if (stmt_info == DR_GROUP_LAST_ELEMENT (group_info)) >> is_store = true; >> } >> else >> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h >> index 53a3d78d545..6817e113b56 100644 >> --- a/gcc/tree-vectorizer.h >> +++ b/gcc/tree-vectorizer.h >> @@ -1285,11 +1285,12 @@ public: >> stmt_vec_info first_element; >> /* Pointer to the next element in the group. */ >> stmt_vec_info next_element; >> + /* Pointer to the last element in the group, for now it's only used >> + for loop-vect stores since they only get transformed till the >> + last one is being transformed. */ >> + stmt_vec_info last_element; >> /* The size of the group. */ >> unsigned int size; >> - /* For stores, number of stores from this group seen. We vectorize the >> last >> - one. */ >> - unsigned int store_count; >> /* For loads only, the gap from the previous load. For consecutive loads, >> GAP >> is 1. */ >> unsigned int gap; >> @@ -1500,10 +1501,10 @@ struct gather_scatter_info { >> (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element) >> #define DR_GROUP_NEXT_ELEMENT(S) \ >> (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element) >> +#define DR_GROUP_LAST_ELEMENT(S) \ >> + (gcc_checking_assert ((S)->dr_aux.dr), (S)->last_element) >> #define DR_GROUP_SIZE(S) \ >> (gcc_checking_assert ((S)->dr_aux.dr), (S)->size) >> -#define DR_GROUP_STORE_COUNT(S) \ >> - (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count) >> #define DR_GROUP_GAP(S) \ >> (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap) >> >> @@ -2317,6 +2318,7 @@ extern tree vect_get_new_ssa_name (tree, enum >> vect_var_kind, >> extern tree vect_create_addr_base_for_vector_ref (vec_info *, >> stmt_vec_info, gimple_seq >> *, >> tree); >> +extern void vect_set_group_last_element (vec_info *, stmt_vec_info); >> >> /* In tree-vect-loop.cc. */ >> extern tree neutral_op_for_reduction (tree, code_helper, tree); >> -- >> 2.31.1