Richard Biener <rguent...@suse.de> writes: > This makes the only consumer of STMT_VINFO_SAME_ALIGN_REFS, the > loop peeling for alignment code, use locally computed data and > then removes STMT_VINFO_SAME_ALIGN_REFS and its computation. > The main benefit is reducing the dependence computation load > by not asking for read-read dependences and getting rid of > DR related info from stmt_vec_info.
Nice. Some very minor comments below… > @@ -1130,6 +1129,45 @@ vect_compute_data_ref_alignment (vec_info *vinfo, > dr_vec_info *dr_info) > return; > } > > +/* Return whether DR_INFO, which is related to DR_PEEL_INFO in > + that it only differs in DR_INIT, is aligned if DR_PEEL_INFO > + is made aligned via peeling. */ > + > +static bool > +vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info, > + dr_vec_info *dr_peel_info) > +{ > + if (known_ge (DR_TARGET_ALIGNMENT (dr_peel_info), > + DR_TARGET_ALIGNMENT (dr_info))) Wondered here if multiple_p might be clearer/more precise than known_ge, but of course non-power-of-2 alignments aren't much use… > + { > + poly_offset_int diff > + = (wi::to_poly_offset (DR_INIT (dr_peel_info->dr)) > + - wi::to_poly_offset (DR_INIT (dr_info->dr))); > + if (known_eq (diff, 0) > + || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info))) > + return true; > + } > + return false; > +} > + > +/* Return whether DR_INFO is aligned if DR_PEEL_INFO is made > + aligned via peeling. */ > + > +static bool > +vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info, > + dr_vec_info *dr_peel_info) > +{ > + if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr), > + DR_BASE_ADDRESS (dr_peel_info->dr), 0) > + || !operand_equal_p (DR_OFFSET (dr_info->dr), > + DR_OFFSET (dr_peel_info->dr), 0) > + || !operand_equal_p (DR_STEP (dr_info->dr), > + DR_STEP (dr_peel_info->dr), 0)) > + return false; > + > + return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info); > +} > + > /* Function vect_update_misalignment_for_peel. > Sets DR_INFO's misalignment > - to 0 if it has the same alignment as DR_PEEL_INFO, > @@ -1146,18 +1184,10 @@ static void > vect_update_misalignment_for_peel (dr_vec_info *dr_info, > dr_vec_info *dr_peel_info, int npeel) > { > - unsigned int i; > - vec<dr_p> same_aligned_drs; > - struct data_reference *current_dr; > - stmt_vec_info peel_stmt_info = dr_peel_info->stmt; > - > /* It can be assumed that if dr_info has the same alignment as dr_peel, > it is aligned in the vector loop. */ > - same_aligned_drs = STMT_VINFO_SAME_ALIGN_REFS (peel_stmt_info); > - FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr) > + if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info)) > { > - if (current_dr != dr_info->dr) > - continue; > gcc_assert (!known_alignment_for_access_p (dr_info) > || !known_alignment_for_access_p (dr_peel_info) > || (DR_MISALIGNMENT (dr_info) > @@ -1572,6 +1602,43 @@ vect_peeling_supportable (loop_vec_info loop_vinfo, > dr_vec_info *dr0_info, > return true; > } > > +/* Compare two data-references DRA and DRB to group them into chunks > + with related alignment. */ > + > +static int > +dr_align_group_sort_cmp (const void *dra_, const void *drb_) > +{ > + data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_); > + data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_); > + int cmp; > + > + /* Stabilize sort. */ > + if (dra == drb) > + return 0; > + > + /* Ordering of DRs according to base. */ > + cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra), > + DR_BASE_ADDRESS (drb)); > + if (cmp != 0) > + return cmp; > + > + /* And according to DR_OFFSET. */ > + cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)); > + if (cmp != 0) > + return cmp; > + > + /* And after step. */ > + cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)); > + if (cmp != 0) > + return cmp; > + > + /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. > */ > + cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)); > + if (cmp == 0) > + return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1; > + return cmp; > +} > + > /* Function vect_enhance_data_refs_alignment > > This pass will use loop versioning and loop peeling in order to enhance > @@ -1666,7 +1733,6 @@ vect_peeling_supportable (loop_vec_info loop_vinfo, > dr_vec_info *dr0_info, > opt_result > vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) > { > - vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > enum dr_alignment_support supportable_dr_alignment; > dr_vec_info *first_store = NULL; > @@ -1680,7 +1746,7 @@ vect_enhance_data_refs_alignment (loop_vec_info > loop_vinfo) > bool one_misalignment_unknown = false; > bool one_dr_unsupportable = false; > dr_vec_info *unsupportable_dr_info = NULL; > - unsigned int mis, same_align_drs_max = 0; > + unsigned int mis, dr0_same_align_drs = 0, first_store_same_align_drs = 0; > hash_table<peel_info_hasher> peeling_htab (1); > > DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment"); > @@ -1689,6 +1755,54 @@ vect_enhance_data_refs_alignment (loop_vec_info > loop_vinfo) > LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0); > LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0; > > + if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ()) > + return opt_result::success (); > + > + /* Sort the vector of datarefs so DRs that have the same or dependent > + alignment are next to each other. */ > + auto_vec<data_reference_p> datarefs > + = LOOP_VINFO_DATAREFS (loop_vinfo).copy (); > + datarefs.qsort (dr_align_group_sort_cmp); > + > + /* Compute what was formerly STMT_VINFO_SAME_ALIGN_REFS.length (). */ Is this comment worth it? Hopefully in a couple of years noone will know/need to know what on earth STMT_VINFO_SAME_ALIGN_REFS is/was. > + auto_vec<unsigned> n_same_align_refs (datarefs.length ()); > + n_same_align_refs.quick_grow_cleared (datarefs.length ()); > + unsigned i0; > + for (i0 = 0; i0 < datarefs.length (); ++i0) > + if (DR_BASE_ADDRESS (datarefs[i0])) > + break; > + for (i = i0 + 1; i <= datarefs.length (); ++i) > + { > + if (i == datarefs.length () > + || !DR_BASE_ADDRESS (datarefs[i]) Can this happen? I thought the qsort would put all null bases first. Thanks, Richard