Hello All: This patch improve determine_suggested_unroll_factor in finish_cost with reduction factor of loads/stores/non_load_stores.
Return unroll factor calculated as per reduction factor with number of loads/stores/non_load_stores (general_ops). Bootstrapped and regtested on powerpc64-linux-gnu. Expected gains with spec 2017 benchmarks. Thanks & Regards Ajit rs6000: Improve suggested unroll factor in finish_cost Improve determine_suggested_unroll_factor in finish_cost with reduction factor of loads/stores/non_load_stores. Return unroll factor calculated as per reduction factor with number of loads/stores/non_load_stores (general_ops). 2024-07-22 Ajit Kumar Agarwal <aagar...@linux.ibm.com> gcc/ChangeLog: * config/rs6000/rs6000.cc: Improve determine_suggested_unroll_factor with reduction factor of load/stores/general_ops. --- gcc/config/rs6000/rs6000.cc | 60 ++++++++++--------------------------- 1 file changed, 16 insertions(+), 44 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 5ed64b1e686..0d69ec4cfbe 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -5458,7 +5458,6 @@ rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo) - estimated iteration count when iteration count is unknown; */ - unsigned int rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo) { @@ -5483,53 +5482,26 @@ rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo) unsigned int issue_width = rs6000_vect_unroll_issue; unsigned int uf = CEIL (reduc_factor * issue_width, nstmts_nonldst); uf = MIN ((unsigned int) rs6000_vect_unroll_limit, uf); - /* Make sure it is power of 2. */ - uf = 1 << ceil_log2 (uf); + unsigned int temp; - /* If the iteration count is known, the costing would be exact enough, - don't worry it could be worse. */ - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) - return uf; - - /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the - loop if either condition is satisfied: - - reduction factor exceeds the threshold; - - emulated gather load adopted. */ - if (reduc_factor > (unsigned int) rs6000_vect_unroll_reduc_threshold - || m_gather_load) - return uf; - - /* Check if we can conclude it's good to unroll from the estimated - iteration count. */ - HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop); - unsigned int vf = vect_vf_for_cost (loop_vinfo); - unsigned int unrolled_vf = vf * uf; - if (est_niter == -1 || est_niter < unrolled_vf) - /* When the estimated iteration of this loop is unknown, it's possible - that we are able to vectorize this loop with the original VF but fail - to vectorize it with the unrolled VF any more if the actual iteration - count is in between. */ - return 1; - else + if (m_nstores > 0) { - unsigned int epil_niter_unr = est_niter % unrolled_vf; - unsigned int epil_niter = est_niter % vf; - /* Even if we have partial vector support, it can be still inefficent - to calculate the length when the iteration count is unknown, so - only expect it's good to unroll when the epilogue iteration count - is not bigger than VF (only one time length calculation). */ - if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) - && epil_niter_unr <= vf) - return uf; - /* Without partial vector support, conservatively unroll this when - the epilogue iteration count is less than the original one - (epilogue execution time wouldn't be longer than before). */ - else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) - && epil_niter_unr <= epil_niter) - return uf; + temp = CEIL (reduc_factor * rs6000_vect_unroll_issue, + m_nstores); + uf = MIN (uf, temp); } - return 1; + if (m_nloads > 0) + { + temp = CEIL (reduc_factor * rs6000_vect_unroll_issue, + m_nloads + m_nstores); + uf = MIN (uf, temp); + } + + /* Make sure it is power of 2. */ + uf = 1 << ceil_log2 (uf); + + return uf; } void -- 2.43.5