For very small loops (< 6 insns), it would be fine to unroll 4 times to use cache line better. Like below loops: `while (i) a[--i] = NULL; while (p < e) *d++ = *p++;`
And for very complex loops which may cause negative impacts: branch-miss or cache-miss. Like below loop: there are calls, early exits and branches in loop. ``` for (int i = 0; i < n; i++) { int e = a[I]; .... if (function_call(e)) break; .... } ``` This patch enhances RTL unroll for small loops and prevent to unroll complex loops. gcc/ChangeLog 2020-07-03 Jiufu Guo <guoji...@linux.ibm.com> * config/rs6000/rs6000.c (rs6000_loop_unroll_adjust): Refine hook. (rs6000_complex_loop_p): New function. (num_loop_calls): New function. --- gcc/config/rs6000/rs6000.c | 46 +++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 58f5d780603..a4874fa0efc 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5130,22 +5130,56 @@ rs6000_destroy_cost_data (void *data) free (data); } +/* Count the number of call insns in LOOP. */ +static unsigned int +num_loop_calls (struct loop *loop) +{ + basic_block *bbs; + rtx_insn *insn; + unsigned int i; + unsigned int call_ins_num = 0; + + bbs = get_loop_body (loop); + for (i = 0; i < loop->num_nodes; i++) + FOR_BB_INSNS (bbs[i], insn) + if (CALL_P (insn)) + call_ins_num++; + + free (bbs); + + return call_ins_num; +} + +/* Return true if LOOP is too complex to be unrolled. */ +static bool +rs6000_complex_loop_p (struct loop *loop) +{ + unsigned call_num; + + return loop->ninsns > 10 + && (call_num = num_loop_calls (loop)) > 0 + && (call_num + num_loop_branches (loop)) * 5 > loop->ninsns + && !single_exit (loop); +} + /* Implement targetm.loop_unroll_adjust. */ static unsigned rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop) { - if (unroll_only_small_loops) + if (unroll_only_small_loops) { - /* TODO: This is hardcoded to 10 right now. It can be refined, for - example we may want to unroll very small loops more times (4 perhaps). - We also should use a PARAM for this. */ + if (loop->ninsns <= 6) + return MIN (4, nunroll); if (loop->ninsns <= 10) return MIN (2, nunroll); - else - return 0; + + return 0; } + if (rs6000_complex_loop_p (loop)) + return 0; + return nunroll; } -- 2.25.1