Jiufu Guo <guoji...@linux.ibm.com> writes:

I would like to reping this patch.
Since this is correcting COUNT and PROB for hot blocks, it helps some
cases.

https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html

Thanks,
Jiufu Guo

> Jiufu Guo <guoji...@linux.ibm.com> writes:
>
> Gentle ping.
> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>
> BR,
> Jiufu Guo
>
>> Jiufu Guo via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>
>> Hi,
>>
>> I would like to reping this, hope to get approval for this patch.
>> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>>
>> BR,
>> Jiufu Guo
>>
>>> Jiufu Guo <guoji...@linux.ibm.com> writes:
>>>
>>> Hi,
>>>
>>> I'd like to ping this patch for trunk on stage 1.
>>>
>>> This patch could fix the issue on incorrect COUNT/FREQUENCES of loop
>>> unrolled blocks, and also could help the improve the cold/hot issue of
>>> the unrolled loops.
>>>
>>> patch is also at
>>> https://gcc.gnu.org/legacy-ml/gcc-patches/2020-02/msg00927.html
>>>
>>> Thanks,
>>> Jiufu
>>>
>>>> Jiufu Guo <guoji...@linux.ibm.com> writes:
>>>>
>>>> Hi!
>>>>
>>>> I'd like to ping following patch. As near end of gcc10 stage 4, it seems
>>>> I would ask approval for GCC11 trunk.
>>>>
>>>> Thanks,
>>>> Jiufu Guo
>>>>
>>>>> Hi Honza and all,
>>>>>
>>>>> I updated the patch a little as below. Bootstrap and regtest are ok
>>>>> on powerpc64le.
>>>>>
>>>>> Is OK for trunk?
>>>>>
>>>>> Thanks for comments.
>>>>> Jiufu
>>>>>
>>>>> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
>>>>> index 727e951..ded0046 100644
>>>>> --- a/gcc/cfgloopmanip.c
>>>>> +++ b/gcc/cfgloopmanip.c
>>>>> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>>>>>  #include "gimplify-me.h"
>>>>>  #include "tree-ssa-loop-manip.h"
>>>>>  #include "dumpfile.h"
>>>>> +#include "cfgrtl.h"
>>>>>  
>>>>>  static void copy_loops_to (class loop **, int,
>>>>>                      class loop *);
>>>>> @@ -1258,14 +1259,30 @@ duplicate_loop_to_header_edge (class loop *loop, 
>>>>> edge e,
>>>>>     /* If original loop is executed COUNT_IN times, the unrolled
>>>>>        loop will account SCALE_MAIN_DEN times.  */
>>>>>     scale_main = count_in.probability_in (scale_main_den);
>>>>> +
>>>>> +   /* If we are guessing at the number of iterations and count_in
>>>>> +      becomes unrealistically small, reset probability.  */
>>>>> +   if (!(count_in.reliable_p () || loop->any_estimate))
>>>>> +     {
>>>>> +       profile_count new_count_in = count_in.apply_probability 
>>>>> (scale_main);
>>>>> +       profile_count preheader_count = loop_preheader_edge (loop)->count 
>>>>> ();
>>>>> +       if (new_count_in.apply_scale (1, 10) < preheader_count)
>>>>> +         scale_main = profile_probability::likely ();
>>>>> +     }
>>>>> +
>>>>>     scale_act = scale_main * prob_pass_main;
>>>>>   }
>>>>>        else
>>>>>   {
>>>>> +   profile_count new_loop_count;
>>>>>     profile_count preheader_count = e->count ();
>>>>> -   for (i = 0; i < ndupl; i++)
>>>>> -     scale_main = scale_main * scale_step[i];
>>>>>     scale_act = preheader_count.probability_in (count_in);
>>>>> +   /* Compute final preheader count after peeling NDUPL copies.  */
>>>>> +   for (i = 0; i < ndupl; i++)
>>>>> +     preheader_count = preheader_count.apply_probability (scale_step[i]);
>>>>> +   /* Subtract out exit(s) from peeled copies.  */
>>>>> +   new_loop_count = count_in - (e->count () - preheader_count);
>>>>> +   scale_main = new_loop_count.probability_in (count_in);
>>>>>   }
>>>>>      }
>>>>>  
>>>>> @@ -1381,6 +1398,38 @@ duplicate_loop_to_header_edge (class loop *loop, 
>>>>> edge e,
>>>>>     scale_bbs_frequencies (new_bbs, n, scale_act);
>>>>>     scale_act = scale_act * scale_step[j];
>>>>>   }
>>>>> +
>>>>> +      /* Need to update PROB of exit edge and corresponding COUNT.  */
>>>>> +      if (orig && is_latch && (!bitmap_bit_p (wont_exit, j + 1))
>>>>> +   && bbs_to_scale)
>>>>> + {
>>>>> +   edge new_exit = new_spec_edges[SE_ORIG];
>>>>> +   profile_count new_count_in = new_exit->src->count;
>>>>> +   profile_count preheader_count = loop_preheader_edge (loop)->count ();
>>>>> +   edge e;
>>>>> +   edge_iterator ei;
>>>>> +
>>>>> +   FOR_EACH_EDGE (e, ei, new_exit->src->succs)
>>>>> +     if (e != new_exit)
>>>>> +       break;
>>>>> +
>>>>> +   gcc_assert (e && e != new_exit);
>>>>> +
>>>>> +   new_exit->probability = preheader_count.probability_in (new_count_in);
>>>>> +   e->probability = new_exit->probability.invert ();
>>>>> +
>>>>> +   profile_count new_latch_count
>>>>> +     = new_exit->src->count.apply_probability (e->probability);
>>>>> +   profile_count old_latch_count = e->dest->count;
>>>>> +
>>>>> +   EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
>>>>> +     scale_bbs_frequencies_profile_count (new_bbs + i, 1,
>>>>> +                                          new_latch_count,
>>>>> +                                          old_latch_count);
>>>>> +
>>>>> +   if (current_ir_type () != IR_GIMPLE)
>>>>> +     update_br_prob_note (e->src);
>>>>> + }
>>>>>      }
>>>>>    free (new_bbs);
>>>>>    free (orig_loops);
>>>>> diff --git a/gcc/testsuite/gcc.dg/pr68212.c 
>>>>> b/gcc/testsuite/gcc.dg/pr68212.c
>>>>> new file mode 100644
>>>>> index 0000000..f3b7c22
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/pr68212.c
>>>>> @@ -0,0 +1,13 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param 
>>>>> max-unroll-times=4 -fdump-rtl-alignments" } */
>>>>> +
>>>>> +void foo(long int *a, long int *b, long int n)
>>>>> +{
>>>>> +  long int i;
>>>>> +
>>>>> +  for (i = 0; i < n; i++)
>>>>> +    a[i] = *b;
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-rtl-dump-times "internal loop alignment added" 1 
>>>>> "alignments"} } */
>>>>> +

Reply via email to