Jeff Law <l...@redhat.com> writes:

> On 5/30/19 9:03 AM, Jiufu Guo wrote:
>> Jeff Law <l...@redhat.com> writes:
>> 
>>> On 5/29/19 6:36 AM, Richard Biener wrote:
>>>> On Tue, 28 May 2019, Jiufu Guo wrote:
>>>>
>>>>> Hi,
>>>>>
>>>>> This patch implements a new opportunity of jump threading for PR77820.
>>>>> In this optimization, conditional jumps are merged with unconditional
>>>>> jump. And then moving CMP result to GPR is eliminated.
>>>>>
>>>>> This version is based on the proposal of Richard, Jeff and Andrew, and
>>>>> refined to incorporate comments.  Thanks for the reviews!
>>>>>
>>>>> Bootstrapped and tested on powerpc64le and powerpc64be with no
>>>>> regressions (one case is improved) and new testcases are added. Is this
>>>>> ok for trunk?
>>>>>
>>>>> Example of this opportunity looks like below:
>>>>>
>>>>>   <P0>
>>>>>   p0 = a CMP b
>>>>>   goto <X>;
>>>>>
>>>>>   <P1>
>>>>>   p1 = c CMP d
>>>>>   goto <X>;
>>>>>
>>>>>   <X>
>>>>>   # phi = PHI <p0 (P0), p1 (P1)>
>>>>>   if (phi != 0) goto <Y>; else goto <Z>;
>>>>>
>>>>> Could be transformed to:
>>>>>
>>>>>   <P0>
>>>>>   p0 = a CMP b
>>>>>   if (p0 != 0) goto <Y>; else goto <Z>;
>>>>>
>>>>>   <P1>
>>>>>   p1 = c CMP d
>>>>>   if (p1 != 0) goto <Y>; else goto <Z>;
>>>>>
>>>>>
>>>>> This optimization eliminates:
>>>>> 1. saving CMP result: p0 = a CMP b.
>>>>> 2. additional CMP on branch: if (phi != 0).
>>>>> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is.
>>>>>
>>>>> Thanks!
>>>>> Jiufu Guo
>>>>>
>>>>>
>>>>> [gcc]
>>>>> 2019-05-28  Jiufu Guo  <guoji...@linux.ibm.com>
>>>>>       Lijia He  <heli...@linux.ibm.com>
>>>>>
>>>>>   PR tree-optimization/77820
>>>>>   * tree-ssa-threadedge.c
>>>>>   (edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New
>>>>>   function.
>>>>>   (thread_across_edge): Add call to
>>>>>   edge_forwards_cmp_to_conditional_jump_through_empty_bb_p.
>>>>>
>>>>> [gcc/testsuite]
>>>>> 2019-05-28  Jiufu Guo  <guoji...@linux.ibm.com>
>>>>>       Lijia He  <heli...@linux.ibm.com>
>>>>>
>>>>>   PR tree-optimization/77820
>>>>>   * gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase.
>>>>>   * gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase.
>>>>>   * gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase.
>>>>>   * gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase.
>>>>>   * gcc.dg/tree-ssa/split-path-6.c: Update testcase.
>>>>>
>>>>> ---
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++
>>>>>  gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c     |  2 +-
>>>>>  gcc/tree-ssa-threadedge.c                        | 76 
>>>>> +++++++++++++++++++++++-
>>>>>  6 files changed, 192 insertions(+), 4 deletions(-)
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>>>
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c 
>>>>> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>>> new file mode 100644
>>>>> index 0000000..5227c87
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c
>>>>> @@ -0,0 +1,30 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>>> +
>>>>> +void g (int);
>>>>> +void g1 (int);
>>>>> +
>>>>> +void
>>>>> +f (long a, long b, long c, long d, long x)
>>>>> +{
>>>>> +  _Bool t;
>>>>> +  if (x)
>>>>> +    {
>>>>> +      g (a + 1);
>>>>> +      t = a < b;
>>>>> +      c = d + x;
>>>>> +    }
>>>>> +  else
>>>>> +    {
>>>>> +      g (b + 1);
>>>>> +      a = c + d;
>>>>> +      t = c > d;
>>>>> +    }
>>>>> +
>>>>> +  if (t)
>>>>> +    g1 (c);
>>>>> +
>>>>> +  g (a);
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } 
>>>>> */
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c 
>>>>> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>>> new file mode 100644
>>>>> index 0000000..eaf89bb
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c
>>>>> @@ -0,0 +1,23 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>>> +
>>>>> +void g (void);
>>>>> +void g1 (void);
>>>>> +
>>>>> +void
>>>>> +f (long a, long b, long c, long d, int x)
>>>>> +{
>>>>> +  _Bool t;
>>>>> +  if (x)
>>>>> +    t = c < d;
>>>>> +  else
>>>>> +    t = a < b;
>>>>> +
>>>>> +  if (t)
>>>>> +    {
>>>>> +      g1 ();
>>>>> +      g ();
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } 
>>>>> */
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c 
>>>>> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>>> new file mode 100644
>>>>> index 0000000..d5a1e0b
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c
>>>>> @@ -0,0 +1,25 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>>> +
>>>>> +void g (void);
>>>>> +void g1 (void);
>>>>> +
>>>>> +void
>>>>> +f (long a, long b, long c, long d, int x)
>>>>> +{
>>>>> +  int t;
>>>>> +  if (x)
>>>>> +    t = a < b;
>>>>> +  else if (d == x)
>>>>> +    t = c < b;
>>>>> +  else
>>>>> +    t = d > c;
>>>>> +
>>>>> +  if (t)
>>>>> +    {
>>>>> +      g1 ();
>>>>> +      g ();
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } 
>>>>> */
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c 
>>>>> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>>> new file mode 100644
>>>>> index 0000000..53acabc
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c
>>>>> @@ -0,0 +1,40 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */
>>>>> +
>>>>> +void g (int);
>>>>> +void g1 (int);
>>>>> +
>>>>> +void
>>>>> +f (long a, long b, long c, long d, int x)
>>>>> +{
>>>>> +  int t;
>>>>> +  _Bool l1 = 0, l2 = 0;
>>>>> +  if (x)
>>>>> +    {
>>>>> +      g (a);
>>>>> +      c = a + b;
>>>>> +      t = a < b;
>>>>> +      l1 = 1;
>>>>> +    }
>>>>> +  else
>>>>> +    {
>>>>> +      g1 (b);
>>>>> +      t = c > d;
>>>>> +      d = c + b;
>>>>> +      l2 = 1;
>>>>> +    }
>>>>> +
>>>>> +  if (t)
>>>>> +    {
>>>>> +      if (l1 | l2)
>>>>> + g1 (c);
>>>>> +    }
>>>>> +  else
>>>>> +    {
>>>>> +      g (d);
>>>>> +      g1 (a + b);
>>>>> +    }
>>>>> +  g (c + d);
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } 
>>>>> */
>>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c 
>>>>> b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>>>> index e9b4f26..1d7b587 100644
>>>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
>>>>> @@ -69,4 +69,4 @@ lookharder (string)
>>>>>      }
>>>>>  }
>>>>>  
>>>>> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 
>>>>> "split-paths" } } */
>>>>> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 
>>>>> "split-paths" } } */
>>>>> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
>>>>> index c3ea2d6..36c413a 100644
>>>>> --- a/gcc/tree-ssa-threadedge.c
>>>>> +++ b/gcc/tree-ssa-threadedge.c
>>>>> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e,
>>>>>    return 0;
>>>>>  }
>>>>>  
>>>>> +/* There are basic blocks look like:
>>>>> +   <P0>
>>>>> +   p0 = a CMP b ; or p0 = (INT) (a CMP b)
>>>>> +   goto <X>;
>>>>> +
>>>>> +   <P1>
>>>>> +   p1 = c CMP d
>>>>> +   goto <X>;
>>>>> +
>>>>> +   <X>
>>>>> +   # phi = PHI <p0 (P0), p1 (P1)>
>>>>> +   if (phi != 0) goto <Y>; else goto <Z>;
>>>>> +
>>>>> +   Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD
>>>>> +   And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK
>>>>> +
>>>>> +   Return true if E is (P0,X) or (P1,X)  */
>>>>> +
>>>>> +bool
>>>>> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
>>>>> +{
>>>>> +  basic_block bb = e->dest;
>>>>> +
>>>>> +  /* See if there is only one stmt which is gcond.  */
>>>>> +  gimple *gs = last_and_only_stmt (bb);
>>>>> +  if (gs == NULL || gimple_code (gs) != GIMPLE_COND)
>>>>> +    return false;
>>>>      gcond *gs;
>>>>      if (!(gs = safe_dyn_cast <gcond *> (last_and_only_stmt (bb))))
>>>>        return false;
>>>>
>>>> makes the following gimple_cond_ accesses more efficient when
>>>> checking is enabled.
>>>>
>>>>> +
>>>>> +  /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
>>>>> +  tree cond = gimple_cond_lhs (gs);
>>>>> +  enum tree_code code = gimple_cond_code (gs);
>>>>> +  tree rhs = gimple_cond_rhs (gs);
>>>>> +  if (TREE_CODE (cond) != SSA_NAME
>>>>> +      || (code != NE_EXPR && code != EQ_EXPR)
>>>>> +      || (!integer_onep (rhs) && !integer_zerop (rhs)))
>>>>> +    return false;
>>>>> +  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
>>>>> +  if (phi == NULL || gimple_bb (phi) != bb)
>>>>> +    return false;
>>>>> +
>>>>> +  /* Check if phi's incoming value is CMP.  */
>>>>> +  gimple *def;
>>>>> +  tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>>>> +  if (TREE_CODE (value) == SSA_NAME 
>>>>> +      && has_single_use (value)
>>>>> +      && is_gimple_assign (SSA_NAME_DEF_STMT (value)))
>>>>> +    def = SSA_NAME_DEF_STMT (value);
>>>> Same is true here and below if you rewrite to
>>>>
>>>>      gassign *def;
>>>>      tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>>>      if (TREE_CODE (value) != SSA_NAME
>>>>          || !has_single_use (value)
>>>>          || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
>>>>        return false;
>>>>
>>>> Otherwise it looks good.  I'd like to have Jeffs opinion and
>>>> final ACK here because we touch jump-threading and he's most
>>>> familiar with that detail and the place you hook into.
>>> I've got the full thread to look over.  At a high level I wouldn't have
>>> guessed it'd be this easy to get the threader handle this, but
>>> occasionally we are surprised in a good way.  Anyway, I'll be looking
>>> through the full discussion.
>>>
>>> Jeff
>> 
>> Hi Jeff, Richard and all,
>> 
>> Thanks a lot for your great comments in all threads. Based on those
>> comments, I refined the code as below:
>> 
>> bool
>> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e)
>> {
>>   /* See if there is only one stmt which is gcond.  */
>>   gcond *gs;
>>   if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest))))
>>     return false;
>>   
>>   /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block.  */
>>   tree cond = gimple_cond_lhs (gs);
>>   enum tree_code code = gimple_cond_code (gs);
>>   tree rhs = gimple_cond_rhs (gs);
>>   if (TREE_CODE (cond) != SSA_NAME
>>       || (code != NE_EXPR && code != EQ_EXPR)
>>       || (!integer_onep (rhs) && !integer_zerop (rhs)))
>>     return false;
>>   gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond));
>>   if (phi == NULL || gimple_bb (phi) != e->dest)
>>     return false;
>> 
>>   /* Check if phi's incoming value is CMP.  */
>>   gassign *def;
>>   tree value = PHI_ARG_DEF_FROM_EDGE (phi, e);
>>   if (TREE_CODE (value) != SSA_NAME
>>       || !has_single_use (value)
>>       || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value))))
>>     return false;
>> 
>>   /* Or if it is (INT) (a CMP b).  */
>>   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
>>     {
>>       value = gimple_assign_rhs1 (def);
>>       if (TREE_CODE (value) != SSA_NAME
>>        || !has_single_use (value)
>>        || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value))))
>>      return false;
>>     }
>> 
>>   if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison)
>>     return false;
>> 
>>   if (!single_succ_p (e->src))
>>     return false;
>> 
>>   return true;
>> }
>> 
>> 
>> In this code, I put "if (!single_succ_p (e->src))" there, which may be
>> helpful for reducing the copy block.
> Sounds good.  My testing did show a regression on the sh port.
>
> For pr51244-20 on the SH port your changes make the ultimate resulting
> code better, but compromise the test.  We can restore the shape of the
> CFG and get the testing coverage for sh_treg_combine by disabling VRP
> and DOM.
>
> Can you please include the attached patch in your next update?

Thanks, I sent out a new version patch to include this update.

Jiufu Guo
>
> Jeff
> diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c 
> b/gcc/testsuite/gcc.target/sh/pr51244-20.c
> index c342163160b..be265cd16af 100644
> --- a/gcc/testsuite/gcc.target/sh/pr51244-20.c
> +++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c
> @@ -1,7 +1,7 @@
>  /* Check that the SH specific sh_treg_combine RTL optimization pass works as
>     expected.  */
>  /* { dg-do compile }  */
> -/* { dg-options "-O2" } */
> +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */
>  
>  /* { dg-final { scan-assembler-not "not\t" } } */
>  /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */

Reply via email to