Jeff Law <l...@redhat.com> writes: > On 5/30/19 9:03 AM, Jiufu Guo wrote: >> Jeff Law <l...@redhat.com> writes: >> >>> On 5/29/19 6:36 AM, Richard Biener wrote: >>>> On Tue, 28 May 2019, Jiufu Guo wrote: >>>> >>>>> Hi, >>>>> >>>>> This patch implements a new opportunity of jump threading for PR77820. >>>>> In this optimization, conditional jumps are merged with unconditional >>>>> jump. And then moving CMP result to GPR is eliminated. >>>>> >>>>> This version is based on the proposal of Richard, Jeff and Andrew, and >>>>> refined to incorporate comments. Thanks for the reviews! >>>>> >>>>> Bootstrapped and tested on powerpc64le and powerpc64be with no >>>>> regressions (one case is improved) and new testcases are added. Is this >>>>> ok for trunk? >>>>> >>>>> Example of this opportunity looks like below: >>>>> >>>>> <P0> >>>>> p0 = a CMP b >>>>> goto <X>; >>>>> >>>>> <P1> >>>>> p1 = c CMP d >>>>> goto <X>; >>>>> >>>>> <X> >>>>> # phi = PHI <p0 (P0), p1 (P1)> >>>>> if (phi != 0) goto <Y>; else goto <Z>; >>>>> >>>>> Could be transformed to: >>>>> >>>>> <P0> >>>>> p0 = a CMP b >>>>> if (p0 != 0) goto <Y>; else goto <Z>; >>>>> >>>>> <P1> >>>>> p1 = c CMP d >>>>> if (p1 != 0) goto <Y>; else goto <Z>; >>>>> >>>>> >>>>> This optimization eliminates: >>>>> 1. saving CMP result: p0 = a CMP b. >>>>> 2. additional CMP on branch: if (phi != 0). >>>>> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is. >>>>> >>>>> Thanks! >>>>> Jiufu Guo >>>>> >>>>> >>>>> [gcc] >>>>> 2019-05-28 Jiufu Guo <guoji...@linux.ibm.com> >>>>> Lijia He <heli...@linux.ibm.com> >>>>> >>>>> PR tree-optimization/77820 >>>>> * tree-ssa-threadedge.c >>>>> (edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New >>>>> function. >>>>> (thread_across_edge): Add call to >>>>> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p. >>>>> >>>>> [gcc/testsuite] >>>>> 2019-05-28 Jiufu Guo <guoji...@linux.ibm.com> >>>>> Lijia He <heli...@linux.ibm.com> >>>>> >>>>> PR tree-optimization/77820 >>>>> * gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase. >>>>> * gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase. >>>>> * gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase. >>>>> * gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase. >>>>> * gcc.dg/tree-ssa/split-path-6.c: Update testcase. >>>>> >>>>> --- >>>>> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++ >>>>> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++ >>>>> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++ >>>>> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++ >>>>> gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c | 2 +- >>>>> gcc/tree-ssa-threadedge.c | 76 >>>>> +++++++++++++++++++++++- >>>>> 6 files changed, 192 insertions(+), 4 deletions(-) >>>>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >>>>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >>>>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >>>>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >>>>> >>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >>>>> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >>>>> new file mode 100644 >>>>> index 0000000..5227c87 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >>>>> @@ -0,0 +1,30 @@ >>>>> +/* { dg-do compile } */ >>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >>>>> + >>>>> +void g (int); >>>>> +void g1 (int); >>>>> + >>>>> +void >>>>> +f (long a, long b, long c, long d, long x) >>>>> +{ >>>>> + _Bool t; >>>>> + if (x) >>>>> + { >>>>> + g (a + 1); >>>>> + t = a < b; >>>>> + c = d + x; >>>>> + } >>>>> + else >>>>> + { >>>>> + g (b + 1); >>>>> + a = c + d; >>>>> + t = c > d; >>>>> + } >>>>> + >>>>> + if (t) >>>>> + g1 (c); >>>>> + >>>>> + g (a); >>>>> +} >>>>> + >>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } >>>>> */ >>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >>>>> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >>>>> new file mode 100644 >>>>> index 0000000..eaf89bb >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >>>>> @@ -0,0 +1,23 @@ >>>>> +/* { dg-do compile } */ >>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >>>>> + >>>>> +void g (void); >>>>> +void g1 (void); >>>>> + >>>>> +void >>>>> +f (long a, long b, long c, long d, int x) >>>>> +{ >>>>> + _Bool t; >>>>> + if (x) >>>>> + t = c < d; >>>>> + else >>>>> + t = a < b; >>>>> + >>>>> + if (t) >>>>> + { >>>>> + g1 (); >>>>> + g (); >>>>> + } >>>>> +} >>>>> + >>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } >>>>> */ >>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >>>>> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >>>>> new file mode 100644 >>>>> index 0000000..d5a1e0b >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >>>>> @@ -0,0 +1,25 @@ >>>>> +/* { dg-do compile } */ >>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >>>>> + >>>>> +void g (void); >>>>> +void g1 (void); >>>>> + >>>>> +void >>>>> +f (long a, long b, long c, long d, int x) >>>>> +{ >>>>> + int t; >>>>> + if (x) >>>>> + t = a < b; >>>>> + else if (d == x) >>>>> + t = c < b; >>>>> + else >>>>> + t = d > c; >>>>> + >>>>> + if (t) >>>>> + { >>>>> + g1 (); >>>>> + g (); >>>>> + } >>>>> +} >>>>> + >>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } >>>>> */ >>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >>>>> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >>>>> new file mode 100644 >>>>> index 0000000..53acabc >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >>>>> @@ -0,0 +1,40 @@ >>>>> +/* { dg-do compile } */ >>>>> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >>>>> + >>>>> +void g (int); >>>>> +void g1 (int); >>>>> + >>>>> +void >>>>> +f (long a, long b, long c, long d, int x) >>>>> +{ >>>>> + int t; >>>>> + _Bool l1 = 0, l2 = 0; >>>>> + if (x) >>>>> + { >>>>> + g (a); >>>>> + c = a + b; >>>>> + t = a < b; >>>>> + l1 = 1; >>>>> + } >>>>> + else >>>>> + { >>>>> + g1 (b); >>>>> + t = c > d; >>>>> + d = c + b; >>>>> + l2 = 1; >>>>> + } >>>>> + >>>>> + if (t) >>>>> + { >>>>> + if (l1 | l2) >>>>> + g1 (c); >>>>> + } >>>>> + else >>>>> + { >>>>> + g (d); >>>>> + g1 (a + b); >>>>> + } >>>>> + g (c + d); >>>>> +} >>>>> + >>>>> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } >>>>> */ >>>>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >>>>> b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >>>>> index e9b4f26..1d7b587 100644 >>>>> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >>>>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >>>>> @@ -69,4 +69,4 @@ lookharder (string) >>>>> } >>>>> } >>>>> >>>>> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 >>>>> "split-paths" } } */ >>>>> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 >>>>> "split-paths" } } */ >>>>> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c >>>>> index c3ea2d6..36c413a 100644 >>>>> --- a/gcc/tree-ssa-threadedge.c >>>>> +++ b/gcc/tree-ssa-threadedge.c >>>>> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e, >>>>> return 0; >>>>> } >>>>> >>>>> +/* There are basic blocks look like: >>>>> + <P0> >>>>> + p0 = a CMP b ; or p0 = (INT) (a CMP b) >>>>> + goto <X>; >>>>> + >>>>> + <P1> >>>>> + p1 = c CMP d >>>>> + goto <X>; >>>>> + >>>>> + <X> >>>>> + # phi = PHI <p0 (P0), p1 (P1)> >>>>> + if (phi != 0) goto <Y>; else goto <Z>; >>>>> + >>>>> + Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD >>>>> + And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK >>>>> + >>>>> + Return true if E is (P0,X) or (P1,X) */ >>>>> + >>>>> +bool >>>>> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e) >>>>> +{ >>>>> + basic_block bb = e->dest; >>>>> + >>>>> + /* See if there is only one stmt which is gcond. */ >>>>> + gimple *gs = last_and_only_stmt (bb); >>>>> + if (gs == NULL || gimple_code (gs) != GIMPLE_COND) >>>>> + return false; >>>> gcond *gs; >>>> if (!(gs = safe_dyn_cast <gcond *> (last_and_only_stmt (bb)))) >>>> return false; >>>> >>>> makes the following gimple_cond_ accesses more efficient when >>>> checking is enabled. >>>> >>>>> + >>>>> + /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block. */ >>>>> + tree cond = gimple_cond_lhs (gs); >>>>> + enum tree_code code = gimple_cond_code (gs); >>>>> + tree rhs = gimple_cond_rhs (gs); >>>>> + if (TREE_CODE (cond) != SSA_NAME >>>>> + || (code != NE_EXPR && code != EQ_EXPR) >>>>> + || (!integer_onep (rhs) && !integer_zerop (rhs))) >>>>> + return false; >>>>> + gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond)); >>>>> + if (phi == NULL || gimple_bb (phi) != bb) >>>>> + return false; >>>>> + >>>>> + /* Check if phi's incoming value is CMP. */ >>>>> + gimple *def; >>>>> + tree value = PHI_ARG_DEF_FROM_EDGE (phi, e); >>>>> + if (TREE_CODE (value) == SSA_NAME >>>>> + && has_single_use (value) >>>>> + && is_gimple_assign (SSA_NAME_DEF_STMT (value))) >>>>> + def = SSA_NAME_DEF_STMT (value); >>>> Same is true here and below if you rewrite to >>>> >>>> gassign *def; >>>> tree value = PHI_ARG_DEF_FROM_EDGE (phi, e); >>>> if (TREE_CODE (value) != SSA_NAME >>>> || !has_single_use (value) >>>> || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value)))) >>>> return false; >>>> >>>> Otherwise it looks good. I'd like to have Jeffs opinion and >>>> final ACK here because we touch jump-threading and he's most >>>> familiar with that detail and the place you hook into. >>> I've got the full thread to look over. At a high level I wouldn't have >>> guessed it'd be this easy to get the threader handle this, but >>> occasionally we are surprised in a good way. Anyway, I'll be looking >>> through the full discussion. >>> >>> Jeff >> >> Hi Jeff, Richard and all, >> >> Thanks a lot for your great comments in all threads. Based on those >> comments, I refined the code as below: >> >> bool >> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e) >> { >> /* See if there is only one stmt which is gcond. */ >> gcond *gs; >> if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest)))) >> return false; >> >> /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block. */ >> tree cond = gimple_cond_lhs (gs); >> enum tree_code code = gimple_cond_code (gs); >> tree rhs = gimple_cond_rhs (gs); >> if (TREE_CODE (cond) != SSA_NAME >> || (code != NE_EXPR && code != EQ_EXPR) >> || (!integer_onep (rhs) && !integer_zerop (rhs))) >> return false; >> gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond)); >> if (phi == NULL || gimple_bb (phi) != e->dest) >> return false; >> >> /* Check if phi's incoming value is CMP. */ >> gassign *def; >> tree value = PHI_ARG_DEF_FROM_EDGE (phi, e); >> if (TREE_CODE (value) != SSA_NAME >> || !has_single_use (value) >> || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value)))) >> return false; >> >> /* Or if it is (INT) (a CMP b). */ >> if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))) >> { >> value = gimple_assign_rhs1 (def); >> if (TREE_CODE (value) != SSA_NAME >> || !has_single_use (value) >> || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value)))) >> return false; >> } >> >> if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison) >> return false; >> >> if (!single_succ_p (e->src)) >> return false; >> >> return true; >> } >> >> >> In this code, I put "if (!single_succ_p (e->src))" there, which may be >> helpful for reducing the copy block. > Sounds good. My testing did show a regression on the sh port. > > For pr51244-20 on the SH port your changes make the ultimate resulting > code better, but compromise the test. We can restore the shape of the > CFG and get the testing coverage for sh_treg_combine by disabling VRP > and DOM. > > Can you please include the attached patch in your next update?
Thanks, I sent out a new version patch to include this update. Jiufu Guo > > Jeff > diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c > b/gcc/testsuite/gcc.target/sh/pr51244-20.c > index c342163160b..be265cd16af 100644 > --- a/gcc/testsuite/gcc.target/sh/pr51244-20.c > +++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c > @@ -1,7 +1,7 @@ > /* Check that the SH specific sh_treg_combine RTL optimization pass works as > expected. */ > /* { dg-do compile } */ > -/* { dg-options "-O2" } */ > +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */ > > /* { dg-final { scan-assembler-not "not\t" } } */ > /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */