On 5/29/19 6:36 AM, Richard Biener wrote: > On Tue, 28 May 2019, Jiufu Guo wrote: > >> Hi, >> >> This patch implements a new opportunity of jump threading for PR77820. >> In this optimization, conditional jumps are merged with unconditional >> jump. And then moving CMP result to GPR is eliminated. >> >> This version is based on the proposal of Richard, Jeff and Andrew, and >> refined to incorporate comments. Thanks for the reviews! >> >> Bootstrapped and tested on powerpc64le and powerpc64be with no >> regressions (one case is improved) and new testcases are added. Is this >> ok for trunk? >> >> Example of this opportunity looks like below: >> >> <P0> >> p0 = a CMP b >> goto <X>; >> >> <P1> >> p1 = c CMP d >> goto <X>; >> >> <X> >> # phi = PHI <p0 (P0), p1 (P1)> >> if (phi != 0) goto <Y>; else goto <Z>; >> >> Could be transformed to: >> >> <P0> >> p0 = a CMP b >> if (p0 != 0) goto <Y>; else goto <Z>; >> >> <P1> >> p1 = c CMP d >> if (p1 != 0) goto <Y>; else goto <Z>; >> >> >> This optimization eliminates: >> 1. saving CMP result: p0 = a CMP b. >> 2. additional CMP on branch: if (phi != 0). >> 3. converting CMP result if there is phi = (INT_CONV) p0 if there is. >> >> Thanks! >> Jiufu Guo >> >> >> [gcc] >> 2019-05-28 Jiufu Guo <guoji...@linux.ibm.com> >> Lijia He <heli...@linux.ibm.com> >> >> PR tree-optimization/77820 >> * tree-ssa-threadedge.c >> (edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New >> function. >> (thread_across_edge): Add call to >> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p. >> >> [gcc/testsuite] >> 2019-05-28 Jiufu Guo <guoji...@linux.ibm.com> >> Lijia He <heli...@linux.ibm.com> >> >> PR tree-optimization/77820 >> * gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase. >> * gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase. >> * gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase. >> * gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase. >> * gcc.dg/tree-ssa/split-path-6.c: Update testcase. >> >> --- >> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++ >> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 +++++++ >> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 ++++++++ >> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 +++++++++++++ >> gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c | 2 +- >> gcc/tree-ssa-threadedge.c | 76 >> +++++++++++++++++++++++- >> 6 files changed, 192 insertions(+), 4 deletions(-) >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >> new file mode 100644 >> index 0000000..5227c87 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >> @@ -0,0 +1,30 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >> + >> +void g (int); >> +void g1 (int); >> + >> +void >> +f (long a, long b, long c, long d, long x) >> +{ >> + _Bool t; >> + if (x) >> + { >> + g (a + 1); >> + t = a < b; >> + c = d + x; >> + } >> + else >> + { >> + g (b + 1); >> + a = c + d; >> + t = c > d; >> + } >> + >> + if (t) >> + g1 (c); >> + >> + g (a); >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >> new file mode 100644 >> index 0000000..eaf89bb >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >> @@ -0,0 +1,23 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >> + >> +void g (void); >> +void g1 (void); >> + >> +void >> +f (long a, long b, long c, long d, int x) >> +{ >> + _Bool t; >> + if (x) >> + t = c < d; >> + else >> + t = a < b; >> + >> + if (t) >> + { >> + g1 (); >> + g (); >> + } >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >> new file mode 100644 >> index 0000000..d5a1e0b >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >> @@ -0,0 +1,25 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >> + >> +void g (void); >> +void g1 (void); >> + >> +void >> +f (long a, long b, long c, long d, int x) >> +{ >> + int t; >> + if (x) >> + t = a < b; >> + else if (d == x) >> + t = c < b; >> + else >> + t = d > c; >> + >> + if (t) >> + { >> + g1 (); >> + g (); >> + } >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >> b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >> new file mode 100644 >> index 0000000..53acabc >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >> @@ -0,0 +1,40 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >> + >> +void g (int); >> +void g1 (int); >> + >> +void >> +f (long a, long b, long c, long d, int x) >> +{ >> + int t; >> + _Bool l1 = 0, l2 = 0; >> + if (x) >> + { >> + g (a); >> + c = a + b; >> + t = a < b; >> + l1 = 1; >> + } >> + else >> + { >> + g1 (b); >> + t = c > d; >> + d = c + b; >> + l2 = 1; >> + } >> + >> + if (t) >> + { >> + if (l1 | l2) >> + g1 (c); >> + } >> + else >> + { >> + g (d); >> + g1 (a + b); >> + } >> + g (c + d); >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >> b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >> index e9b4f26..1d7b587 100644 >> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >> @@ -69,4 +69,4 @@ lookharder (string) >> } >> } >> >> -/* { dg-final { scan-tree-dump-times "Duplicating join block" 3 >> "split-paths" } } */ >> +/* { dg-final { scan-tree-dump-times "Duplicating join block" 2 >> "split-paths" } } */ >> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c >> index c3ea2d6..36c413a 100644 >> --- a/gcc/tree-ssa-threadedge.c >> +++ b/gcc/tree-ssa-threadedge.c >> @@ -1157,6 +1157,74 @@ thread_through_normal_block (edge e, >> return 0; >> } >> >> +/* There are basic blocks look like: >> + <P0> >> + p0 = a CMP b ; or p0 = (INT) (a CMP b) >> + goto <X>; >> + >> + <P1> >> + p1 = c CMP d >> + goto <X>; >> + >> + <X> >> + # phi = PHI <p0 (P0), p1 (P1)> >> + if (phi != 0) goto <Y>; else goto <Z>; >> + >> + Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD >> + And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK >> + >> + Return true if E is (P0,X) or (P1,X) */ >> + >> +bool >> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e) >> +{ >> + basic_block bb = e->dest; >> + >> + /* See if there is only one stmt which is gcond. */ >> + gimple *gs = last_and_only_stmt (bb); >> + if (gs == NULL || gimple_code (gs) != GIMPLE_COND) >> + return false; > gcond *gs; > if (!(gs = safe_dyn_cast <gcond *> (last_and_only_stmt (bb)))) > return false; > > makes the following gimple_cond_ accesses more efficient when > checking is enabled. > >> + >> + /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block. */ >> + tree cond = gimple_cond_lhs (gs); >> + enum tree_code code = gimple_cond_code (gs); >> + tree rhs = gimple_cond_rhs (gs); >> + if (TREE_CODE (cond) != SSA_NAME >> + || (code != NE_EXPR && code != EQ_EXPR) >> + || (!integer_onep (rhs) && !integer_zerop (rhs))) >> + return false; >> + gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond)); >> + if (phi == NULL || gimple_bb (phi) != bb) >> + return false; >> + >> + /* Check if phi's incoming value is CMP. */ >> + gimple *def; >> + tree value = PHI_ARG_DEF_FROM_EDGE (phi, e); >> + if (TREE_CODE (value) == SSA_NAME >> + && has_single_use (value) >> + && is_gimple_assign (SSA_NAME_DEF_STMT (value))) >> + def = SSA_NAME_DEF_STMT (value); > Same is true here and below if you rewrite to > > gassign *def; > tree value = PHI_ARG_DEF_FROM_EDGE (phi, e); > if (TREE_CODE (value) != SSA_NAME > || !has_single_use (value) > || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value)))) > return false; > > Otherwise it looks good. I'd like to have Jeffs opinion and > final ACK here because we touch jump-threading and he's most > familiar with that detail and the place you hook into. I've got the full thread to look over. At a high level I wouldn't have guessed it'd be this easy to get the threader handle this, but occasionally we are surprised in a good way. Anyway, I'll be looking through the full discussion.
Jeff