Hi, Currently tree if-conversion only supports PHIs with no more than two arguments unless the loop is marked with "simd pragma". This patch makes such PHIs supported unconditionally if they have no more than MAX_PHI_ARG_NUM arguments, thus cases like PR56541 can be fixed. Note because a chain of "?:" operators are needed to compute mult-arg PHI, this patch records the case and versions loop so that vectorizer can fall back to the original loop if if-conversion+vectorization isn't beneficial. Ideally, cost computation in vectorizer should be improved to measure benefit against the original loop, rather than if-converted loop. So far MAX_PHI_ARG_NUM is set to (4) because cases with more arguments are rare and not likely beneficial.
Apart from above change, the patch also makes changes like: only split critical edge when we have to; cleanups code logic in if_convertible_loop_p about aggressive_if_conv. Bootstrap and test on x86_64 and AArch64, is it OK? Thanks, bin 2016-04-26 Bin Cheng <bin.ch...@arm.com> PR tree-optimization/56541 * tree-if-conv.c (MAX_PHI_ARG_NUM): New macro. (any_complicated_phi): New static variable. (aggressive_if_conv): Delete. (if_convertible_phi_p): Support PHIs with more than two arguments. (if_convertible_bb_p): Remvoe check on aggressive_if_conv and critical pred edges. (ifcvt_split_critical_edges): Support PHIs with more than two arguments by checking new parameter. Only split critical edges if needed. (tree_if_conversion): Handle simd pragma marked loop using new local variable aggressive_if_conv. Check any_complicated_phi. gcc/testsuite/ChangeLog 2016-04-26 Bin Cheng <bin.ch...@arm.com> PR tree-optimization/56541 * gcc.dg/tree-ssa/ifc-pr56541.c: New test.
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 32ced16..31fe390 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -113,11 +113,21 @@ along with GCC; see the file COPYING3. If not see #include "varasm.h" #include "builtins.h" #include "params.h" - + +/* Only handle PHIs with no more arguments unless we are asked to by + simd pragma. */ +#define MAX_PHI_ARG_NUM (4) + /* Indicate if new load/store that needs to be predicated is introduced during if conversion. */ static bool any_pred_load_store; +/* Indicate if there are any complicated PHIs that need to be handled in + if-conversion. Complicated PHI has more than two arguments and can't + be degenerated to two arguments PHI. See more information in comment + before phi_convertible_by_degenerating_args. */ +static bool any_complicated_phi; + /* Hash for struct innermost_loop_behavior. It depends on the user to free the memory. */ @@ -172,9 +182,6 @@ innermost_loop_behavior_hash::equal (const value_type &e1, /* List of basic blocks in if-conversion-suitable order. */ static basic_block *ifc_bbs; -/* Apply more aggressive (extended) if-conversion if true. */ -static bool aggressive_if_conv; - /* Hash table to store <DR's innermost loop behavior, DR> pairs. */ static hash_map<innermost_loop_behavior_hash, data_reference_p> *innermost_DR_map; @@ -639,13 +646,9 @@ phi_convertible_by_degenerating_args (gphi *phi) } /* Return true when PHI is if-convertible. PHI is part of loop LOOP - and it belongs to basic block BB. - - PHI is not if-convertible if: - - it has more than 2 arguments. - - When the aggressive_if_conv is set, PHI can have more than - two arguments. */ + and it belongs to basic block BB. Note at this point, it is sure + that PHI is if-convertible. This function updates global variable + ANY_COMPLICATED_PHI if PHI is complicated. */ static bool if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi) @@ -656,17 +659,10 @@ if_convertible_phi_p (struct loop *loop, basic_block bb, gphi *phi) print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); } - if (bb != loop->header) - { - if (gimple_phi_num_args (phi) > 2 - && !aggressive_if_conv - && !phi_convertible_by_degenerating_args (phi)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Phi can't be predicated by single cond.\n"); - return false; - } - } + if (bb != loop->header + && gimple_phi_num_args (phi) > 2 + && !phi_convertible_by_degenerating_args (phi)) + any_complicated_phi = true; return true; } @@ -1012,8 +1008,6 @@ has_pred_critical_p (basic_block bb) - it is after the exit block but before the latch, - its edges are not normal. - Last restriction is valid if aggressive_if_conv is false. - EXIT_BB is the basic block containing the exit of the LOOP. BB is inside LOOP. */ @@ -1062,19 +1056,6 @@ if_convertible_bb_p (struct loop *loop, basic_block bb, basic_block exit_bb) return false; } - /* At least one incoming edge has to be non-critical as otherwise edge - predicates are not equal to basic-block predicates of the edge - source. This check is skipped if aggressive_if_conv is true. */ - if (!aggressive_if_conv - && EDGE_COUNT (bb->preds) > 1 - && bb != loop->header - && all_preds_critical_p (bb)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "only critical predecessors\n"); - return false; - } - return true; } @@ -2380,11 +2361,16 @@ version_loop_for_if_conversion (struct loop *loop) return true; } -/* Performs splitting of critical edges if aggressive_if_conv is true. - Returns false if loop won't be if converted and true otherwise. */ +/* Performs splitting of critical edges. Skip splitting and return false + if LOOP will not be converted because: + + - LOOP is not well formed. + - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments. + + Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */ static bool -ifcvt_split_critical_edges (struct loop *loop) +ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv) { basic_block *body; basic_block bb; @@ -2393,30 +2379,51 @@ ifcvt_split_critical_edges (struct loop *loop) gimple *stmt; edge e; edge_iterator ei; + vec<edge> critical_edges = vNULL; - if (num <= 2) - return false; - if (loop->inner) - return false; - if (!single_exit (loop)) + /* Loop is not well formed. */ + if (num <= 2 || loop->inner || !single_exit (loop)) return false; body = get_loop_body (loop); for (i = 0; i < num; i++) { bb = body[i]; - if (bb == loop->latch - || bb_with_exit_edge_p (loop, bb)) + if (!aggressive_if_conv + && phi_nodes (bb) + && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "BB %d has complicated PHI with more than %d args.\n", + bb->index, MAX_PHI_ARG_NUM); + + free (body); + critical_edges.release (); + return false; + } + if (bb == loop->latch || bb_with_exit_edge_p (loop, bb)) continue; + stmt = last_stmt (bb); /* Skip basic blocks not ending with conditional branch. */ - if (!(stmt && gimple_code (stmt) == GIMPLE_COND)) + if (!stmt || gimple_code (stmt) != GIMPLE_COND) continue; + FOR_EACH_EDGE (e, ei, bb->succs) if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop) - split_edge (e); + critical_edges.safe_push (e); } free (body); + + while (critical_edges.length () > 0) + { + e = critical_edges.pop (); + /* Don't split if bb can be predicated along non-critical edge. */ + if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest)) + split_edge (e); + } + return true; } @@ -2713,12 +2720,16 @@ static unsigned int tree_if_conversion (struct loop *loop) { unsigned int todo = 0; + bool aggressive_if_conv; + ifc_bbs = NULL; any_pred_load_store = false; + any_complicated_phi = false; - /* Set up aggressive if-conversion for loops marked with simd pragma. */ + /* Apply more aggressive if-conversion when loop or its outer loop were + marked with simd pragma. When that's the case, we try to if-convert + loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */ aggressive_if_conv = loop->force_vectorize; - /* Check either outer loop was marked with simd pragma. */ if (!aggressive_if_conv) { struct loop *outer_loop = loop_outer (loop); @@ -2726,20 +2737,20 @@ tree_if_conversion (struct loop *loop) aggressive_if_conv = true; } - if (aggressive_if_conv) - if (!ifcvt_split_critical_edges (loop)) - goto cleanup; + if (!ifcvt_split_critical_edges (loop, aggressive_if_conv)) + goto cleanup; if (!if_convertible_loop_p (loop) || !dbg_cnt (if_conversion_tree)) goto cleanup; - if (any_pred_load_store + if ((any_pred_load_store || any_complicated_phi) && ((!flag_tree_loop_vectorize && !loop->force_vectorize) || loop->dont_vectorize)) goto cleanup; - if (any_pred_load_store && !version_loop_for_if_conversion (loop)) + if ((any_pred_load_store || any_complicated_phi) + && !version_loop_for_if_conversion (loop)) goto cleanup; /* Now all statements are if-convertible. Combine all the basic @@ -2749,11 +2760,8 @@ tree_if_conversion (struct loop *loop) /* Delete dead predicate computations and repair tree correspondent to bool pattern to delete multiple uses of predicates. */ - if (aggressive_if_conv) - { - ifcvt_local_dce (loop->header); - ifcvt_repair_bool_pattern (loop->header); - } + ifcvt_local_dce (loop->header); + ifcvt_repair_bool_pattern (loop->header); todo |= TODO_cleanup_cfg; mark_virtual_operands_for_renaming (cfun); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-pr56541.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-pr56541.c new file mode 100644 index 0000000..bd0aa47 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-pr56541.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ifcvt-stats" { target *-*-* } } */ + +float a,b,c,d; + +float z[1024]; int ok[1024]; +const float rBig = 150.; + +void foo() +{ + int i; + + for (i=0; i!=1024; ++i) + { + float rR = a*z[i]; + float rL = b*z[i]; + float rMin = (rR<rL) ? rR : rL; + float rMax = (rR<rL) ? rL : rR; + rMin = (rMax>0) ? rMin : rBig; + rMin = (rMin>0) ? rMin : rMax; + ok[i] = rMin-c<rMax+d; + } +} + +/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */