On Fri, 7 Jul 2023, Tamar Christina wrote: > Hi All, > > Following on from Jakub's patch in g:de0ee9d14165eebb3d31c84e98260c05c3b33acb > these two patches finishes the work fixing the regression and improves > codegen. > > As explained in that commit, ifconvert sorts PHI args in increasing number of > occurrences in order to reduce the number of comparisons done while > traversing the tree. > > The remaining task that this patch fixes is dealing with the long chain of > comparisons that can be created from phi nodes, particularly when they share > any common successor (classical example is a diamond node). > > on a PHI-node the true and else branches carry a condition, true will > carry `a` and false `~a`. The issue is that at the moment GCC tests both `a` > and `~a` when the phi node has more than 2 arguments. Clearly this isn't > needed. The deeper the nesting of phi nodes the larger the repetition. > > As an example, for > > foo (int *f, int d, int e) > { > for (int i = 0; i < 1024; i++) > { > int a = f[i]; > int t; > if (a < 0) > t = 1; > else if (a < e) > t = 1 - a * d; > else > t = 0; > f[i] = t; > } > } > > after Jakub's patch we generate: > > _7 = a_10 < 0; > _21 = a_10 >= 0; > _22 = a_10 < e_11(D); > _23 = _21 & _22; > _ifc__42 = _23 ? t_13 : 0; > t_6 = _7 ? 1 : _ifc__42 > > but while better than before it is still inefficient, since in the false > branch, where we know ~_7 is true, we still test _21. > > This leads to superfluous tests for every diamond node. After this patch we > generate > > _7 = a_10 < 0; > _22 = a_10 < e_11(D); > _ifc__42 = _22 ? t_13 : 0; > t_6 = _7 ? 1 : _ifc__42; > > Which correctly elides the test of _21. This is done by borrowing the > vectorizer's helper functions to limit predicate mask usages. Ifcvt will > chain > conditionals on the false edge (unless specifically inverted) so this patch on > creating cond a ? b : c, will register ~a when traversing c. If c is a > conditional then c will be simplified to the smaller possible predicate given > the assumptions we already know to be true. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Not sure how to write a non-fragile testcase for this as the > conditionals chosen depends on threading etc. Any Suggestions? > > Ok for master?
OK. For a testcase I wonder if you can produce a GIMPLE FE one starting with pass_fix_loops? (I think it's still not possible to start when in LC SSA) Thanks, Richard. > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/109154 > * tree-if-conv.cc (gen_simplified_condition, > gen_phi_nest_statement): New. > (gen_phi_arg_condition, predicate_scalar_phi): Use it. > > --- inline copy of patch -- > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc > index > e342532a343a3c066142adeec5fdfaf736a653e5..16b36dd8b0226f796c1a3fc6d45a9059385e812b > 100644 > --- a/gcc/tree-if-conv.cc > +++ b/gcc/tree-if-conv.cc > @@ -1870,12 +1870,44 @@ convert_scalar_cond_reduction (gimple *reduc, > gimple_stmt_iterator *gsi, > return rhs; > } > > +/* Generate a simplified conditional. */ > + > +static tree > +gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set) > +{ > + /* Check if the value is already live in a previous branch. This resolves > + nested conditionals from diamond PHI reductions. */ > + if (TREE_CODE (cond) == SSA_NAME) > + { > + gimple *stmt = SSA_NAME_DEF_STMT (cond); > + gassign *assign = NULL; > + if ((assign = as_a <gassign *> (stmt)) > + && gimple_assign_rhs_code (assign) == BIT_AND_EXPR) > + { > + tree arg1 = gimple_assign_rhs1 (assign); > + tree arg2 = gimple_assign_rhs2 (assign); > + if (cond_set.contains ({ arg1, 1 })) > + arg1 = boolean_true_node; > + else > + arg1 = gen_simplified_condition (arg1, cond_set); > + > + if (cond_set.contains ({ arg2, 1 })) > + arg2 = boolean_true_node; > + else > + arg2 = gen_simplified_condition (arg2, cond_set); > + > + cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2); > + } > + } > + return cond; > +} > + > /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT > as to whether the condition is inverted. */ > > static tree > -gen_phi_arg_condition (gphi *phi, vec<int> *occur, > - gimple_stmt_iterator *gsi, bool *invert) > +gen_phi_arg_condition (gphi *phi, vec<int> *occur, gimple_stmt_iterator *gsi, > + scalar_cond_masked_set_type &cond_set, bool *invert) > { > int len; > int i; > @@ -1902,6 +1934,8 @@ gen_phi_arg_condition (gphi *phi, vec<int> *occur, > c = TREE_OPERAND (c, 0); > *invert = true; > } > + > + c = gen_simplified_condition (c, cond_set); > c = force_gimple_operand_gsi (gsi, unshare_expr (c), > true, NULL_TREE, true, GSI_SAME_STMT); > if (cond != NULL_TREE) > @@ -1913,11 +1947,79 @@ gen_phi_arg_condition (gphi *phi, vec<int> *occur, > } > else > cond = c; > + > + /* Register the new possibly simplified conditional. When more than 2 > + entries in a phi node we chain entries in the false branch, so the > + inverted condition is active. */ > + scalar_cond_masked_key pred_cond ({ cond, 1 }); > + if (!invert) > + pred_cond.inverted_p = !pred_cond.inverted_p; > + cond_set.add (pred_cond); > } > gcc_assert (cond != NULL_TREE); > return cond; > } > > +/* Create the smallest nested conditional possible. On pre-order we record > + which conditionals are live, and on post-order rewrite the chain by > removing > + already active conditions. > + > + As an example we simplify: > + > + _7 = a_10 < 0; > + _21 = a_10 >= 0; > + _22 = a_10 < e_11(D); > + _23 = _21 & _22; > + _ifc__42 = _23 ? t_13 : 0; > + t_6 = _7 ? 1 : _ifc__42 > + > + into > + > + _7 = a_10 < 0; > + _22 = a_10 < e_11(D); > + _ifc__42 = _22 ? t_13 : 0; > + t_6 = _7 ? 1 : _ifc__42; > + > + which produces better code. */ > + > +static tree > +gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi, > + scalar_cond_masked_set_type &cond_set, tree type, > + hash_map<tree_operand_hash, auto_vec<int>> &phi_arg_map, > + gimple **res_stmt, tree lhs0, vec<tree> &args, > + unsigned idx) > +{ > + if (idx == args.length ()) > + return args[idx - 1]; > + > + vec<int> *indexes = phi_arg_map.get (args[idx - 1]); > + bool invert; > + tree cond = gen_phi_arg_condition (phi, indexes, gsi, cond_set, &invert); > + tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, phi_arg_map, > + res_stmt, lhs0, args, idx + 1); > + > + unsigned prev = idx; > + unsigned curr = prev - 1; > + tree arg0 = args[curr]; > + tree rhs, lhs; > + if (idx > 1) > + lhs = make_temp_ssa_name (type, NULL, "_ifc_"); > + else > + lhs = lhs0; > + > + if (invert) > + rhs = fold_build_cond_expr (type, unshare_expr (cond), > + arg1, arg0); > + else > + rhs = fold_build_cond_expr (type, unshare_expr (cond), > + arg0, arg1); > + gassign *new_stmt = gimple_build_assign (lhs, rhs); > + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); > + update_stmt (new_stmt); > + *res_stmt = new_stmt; > + return lhs; > +} > + > /* Replace a scalar PHI node with a COND_EXPR using COND as condition. > This routine can handle PHI nodes with more than two arguments. > > @@ -1968,6 +2070,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator > *gsi) > } > > bb = gimple_bb (phi); > + /* Keep track of conditionals already seen. */ > + scalar_cond_masked_set_type cond_set; > if (EDGE_COUNT (bb->preds) == 2) > { > /* Predicate ordinary PHI node with 2 arguments. */ > @@ -1976,6 +2080,7 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator > *gsi) > first_edge = EDGE_PRED (bb, 0); > second_edge = EDGE_PRED (bb, 1); > cond = bb_predicate (first_edge->src); > + cond_set.add ({ cond, 1 }); > if (TREE_CODE (cond) == TRUTH_NOT_EXPR) > std::swap (first_edge, second_edge); > if (EDGE_COUNT (first_edge->src->succs) > 1) > @@ -1988,7 +2093,9 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator > *gsi) > } > else > cond = bb_predicate (first_edge->src); > + > /* Gimplify the condition to a valid cond-expr conditonal operand. */ > + cond = gen_simplified_condition (cond, cond_set); > cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true, > NULL_TREE, true, GSI_SAME_STMT); > true_bb = first_edge->src; > @@ -2110,31 +2217,9 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator > *gsi) > else > { > /* Common case. */ > - vec<int> *indexes; > tree type = TREE_TYPE (gimple_phi_result (phi)); > - tree lhs; > - arg1 = args[args_len - 1]; > - for (i = args_len - 1; i > 0; i--) > - { > - arg0 = args[i - 1]; > - indexes = phi_arg_map.get (args[i - 1]); > - if (i != 1) > - lhs = make_temp_ssa_name (type, NULL, "_ifc_"); > - else > - lhs = res; > - bool invert; > - cond = gen_phi_arg_condition (phi, indexes, gsi, &invert); > - if (invert) > - rhs = fold_build_cond_expr (type, unshare_expr (cond), > - arg1, arg0); > - else > - rhs = fold_build_cond_expr (type, unshare_expr (cond), > - arg0, arg1); > - new_stmt = gimple_build_assign (lhs, rhs); > - gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); > - update_stmt (new_stmt); > - arg1 = lhs; > - } > + gen_phi_nest_statement (phi, gsi, cond_set, type, phi_arg_map, > + &new_stmt, res, args, 1); > } > > if (dump_file && (dump_flags & TDF_DETAILS)) > > > > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman; HRB 36809 (AG Nuernberg)