If we consider code like:
if (bar1 == x)
return foo();
if (bar2 != y)
return foo();
return 0;
We would like the ifcombine pass to convert this to:
if (bar1 == x || bar2 != y)
return foo();
return 0;
The ifcombine pass can handle this transformation but it is ran very early and
it misses the opportunity because there are two seperate blocks for foo().
This patch updates the tail-merging pass so that, after the block merge, it
attempts to combine conditions in the predecessors of the merged block in its
immediate dominator. The optimization is restricted to targets with support
for conditional comparisons.
An XFAIL in gcc.dg/guality/pr54693-2.c now triggers only when `-flto` w/o
`-fno-use-linker-plugin` is used. The generated code and debug info seems
correct, so we have adjusted the clause.
PR tree-optimization/102793
gcc/ChangeLog:
* tree-ssa-tail-merge.cc (INCLUDE_MEMORY): Added definition.
(struct cond_combine_info): New struct.
(phi_args_def_p): New function.
(maybe_combine_conditions): New function.
(apply_clusters): Added call to `maybe_combine_conditions`.
(tail_merge_optimize): Added call to `mark_ssa_maybe_undefs`.
gcc/testsuite/ChangeLog:
* gcc.dg/guality/pr54693-2.c: Update 'x-fail' clause.
* gcc.dg/tree-ssa/pr102793-1.c: New test.
* gcc.dg/tree-ssa/pr102793-2.c: New test.
Signed-off-by: Konstantinos Eleftheriou <[email protected]>
---
gcc/testsuite/gcc.dg/guality/pr54693-2.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c | 50 ++++
gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c | 51 ++++
gcc/tree-ssa-tail-merge.cc | 258 +++++++++++++++++++++
4 files changed, 360 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
diff --git a/gcc/testsuite/gcc.dg/guality/pr54693-2.c
b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
index 229ef0efbea0..9d0080782410 100644
--- a/gcc/testsuite/gcc.dg/guality/pr54693-2.c
+++ b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
@@ -18,7 +18,7 @@ foo (int x, int y, int z)
while (x > 3 && y > 3 && z > 3)
{ /* { dg-final { gdb-test .+2 "i" "v + 1" } } */
/* { dg-final { gdb-test .+1 "x" "10 - i" { xfail {
aarch64*-*-* && { any-opts "-fno-fat-lto-objects" } } } } } */
- bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail {
aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
+ bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail {
aarch64*-*-* && { { any-opts "-flto" } && { no-opts "-fno-use-linker-plugin" }
} } } } } */
/* { dg-final { gdb-test .-1 "z" "30 - 3 * i" { xfail {
aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
i++, x--, y -= 2, z -= 3;
}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
new file mode 100644
index 000000000000..fc02c92059ba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } */
+/* { dg-options "-O3 -fdump-tree-pre" } */
+
+typedef unsigned long uint64_t;
+
+int foo(void);
+
+int ccmp(uint64_t* s1, uint64_t* s2)
+{
+ uint64_t d1, d2, bar;
+ d1 = *s1++;
+ d2 = *s2++;
+ bar = (d1 ^ d2) & 0xabcd;
+ if (bar == 0 || d1 != d2)
+ return foo();
+ return 0;
+}
+
+int noccmp0(uint64_t* s1, uint64_t* s2)
+{
+ uint64_t d1, d2, bar;
+
+ d1 = *s1++;
+ d2 = *s2++;
+ bar = (d1 ^ d2) & 0xabcd;
+ if (bar == 0)
+ return foo();
+ if (d1 != d2)
+ return foo();
+ return 0;
+}
+
+int noccmp1(uint64_t* s1, uint64_t* s2)
+{
+ uint64_t d1, d2, d3, d4, bar;
+ d1 = *s1++;
+ d2 = *s2++;
+ d3 = *s1++;
+ d4 = *s2++;
+ bar = (d1 ^ d2) & 0xabcd;
+ if (bar == 0)
+ return foo();
+ if (d3 != d4)
+ return foo();
+ return 0;
+}
+
+/* Check for condition assignments for noccmp0 and noccmp1. */
+/* { dg-final { scan-tree-dump-times {_\d+ = bar_\d+ == 0;\n _\d+ = d\d+_\d+
!= d\d+_\d+;} 2 "pre" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
new file mode 100644
index 000000000000..6e8674781563
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } */
+/* { dg-options "-O3 -fdump-tree-pre" } */
+
+typedef unsigned long uint64_t;
+
+int foo(void);
+
+uint64_t noccmp0(uint64_t* s1, uint64_t* s2)
+{
+ uint64_t d1, d2, d3, d4, bar;
+ d1 = *s1++;
+ d2 = *s2++;
+ d3 = *s1++;
+ d4 = *s2++;
+ bar = (d1 ^ d2) & 0xabcd;
+ if (bar == 0)
+ return foo();
+ if (d3 != d4)
+ d3++;
+ else
+ return foo();
+ return d3;
+}
+
+uint64_t noccmp1(uint64_t* s1, uint64_t* s2)
+{
+ uint64_t d1, d2, d3, d4, bar;
+ d1 = *s1++;
+ d2 = *s2++;
+ d3 = *s1++;
+ d4 = *s2++;
+ bar = (d1 ^ d2) & 0xabcd;
+ if (bar == 0)
+ d3++;
+ else
+ return foo();
+ if (d3 > d4)
+ d3++;
+ else if (d1 != d2)
+ return foo ();
+ d3 = d3 + d4 + 1;
+ return d3;
+}
+
+/* Check for condition assignments in the case that the transformation
+ is applied.
+ The transformation should not be applied on noccmp1, where the foo call is
+ on the false branch of the first condition. */
+/* { dg-final { scan-tree-dump-times {_\d+ = bar_\d+ == 0;\n _\d+ = d\d+_\d+
== d\d+_\d+;} 1 "pre" } } */
+/* { dg-final { scan-tree-dump-times {if \(bar_\d+ == 0\)} 1 "pre" } } */
\ No newline at end of file
diff --git a/gcc/tree-ssa-tail-merge.cc b/gcc/tree-ssa-tail-merge.cc
index 857e91c206b3..f1f81646a3a4 100644
--- a/gcc/tree-ssa-tail-merge.cc
+++ b/gcc/tree-ssa-tail-merge.cc
@@ -189,6 +189,7 @@ along with GCC; see the file COPYING3. If not see
#include "system.h"
#include "coretypes.h"
#include "backend.h"
+#include "target.h"
#include "tree.h"
#include "gimple.h"
#include "cfghooks.h"
@@ -202,10 +203,17 @@ along with GCC; see the file COPYING3. If not see
#include "tree-cfg.h"
#include "tree-into-ssa.h"
#include "tree-ssa-sccvn.h"
+#include "tree-ssa-ifcombine.h"
#include "cfgloop.h"
#include "tree-eh.h"
#include "tree-cfgcleanup.h"
+#define INCLUDE_MEMORY
+#include <memory>
+#include "gimple-pretty-print.h"
+#include "tree-ssa.h"
+#include "algorithm"
+
const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE;
/* Describes a group of bbs with the same successors. The successor bbs are
@@ -277,6 +285,18 @@ struct aux_bb_info
basic_block dep_bb;
};
+/* Info for maybe_combine_conditions. */
+
+struct cond_combine_info
+{
+ /* Conditional expression that leads to the merged block. */
+ gcond *bb_cond_stmt;
+ /* Block that contains the conditional expression. */
+ basic_block cond_bb;
+ /* True if the merged block resides in the condition's else branch. */
+ bool in_else_branch;
+};
+
/* Macros to access the fields of struct aux_bb_info. */
#define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
@@ -1707,6 +1727,238 @@ replace_block_by (basic_block bb1, basic_block bb2)
static bitmap update_bbs;
+static bool phi_args_def_p (basic_block bb, sbitmap visited)
+{
+ if (bitmap_bit_p (visited, bb->index))
+ return true;
+
+ bitmap_set_bit (visited, bb->index);
+
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ basic_block succ = e->dest;
+ gphi_iterator gpi;
+
+ for (gpi = gsi_start_phis (succ); !gsi_end_p (gpi); gsi_next (&gpi))
+ {
+ gphi *phi = gpi.phi ();
+ use_operand_p use;
+ ssa_op_iter iter;
+
+ FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE)
+ {
+ tree var = USE_FROM_PTR (use);
+
+ if (TREE_CODE (var) != SSA_NAME)
+ continue;
+
+ if (ssa_name_maybe_undef_p (var))
+ return false;
+ }
+ }
+
+ if (!phi_args_def_p (succ, visited))
+ return false;
+ }
+
+ return true;
+}
+
+/* Try to combine conditions with edges that lead to BB in its immediate
+ dominator. Nothing is done for targets that don't support conditional
+ comparisons. */
+
+static void
+maybe_combine_conditions (basic_block bb)
+{
+ if (!targetm.have_ccmp ())
+ return;
+
+ basic_block imm_dominator = get_immediate_dominator (CDI_DOMINATORS, bb);
+
+ edge e;
+ edge_iterator ei;
+ bool bb_in_else_branch = false;
+ auto_vec<struct cond_combine_info> comb_conds (2);
+
+ /* Check that the immediate dominator is found and has two successors and
+ the merged block has two predecessors. */
+ if (!imm_dominator || imm_dominator->succs->length () != 2
+ || bb->preds->length () != 2)
+ return;
+
+ /* Find conditions in if-statements that lead to bb. */
+ int index = 0;
+ int imm_dom_index = -1;
+ unsigned int non_imm_dom_index;
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ basic_block then_tmp = NULL;
+ basic_block else_tmp = NULL;
+ bb_in_else_branch = recognize_if_then_else (e->src, &then_tmp, &bb);
+ if (recognize_if_then_else (e->src, &bb, &else_tmp)
+ || bb_in_else_branch)
+ {
+ gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (e->src));
+ if (TREE_CODE_CLASS (gimple_cond_code (cond)) != tcc_comparison
+ || index == 2)
+ return;
+
+ struct cond_combine_info cci;
+ cci.bb_cond_stmt = cond;
+ cci.cond_bb = e->src;
+ cci.in_else_branch = bb_in_else_branch;
+ comb_conds.quick_push (cci);
+
+ if (cci.cond_bb == imm_dominator)
+ imm_dom_index = index;
+ else
+ non_imm_dom_index = index;
+ index++;
+ }
+ }
+
+ /* Abort if the immediate dominator is not found in the blocks containing
+ the conditions or if only one block with a condition is found. */
+ if (imm_dom_index == -1 || comb_conds.length () == 1)
+ return;
+
+ /* Check if the non-immediate dominator block contains other non-debug
+ statements than the condition and abort in that case. */
+ unsigned int stmt_count = 0;
+ gimple_stmt_iterator gsi
+ = gsi_start_bb (comb_conds[non_imm_dom_index].cond_bb);
+ for (; !gsi_end_p (gsi) && stmt_count < 2; gsi_next (&gsi))
+ {
+ if (!is_gimple_debug (gsi_stmt (gsi)))
+ stmt_count++;
+ }
+
+ bool other_bb_has_only_cond = stmt_count < 2;
+ if (!other_bb_has_only_cond)
+ return;
+
+ basic_block imm_dom_block = comb_conds[imm_dom_index].cond_bb;
+ basic_block non_imm_dom_block = comb_conds[non_imm_dom_index].cond_bb;
+
+ /* Ensure that the other block is a successor of the immediate dominator,
+ that phi arguments from both condition blocks to bb are
+ the same and that phi arguments found starting from the other block
+ are defined on all paths. */
+ auto_sbitmap visited (last_basic_block_for_fn (cfun));
+ bitmap_clear (visited);
+ if ((!find_edge (imm_dom_block, non_imm_dom_block))
+ || !same_phi_args_p (imm_dom_block, non_imm_dom_block, bb)
+ || !phi_args_def_p (non_imm_dom_block, visited))
+ return;
+
+ /* Convert condition statements to tree nodes. */
+ gcond *imm_dom_cond = comb_conds[imm_dom_index].bb_cond_stmt;
+ gcond *non_imm_dom_cond = comb_conds[non_imm_dom_index].bb_cond_stmt;
+ tree bb_cond1_lhs = gimple_cond_lhs (imm_dom_cond);
+ tree bb_cond1_rhs = gimple_cond_rhs (imm_dom_cond);
+ tree bb_cond2_lhs = gimple_cond_lhs (non_imm_dom_cond);
+ tree bb_cond2_rhs = gimple_cond_rhs (non_imm_dom_cond);
+
+ tree cond1_expr = build2 (gimple_cond_code (imm_dom_cond),
+ TREE_TYPE (bb_cond1_lhs),
+ bb_cond1_lhs, bb_cond1_rhs);
+
+ tree cond2_expr = build2 (gimple_cond_code (non_imm_dom_cond),
+ TREE_TYPE (bb_cond2_lhs),
+ bb_cond2_lhs, bb_cond2_rhs);
+
+ /* Fix the conditional expressions, so that the combined expression is
+ correct when the remaining block is reached through the else branch. */
+ tree_code bool_op = BIT_IOR_EXPR;
+ bool non_imm_dom_in_else_branch
+ = comb_conds[non_imm_dom_index].in_else_branch;
+ if (comb_conds[imm_dom_index].in_else_branch)
+ bool_op = BIT_AND_EXPR;
+
+ if ((!non_imm_dom_in_else_branch && bool_op == BIT_AND_EXPR)
+ || (non_imm_dom_in_else_branch && bool_op == BIT_IOR_EXPR))
+ {
+ cond2_expr = invert_truthvalue (cond2_expr);
+ if (FLOAT_TYPE_P (TREE_TYPE (cond2_expr)))
+ return;
+ }
+
+ gsi = gsi_last_bb (imm_dominator);
+ gimple *last_stmt = *gsi;
+
+ /* Create combined condition. */
+ gimple_seq seq = NULL;
+ tree first_cond_lhs = make_ssa_name (boolean_type_node);
+ gimple_seq_add_stmt (&seq, gimple_build_assign (first_cond_lhs, cond1_expr));
+ tree sec_cond_lhs = make_ssa_name (boolean_type_node);
+ gassign *sec_cond_assign = gimple_build_assign (sec_cond_lhs, cond2_expr);
+ gimple_seq_add_stmt (&seq, sec_cond_assign);
+ tree comb_cond_lhs = make_ssa_name (boolean_type_node);
+ gimple_seq_add_stmt (&seq, gimple_build_assign (comb_cond_lhs,
+ bool_op,
+ first_cond_lhs,
+ sec_cond_lhs));
+
+ /* Update immediate dominator's condition with the combined one. */
+ gimple_set_location (sec_cond_assign, gimple_location (non_imm_dom_cond));
+ gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT);
+ gcond *last_stmt_cond = safe_dyn_cast <gcond *>(last_stmt);
+
+ gimple_cond_set_condition (last_stmt_cond, EQ_EXPR, comb_cond_lhs,
+ build_int_cst (TREE_TYPE (comb_cond_lhs), 1));
+ update_stmt (last_stmt_cond);
+
+ /* Get the edges from the immediate dominator. */
+ edge e_succ_non_imm = nullptr;
+ edge e_succ_other = nullptr;
+ FOR_EACH_EDGE (e, ei, imm_dominator->succs)
+ {
+ if (e->dest == non_imm_dom_block)
+ e_succ_non_imm = e;
+ else
+ e_succ_other = e;
+ }
+
+ /* We're going to remove the block that contained the condition that is
+ now merged in the immediate dominator, so redirect the edge that was
+ pointing to this block. */
+ edge e_red = nullptr;
+ FOR_EACH_EDGE (e, ei, e_succ_non_imm->dest->succs)
+ {
+ if (e->dest == bb)
+ continue;
+ e_red = redirect_edge_and_branch (e_succ_non_imm, e->dest);
+ }
+
+ /* Update edge probabilities. */
+ profile_probability new_prob;
+ profile_count src_count = e_succ_other->src->count;
+ profile_count dest_count = e_succ_other->dest->count;
+ if (src_count.nonzero_p ()
+ && dest_count.nonzero_p ())
+ new_prob = dest_count.probability_in (src_count);
+ else
+ new_prob = profile_probability::always ().apply_scale (1, 2);
+
+ e_succ_other->probability = new_prob;
+ e_red->probability = new_prob.invert ();
+
+ /* Remove the block. */
+ mark_basic_block_deleted (non_imm_dom_block);
+ same_succ_flush_bb (non_imm_dom_block);
+ delete_basic_block (non_imm_dom_block);
+
+ if (dump_file)
+ fprintf (dump_file, "Conditional statements %s and %s combined in BB %d\n",
+ print_generic_expr_to_str (cond1_expr),
+ print_generic_expr_to_str (cond2_expr),
+ imm_dominator->index);
+
+}
+
/* For each cluster in all_clusters, merge all cluster->bbs. Returns
number of bbs removed. */
@@ -1735,6 +1987,9 @@ apply_clusters (void)
bitmap_clear_bit (update_bbs, bb1->index);
replace_block_by (bb1, bb2);
+
+ maybe_combine_conditions (bb2);
+
nr_bbs_removed++;
}
}
@@ -1826,6 +2081,9 @@ tail_merge_optimize (bool need_crit_edge_split)
}
init_worklist ();
+ /* This is needed by maybe_combine_conditions. */
+ mark_ssa_maybe_undefs ();
+
while (!worklist.is_empty ())
{
if (!loop_entered)
--
2.51.1