On Fri, Dec 5, 2025 at 8:57 AM Konstantinos Eleftheriou
<[email protected]> wrote:
>
> If we consider code like:
>
>     if (bar1 == x)
>       return foo();
>     if (bar2 != y)
>       return foo();
>     return 0;
>
> We would like the ifcombine pass to convert this to:
>
>     if (bar1 == x || bar2 != y)
>       return foo();
>     return 0;
>
> The ifcombine pass can handle this transformation but it is ran very early and
> it misses the opportunity because there are two seperate blocks for foo().
>
> This patch updates the tail-merging pass so that, after the block merge, it
> attempts to combine conditions in the predecessors of the merged block in its
> immediate dominator. The optimization is restricted to targets with support
> for conditional comparisons.

Did you look at the difference doing ifcombine on the whole IR vs just
doing it in tail-merge?
What about doing ifcombine very late say right before the second sink?
I didn't see any stats to prove which way was better. I know Richard
B. suggested doing it as part of tail-merge:
"I'm not sure it's worth doing if-combining on the whole IL again because of it.
It might be possible to locally try if-combining from the immediate dominator
of a merged tail from inside tail-merging itself?"
But I am curious if doing ifcombine on the whole IR would be better.
Also would be interesting on the compile time.


>
> An XFAIL in gcc.dg/guality/pr54693-2.c now triggers only when `-flto` w/o
> `-fno-use-linker-plugin` is used. The generated code and debug info seems
> correct, so we have adjusted the clause.
>
>         PR tree-optimization/102793
>
> gcc/ChangeLog:
>
>         * tree-ssa-tail-merge.cc (INCLUDE_MEMORY): Added definition.
>         (struct cond_combine_info): New struct.
>         (phi_args_def_p): New function.
>         (maybe_combine_conditions): New function.
>         (apply_clusters): Added call to `maybe_combine_conditions`.
>         (tail_merge_optimize): Added call to `mark_ssa_maybe_undefs`.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/guality/pr54693-2.c: Update 'x-fail' clause.
>         * gcc.dg/tree-ssa/pr102793-1.c: New test.
>         * gcc.dg/tree-ssa/pr102793-2.c: New test.
>
> Signed-off-by: Konstantinos Eleftheriou <[email protected]>
> ---
>
>  gcc/testsuite/gcc.dg/guality/pr54693-2.c   |   2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c |  50 ++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c |  51 ++++
>  gcc/tree-ssa-tail-merge.cc                 | 258 +++++++++++++++++++++
>  4 files changed, 360 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/guality/pr54693-2.c 
> b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
> index 229ef0efbea0..9d0080782410 100644
> --- a/gcc/testsuite/gcc.dg/guality/pr54693-2.c
> +++ b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
> @@ -18,7 +18,7 @@ foo (int x, int y, int z)
>    while (x > 3 && y > 3 && z > 3)
>      {          /* { dg-final { gdb-test .+2 "i" "v + 1" } } */
>                 /* { dg-final { gdb-test .+1 "x" "10 - i" { xfail { 
> aarch64*-*-* && { any-opts "-fno-fat-lto-objects" } } } } } */
> -      bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail { 
> aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
> +      bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail { 
> aarch64*-*-* && { { any-opts "-flto" } && { no-opts "-fno-use-linker-plugin" 
> } } } } } } */
>                 /* { dg-final { gdb-test .-1 "z" "30 - 3 * i" { xfail { 
> aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
>        i++, x--, y -= 2, z -= 3;
>      }
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
> new file mode 100644
> index 000000000000..fc02c92059ba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } */
> +/* { dg-options "-O3 -fdump-tree-pre" } */
> +
> +typedef unsigned long uint64_t;
> +
> +int foo(void);
> +
> +int ccmp(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, bar;
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0 || d1 != d2)
> +      return foo();
> +    return 0;
> +}
> +
> +int noccmp0(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, bar;
> +
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0)
> +      return foo();
> +    if (d1 != d2)
> +      return foo();
> +    return 0;
> +}
> +
> +int noccmp1(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, d3, d4, bar;
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    d3 = *s1++;
> +    d4 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0)
> +      return foo();
> +    if (d3 != d4)
> +      return foo();
> +    return 0;
> +}
> +
> +/* Check for condition assignments for noccmp0 and noccmp1.  */
> +/* { dg-final { scan-tree-dump-times {_\d+ = bar_\d+ == 0;\n  _\d+ = 
> d\d+_\d+ != d\d+_\d+;} 2 "pre" } } */
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
> new file mode 100644
> index 000000000000..6e8674781563
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } */
> +/* { dg-options "-O3 -fdump-tree-pre" } */
> +
> +typedef unsigned long uint64_t;
> +
> +int foo(void);
> +
> +uint64_t noccmp0(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, d3, d4, bar;
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    d3 = *s1++;
> +    d4 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0)
> +      return foo();
> +    if (d3 != d4)
> +      d3++;
> +    else
> +      return foo();
> +    return d3;
> +}
> +
> +uint64_t noccmp1(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, d3, d4, bar;
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    d3 = *s1++;
> +    d4 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0)
> +      d3++;
> +    else
> +      return foo();
> +    if (d3 > d4)
> +      d3++;
> +    else if (d1 != d2)
> +      return foo ();
> +    d3 = d3 + d4 + 1;
> +    return d3;
> +}
> +
> +/* Check for condition assignments in the case that the transformation
> +   is applied.
> +   The transformation should not be applied on noccmp1, where the foo call is
> +   on the false branch of the first condition.  */
> +/* { dg-final { scan-tree-dump-times {_\d+ = bar_\d+ == 0;\n  _\d+ = 
> d\d+_\d+ == d\d+_\d+;} 1 "pre" } } */
> +/* { dg-final { scan-tree-dump-times {if \(bar_\d+ == 0\)} 1 "pre" } } */
> \ No newline at end of file
> diff --git a/gcc/tree-ssa-tail-merge.cc b/gcc/tree-ssa-tail-merge.cc
> index 857e91c206b3..f1f81646a3a4 100644
> --- a/gcc/tree-ssa-tail-merge.cc
> +++ b/gcc/tree-ssa-tail-merge.cc
> @@ -189,6 +189,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "system.h"
>  #include "coretypes.h"
>  #include "backend.h"
> +#include "target.h"
>  #include "tree.h"
>  #include "gimple.h"
>  #include "cfghooks.h"
> @@ -202,10 +203,17 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-cfg.h"
>  #include "tree-into-ssa.h"
>  #include "tree-ssa-sccvn.h"
> +#include "tree-ssa-ifcombine.h"
>  #include "cfgloop.h"
>  #include "tree-eh.h"
>  #include "tree-cfgcleanup.h"
>
> +#define INCLUDE_MEMORY
> +#include <memory>

This is wrong. INCLUDE_MEMORY should be defined before the include of system.h.
But memory is unconditionally included via system.h since
r15-5603-gb3f1b9e2aa079f so you don't need
that either.

> +#include "gimple-pretty-print.h"
> +#include "tree-ssa.h"
> +#include "algorithm"
> +
>  const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE;
>
>  /* Describes a group of bbs with the same successors.  The successor bbs are
> @@ -277,6 +285,18 @@ struct aux_bb_info
>    basic_block dep_bb;
>  };
>
> +/* Info for maybe_combine_conditions.  */
> +
> +struct cond_combine_info
> +{
> +  /* Conditional expression that leads to the merged block.  */
> +  gcond *bb_cond_stmt;
> +  /* Block that contains the conditional expression.  */
> +  basic_block cond_bb;
> +  /* True if the merged block resides in the condition's else branch.  */
> +  bool in_else_branch;
> +};
> +
>  /* Macros to access the fields of struct aux_bb_info.  */
>
>  #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
> @@ -1707,6 +1727,238 @@ replace_block_by (basic_block bb1, basic_block bb2)
>
>  static bitmap update_bbs;
>
> +static bool phi_args_def_p (basic_block bb, sbitmap visited)

Formatting, phi_args_def_p starts the line.
Also no comment before on what this function does or returns.

> +{
> +  if (bitmap_bit_p (visited, bb->index))
> +    return true;
> +
> +  bitmap_set_bit (visited, bb->index);

Would be better if you just do:
if (!bitmap_set_bit (visited, bb->index))
  return true;

> +
> +  edge e;
> +  edge_iterator ei;
> +  FOR_EACH_EDGE (e, ei, bb->succs)
> +    {
> +      basic_block succ = e->dest;
> +      gphi_iterator gpi;
> +
> +      for (gpi = gsi_start_phis (succ); !gsi_end_p (gpi); gsi_next (&gpi))
> +       {
> +         gphi *phi = gpi.phi ();
> +         use_operand_p use;
> +         ssa_op_iter iter;
> +
> +         FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE)
> +           {
> +             tree var = USE_FROM_PTR (use);
> +
> +             if (TREE_CODE (var) != SSA_NAME)
> +               continue;
> +
> +             if (ssa_name_maybe_undef_p (var))
> +               return false;
> +           }

You don't need to check each phi arguments. You can just check the phi
result since mark_ssa_maybe_undefs will mark if the phi def if any of
the arguments was undef already.

> +       }
> +
> +       if (!phi_args_def_p (succ, visited))
> +         return false;
> +    }
> +
> +    return true;
> +}
> +
> +/* Try to combine conditions with edges that lead to BB in its immediate
> +   dominator.  Nothing is done for targets that don't support conditional
> +   comparisons.  */
> +
> +static void
> +maybe_combine_conditions (basic_block bb)
> +{
> +  if (!targetm.have_ccmp ())
> +    return;
> +
> +  basic_block imm_dominator = get_immediate_dominator (CDI_DOMINATORS, bb);
> +
> +  edge e;
> +  edge_iterator ei;
> +  bool bb_in_else_branch = false;
> +  auto_vec<struct cond_combine_info> comb_conds (2);

auto_vec<struct cond_combine_info, 2> comb_conds;
Would be better  and move it below the if. I am not sure why you need
a vec here either.
So just:
cond_combine_info comb_conds[2];

Also no reason for the struct tag.

> +
> +  /* Check that the immediate dominator is found and has two successors and
> +     the merged block has two predecessors.  */
> +  if (!imm_dominator || imm_dominator->succs->length () != 2
> +      || bb->preds->length () != 2)
> +    return;
> +
> +  /* Find conditions in if-statements that lead to bb.  */
> +  int index = 0;
> +  int imm_dom_index = -1;
> +  unsigned int non_imm_dom_index;
> +  FOR_EACH_EDGE (e, ei, bb->preds)
> +    {
> +      basic_block then_tmp = NULL;
> +      basic_block else_tmp = NULL;
> +      bb_in_else_branch = recognize_if_then_else (e->src, &then_tmp, &bb);
> +      if (recognize_if_then_else (e->src, &bb, &else_tmp)
> +         || bb_in_else_branch)

I don't understand this part. because won't bb be over-written?

> +       {
> +         gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (e->src));

Just use as_a<gcond*> here since you deference it unconditionally.

> +         if (TREE_CODE_CLASS (gimple_cond_code (cond)) != tcc_comparison

the code class on a GIMPLE_COND will always be tcc_comparison (as
verified by verify_gimple_cond).

> +             || index == 2)

You have a check above on `bb->preds->length () != 2` so index better
be 0/1 based on that. So maybe these could be a gcc_assert instead.

> +           return;
> +
> +         struct cond_combine_info cci;
> +         cci.bb_cond_stmt = cond;
> +         cci.cond_bb = e->src;
> +         cci.in_else_branch = bb_in_else_branch;
> +         comb_conds.quick_push (cci);
> +
> +         if (cci.cond_bb == imm_dominator)
> +           imm_dom_index = index;
> +         else
> +           non_imm_dom_index = index;
> +         index++;
> +       }
> +    }
> +
> +  /* Abort if the immediate dominator is not found in the blocks containing
> +     the conditions or if only one block with a condition is found.  */
> +  if (imm_dom_index == -1 || comb_conds.length () == 1)
> +    return;
> +
> +  /* Check if the non-immediate dominator block contains other non-debug
> +     statements than the condition and abort in that case.  */
> +  unsigned int stmt_count = 0;
> +  gimple_stmt_iterator gsi
> +    = gsi_start_bb (comb_conds[non_imm_dom_index].cond_bb);
> +  for (; !gsi_end_p (gsi) && stmt_count < 2; gsi_next (&gsi))
> +    {
> +      if (!is_gimple_debug (gsi_stmt (gsi)))
> +       stmt_count++;
> +    }
> +
> +  bool other_bb_has_only_cond = stmt_count < 2;
> +  if (!other_bb_has_only_cond)
> +    return;

Why not go in reverse to check if there are anything besides the
conditional? Would be faster in the case there is.

> +
> +  basic_block imm_dom_block = comb_conds[imm_dom_index].cond_bb;
> +  basic_block non_imm_dom_block = comb_conds[non_imm_dom_index].cond_bb;
> +
> +  /* Ensure that the other block is a successor of the immediate dominator,
> +     that phi arguments from both condition blocks to bb are
> +     the same and that phi arguments found starting from the other block
> +     are defined on all paths.  */
> +  auto_sbitmap visited (last_basic_block_for_fn (cfun));
> +  bitmap_clear (visited);
> +  if ((!find_edge (imm_dom_block, non_imm_dom_block))
> +      || !same_phi_args_p (imm_dom_block, non_imm_dom_block, bb)
> +      || !phi_args_def_p (non_imm_dom_block, visited))
> +    return;
> +
> +  /* Convert condition statements to tree nodes.  */
> +  gcond *imm_dom_cond = comb_conds[imm_dom_index].bb_cond_stmt;
> +  gcond *non_imm_dom_cond = comb_conds[non_imm_dom_index].bb_cond_stmt;
> +  tree bb_cond1_lhs = gimple_cond_lhs (imm_dom_cond);
> +  tree bb_cond1_rhs = gimple_cond_rhs (imm_dom_cond);
> +  tree bb_cond2_lhs = gimple_cond_lhs (non_imm_dom_cond);
> +  tree bb_cond2_rhs = gimple_cond_rhs (non_imm_dom_cond);
> +
> +  tree cond1_expr = build2 (gimple_cond_code (imm_dom_cond),
> +                           TREE_TYPE (bb_cond1_lhs),

That is definitely the wrong type. it should be boolean_type.

> +                           bb_cond1_lhs, bb_cond1_rhs);
> +
> +  tree cond2_expr = build2 (gimple_cond_code (non_imm_dom_cond),
> +                           TREE_TYPE (bb_cond2_lhs),
> +                           bb_cond2_lhs, bb_cond2_rhs);
> +
> +  /* Fix the conditional expressions, so that the combined expression is
> +     correct when the remaining block is reached through the else branch.  */
> +  tree_code bool_op = BIT_IOR_EXPR;
> +  bool non_imm_dom_in_else_branch
> +    = comb_conds[non_imm_dom_index].in_else_branch;
> +  if (comb_conds[imm_dom_index].in_else_branch)
> +    bool_op = BIT_AND_EXPR;
> +
> +  if ((!non_imm_dom_in_else_branch && bool_op == BIT_AND_EXPR)
> +      || (non_imm_dom_in_else_branch && bool_op == BIT_IOR_EXPR))
> +    {
> +      cond2_expr = invert_truthvalue (cond2_expr);
> +      if (FLOAT_TYPE_P (TREE_TYPE (cond2_expr)))
> +       return;

   needs_inverted = true;

> +    }
> +
> +  gsi = gsi_last_bb (imm_dominator);
> +  gimple *last_stmt = *gsi;
> +
> +  /* Create combined condition.  */
> +  gimple_seq seq = NULL;
> +  tree first_cond_lhs = make_ssa_name (boolean_type_node);
> +  gimple_seq_add_stmt (&seq, gimple_build_assign (first_cond_lhs, 
> cond1_expr));

first_cond_lhs  = gimple_build (&seq, gimple_cond_code (imm_dom_cond),
boolean_type_node, bb_cond1_lhs, bb_cond1_rhs);

> +  tree sec_cond_lhs = make_ssa_name (boolean_type_node);
> +  gassign *sec_cond_assign = gimple_build_assign (sec_cond_lhs, cond2_expr);
> +  gimple_seq_add_stmt (&seq, sec_cond_assign);


sec_cond_lhs = gimple_build (&seq, gimple_cond_code
(non_imm_dom_cond), boolean_type_node, bb_cond2_lhs, bb_cond2_rhs);
if (needs_inverted)
  sec_cond_lhs = gimple_build (&seq, BIT_NOT_EXPR, boolean_type_node,
sec_cond_lhs);

> +  tree comb_cond_lhs = make_ssa_name (boolean_type_node);
> +  gimple_seq_add_stmt (&seq, gimple_build_assign (comb_cond_lhs,
> +                                                 bool_op,
> +                                                 first_cond_lhs,
> +                                                 sec_cond_lhs));

comb_cond_lhs = gimple_build (&seq, bool_op, boolean_type_node,
first_cond_lhs, sec_cond_lhs);

> +
> +  /* Update immediate dominator's condition with the combined one.  */
> +  gimple_set_location (sec_cond_assign, gimple_location (non_imm_dom_cond));
> +  gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT);
> +  gcond *last_stmt_cond = safe_dyn_cast <gcond *>(last_stmt);

as_a instead of safe_dyn_cast since you unconditionally modify it as a
GIMPLE_COND anyways.

> +
> +  gimple_cond_set_condition (last_stmt_cond, EQ_EXPR, comb_cond_lhs,
> +                            build_int_cst (TREE_TYPE (comb_cond_lhs), 1));
> +  update_stmt (last_stmt_cond);
> +
> +  /* Get the edges from the immediate dominator.  */
> +  edge e_succ_non_imm = nullptr;
> +  edge e_succ_other = nullptr;
> +  FOR_EACH_EDGE (e, ei, imm_dominator->succs)
> +    {
> +      if (e->dest == non_imm_dom_block)
> +       e_succ_non_imm = e;
> +      else
> +       e_succ_other = e;
> +    }
> +
> +  /* We're going to remove the block that contained the condition that is
> +     now merged in the immediate dominator, so redirect the edge that was
> +     pointing to this block.  */
> +  edge e_red = nullptr;
> +  FOR_EACH_EDGE (e, ei, e_succ_non_imm->dest->succs)
> +    {
> +      if (e->dest == bb)
> +       continue;
> +      e_red = redirect_edge_and_branch (e_succ_non_imm, e->dest);
> +    }
> +
> +  /* Update edge probabilities.  */
> +  profile_probability new_prob;
> +  profile_count src_count = e_succ_other->src->count;
> +  profile_count dest_count = e_succ_other->dest->count;
> +  if (src_count.nonzero_p ()
> +      && dest_count.nonzero_p ())
> +    new_prob = dest_count.probability_in (src_count);
> +  else
> +    new_prob = profile_probability::always ().apply_scale (1, 2);
> +
> +  e_succ_other->probability = new_prob;
> +  e_red->probability = new_prob.invert ();
> +
> +  /* Remove the block.  */
> +  mark_basic_block_deleted (non_imm_dom_block);
> +  same_succ_flush_bb (non_imm_dom_block);
> +  delete_basic_block (non_imm_dom_block);
> +
> +  if (dump_file)
> +    fprintf (dump_file, "Conditional statements %s and %s combined in BB 
> %d\n",
> +            print_generic_expr_to_str (cond1_expr),
> +            print_generic_expr_to_str (cond2_expr),
> +            imm_dominator->index);

This should only be done if details are requested. Also should be done
above. This also leaks memory since print_generic_expr_to_str does a
strdup.

Thanks,
Andrew

> +
> +}
> +
>  /* For each cluster in all_clusters, merge all cluster->bbs.  Returns
>     number of bbs removed.  */
>
> @@ -1735,6 +1987,9 @@ apply_clusters (void)
>           bitmap_clear_bit (update_bbs, bb1->index);
>
>           replace_block_by (bb1, bb2);
> +
> +         maybe_combine_conditions (bb2);
> +
>           nr_bbs_removed++;
>         }
>      }
> @@ -1826,6 +2081,9 @@ tail_merge_optimize (bool need_crit_edge_split)
>      }
>    init_worklist ();
>
> +  /* This is needed by maybe_combine_conditions.  */
> +  mark_ssa_maybe_undefs ();
> +
>    while (!worklist.is_empty ())
>      {
>        if (!loop_entered)
> --
> 2.51.1
>

Reply via email to