On Tue, Oct 29, 2024 at 4:29 AM Andrew Pinski <quic_apin...@quicinc.com> wrote:
>
> r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs
> into using either AND or OR. But it only allowed the inner condition
> basic block having the conditional only. This changes to allow up to 2 
> defining
> statements as long as they are just nop conversions for either the lhs or rhs
> of the conditional.
>
> This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more than 
> before.
>
> Boootstrapped and tested on x86_64-linux-gnu.
>
>         PR tree-optimization/85605
>
> gcc/ChangeLog:
>
>         * tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New 
> function.
>         (ifcombine_ifandif): Use can_combine_bbs_with_short_circuit instead 
> of checking
>         if iterator is one before the last statement.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test.
>         * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test.
>         * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  .../g++.dg/tree-ssa/ifcombine-ccmp-1.C        | 27 +++++++++++++
>  .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c    | 18 +++++++++
>  .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c    | 19 +++++++++
>  gcc/tree-ssa-ifcombine.cc                     | 39 ++++++++++++++++++-
>  4 files changed, 101 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
>
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C 
> b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> new file mode 100644
> index 00000000000..282cec8c628
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -g -fdump-tree-optimized --param 
> logical-op-non-short-circuit=1" } */
> +
> +/* PR tree-optimization/85605 */
> +#include <stdint.h>
> +
> +template<class T,class T2>
> +inline bool cmp(T a, T2 b) {
> +  return a<0 ? true : T2(a) < b;
> +}
> +
> +template<class T,class T2>
> +inline bool cmp2(T a, T2 b) {
> +  return (a<0) | (T2(a) < b);
> +}
> +
> +bool f(int a, int b) {
> +    return cmp(int64_t(a), unsigned(b));
> +}
> +
> +bool f2(int a, int b) {
> +    return cmp2(int64_t(a), unsigned(b));
> +}
> +
> +
> +/* Both of these functions should be optimized to the same, and have an | in 
> them. */
> +/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> new file mode 100644
> index 00000000000..1bdbb9358b4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -g -fdump-tree-optimized --param 
> logical-op-non-short-circuit=1" } */
> +
> +/* PR tree-optimization/85605 */
> +/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to signed 
> in the
> +   inner bb which should be able to move too. */
> +
> +int t (int a, unsigned b)
> +{
> +  if (a > 0)
> +  {
> +    signed t = b;
> +    if (t > 0)
> +      return 0;
> +  }
> +  return 1;
> +}
> +/* { dg-final { scan-tree-dump "\&" "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> new file mode 100644
> index 00000000000..8d74b4932c5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -g -fdump-tree-optimized --param 
> logical-op-non-short-circuit=1" } */
> +
> +/* PR tree-optimization/85605 */
> +/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to signed 
> in the
> +   inner bb which should be able to move too. */
> +
> +int t (int a, unsigned b)
> +{
> +  if (a > 0)
> +    goto L1;
> +  signed t = b;
> +  if (t > 0)
> +    goto L1;
> +  return 0;
> +L1:
> +  return 1;
> +}
> +/* { dg-final { scan-tree-dump "\|" "optimized" } } */
> diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
> index 39702929fc0..3acecda31cc 100644
> --- a/gcc/tree-ssa-ifcombine.cc
> +++ b/gcc/tree-ssa-ifcombine.cc
> @@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block 
> inner_cond_bb,
>    outer2->probability = profile_probability::never ();
>  }
>
> +/* Returns true if inner_cond_bb contains just the condition or 1/2 
> statements
> +   that define lhs or rhs with a nop conversion. */
> +
> +static bool
> +can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree lhs, 
> tree rhs)
> +{
> +  gimple_stmt_iterator gsi;
> +  gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb);
> +  /* If only the condition, this should be allowed. */
> +  if (gsi_one_before_end_p (gsi))
> +    return true;
> +  /* Can have up to 2 statements defining each of lhs/rhs. */
> +  for (int i = 0; i < 2; i++)
> +    {
> +      gimple *stmt = gsi_stmt (gsi);
> +      if (!gimple_assign_cast_p (stmt))
> +       return false;
> +      if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
> +                                 TREE_TYPE (gimple_assign_rhs1 (stmt))))

I would say non-nop conversions should be fine as well - not sure why
gimple_assign_cast_p covers FIX_TRUNC_EXPR but for example not
FLOAT_EXPR - so possibly is_gimple_assign && CONVERT_EXPR_CODE_P (..)
would be a better check?

> +       return false;
> +      /* The defining statement needs to match either the lhs or rhs of
> +         the condition. */
> +      if (!operand_equal_p (lhs, gimple_assign_lhs (stmt))
> +          && !operand_equal_p (rhs, gimple_assign_lhs (stmt)))

I think you can use == here.

OK with that change but note this might conflict with the series from Alexandre.

Richard.

> +        return false;
> +      gsi_next_nondebug (&gsi);
> +      if (gsi_one_before_end_p (gsi))
> +       return true;
> +    }
> +  return false;
> +}
> +
>  /* If-convert on a and pattern with a common else block.  The inner
>     if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
>     inner_inv, outer_inv and result_inv indicate whether the conditions
> @@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
> inner_inv,
>               = param_logical_op_non_short_circuit;
>           if (!logical_op_non_short_circuit || sanitize_coverage_p ())
>             return false;
> -         /* Only do this optimization if the inner bb contains only the 
> conditional. */
> -         if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb 
> (inner_cond_bb)))
> +         /* Only do this optimization if the inner bb contains only the 
> conditional
> +            or there is one or 2 statements which are nop conversion for the 
> comparison. */
> +         if (!can_combine_bbs_with_short_circuit (inner_cond_bb,
> +                                                  gimple_cond_lhs 
> (inner_cond),
> +                                                  gimple_cond_rhs 
> (inner_cond)))
>             return false;
>           t1 = fold_build2_loc (gimple_location (inner_cond),
>                                 inner_cond_code,
> --
> 2.43.0
>

Reply via email to