On Tue, Oct 29, 2024 at 10:10 AM Andrew Pinski <pins...@gmail.com> wrote:
>
> On Tue, Oct 29, 2024 at 5:59 AM Richard Biener
> <richard.guent...@gmail.com> wrote:
> >
> > On Tue, Oct 29, 2024 at 4:29 AM Andrew Pinski <quic_apin...@quicinc.com> 
> > wrote:
> > >
> > > r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs
> > > into using either AND or OR. But it only allowed the inner condition
> > > basic block having the conditional only. This changes to allow up to 2 
> > > defining
> > > statements as long as they are just nop conversions for either the lhs or 
> > > rhs
> > > of the conditional.
> > >
> > > This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more 
> > > than before.
> > >
> > > Boootstrapped and tested on x86_64-linux-gnu.
> > >
> > >         PR tree-optimization/85605
> > >
> > > gcc/ChangeLog:
> > >
> > >         * tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New 
> > > function.
> > >         (ifcombine_ifandif): Use can_combine_bbs_with_short_circuit 
> > > instead of checking
> > >         if iterator is one before the last statement.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         * g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test.
> > >         * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test.
> > >         * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test.
> > >
> > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> > > ---
> > >  .../g++.dg/tree-ssa/ifcombine-ccmp-1.C        | 27 +++++++++++++
> > >  .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c    | 18 +++++++++
> > >  .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c    | 19 +++++++++
> > >  gcc/tree-ssa-ifcombine.cc                     | 39 ++++++++++++++++++-
> > >  4 files changed, 101 insertions(+), 2 deletions(-)
> > >  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > >
> > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C 
> > > b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > new file mode 100644
> > > index 00000000000..282cec8c628
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > @@ -0,0 +1,27 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param 
> > > logical-op-non-short-circuit=1" } */
> > > +
> > > +/* PR tree-optimization/85605 */
> > > +#include <stdint.h>
> > > +
> > > +template<class T,class T2>
> > > +inline bool cmp(T a, T2 b) {
> > > +  return a<0 ? true : T2(a) < b;
> > > +}
> > > +
> > > +template<class T,class T2>
> > > +inline bool cmp2(T a, T2 b) {
> > > +  return (a<0) | (T2(a) < b);
> > > +}
> > > +
> > > +bool f(int a, int b) {
> > > +    return cmp(int64_t(a), unsigned(b));
> > > +}
> > > +
> > > +bool f2(int a, int b) {
> > > +    return cmp2(int64_t(a), unsigned(b));
> > > +}
> > > +
> > > +
> > > +/* Both of these functions should be optimized to the same, and have an 
> > > | in them. */
> > > +/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c 
> > > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > new file mode 100644
> > > index 00000000000..1bdbb9358b4
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > @@ -0,0 +1,18 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param 
> > > logical-op-non-short-circuit=1" } */
> > > +
> > > +/* PR tree-optimization/85605 */
> > > +/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to 
> > > signed in the
> > > +   inner bb which should be able to move too. */
> > > +
> > > +int t (int a, unsigned b)
> > > +{
> > > +  if (a > 0)
> > > +  {
> > > +    signed t = b;
> > > +    if (t > 0)
> > > +      return 0;
> > > +  }
> > > +  return 1;
> > > +}
> > > +/* { dg-final { scan-tree-dump "\&" "optimized" } } */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c 
> > > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > new file mode 100644
> > > index 00000000000..8d74b4932c5
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > @@ -0,0 +1,19 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param 
> > > logical-op-non-short-circuit=1" } */
> > > +
> > > +/* PR tree-optimization/85605 */
> > > +/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to 
> > > signed in the
> > > +   inner bb which should be able to move too. */
> > > +
> > > +int t (int a, unsigned b)
> > > +{
> > > +  if (a > 0)
> > > +    goto L1;
> > > +  signed t = b;
> > > +  if (t > 0)
> > > +    goto L1;
> > > +  return 0;
> > > +L1:
> > > +  return 1;
> > > +}
> > > +/* { dg-final { scan-tree-dump "\|" "optimized" } } */
> > > diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
> > > index 39702929fc0..3acecda31cc 100644
> > > --- a/gcc/tree-ssa-ifcombine.cc
> > > +++ b/gcc/tree-ssa-ifcombine.cc
> > > @@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block 
> > > inner_cond_bb,
> > >    outer2->probability = profile_probability::never ();
> > >  }
> > >
> > > +/* Returns true if inner_cond_bb contains just the condition or 1/2 
> > > statements
> > > +   that define lhs or rhs with a nop conversion. */
> > > +
> > > +static bool
> > > +can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree lhs, 
> > > tree rhs)
> > > +{
> > > +  gimple_stmt_iterator gsi;
> > > +  gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb);
> > > +  /* If only the condition, this should be allowed. */
> > > +  if (gsi_one_before_end_p (gsi))
> > > +    return true;
> > > +  /* Can have up to 2 statements defining each of lhs/rhs. */
> > > +  for (int i = 0; i < 2; i++)
> > > +    {
> > > +      gimple *stmt = gsi_stmt (gsi);
> > > +      if (!gimple_assign_cast_p (stmt))
> > > +       return false;
> > > +      if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
> > > +                                 TREE_TYPE (gimple_assign_rhs1 (stmt))))
> >
> > I would say non-nop conversions should be fine as well - not sure why
> > gimple_assign_cast_p covers FIX_TRUNC_EXPR but for example not
> > FLOAT_EXPR - so possibly is_gimple_assign && CONVERT_EXPR_CODE_P (..)
> > would be a better check?
>
> I will check on that in a little bit but I think so.

Just a quick update on this. Without checking for a nop_conversion,
gcc.dg/tree-ssa/pr111456-1.c fails.
I am looking into a way of fixing that still. I think there is a
missed optimization still, plus the way ifcombine uses fold_build2_loc
to build the full tree and then does force_gimple_operand_gsi_1 also
seems like it is not helping (though I could be wrong).

Thanks,
Andrew

>
> >
> > > +       return false;
> > > +      /* The defining statement needs to match either the lhs or rhs of
> > > +         the condition. */
> > > +      if (!operand_equal_p (lhs, gimple_assign_lhs (stmt))
> > > +          && !operand_equal_p (rhs, gimple_assign_lhs (stmt)))
> >
> > I think you can use == here.
>
> Yes I was thinking using `!=` here was fine but I was not 100% sure at
> the time I wrote it.
>
> >
> > OK with that change but note this might conflict with the series from 
> > Alexandre.
>
> I looked and I think it won't conflict, Alexandre's changes did not
> touch the logical_op_non_short_circuit case as far as I could see.
>
> Note I am going to wait on applying this until I get the patch for PR
> 117346 approved (I should be posting it later today) as this patch
> exposed a missed optimization with ccmp on aarch64.
>
> Thanks,
> Andrew
>
> >
> > Richard.
> >
> > > +        return false;
> > > +      gsi_next_nondebug (&gsi);
> > > +      if (gsi_one_before_end_p (gsi))
> > > +       return true;
> > > +    }
> > > +  return false;
> > > +}
> > > +
> > >  /* If-convert on a and pattern with a common else block.  The inner
> > >     if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
> > >     inner_inv, outer_inv and result_inv indicate whether the conditions
> > > @@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool 
> > > inner_inv,
> > >               = param_logical_op_non_short_circuit;
> > >           if (!logical_op_non_short_circuit || sanitize_coverage_p ())
> > >             return false;
> > > -         /* Only do this optimization if the inner bb contains only the 
> > > conditional. */
> > > -         if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb 
> > > (inner_cond_bb)))
> > > +         /* Only do this optimization if the inner bb contains only the 
> > > conditional
> > > +            or there is one or 2 statements which are nop conversion for 
> > > the comparison. */
> > > +         if (!can_combine_bbs_with_short_circuit (inner_cond_bb,
> > > +                                                  gimple_cond_lhs 
> > > (inner_cond),
> > > +                                                  gimple_cond_rhs 
> > > (inner_cond)))
> > >             return false;
> > >           t1 = fold_build2_loc (gimple_location (inner_cond),
> > >                                 inner_cond_code,
> > > --
> > > 2.43.0
> > >

Reply via email to