On Tue, Oct 29, 2024 at 4:29 AM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs > into using either AND or OR. But it only allowed the inner condition > basic block having the conditional only. This changes to allow up to 2 > defining > statements as long as they are just nop conversions for either the lhs or rhs > of the conditional. > > This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more than > before. > > Boootstrapped and tested on x86_64-linux-gnu. > > PR tree-optimization/85605 > > gcc/ChangeLog: > > * tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New > function. > (ifcombine_ifandif): Use can_combine_bbs_with_short_circuit instead > of checking > if iterator is one before the last statement. > > gcc/testsuite/ChangeLog: > > * g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test. > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test. > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > .../g++.dg/tree-ssa/ifcombine-ccmp-1.C | 27 +++++++++++++ > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c | 18 +++++++++ > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c | 19 +++++++++ > gcc/tree-ssa-ifcombine.cc | 39 ++++++++++++++++++- > 4 files changed, 101 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C > b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C > new file mode 100644 > index 00000000000..282cec8c628 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C > @@ -0,0 +1,27 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -g -fdump-tree-optimized --param > logical-op-non-short-circuit=1" } */ > + > +/* PR tree-optimization/85605 */ > +#include <stdint.h> > + > +template<class T,class T2> > +inline bool cmp(T a, T2 b) { > + return a<0 ? true : T2(a) < b; > +} > + > +template<class T,class T2> > +inline bool cmp2(T a, T2 b) { > + return (a<0) | (T2(a) < b); > +} > + > +bool f(int a, int b) { > + return cmp(int64_t(a), unsigned(b)); > +} > + > +bool f2(int a, int b) { > + return cmp2(int64_t(a), unsigned(b)); > +} > + > + > +/* Both of these functions should be optimized to the same, and have an | in > them. */ > +/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c > new file mode 100644 > index 00000000000..1bdbb9358b4 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -g -fdump-tree-optimized --param > logical-op-non-short-circuit=1" } */ > + > +/* PR tree-optimization/85605 */ > +/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to signed > in the > + inner bb which should be able to move too. */ > + > +int t (int a, unsigned b) > +{ > + if (a > 0) > + { > + signed t = b; > + if (t > 0) > + return 0; > + } > + return 1; > +} > +/* { dg-final { scan-tree-dump "\&" "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c > new file mode 100644 > index 00000000000..8d74b4932c5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -g -fdump-tree-optimized --param > logical-op-non-short-circuit=1" } */ > + > +/* PR tree-optimization/85605 */ > +/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to signed > in the > + inner bb which should be able to move too. */ > + > +int t (int a, unsigned b) > +{ > + if (a > 0) > + goto L1; > + signed t = b; > + if (t > 0) > + goto L1; > + return 0; > +L1: > + return 1; > +} > +/* { dg-final { scan-tree-dump "\|" "optimized" } } */ > diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc > index 39702929fc0..3acecda31cc 100644 > --- a/gcc/tree-ssa-ifcombine.cc > +++ b/gcc/tree-ssa-ifcombine.cc > @@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block > inner_cond_bb, > outer2->probability = profile_probability::never (); > } > > +/* Returns true if inner_cond_bb contains just the condition or 1/2 > statements > + that define lhs or rhs with a nop conversion. */ > + > +static bool > +can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree lhs, > tree rhs) > +{ > + gimple_stmt_iterator gsi; > + gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb); > + /* If only the condition, this should be allowed. */ > + if (gsi_one_before_end_p (gsi)) > + return true; > + /* Can have up to 2 statements defining each of lhs/rhs. */ > + for (int i = 0; i < 2; i++) > + { > + gimple *stmt = gsi_stmt (gsi); > + if (!gimple_assign_cast_p (stmt)) > + return false; > + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)), > + TREE_TYPE (gimple_assign_rhs1 (stmt))))
I would say non-nop conversions should be fine as well - not sure why gimple_assign_cast_p covers FIX_TRUNC_EXPR but for example not FLOAT_EXPR - so possibly is_gimple_assign && CONVERT_EXPR_CODE_P (..) would be a better check? > + return false; > + /* The defining statement needs to match either the lhs or rhs of > + the condition. */ > + if (!operand_equal_p (lhs, gimple_assign_lhs (stmt)) > + && !operand_equal_p (rhs, gimple_assign_lhs (stmt))) I think you can use == here. OK with that change but note this might conflict with the series from Alexandre. Richard. > + return false; > + gsi_next_nondebug (&gsi); > + if (gsi_one_before_end_p (gsi)) > + return true; > + } > + return false; > +} > + > /* If-convert on a and pattern with a common else block. The inner > if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. > inner_inv, outer_inv and result_inv indicate whether the conditions > @@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool > inner_inv, > = param_logical_op_non_short_circuit; > if (!logical_op_non_short_circuit || sanitize_coverage_p ()) > return false; > - /* Only do this optimization if the inner bb contains only the > conditional. */ > - if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb > (inner_cond_bb))) > + /* Only do this optimization if the inner bb contains only the > conditional > + or there is one or 2 statements which are nop conversion for the > comparison. */ > + if (!can_combine_bbs_with_short_circuit (inner_cond_bb, > + gimple_cond_lhs > (inner_cond), > + gimple_cond_rhs > (inner_cond))) > return false; > t1 = fold_build2_loc (gimple_location (inner_cond), > inner_cond_code, > -- > 2.43.0 >