On Tue, Oct 29, 2024 at 10:10 AM Andrew Pinski <pins...@gmail.com> wrote: > > On Tue, Oct 29, 2024 at 5:59 AM Richard Biener > <richard.guent...@gmail.com> wrote: > > > > On Tue, Oct 29, 2024 at 4:29 AM Andrew Pinski <quic_apin...@quicinc.com> > > wrote: > > > > > > r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs > > > into using either AND or OR. But it only allowed the inner condition > > > basic block having the conditional only. This changes to allow up to 2 > > > defining > > > statements as long as they are just nop conversions for either the lhs or > > > rhs > > > of the conditional. > > > > > > This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more > > > than before. > > > > > > Boootstrapped and tested on x86_64-linux-gnu. > > > > > > PR tree-optimization/85605 > > > > > > gcc/ChangeLog: > > > > > > * tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New > > > function. > > > (ifcombine_ifandif): Use can_combine_bbs_with_short_circuit > > > instead of checking > > > if iterator is one before the last statement. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test. > > > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test. > > > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test. > > > > > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > > > --- > > > .../g++.dg/tree-ssa/ifcombine-ccmp-1.C | 27 +++++++++++++ > > > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c | 18 +++++++++ > > > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c | 19 +++++++++ > > > gcc/tree-ssa-ifcombine.cc | 39 ++++++++++++++++++- > > > 4 files changed, 101 insertions(+), 2 deletions(-) > > > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C > > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c > > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c > > > > > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C > > > b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C > > > new file mode 100644 > > > index 00000000000..282cec8c628 > > > --- /dev/null > > > +++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C > > > @@ -0,0 +1,27 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param > > > logical-op-non-short-circuit=1" } */ > > > + > > > +/* PR tree-optimization/85605 */ > > > +#include <stdint.h> > > > + > > > +template<class T,class T2> > > > +inline bool cmp(T a, T2 b) { > > > + return a<0 ? true : T2(a) < b; > > > +} > > > + > > > +template<class T,class T2> > > > +inline bool cmp2(T a, T2 b) { > > > + return (a<0) | (T2(a) < b); > > > +} > > > + > > > +bool f(int a, int b) { > > > + return cmp(int64_t(a), unsigned(b)); > > > +} > > > + > > > +bool f2(int a, int b) { > > > + return cmp2(int64_t(a), unsigned(b)); > > > +} > > > + > > > + > > > +/* Both of these functions should be optimized to the same, and have an > > > | in them. */ > > > +/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */ > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c > > > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c > > > new file mode 100644 > > > index 00000000000..1bdbb9358b4 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c > > > @@ -0,0 +1,18 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param > > > logical-op-non-short-circuit=1" } */ > > > + > > > +/* PR tree-optimization/85605 */ > > > +/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to > > > signed in the > > > + inner bb which should be able to move too. */ > > > + > > > +int t (int a, unsigned b) > > > +{ > > > + if (a > 0) > > > + { > > > + signed t = b; > > > + if (t > 0) > > > + return 0; > > > + } > > > + return 1; > > > +} > > > +/* { dg-final { scan-tree-dump "\&" "optimized" } } */ > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c > > > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c > > > new file mode 100644 > > > index 00000000000..8d74b4932c5 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c > > > @@ -0,0 +1,19 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param > > > logical-op-non-short-circuit=1" } */ > > > + > > > +/* PR tree-optimization/85605 */ > > > +/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to > > > signed in the > > > + inner bb which should be able to move too. */ > > > + > > > +int t (int a, unsigned b) > > > +{ > > > + if (a > 0) > > > + goto L1; > > > + signed t = b; > > > + if (t > 0) > > > + goto L1; > > > + return 0; > > > +L1: > > > + return 1; > > > +} > > > +/* { dg-final { scan-tree-dump "\|" "optimized" } } */ > > > diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc > > > index 39702929fc0..3acecda31cc 100644 > > > --- a/gcc/tree-ssa-ifcombine.cc > > > +++ b/gcc/tree-ssa-ifcombine.cc > > > @@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block > > > inner_cond_bb, > > > outer2->probability = profile_probability::never (); > > > } > > > > > > +/* Returns true if inner_cond_bb contains just the condition or 1/2 > > > statements > > > + that define lhs or rhs with a nop conversion. */ > > > + > > > +static bool > > > +can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree lhs, > > > tree rhs) > > > +{ > > > + gimple_stmt_iterator gsi; > > > + gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb); > > > + /* If only the condition, this should be allowed. */ > > > + if (gsi_one_before_end_p (gsi)) > > > + return true; > > > + /* Can have up to 2 statements defining each of lhs/rhs. */ > > > + for (int i = 0; i < 2; i++) > > > + { > > > + gimple *stmt = gsi_stmt (gsi); > > > + if (!gimple_assign_cast_p (stmt)) > > > + return false; > > > + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)), > > > + TREE_TYPE (gimple_assign_rhs1 (stmt)))) > > > > I would say non-nop conversions should be fine as well - not sure why > > gimple_assign_cast_p covers FIX_TRUNC_EXPR but for example not > > FLOAT_EXPR - so possibly is_gimple_assign && CONVERT_EXPR_CODE_P (..) > > would be a better check? > > I will check on that in a little bit but I think so.
Just a quick update on this. Without checking for a nop_conversion, gcc.dg/tree-ssa/pr111456-1.c fails. I am looking into a way of fixing that still. I think there is a missed optimization still, plus the way ifcombine uses fold_build2_loc to build the full tree and then does force_gimple_operand_gsi_1 also seems like it is not helping (though I could be wrong). Thanks, Andrew > > > > > > + return false; > > > + /* The defining statement needs to match either the lhs or rhs of > > > + the condition. */ > > > + if (!operand_equal_p (lhs, gimple_assign_lhs (stmt)) > > > + && !operand_equal_p (rhs, gimple_assign_lhs (stmt))) > > > > I think you can use == here. > > Yes I was thinking using `!=` here was fine but I was not 100% sure at > the time I wrote it. > > > > > OK with that change but note this might conflict with the series from > > Alexandre. > > I looked and I think it won't conflict, Alexandre's changes did not > touch the logical_op_non_short_circuit case as far as I could see. > > Note I am going to wait on applying this until I get the patch for PR > 117346 approved (I should be posting it later today) as this patch > exposed a missed optimization with ccmp on aarch64. > > Thanks, > Andrew > > > > > Richard. > > > > > + return false; > > > + gsi_next_nondebug (&gsi); > > > + if (gsi_one_before_end_p (gsi)) > > > + return true; > > > + } > > > + return false; > > > +} > > > + > > > /* If-convert on a and pattern with a common else block. The inner > > > if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. > > > inner_inv, outer_inv and result_inv indicate whether the conditions > > > @@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool > > > inner_inv, > > > = param_logical_op_non_short_circuit; > > > if (!logical_op_non_short_circuit || sanitize_coverage_p ()) > > > return false; > > > - /* Only do this optimization if the inner bb contains only the > > > conditional. */ > > > - if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb > > > (inner_cond_bb))) > > > + /* Only do this optimization if the inner bb contains only the > > > conditional > > > + or there is one or 2 statements which are nop conversion for > > > the comparison. */ > > > + if (!can_combine_bbs_with_short_circuit (inner_cond_bb, > > > + gimple_cond_lhs > > > (inner_cond), > > > + gimple_cond_rhs > > > (inner_cond))) > > > return false; > > > t1 = fold_build2_loc (gimple_location (inner_cond), > > > inner_cond_code, > > > -- > > > 2.43.0 > > >