Hi Richard I accidentally pushed this patch earlier in the mistaken belief that you'd already approved it. It looks uncontroversial to me - it just adds IFN support to build_popcount_expr, analogous to the changes you suggested and approved for build_cltz_expr (and adjusts testcases accordingly). I might have incorporated it into an earlier patch in this series, if I hadn't already pushed that earlier patch.
Is this OK to leave in master now? Thanks, Andrew On Thu, Dec 22, 2022 at 05:43:21PM +0000, Andrew Carlotti via Gcc-patches wrote: > Bootstrapped and regression tested on aarch64-unknown-linux-gnu and > x86_64-pc-linux-gnu - ok to merge? > > gcc/ChangeLog: > > * tree-ssa-loop-niter.cc (build_popcount_expr): Add IFN support. > > gcc/testsuite/ChangeLog: > > * g++.dg/tree-ssa/pr86544.C: Add .POPCOUNT to tree scan regex. > * gcc.dg/tree-ssa/popcount.c: Likewise. > * gcc.dg/tree-ssa/popcount2.c: Likewise. > * gcc.dg/tree-ssa/popcount3.c: Likewise. > * gcc.target/aarch64/popcount4.c: Likewise. > * gcc.target/i386/pr95771.c: Likewise, and... > * gcc.target/i386/pr95771-2.c: ...split int128 test from above, > since this would emit just a single IFN if a TI optab is added. > > --- > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr86544.C > b/gcc/testsuite/g++.dg/tree-ssa/pr86544.C > index > ef438916a8019320564f444ace08e2f4b4190684..50befb36bac75de1cfa282e38358278b3288bd1c > 100644 > --- a/gcc/testsuite/g++.dg/tree-ssa/pr86544.C > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr86544.C > @@ -12,5 +12,5 @@ int PopCount (long b) { > return c; > } > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } } > */ > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 > "optimized" } } */ > /* { dg-final { scan-tree-dump-times "if" 0 "phiopt4" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount.c > b/gcc/testsuite/gcc.dg/tree-ssa/popcount.c > index > b4694109411a4631697463519acbe7d9df65bf6e..efd906a0f5447f0beb3752eded3756999b02e6e6 > 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount.c > @@ -39,4 +39,4 @@ void PopCount3 (long b1) { > } > } > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 3 "optimized" } } > */ > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 3 > "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount2.c > b/gcc/testsuite/gcc.dg/tree-ssa/popcount2.c > index > ef73e345573de721833e98e89c252640a55f7c60..ae38a329bd4d868a762300d3218d68864c0fc4be > 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount2.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount2.c > @@ -26,4 +26,4 @@ int main() > return 0; > } > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } } > */ > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 > "optimized" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount3.c > b/gcc/testsuite/gcc.dg/tree-ssa/popcount3.c > index > ef438916a8019320564f444ace08e2f4b4190684..50befb36bac75de1cfa282e38358278b3288bd1c > 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount3.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount3.c > @@ -12,5 +12,5 @@ int PopCount (long b) { > return c; > } > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 1 "optimized" } } > */ > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 > "optimized" } } */ > /* { dg-final { scan-tree-dump-times "if" 0 "phiopt4" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/popcount4.c > b/gcc/testsuite/gcc.target/aarch64/popcount4.c > index > ee55b2e335223053ca024e95b7a13aa4af32550e..8aa15ff018d4b5fc6bb59e52af20d5c33cea2ee0 > 100644 > --- a/gcc/testsuite/gcc.target/aarch64/popcount4.c > +++ b/gcc/testsuite/gcc.target/aarch64/popcount4.c > @@ -11,4 +11,4 @@ int PopCount (long b) { > return c; > } > > -/* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "optimized" } } > */ > +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 0 > "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr95771-2.c > b/gcc/testsuite/gcc.target/i386/pr95771-2.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..1db9dc94d0b66477667624012221d6844c141a26 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr95771-2.c > @@ -0,0 +1,17 @@ > +/* PR tree-optimization/95771 */ > +/* { dg-do compile } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */ > +/* { dg-final { scan-tree-dump " = __builtin_popcount| = \\.POPCOUNT" > "optimized" } } */ > + > +int > +corge (unsigned __int128 x) > +{ > + int i = 0; > + while (x) > + { > + x &= x - 1; > + ++i; > + } > + return i; > +} > diff --git a/gcc/testsuite/gcc.target/i386/pr95771.c > b/gcc/testsuite/gcc.target/i386/pr95771.c > index > d7b67017800b705b9854f561916c20901ea76803..d41be445f4a68613a082b8956fea3ceaf33d7e0f > 100644 > --- a/gcc/testsuite/gcc.target/i386/pr95771.c > +++ b/gcc/testsuite/gcc.target/i386/pr95771.c > @@ -1,8 +1,7 @@ > /* PR tree-optimization/95771 */ > /* { dg-do compile } */ > /* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */ > -/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 6 "optimized" { > target int128 } } } */ > -/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 4 "optimized" { > target { ! int128 } } } } */ > +/* { dg-final { scan-tree-dump-times " = __builtin_popcount| = \\.POPCOUNT" > 4 "optimized" } } */ > > int > foo (unsigned char x) > @@ -51,17 +50,3 @@ qux (unsigned long long x) > } > return i; > } > - > -#ifdef __SIZEOF_INT128__ > -int > -corge (unsigned __int128 x) > -{ > - int i = 0; > - while (x) > - { > - x &= x - 1; > - ++i; > - } > - return i; > -} > -#endif > diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc > index > 9c2f9f3d5f6205bb5e7f490257800c660fdd0b8d..cc53b27329f8518bc2cacef1830768a140331b31 > 100644 > --- a/gcc/tree-ssa-loop-niter.cc > +++ b/gcc/tree-ssa-loop-niter.cc > @@ -2033,11 +2033,18 @@ static tree > build_popcount_expr (tree src) > { > tree fn; > + bool use_ifn = false; > int prec = TYPE_PRECISION (TREE_TYPE (src)); > int i_prec = TYPE_PRECISION (integer_type_node); > int li_prec = TYPE_PRECISION (long_integer_type_node); > int lli_prec = TYPE_PRECISION (long_long_integer_type_node); > - if (prec <= i_prec) > + > + tree utype = unsigned_type_for (TREE_TYPE (src)); > + src = fold_convert (utype, src); > + > + if (direct_internal_fn_supported_p (IFN_POPCOUNT, utype, > OPTIMIZE_FOR_BOTH)) > + use_ifn = true; > + else if (prec <= i_prec) > fn = builtin_decl_implicit (BUILT_IN_POPCOUNT); > else if (prec == li_prec) > fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL); > @@ -2046,12 +2053,11 @@ build_popcount_expr (tree src) > else > return NULL_TREE; > > - tree utype = unsigned_type_for (TREE_TYPE (src)); > - src = fold_convert (utype, src); > - if (prec < i_prec) > - src = fold_convert (unsigned_type_node, src); > tree call; > - if (prec == 2 * lli_prec) > + if (use_ifn) > + call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_POPCOUNT, > + integer_type_node, 1, src); > + else if (prec == 2 * lli_prec) > { > tree src1 = fold_convert (long_long_unsigned_type_node, > fold_build2 (RSHIFT_EXPR, TREE_TYPE (src), > @@ -2064,7 +2070,12 @@ build_popcount_expr (tree src) > call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2); > } > else > - call = build_call_expr (fn, 1, src); > + { > + if (prec < i_prec) > + src = fold_convert (unsigned_type_node, src); > + > + call = build_call_expr (fn, 1, src); > + } > > return call; > }