On Sun, 3 Jan 2021, Jakub Jelinek wrote: > Hi! > > As the testcase shows, we punt unnecessarily on popcount loop idioms if > the type is smaller than int or larger than long long. > Smaller type than int can be handled by zero-extending the argument to > unsigned int, and types twice as long as long long by doing > __builtin_popcountll on both halves of the __int128. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Thanks, Richard. > 2020-01-03 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/95771 > * tree-ssa-loop-niter.c (number_of_iterations_popcount): Handle types > with precision smaller than int's precision and types with precision > twice as large as long long. Formatting fixes. > > * gcc.target/i386/pr95771.c: New test. > > --- gcc/tree-ssa-loop-niter.c.jj 2020-10-12 12:30:34.348813027 +0200 > +++ gcc/tree-ssa-loop-niter.c 2021-01-02 19:38:17.858170445 +0100 > @@ -2666,27 +2666,45 @@ number_of_iterations_popcount (loop_p lo > > /* We found a match. Get the corresponding popcount builtin. */ > tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx); > - if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION (integer_type_node)) > + if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node)) > fn = builtin_decl_implicit (BUILT_IN_POPCOUNT); > - else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION > - (long_integer_type_node)) > + else if (TYPE_PRECISION (TREE_TYPE (src)) > + == TYPE_PRECISION (long_integer_type_node)) > fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL); > - else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION > - (long_long_integer_type_node)) > + else if (TYPE_PRECISION (TREE_TYPE (src)) > + == TYPE_PRECISION (long_long_integer_type_node) > + || (TYPE_PRECISION (TREE_TYPE (src)) > + == 2 * TYPE_PRECISION (long_long_integer_type_node))) > fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL); > > - /* ??? Support promoting char/short to int. */ > if (!fn) > return false; > > /* Update NITER params accordingly */ > tree utype = unsigned_type_for (TREE_TYPE (src)); > src = fold_convert (utype, src); > - tree call = fold_convert (utype, build_call_expr (fn, 1, src)); > + if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node)) > + src = fold_convert (unsigned_type_node, src); > + tree call; > + if (TYPE_PRECISION (TREE_TYPE (src)) > + == 2 * TYPE_PRECISION (long_long_integer_type_node)) > + { > + int prec = TYPE_PRECISION (long_long_integer_type_node); > + tree src1 = fold_convert (long_long_unsigned_type_node, > + fold_build2 (RSHIFT_EXPR, TREE_TYPE (src), > + unshare_expr (src), > + build_int_cst (integer_type_node, > + prec))); > + tree src2 = fold_convert (long_long_unsigned_type_node, src); > + call = build_call_expr (fn, 1, src1); > + call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call, > + build_call_expr (fn, 1, src2)); > + call = fold_convert (utype, call); > + } > + else > + call = fold_convert (utype, build_call_expr (fn, 1, src)); > if (adjust) > - iter = fold_build2 (MINUS_EXPR, utype, > - call, > - build_int_cst (utype, 1)); > + iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1)); > else > iter = call; > > @@ -2703,10 +2721,9 @@ number_of_iterations_popcount (loop_p lo > if (adjust) > { > tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src, > - build_zero_cst > - (TREE_TYPE (src))); > - niter->may_be_zero = > - simplify_using_initial_conditions (loop, may_be_zero); > + build_zero_cst (TREE_TYPE (src))); > + niter->may_be_zero > + = simplify_using_initial_conditions (loop, may_be_zero); > } > else > niter->may_be_zero = boolean_false_node; > --- gcc/testsuite/gcc.target/i386/pr95771.c.jj 2021-01-02 > 19:53:27.499768266 +0100 > +++ gcc/testsuite/gcc.target/i386/pr95771.c 2021-01-02 19:53:12.782936545 > +0100 > @@ -0,0 +1,67 @@ > +/* PR tree-optimization/95771 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */ > +/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 6 "optimized" { > target int128 } } } */ > +/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 4 "optimized" { > target { ! int128 } } } } */ > + > +int > +foo (unsigned char x) > +{ > + int i = 0; > + while (x) > + { > + x &= x - 1; > + ++i; > + } > + return i; > +} > + > +int > +bar (unsigned short x) > +{ > + int i = 0; > + while (x) > + { > + x &= x - 1; > + ++i; > + } > + return i; > +} > + > +int > +baz (unsigned int x) > +{ > + int i = 0; > + while (x) > + { > + x &= x - 1; > + ++i; > + } > + return i; > +} > + > +int > +qux (unsigned long long x) > +{ > + int i = 0; > + while (x) > + { > + x &= x - 1; > + ++i; > + } > + return i; > +} > + > +#ifdef __SIZEOF_INT128__ > +int > +corge (unsigned __int128 x) > +{ > + int i = 0; > + while (x) > + { > + x &= x - 1; > + ++i; > + } > + return i; > +} > +#endif > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)