On Sun, 3 Jan 2021, Jakub Jelinek wrote:

> Hi!
> 
> As the testcase shows, we punt unnecessarily on popcount loop idioms if
> the type is smaller than int or larger than long long.
> Smaller type than int can be handled by zero-extending the argument to
> unsigned int, and types twice as long as long long by doing
> __builtin_popcountll on both halves of the __int128.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2020-01-03  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/95771
>       * tree-ssa-loop-niter.c (number_of_iterations_popcount): Handle types
>       with precision smaller than int's precision and types with precision
>       twice as large as long long.  Formatting fixes.
> 
>       * gcc.target/i386/pr95771.c: New test.
> 
> --- gcc/tree-ssa-loop-niter.c.jj      2020-10-12 12:30:34.348813027 +0200
> +++ gcc/tree-ssa-loop-niter.c 2021-01-02 19:38:17.858170445 +0100
> @@ -2666,27 +2666,45 @@ number_of_iterations_popcount (loop_p lo
>  
>    /* We found a match. Get the corresponding popcount builtin.  */
>    tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
> -  if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION (integer_type_node))
> +  if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
>      fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
> -  else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
> -        (long_integer_type_node))
> +  else if (TYPE_PRECISION (TREE_TYPE (src))
> +        == TYPE_PRECISION (long_integer_type_node))
>      fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
> -  else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
> -        (long_long_integer_type_node))
> +  else if (TYPE_PRECISION (TREE_TYPE (src))
> +        == TYPE_PRECISION (long_long_integer_type_node)
> +        || (TYPE_PRECISION (TREE_TYPE (src))
> +            == 2 * TYPE_PRECISION (long_long_integer_type_node)))
>      fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
>  
> -  /* ??? Support promoting char/short to int.  */
>    if (!fn)
>      return false;
>  
>    /* Update NITER params accordingly  */
>    tree utype = unsigned_type_for (TREE_TYPE (src));
>    src = fold_convert (utype, src);
> -  tree call = fold_convert (utype, build_call_expr (fn, 1, src));
> +  if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
> +    src = fold_convert (unsigned_type_node, src);
> +  tree call;
> +  if (TYPE_PRECISION (TREE_TYPE (src))
> +      == 2 * TYPE_PRECISION (long_long_integer_type_node))
> +    {
> +      int prec = TYPE_PRECISION (long_long_integer_type_node);
> +      tree src1 = fold_convert (long_long_unsigned_type_node,
> +                             fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
> +                                          unshare_expr (src),
> +                                          build_int_cst (integer_type_node,
> +                                                         prec)));
> +      tree src2 = fold_convert (long_long_unsigned_type_node, src);
> +      call = build_call_expr (fn, 1, src1);
> +      call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
> +                       build_call_expr (fn, 1, src2));
> +      call = fold_convert (utype, call);
> +    }
> +  else
> +    call = fold_convert (utype, build_call_expr (fn, 1, src));
>    if (adjust)
> -    iter = fold_build2 (MINUS_EXPR, utype,
> -                     call,
> -                     build_int_cst (utype, 1));
> +    iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
>    else
>      iter = call;
>  
> @@ -2703,10 +2721,9 @@ number_of_iterations_popcount (loop_p lo
>    if (adjust)
>      {
>        tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
> -                                   build_zero_cst
> -                                   (TREE_TYPE (src)));
> -      niter->may_be_zero =
> -     simplify_using_initial_conditions (loop, may_be_zero);
> +                                   build_zero_cst (TREE_TYPE (src)));
> +      niter->may_be_zero
> +     = simplify_using_initial_conditions (loop, may_be_zero);
>      }
>    else
>      niter->may_be_zero = boolean_false_node;
> --- gcc/testsuite/gcc.target/i386/pr95771.c.jj        2021-01-02 
> 19:53:27.499768266 +0100
> +++ gcc/testsuite/gcc.target/i386/pr95771.c   2021-01-02 19:53:12.782936545 
> +0100
> @@ -0,0 +1,67 @@
> +/* PR tree-optimization/95771 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 6 "optimized" { 
> target int128 } } } */
> +/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 4 "optimized" { 
> target { ! int128 } } } } */
> +
> +int
> +foo (unsigned char x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +
> +int
> +bar (unsigned short x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +
> +int
> +baz (unsigned int x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +
> +int
> +qux (unsigned long long x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +
> +#ifdef __SIZEOF_INT128__
> +int
> +corge (unsigned __int128 x)
> +{
> +  int i = 0;
> +  while (x)
> +    {
> +      x &= x - 1;
> +      ++i;
> +    }
> +  return i;
> +}
> +#endif
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to