On Wed, 21 Oct 2020, Jakub Jelinek wrote:

> Hi!
> 
> While we have at the RTL level noce_try_ifelse_collapse combined with
> simplify_cond_clz_ctz, that optimization doesn't always trigger because
> e.g. on powerpc there is an define_insn to compare a reg against zero and
> copy that register to another one and so we end up with a different pseudo
> in the simplify_cond_clz_ctz test and punt.
> 
> For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
> we can optimize it already in phiopt though, just need to ensure that
> we transform the __builtin_c?z* calls into .C?Z ifns because my recent
> VRP changes codified that the builtin calls are always undefined at zero,
> while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
> And, in phiopt we already have popcount handling that does pretty much the
> same thing, except for always using a zero value rather than the one set
> by C?Z_DEFINED_VALUE_AT_ZERO.
> 
> So, this patch extends that function to handle not just popcount, but also
> clz and ctz.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2020-10-20  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/97503
>       * tree-ssa-phiopt.c (cond_removal_in_popcount_pattern): Rename to ...
>       (cond_removal_in_popcount_clz_ctz_pattern): ... this.  Handle not just
>       popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2.
> 
>       * gcc.dg/tree-ssa/pr97503.c: New test.
> 
> --- gcc/tree-ssa-phiopt.c.jj  2020-07-28 15:39:10.075755306 +0200
> +++ gcc/tree-ssa-phiopt.c     2020-10-20 17:46:16.971329154 +0200
> @@ -61,8 +61,9 @@ static bool minmax_replacement (basic_bl
>                               edge, edge, gimple *, tree, tree);
>  static bool abs_replacement (basic_block, basic_block,
>                            edge, edge, gimple *, tree, tree);
> -static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
> -                                           edge, edge, gimple *, tree, tree);
> +static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, 
> basic_block,
> +                                                   edge, edge, gimple *,
> +                                                   tree, tree);
>  static bool cond_store_replacement (basic_block, basic_block, edge, edge,
>                                   hash_set<tree> *);
>  static bool cond_if_else_store_replacement (basic_block, basic_block, 
> basic_block);
> @@ -344,8 +345,9 @@ tree_ssa_phiopt_worker (bool do_store_el
>         else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>           cfgchanged = true;
>         else if (!early_p
> -                && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
> -                                                     phi, arg0, arg1))
> +                && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
> +                                                             e2, phi, arg0,
> +                                                             arg1))
>           cfgchanged = true;
>         else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>           cfgchanged = true;
> @@ -1777,16 +1779,20 @@ minmax_replacement (basic_block cond_bb,
>  
>     <bb 4>
>     c_12 = PHI <_9(2)>
> -*/
> +
> +   Similarly for __builtin_clz or __builtin_ctz if
> +   C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and
> +   instead of 0 above it uses the value from that macro.  */
>  
>  static bool
> -cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
> -                               edge e1, edge e2,
> -                               gimple *phi, tree arg0, tree arg1)
> +cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
> +                                       basic_block middle_bb,
> +                                       edge e1, edge e2, gimple *phi,
> +                                       tree arg0, tree arg1)
>  {
>    gimple *cond;
>    gimple_stmt_iterator gsi, gsi_from;
> -  gimple *popcount;
> +  gimple *call;
>    gimple *cast = NULL;
>    tree lhs, arg;
>  
> @@ -1804,35 +1810,65 @@ cond_removal_in_popcount_pattern (basic_
>    gsi_next_nondebug (&gsi);
>    if (!gsi_end_p (gsi))
>      {
> -      popcount = gsi_stmt (gsi);
> +      call = gsi_stmt (gsi);
>        gsi_next_nondebug (&gsi);
>        if (!gsi_end_p (gsi))
>       return false;
>      }
>    else
>      {
> -      popcount = cast;
> +      call = cast;
>        cast = NULL;
>      }
>  
> -  /* Check that we have a popcount builtin.  */
> -  if (!is_gimple_call (popcount))
> +  /* Check that we have a popcount/clz/ctz builtin.  */
> +  if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
>      return false;
> -  combined_fn cfn = gimple_call_combined_fn (popcount);
> +
> +  arg = gimple_call_arg (call, 0);
> +  lhs = gimple_get_lhs (call);
> +
> +  if (lhs == NULL_TREE)
> +    return false;
> +
> +  combined_fn cfn = gimple_call_combined_fn (call);
> +  internal_fn ifn = IFN_LAST;
> +  int val = 0;
>    switch (cfn)
>      {
>      CASE_CFN_POPCOUNT:
>        break;
> +    CASE_CFN_CLZ:
> +      if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
> +     {
> +       scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
> +       if (optab_handler (clz_optab, mode) != CODE_FOR_nothing

I think it's better to use

        direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
                                        OPTIMIZE_FOR_BOTH);

for these checks nowadays.  Otherwise OK.

Thanks,
Richard.


> +           && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
> +         {
> +           ifn = IFN_CLZ;
> +           break;
> +         }
> +     }
> +      return false;
> +    CASE_CFN_CTZ:
> +      if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
> +     {
> +       scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
> +       if (optab_handler (ctz_optab, mode) != CODE_FOR_nothing
> +           && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
> +         {
> +           ifn = IFN_CTZ;
> +           break;
> +         }
> +     }
> +      return false;
>      default:
>        return false;
>      }
>  
> -  arg = gimple_call_arg (popcount, 0);
> -  lhs = gimple_get_lhs (popcount);
> -
>    if (cast)
>      {
> -      /* We have a cast stmt feeding popcount builtin.  */
> +      /* We have a cast stmt feeding popcount/clz/ctz builtin.  */
>        /* Check that we have a cast prior to that.  */
>        if (gimple_code (cast) != GIMPLE_ASSIGN
>         || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
> @@ -1845,7 +1881,7 @@ cond_removal_in_popcount_pattern (basic_
>  
>    cond = last_stmt (cond_bb);
>  
> -  /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
> +  /* Cond_bb has a check for b_4 [!=|==] 0 before calling the 
> popcount/clz/ctz
>       builtin.  */
>    if (gimple_code (cond) != GIMPLE_COND
>        || (gimple_cond_code (cond) != NE_EXPR
> @@ -1865,10 +1901,13 @@ cond_removal_in_popcount_pattern (basic_
>      }
>  
>    /* Check PHI arguments.  */
> -  if (lhs != arg0 || !integer_zerop (arg1))
> +  if (lhs != arg0
> +      || TREE_CODE (arg1) != INTEGER_CST
> +      || wi::to_wide (arg1) != val)
>      return false;
>  
> -  /* And insert the popcount builtin and cast stmt before the cond_bb.  */
> +  /* And insert the popcount/clz/ctz builtin and cast stmt before the
> +     cond_bb.  */
>    gsi = gsi_last_bb (cond_bb);
>    if (cast)
>      {
> @@ -1876,9 +1915,19 @@ cond_removal_in_popcount_pattern (basic_
>        gsi_move_before (&gsi_from, &gsi);
>        reset_flow_sensitive_info (gimple_get_lhs (cast));
>      }
> -  gsi_from = gsi_for_stmt (popcount);
> -  gsi_move_before (&gsi_from, &gsi);
> -  reset_flow_sensitive_info (gimple_get_lhs (popcount));
> +  gsi_from = gsi_for_stmt (call);
> +  if (ifn == IFN_LAST || gimple_call_internal_p (call))
> +    gsi_move_before (&gsi_from, &gsi);
> +  else
> +    {
> +      /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
> +      the latter is well defined at zero.  */
> +      call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_insert_before (&gsi, call, GSI_SAME_STMT);
> +      gsi_remove (&gsi_from, true);
> +    }
> +  reset_flow_sensitive_info (lhs);
>  
>    /* Now update the PHI and remove unneeded bbs.  */
>    replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
> --- gcc/testsuite/gcc.dg/tree-ssa/pr97503.c.jj        2020-10-20 
> 18:14:46.862749687 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr97503.c   2020-10-20 18:17:13.618640295 
> +0200
> @@ -0,0 +1,19 @@
> +/* PR tree-optimization/97503 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } 
> */
> +/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { 
> i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */
> +/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { 
> i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
> +/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* 
> x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
> +
> +int
> +foo (int x)
> +{
> +  return x ? __builtin_clz (x) : 32;
> +}
> +
> +int
> +bar (unsigned long long x)
> +{
> +  return x ? __builtin_clzll (x) : 64;
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend

Reply via email to