On Sat, 24 Aug 2024, Filip Kastl wrote:

> Hi,
> 
> bootstrapped and regtested on x86_64-linux.  Ok to push?
> 
> Cheers,
> Filip Kastl
> 
> 
> -- 8< --
> 
> 
> The gen_pow2p function generates (a & -a) == a as a fallback for
> POPCOUNT (a) == 1.  Not only is the bitmagic not equivalent to
> POPCOUNT (a) == 1 but it also introduces UB (consider signed
> a = INT_MIN).
> 
> This patch rewrites gen_pow2p to always use __builtin_popcount instead.
> This means that what the end result GIMPLE code is gets decided by an
> already existing machinery in a later pass.  That is a cleaner solution
> I think.  This existing machinery also uses a ^ (a - 1) > a - 1 which is
> the correct bitmagic.
> 
> While rewriting gen_pow2p I had to add logic for converting the
> operand's type to a type that __builtin_popcount accepts.  I naturally
> also added this logic to gen_log2.  Thanks to this, exponential index
> transform gains the capability to handle all operand types with
> precision at most that of long long int.
> 
>             PR tree-optimization/116355
> 
> gcc/ChangeLog:
> 
>       * tree-switch-conversion.cc (can_log2): Take into account the
>       conversion added to gen_log2.
>       (gen_log2): Add a conversion to a type compatible with FFS.
>       (can_pow2p): New function.
>       (gen_pow2p): Rewrite to use __builtin_popcount instead of
>       manually inserting an internal fn call or bitmagic.
>       (switch_conversion::is_exp_index_transform_viable): Call
>       can_pow2p.
>       (switch_conversion::exp_index_transform): Params of gen_pow2p
>       changed so update its call.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/i386/switch-exp-transform-1.c: Don't test for
>       presence of POPCOUNT internal fn after switch conversion.  Test
>       for it after __builtin_popcount has had a chance to get
>       expanded.
>       * gcc.target/i386/switch-exp-transform-3.c: Also test char and
>       short.
> 
> Signed-off-by: Filip Kastl <fka...@suse.cz>
> ---
>  .../gcc.target/i386/switch-exp-transform-1.c  |   7 +-
>  .../gcc.target/i386/switch-exp-transform-3.c  |  98 ++++++++++++++-
>  gcc/tree-switch-conversion.cc                 | 117 ++++++++++++++----
>  3 files changed, 192 insertions(+), 30 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c 
> b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
> index 53d31460ba3..a8c9e03e515 100644
> --- a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
> +++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
> @@ -1,9 +1,10 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-switchconv -mpopcnt -mbmi" } */
> +/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-widening_mul 
> -mpopcnt -mbmi" } */
>  
>  /* Checks that exponential index transform enables switch conversion to 
> convert
>     this switch into an array lookup.  Also checks that the "index variable 
> is a
> -   power of two" check has been generated.  */
> +   power of two" check has been generated and that it has been later expanded
> +   into an internal function.  */
>  
>  int foo(unsigned bar)
>  {
> @@ -29,4 +30,4 @@ int foo(unsigned bar)
>  }
>  
>  /* { dg-final { scan-tree-dump "CSWTCH" "switchconv" } } */
> -/* { dg-final { scan-tree-dump "POPCOUNT" "switchconv" } } */
> +/* { dg-final { scan-tree-dump "POPCOUNT" "widening_mul" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-3.c 
> b/gcc/testsuite/gcc.target/i386/switch-exp-transform-3.c
> index 64a7b146172..5011d1ebb0e 100644
> --- a/gcc/testsuite/gcc.target/i386/switch-exp-transform-3.c
> +++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-3.c
> @@ -3,10 +3,104 @@
>  
>  /* Checks that the exponential index transformation is done for all these 
> types
>     of the index variable:
> +   - (unsigned) char
> +   - (unsigned) short
>     - (unsigned) int
>     - (unsigned) long
>     - (unsigned) long long  */
>  
> +int unopt_char(char bit_position)
> +{
> +    switch (bit_position)
> +    {
> +        case (1 << 0):
> +            return 0;
> +        case (1 << 1):
> +            return 1;
> +        case (1 << 2):
> +            return 2;
> +        case (1 << 3):
> +            return 3;
> +        case (1 << 4):
> +            return 4;
> +        case (1 << 5):
> +            return 5;
> +        case (1 << 6):
> +            return 6;
> +        default:
> +            return 0;
> +    }
> +}
> +
> +int unopt_unsigned_char(unsigned char bit_position)
> +{
> +    switch (bit_position)
> +    {
> +        case (1 << 0):
> +            return 0;
> +        case (1 << 1):
> +            return 1;
> +        case (1 << 2):
> +            return 2;
> +        case (1 << 3):
> +            return 3;
> +        case (1 << 4):
> +            return 4;
> +        case (1 << 5):
> +            return 5;
> +        case (1 << 6):
> +            return 6;
> +        default:
> +            return 0;
> +    }
> +}
> +
> +int unopt_short(short bit_position)
> +{
> +    switch (bit_position)
> +    {
> +        case (1 << 0):
> +            return 0;
> +        case (1 << 1):
> +            return 1;
> +        case (1 << 2):
> +            return 2;
> +        case (1 << 3):
> +            return 3;
> +        case (1 << 4):
> +            return 4;
> +        case (1 << 5):
> +            return 5;
> +        case (1 << 6):
> +            return 6;
> +        default:
> +            return 0;
> +    }
> +}
> +
> +int unopt_unsigned_short(unsigned short bit_position)
> +{
> +    switch (bit_position)
> +    {
> +        case (1 << 0):
> +            return 0;
> +        case (1 << 1):
> +            return 1;
> +        case (1 << 2):
> +            return 2;
> +        case (1 << 3):
> +            return 3;
> +        case (1 << 4):
> +            return 4;
> +        case (1 << 5):
> +            return 5;
> +        case (1 << 6):
> +            return 6;
> +        default:
> +            return 0;
> +    }
> +}
> +
>  int unopt_int(int bit_position)
>  {
>      switch (bit_position)
> @@ -149,5 +243,5 @@ int unopt_unsigned_long_long(unsigned long long 
> bit_position)
>  
>  #endif
>  
> -/* { dg-final { scan-tree-dump-times "Applying exponential index transform" 
> 4 "switchconv" { target ia32 } } } */
> -/* { dg-final { scan-tree-dump-times "Applying exponential index transform" 
> 6 "switchconv" { target { ! ia32 } } } } */
> +/* { dg-final { scan-tree-dump-times "Applying exponential index transform" 
> 8 "switchconv" { target ia32 } } } */
> +/* { dg-final { scan-tree-dump-times "Applying exponential index transform" 
> 10 "switchconv" { target { ! ia32 } } } } */
> diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
> index 4b11c8d25f4..9dc703f737c 100644
> --- a/gcc/tree-switch-conversion.cc
> +++ b/gcc/tree-switch-conversion.cc
> @@ -66,63 +66,131 @@ using namespace tree_switch_conversion;
>  /* Does the target have optabs needed to efficiently compute exact base two
>     logarithm of a value with type TYPE?
>  
> -   See gen_log2.  */
> +   Also see gen_log2.  */
>  
>  static bool
>  can_log2 (tree type, optimization_type opt_type)
>  {
>    /* Check if target supports FFS.  */
> -  return direct_internal_fn_supported_p (IFN_FFS, type, opt_type);
> +  int prec = TYPE_PRECISION (type);
> +  int i_prec = TYPE_PRECISION (integer_type_node);
> +  int li_prec = TYPE_PRECISION (long_integer_type_node);
> +  int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
> +  tree new_type;
> +  if (prec <= i_prec)
> +    new_type = integer_type_node;

If you are supporting a cast how about doing

     if (prec <= i_prec
         && direct_internal_fn_supported_p (...))
       *type = integer_type_node;
...

so that if prec <= i_prec but the target only supports
FFS on long int we can do that?  Also report back the actual type
that's supported here ...

> +  else if (prec <= li_prec)
> +    new_type = long_integer_type_node;
> +  else if (prec <= lli_prec)
> +    new_type = long_long_integer_type_node;
> +  else
> +    return false;
> +  return direct_internal_fn_supported_p (IFN_FFS, new_type, opt_type);
>  }
>  
>  /* Assume that OP is a power of two.  Build a sequence of gimple statements
>     efficiently computing the base two logarithm of OP using special optabs.
>     Return the ssa name represeting the result of the logarithm through 
> RESULT.
>  
> -   Should only be used if target supports the needed optabs.  See can_log2.  
> */
> +   Should only be used if can_log2 returns true for type of OP.  */
>  
>  static gimple_seq
>  gen_log2 (tree op, location_t loc, tree *result)

... and pass it here

>  {
> -  tree type = TREE_TYPE (op);
>    gimple_seq stmts = NULL;
>    gimple_stmt_iterator gsi = gsi_last (stmts);
> -  tree tmp1 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, IFN_FFS, type, 
> op);
> -  tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, MINUS_EXPR, type,
> -                         tmp1, build_one_cst (type));
> -  *result = tmp2;
> +
> +  tree orig_type = TREE_TYPE (op);
> +  int prec = TYPE_PRECISION (orig_type);
> +  int i_prec = TYPE_PRECISION (integer_type_node);
> +  int li_prec = TYPE_PRECISION (long_integer_type_node);
> +
> +  tree type;
> +  if (prec <= i_prec)
> +    type = integer_type_node;
> +  else if (prec <= li_prec)
> +    type = long_integer_type_node;
> +  else
> +    type = long_long_integer_type_node;
> +  gcc_checking_assert (prec <= TYPE_PRECISION (long_long_integer_type_node));
> +
> +  /* Convert op to one of the types that FFS accepts.  */
> +  tree tmp1 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, CONVERT_EXPR, 
> type,
> +                         op);

Use gimple_convert (...)

> +  /* Build FFS (op) - 1.  */
> +  tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, IFN_FFS, 
> orig_type,
> +                         tmp1);
> +  tree tmp3 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, MINUS_EXPR,
> +                         orig_type, tmp2, build_one_cst (orig_type));
> +  *result = tmp3;
>    return stmts;
>  }
>  
> +/* Is it possible to efficiently check that a value of TYPE is a power of 2?
> +
> +   Also see gen_pow2p.  */
> +
> +static bool
> +can_pow2p (tree type)
> +{
> +  /* Check that we can express "is power of 2" using __builtin_popcount.  */
> +  int lli_prec = TYPE_PRECISION (long_long_unsigned_type_node);
> +  return TYPE_PRECISION (type) <= lli_prec;
> +}
> +
>  /* Build a sequence of gimple statements checking that OP is a power of 2.  
> Use
>     special optabs if target supports them.  Return the result as a
> -   boolen_type_node ssa name through RESULT.  */
> +   boolean_type_node ssa name through RESULT.  Assumes that OP's value will
> +   be non-negative.  The generated check may give arbitrary answer for 
> negative
> +   values.
> +
> +   Should only be used if can_pow2p returns true for type of OP.  */
>  
>  static gimple_seq
> -gen_pow2p (tree op, location_t loc, optimization_type opt_type, tree *result)
> +gen_pow2p (tree op, location_t loc, tree *result)
>  {
> -  tree type = TREE_TYPE (op);
>    gimple_seq stmts = NULL;
>    gimple_stmt_iterator gsi = gsi_last (stmts);
> -  if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, opt_type))
> +
> +  int prec = TYPE_PRECISION (TREE_TYPE (op));
> +  int i_prec = TYPE_PRECISION (unsigned_type_node);
> +  int li_prec = TYPE_PRECISION (long_unsigned_type_node);
> +
> +  tree fn;
> +  tree type;
> +  if (prec <= i_prec)
>      {
> -      tree tmp = gimple_build (&gsi, false, GSI_NEW_STMT, loc, IFN_POPCOUNT,
> -                            type, op);
> -      *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, EQ_EXPR,
> -                           boolean_type_node, tmp, build_one_cst (type));
> +      type = unsigned_type_node;
> +      fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
> +    }
> +  else if (prec <= li_prec)
> +    {
> +      type = long_unsigned_type_node;
> +      fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
>      }
>    else
>      {
> -      tree tmp1 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, NEGATE_EXPR,
> -                             type, op);
> -      tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, BIT_AND_EXPR,
> -                             type, op, tmp1);
> -      *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, EQ_EXPR,
> -                           boolean_type_node, tmp2, op);
> +      type = long_long_unsigned_type_node;
> +      fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
>      }
> +  gcc_checking_assert (prec <= TYPE_PRECISION 
> (long_long_unsigned_type_node));
> +
> +  /* Convert op to one of the types that __builtin_popcount{l,ll} accepts.  
> */
> +  tree tmp1 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, CONVERT_EXPR, 
> type,
> +                         op);

Please use NOP_EXPR or easier, gimple_convert (...) which has a similar
API as gimple_build.

> +  /* Build __builtin_popcount{l,ll} (op) == 1.  */
> +  gcall *call = gimple_build_call (fn, 1, tmp1);
> +  tree tmp2 = make_ssa_name (integer_type_node);
> +  gimple_set_lhs (call, tmp2);

You should be able to use gimple_build with builtin functions as well

Otherwise OK.

Thanks,
Richard.

> +  gsi_insert_after (&gsi, call, GSI_NEW_STMT);
> +  *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, EQ_EXPR,
> +                       boolean_type_node, tmp2,
> +                       build_one_cst (integer_type_node));
> +
>    return stmts;
>  }
>  
> +
>  /* Constructor.  */
>  
>  switch_conversion::switch_conversion (): m_final_bb (NULL),
> @@ -285,7 +353,7 @@ switch_conversion::is_exp_index_transform_viable (gswitch 
> *swtch)
>    unsigned num_labels = gimple_switch_num_labels (swtch);
>  
>    optimization_type opt_type = bb_optimization_type (swtch_bb);
> -  if (!can_log2 (index_type, opt_type))
> +  if (!can_log2 (index_type, opt_type) || !can_pow2p (index_type))
>      return false;
>  
>    /* Check that each case label corresponds only to one value
> @@ -380,8 +448,7 @@ switch_conversion::exp_index_transform (gswitch *swtch)
>    new_edge2->probability = profile_probability::even ();
>  
>    tree tmp;
> -  optimization_type opt_type = bb_optimization_type (cond_bb);
> -  gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, opt_type, &tmp);
> +  gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, &tmp);
>    gsi = gsi_last_bb (cond_bb);
>    gsi_insert_seq_after (&gsi, stmts, GSI_LAST_NEW_STMT);
>    gcond *stmt_cond = gimple_build_cond (NE_EXPR, tmp, boolean_false_node,
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to