Ping!

please review.

Thanks & Regards
Jeevitha

On 18/09/25 3:25 pm, jeevitha wrote:
> Hi All,
> 
> The following patch has been bootstrapped and regtested on powerpc64le-linux.
> 
> PowerPC vector shift left instructions (vslb, vslh, vslw, vsld) use modulo
> semantics for the shift amount. Shifts by (element_bit_width - 1) can be
> optimized by replacing the shift amount splat with a vector of 0xFF..FF. On
> Power8, this reduces instruction overhead by using vspltis[wd].
> 
> This patch adds rs6000_optimize_vector_bitwidth_shift to detect splat 
> constants
> of (element_bit_width - 1) and replace them with a vector of all -1s, thereby
> avoiding unnecessary memory loads.
> 
> 2025-09-18  Jeevitha Palanisamy  <[email protected]>
> 
> gcc/
>       PR target/119912
>       * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Call
>       to new function.
>       (rs6000_optimize_vector_bitwidth_shift): New function to optimize
>       vector immediate shifts.
> 
> gcc/testsuite/
>       PR target/119912
>       * gcc.target/powerpc/pr119912.c: New test.
> 
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
> b/gcc/config/rs6000/rs6000-builtin.cc
> index bc1580f051b..517c99bfcfb 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -1264,6 +1264,68 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator 
> *gsi,
>    return true;
>  }
>  
> +/* Try to optimize shift by splat(element_bit_width - 1).
> +   Returns true if handled, false otherwise.  */
> +static bool
> +rs6000_optimize_vector_bitwidth_shift (gimple_stmt_iterator *gsi,
> +                                    tree arg0, tree arg1,
> +                                    tree lhs, location_t loc, enum tree_code 
> subcode)
> +{
> +  int element_bit_width = 128 / VECTOR_CST_NELTS (arg1);
> +  tree arg1_type = TREE_TYPE (arg1);
> +  tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
> +  tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
> +  tree check_arg = arg1;
> +
> +  if (TARGET_P9_VECTOR || TYPE_PRECISION (unsigned_element_type) <= 16)
> +    return false;
> +
> +  while (TREE_CODE (check_arg) == SSA_NAME
> +      || TREE_CODE (check_arg) == VIEW_CONVERT_EXPR)
> +    {
> +      if (TREE_CODE (check_arg) == SSA_NAME)
> +     {
> +       gimple *def_stmt = SSA_NAME_DEF_STMT (check_arg);
> +       if (!def_stmt || !gimple_assign_lhs (def_stmt))
> +         break;
> +       check_arg = gimple_assign_rhs1 (def_stmt);
> +     }
> +      else
> +     check_arg = TREE_OPERAND (check_arg, 0);
> +    }
> +
> +  /* Optimize if splat of (element_bit_width - 1). */
> +  if (TREE_CODE (check_arg) == VECTOR_CST)
> +    {
> +      tree first_elt = vector_cst_elt (check_arg, 0);
> +      bool is_splat = true;
> +
> +      if (wi::to_widest (first_elt) != element_bit_width - 1)
> +     return false;
> +
> +      for (size_t i = 1; i < VECTOR_CST_NELTS (check_arg); i++)
> +     if (!operand_equal_p (vector_cst_elt (check_arg, i), first_elt, 0))
> +       {
> +         is_splat = false;
> +         break;
> +       }
> +
> +      if (is_splat)
> +     {
> +       int n_elts = VECTOR_CST_NELTS (arg1);
> +       tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
> +       for (int i = 0; i < n_elts; i++)
> +         elts.safe_push (build_int_cst (unsigned_element_type, -1));
> +       tree new_arg1 = elts.build ();
> +       gimple *g = gimple_build_assign (lhs, subcode, arg0, new_arg1);
> +       gimple_set_location (g, loc);
> +       gsi_replace (gsi, g, true);
> +       return true;
> +     }
> +    }
> +  return false;
> +}
> +
>  /* Fold a machine-dependent built-in in GIMPLE.  (For folding into
>     a constant, use rs6000_fold_builtin.)  */
>  bool
> @@ -1720,6 +1782,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>       tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
>       loc = gimple_location (stmt);
>       lhs = gimple_call_lhs (stmt);
> +
> +     if (rs6000_optimize_vector_bitwidth_shift (gsi, arg0, arg1, lhs, loc, 
> LSHIFT_EXPR))
> +       {
> +         return true;
> +       }
>       /* Force arg1 into the range valid matching the arg0 type.  */
>       /* Build a vector consisting of the max valid bit-size values.  */
>       int n_elts = VECTOR_CST_NELTS (arg1);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr119912.c 
> b/gcc/testsuite/gcc.target/powerpc/pr119912.c
> new file mode 100644
> index 00000000000..d1802bba801
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr119912.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */
> +
> +#include <altivec.h>
> +
> +vector unsigned int shlw(vector unsigned int in)
> +{
> +    return vec_sl(in, (vector unsigned int)vec_splats((unsigned char)31));
> +}
> +
> +vector unsigned long long shld(vector unsigned long long in)
> +{
> +    return vec_sl(in, (vector unsigned long long)vec_splats(63));
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvspltis[bhwd] [0-9]+,-1\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mlvx\M} 0 } } */
> 

Reply via email to