Ping! please review.
Thanks & Regards Jeevitha On 18/09/25 3:25 pm, jeevitha wrote: > Hi All, > > The following patch has been bootstrapped and regtested on powerpc64le-linux. > > PowerPC vector shift left instructions (vslb, vslh, vslw, vsld) use modulo > semantics for the shift amount. Shifts by (element_bit_width - 1) can be > optimized by replacing the shift amount splat with a vector of 0xFF..FF. On > Power8, this reduces instruction overhead by using vspltis[wd]. > > This patch adds rs6000_optimize_vector_bitwidth_shift to detect splat > constants > of (element_bit_width - 1) and replace them with a vector of all -1s, thereby > avoiding unnecessary memory loads. > > 2025-09-18 Jeevitha Palanisamy <[email protected]> > > gcc/ > PR target/119912 > * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Call > to new function. > (rs6000_optimize_vector_bitwidth_shift): New function to optimize > vector immediate shifts. > > gcc/testsuite/ > PR target/119912 > * gcc.target/powerpc/pr119912.c: New test. > > diff --git a/gcc/config/rs6000/rs6000-builtin.cc > b/gcc/config/rs6000/rs6000-builtin.cc > index bc1580f051b..517c99bfcfb 100644 > --- a/gcc/config/rs6000/rs6000-builtin.cc > +++ b/gcc/config/rs6000/rs6000-builtin.cc > @@ -1264,6 +1264,68 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator > *gsi, > return true; > } > > +/* Try to optimize shift by splat(element_bit_width - 1). > + Returns true if handled, false otherwise. */ > +static bool > +rs6000_optimize_vector_bitwidth_shift (gimple_stmt_iterator *gsi, > + tree arg0, tree arg1, > + tree lhs, location_t loc, enum tree_code > subcode) > +{ > + int element_bit_width = 128 / VECTOR_CST_NELTS (arg1); > + tree arg1_type = TREE_TYPE (arg1); > + tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); > + tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); > + tree check_arg = arg1; > + > + if (TARGET_P9_VECTOR || TYPE_PRECISION (unsigned_element_type) <= 16) > + return false; > + > + while (TREE_CODE (check_arg) == SSA_NAME > + || TREE_CODE (check_arg) == VIEW_CONVERT_EXPR) > + { > + if (TREE_CODE (check_arg) == SSA_NAME) > + { > + gimple *def_stmt = SSA_NAME_DEF_STMT (check_arg); > + if (!def_stmt || !gimple_assign_lhs (def_stmt)) > + break; > + check_arg = gimple_assign_rhs1 (def_stmt); > + } > + else > + check_arg = TREE_OPERAND (check_arg, 0); > + } > + > + /* Optimize if splat of (element_bit_width - 1). */ > + if (TREE_CODE (check_arg) == VECTOR_CST) > + { > + tree first_elt = vector_cst_elt (check_arg, 0); > + bool is_splat = true; > + > + if (wi::to_widest (first_elt) != element_bit_width - 1) > + return false; > + > + for (size_t i = 1; i < VECTOR_CST_NELTS (check_arg); i++) > + if (!operand_equal_p (vector_cst_elt (check_arg, i), first_elt, 0)) > + { > + is_splat = false; > + break; > + } > + > + if (is_splat) > + { > + int n_elts = VECTOR_CST_NELTS (arg1); > + tree_vector_builder elts (unsigned_arg1_type, n_elts, 1); > + for (int i = 0; i < n_elts; i++) > + elts.safe_push (build_int_cst (unsigned_element_type, -1)); > + tree new_arg1 = elts.build (); > + gimple *g = gimple_build_assign (lhs, subcode, arg0, new_arg1); > + gimple_set_location (g, loc); > + gsi_replace (gsi, g, true); > + return true; > + } > + } > + return false; > +} > + > /* Fold a machine-dependent built-in in GIMPLE. (For folding into > a constant, use rs6000_fold_builtin.) */ > bool > @@ -1720,6 +1782,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); > loc = gimple_location (stmt); > lhs = gimple_call_lhs (stmt); > + > + if (rs6000_optimize_vector_bitwidth_shift (gsi, arg0, arg1, lhs, loc, > LSHIFT_EXPR)) > + { > + return true; > + } > /* Force arg1 into the range valid matching the arg0 type. */ > /* Build a vector consisting of the max valid bit-size values. */ > int n_elts = VECTOR_CST_NELTS (arg1); > diff --git a/gcc/testsuite/gcc.target/powerpc/pr119912.c > b/gcc/testsuite/gcc.target/powerpc/pr119912.c > new file mode 100644 > index 00000000000..d1802bba801 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr119912.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mdejagnu-cpu=power8 -mvsx -O2" } */ > + > +#include <altivec.h> > + > +vector unsigned int shlw(vector unsigned int in) > +{ > + return vec_sl(in, (vector unsigned int)vec_splats((unsigned char)31)); > +} > + > +vector unsigned long long shld(vector unsigned long long in) > +{ > + return vec_sl(in, (vector unsigned long long)vec_splats(63)); > +} > + > +/* { dg-final { scan-assembler-times {\mvspltis[bhwd] [0-9]+,-1\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mlvx\M} 0 } } */ >
