This patch extends get_nonzero_bits to handle POLY_INT_CSTs, The easiest (but also most useful) case is that the number of trailing zeros in the runtime value is at least the number of trailing zeros in each individual component.
In principle, we could do this for coeffs 1 and above only, and then OR in ceoff 0. This would give ~0x11 for [14, 32], say. But that's future work. gcc/ * tree-ssanames.cc (get_nonzero_bits): Handle POLY_INT_CSTs. * match.pd (with_possible_nonzero_bits): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/cnt_fold_4.c: New test. --- gcc/match.pd | 2 + .../gcc.target/aarch64/sve/cnt_fold_4.c | 61 +++++++++++++++++++ gcc/tree-ssanames.cc | 3 + 3 files changed, 66 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c diff --git a/gcc/match.pd b/gcc/match.pd index 540582dc984..41903554478 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2893,6 +2893,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) possibly set. */ (match with_possible_nonzero_bits INTEGER_CST@0) +(match with_possible_nonzero_bits + POLY_INT_CST@0) (match with_possible_nonzero_bits SSA_NAME@0 (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0))))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c new file mode 100644 index 00000000000..b7a53701993 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_4.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_sve.h> + +/* +** f1: +** cnth x0 +** ret +*/ +uint64_t +f1 () +{ + uint64_t x = svcntw (); + x >>= 2; + return x << 3; +} + +/* +** f2: +** [^\n]+ +** [^\n]+ +** ... +** ret +*/ +uint64_t +f2 () +{ + uint64_t x = svcntd (); + x >>= 2; + return x << 3; +} + +/* +** f3: +** cntb x0, all, mul #4 +** ret +*/ +uint64_t +f3 () +{ + uint64_t x = svcntd (); + x >>= 1; + return x << 6; +} + +/* +** f4: +** [^\n]+ +** [^\n]+ +** ... +** ret +*/ +uint64_t +f4 () +{ + uint64_t x = svcntd (); + x >>= 2; + return x << 2; +} diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc index 4f83fcbb517..d2d1ec18797 100644 --- a/gcc/tree-ssanames.cc +++ b/gcc/tree-ssanames.cc @@ -505,6 +505,9 @@ get_nonzero_bits (const_tree name) /* Use element_precision instead of TYPE_PRECISION so complex and vector types get a non-zero precision. */ unsigned int precision = element_precision (TREE_TYPE (name)); + if (POLY_INT_CST_P (name)) + return -known_alignment (wi::to_poly_wide (name)); + if (POINTER_TYPE_P (TREE_TYPE (name))) { struct ptr_info_def *pi = SSA_NAME_PTR_INFO (name); -- 2.25.1