optabs.c:expand_unop_direct can expand a popcount builtin without a call under certain conditions even without a popcount pattern of the required data width:
if (unoptab == popcount_optab && is_a <scalar_int_mode> (mode, &int_mode) && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD && optab_handler (unoptab, word_mode) != CODE_FOR_nothing && optimize_insn_for_speed_p ()) { temp = expand_doubleword_popcount (int_mode, op0, target); if (temp) return temp; } However, the match.pd recognition of popcount arithmetic using & / + is tied to having an exactly matching operation. This causes a failure for gcc.dg/tree-ssa/popcount4l.c for 16-bit targets that have a 16 bit popcount operation (and no wider). Likewise, not recognizing a 64 bit popcount for a 32 bit target with 32 bit popcount could be rectified by synthesizing the wide popcount operations with two narrower popcount operations. The attached patch implements this.
2020-07-30 Joern Rennecke <joern.renne...@riscy-ip.com> gcc: * gimple-match-head.c (langhooks.h): Include. * match.pd <popcount & / + pattern matching>: When generating popcount directly fails, try doing it in two halves. * gcc.dg/tree-ssa/popcount4ll.c: Remove lp64 condition. Adjust scanning pattern for !lp64. testsuite: * gcc.dg/tree-ssa/popcount5ll.c: Likewise. * gcc.dg/tree-ssa/popcount4l.c: Adjust scanning pattern for ! int32plus. diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c index d941b8b..e3342e3 100644 --- a/gcc/gimple-match-head.c +++ b/gcc/gimple-match-head.c @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see #include "optabs-tree.h" #include "tree-eh.h" #include "dbgcnt.h" +#include "langhooks.h" /* Forward declarations of the private auto-generated matchers. They expect valueized operands in canonical order and do not diff --git a/gcc/match.pd b/gcc/match.pd index 17c35ee4..fa2e93e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6437,10 +6437,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && tree_to_uhwi (@3) == c2 && tree_to_uhwi (@9) == c3 && tree_to_uhwi (@7) == c3 - && tree_to_uhwi (@11) == c4 - && direct_internal_fn_supported_p (IFN_POPCOUNT, type, - OPTIMIZE_FOR_BOTH)) - (convert (IFN_POPCOUNT:type @0))))) + && tree_to_uhwi (@11) == c4) + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, + OPTIMIZE_FOR_BOTH)) + (convert (IFN_POPCOUNT:type @0)) + /* Try to do popcount in two halves. PREC must be even, and at least + six bits for this to work without extension before adding. + If popcount is available, is should probably be available for + BITS_PER_WORD, so don't bother with smaller halves. */ + (with { tree half_type = (prec <= BITS_PER_WORD || (prec & 1) ? NULL_TREE + : lang_hooks.types.type_for_size (prec/2, 1)); + gcc_assert (prec > 2 || half_type == NULL_TREE); + } + (if (half_type != NULL_TREE + && direct_internal_fn_supported_p (IFN_POPCOUNT, half_type, + OPTIMIZE_FOR_BOTH)) + (convert (plus + (IFN_POPCOUNT:half_type (convert @0)) + (IFN_POPCOUNT:half_type (convert (rshift @0 + { wide_int_to_tree (half_type, prec/2); } ))))))))))) /* __builtin_ffs needs to deal on many targets with the possible zero argument. If we know the argument is always non-zero, __builtin_ctz + 1 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c index 69fb2d1..269e56e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c @@ -25,6 +25,7 @@ int popcount64c(unsigned long x) return (x * h01) >> shift; } -/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target int32plus } } } */ +/* { dg-final { scan-tree-dump "\.POPCOUNT" "optimized" { target { ! int32plus } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c index c1588be..7abadf6 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { lp64 } } } */ +/* { dg-do compile } */ /* { dg-require-effective-target popcountll } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ @@ -16,4 +16,5 @@ int popcount64c(unsigned long long x) return (x * h01) >> shift; } -/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target { lp64 } } } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c index edb191b..2afe081 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c @@ -1,5 +1,5 @@ /* PR tree-optimization/94800 */ -/* { dg-do compile { target { lp64 } } } */ +/* { dg-do compile } */ /* { dg-require-effective-target popcountll } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ @@ -19,4 +19,5 @@ int popcount64c(unsigned long long x) return x >> shift; } -/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target { lp64 } } } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { ! lp64 } } } } */