Attached is the updated version of the patch. Bootstrapped and regtested on x86_64-pc-linux-gnu.
OK to apply?
Recognize popcount also when a double width operation is needed. 2021-01-27 Joern Rennecke <joern.renne...@riscy-ip.com> gcc/ * match.pd <popcount & / + pattern matching>: When generating popcount directly fails, try doing it in two halves. gcc/testsuite/ * gcc.dg/tree-ssa/popcount4ll.c: Remove lp64 condition. Adjust scanning pattern for !lp64. * gcc.dg/tree-ssa/popcount5ll.c: Likewise. * gcc.dg/tree-ssa/popcount4l.c: Adjust scanning pattern for ! int32plus. diff --git a/gcc/match.pd b/gcc/match.pd index cdb87636951..3cfa5e761a4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6550,10 +6550,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && tree_to_uhwi (@3) == c2 && tree_to_uhwi (@9) == c3 && tree_to_uhwi (@7) == c3 - && tree_to_uhwi (@11) == c4 - && direct_internal_fn_supported_p (IFN_POPCOUNT, type, - OPTIMIZE_FOR_BOTH)) - (convert (IFN_POPCOUNT:type @0))))) + && tree_to_uhwi (@11) == c4) + (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, + OPTIMIZE_FOR_BOTH)) + (convert (IFN_POPCOUNT:type @0)) + /* Try to do popcount in two halves. PREC must be at least + five bits for this to work without extension before adding. */ + (with { + tree half_type = NULL_TREE; + machine_mode m = mode_for_size ((prec + 1) / 2, MODE_INT, 1).require (); + int half_prec = GET_MODE_PRECISION (as_a <scalar_int_mode> (m)); + if (m != TYPE_MODE (type)) + half_type = build_nonstandard_integer_type (half_prec, 1); + gcc_assert (half_prec > 2); + } + (if (half_type != NULL_TREE + && direct_internal_fn_supported_p (IFN_POPCOUNT, half_type, + OPTIMIZE_FOR_BOTH)) + (convert (plus + (IFN_POPCOUNT:half_type (convert @0)) + (IFN_POPCOUNT:half_type (convert (rshift @0 + { build_int_cst (integer_type_node, half_prec); } ))))))))))) /* __builtin_ffs needs to deal on many targets with the possible zero argument. If we know the argument is always non-zero, __builtin_ctz + 1 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c index 69fb2d1134d..269e56e90f9 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c @@ -25,6 +25,7 @@ int popcount64c(unsigned long x) return (x * h01) >> shift; } -/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target int32plus } } } */ +/* { dg-final { scan-tree-dump "\.POPCOUNT" "optimized" { target { ! int32plus } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c index c1588be68e4..7abadf6df04 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { lp64 } } } */ +/* { dg-do compile } */ /* { dg-require-effective-target popcountll } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ @@ -16,4 +16,5 @@ int popcount64c(unsigned long long x) return (x * h01) >> shift; } -/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target { lp64 } } } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { ! lp64 } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c index edb191bf894..2afe08124fe 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c @@ -1,5 +1,5 @@ /* PR tree-optimization/94800 */ -/* { dg-do compile { target { lp64 } } } */ +/* { dg-do compile } */ /* { dg-require-effective-target popcountll } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ @@ -19,4 +19,5 @@ int popcount64c(unsigned long long x) return x >> shift; } -/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target { lp64 } } } } */ +/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { ! lp64 } } } } */