optabs.c:expand_unop_direct can expand a popcount builtin without a call
under certain conditions even without a popcount pattern of the required
data width:

  if (unoptab == popcount_optab
      && is_a <scalar_int_mode> (mode, &int_mode)
      && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
      && optab_handler (unoptab, word_mode) != CODE_FOR_nothing
      && optimize_insn_for_speed_p ())
    {
      temp = expand_doubleword_popcount (int_mode, op0, target);
      if (temp)
        return temp;
    }


However, the match.pd recognition of popcount arithmetic using & / + is
tied to having an exactly matching operation.  This causes a failure for
gcc.dg/tree-ssa/popcount4l.c for 16-bit targets that have a 16 bit
popcount operation (and no wider).
Likewise, not recognizing a 64 bit popcount for a 32 bit target with
32 bit popcount could be rectified by synthesizing the wide popcount
operations with two narrower popcount operations.
The attached patch implements this.
2020-07-30  Joern Rennecke  <joern.renne...@riscy-ip.com>

gcc:
        * gimple-match-head.c (langhooks.h): Include.
        * match.pd <popcount & / + pattern matching>:
        When generating popcount directly fails, try doing it in two halves.
        * gcc.dg/tree-ssa/popcount4ll.c: Remove lp64 condition.
        Adjust scanning pattern for !lp64.
testsuite:
        * gcc.dg/tree-ssa/popcount5ll.c: Likewise.
        * gcc.dg/tree-ssa/popcount4l.c: Adjust scanning pattern
        for ! int32plus.
        
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index d941b8b..e3342e3 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "optabs-tree.h"
 #include "tree-eh.h"
 #include "dbgcnt.h"
+#include "langhooks.h"
 
 /* Forward declarations of the private auto-generated matchers.
    They expect valueized operands in canonical order and do not
diff --git a/gcc/match.pd b/gcc/match.pd
index 17c35ee4..fa2e93e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6437,10 +6437,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        && tree_to_uhwi (@3) == c2
        && tree_to_uhwi (@9) == c3
        && tree_to_uhwi (@7) == c3
-       && tree_to_uhwi (@11) == c4
-       && direct_internal_fn_supported_p (IFN_POPCOUNT, type,
-                                          OPTIMIZE_FOR_BOTH))
-    (convert (IFN_POPCOUNT:type @0)))))
+       && tree_to_uhwi (@11) == c4)
+    (if (direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+                                        OPTIMIZE_FOR_BOTH))
+     (convert (IFN_POPCOUNT:type @0))
+     /* Try to do popcount in two halves.  PREC must be even, and at least
+       six bits for this to work without extension before adding.
+       If popcount is available, is should probably be available for
+       BITS_PER_WORD, so don't bother with smaller halves.  */
+     (with { tree half_type = (prec <= BITS_PER_WORD || (prec & 1) ? NULL_TREE
+                              : lang_hooks.types.type_for_size (prec/2, 1));
+          gcc_assert (prec > 2 || half_type == NULL_TREE);
+      }
+      (if (half_type != NULL_TREE
+          && direct_internal_fn_supported_p (IFN_POPCOUNT, half_type,
+                                             OPTIMIZE_FOR_BOTH))
+       (convert (plus
+        (IFN_POPCOUNT:half_type (convert @0))
+        (IFN_POPCOUNT:half_type (convert (rshift @0
+           { wide_int_to_tree (half_type, prec/2); } )))))))))))
 
 /* __builtin_ffs needs to deal on many targets with the possible zero
    argument.  If we know the argument is always non-zero, __builtin_ctz + 1
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c 
b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
index 69fb2d1..269e56e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4l.c
@@ -25,6 +25,7 @@ int popcount64c(unsigned long x)
     return (x * h01) >> shift;
 }
 
-/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target 
int32plus } } } */
+/* { dg-final { scan-tree-dump "\.POPCOUNT" "optimized" { target { ! int32plus 
} } } } */
 
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c 
b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
index c1588be..7abadf6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount4ll.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { lp64 } } } */
+/* { dg-do compile } */
 /* { dg-require-effective-target popcountll } */
 /* { dg-options "-O2 -fdump-tree-optimized" } */
 
@@ -16,4 +16,5 @@ int popcount64c(unsigned long long x)
     return (x * h01) >> shift;
 }
 
-/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target { 
lp64 } } } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { ! 
lp64 } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c 
b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
index edb191b..2afe081 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
@@ -1,5 +1,5 @@
 /* PR tree-optimization/94800 */
-/* { dg-do compile { target { lp64 } } } */
+/* { dg-do compile } */
 /* { dg-require-effective-target popcountll } */
 /* { dg-options "-O2 -fdump-tree-optimized" } */
 
@@ -19,4 +19,5 @@ int popcount64c(unsigned long long x)
     return x >> shift;
 }
 
-/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" { target { 
lp64 } } } } */
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 2 "optimized" { target { ! 
lp64 } } } } */

Reply via email to