This patch implements several constant folding optimizations
for __builtin_parity and friends. We canonicalize popcount(x)&1 as parity(x) in gimple, and potentially convert back again when we expand to RTL. parity(~x) is simplified to parity(x), which is true for all integer modes with an even number of bits. But probably most usefully, parity(x)^parity(y) can be simplified to a parity(x^y), requiring only a single libcall or popcount. This idiom is seen in PR target/44481 and elsewhere in the Linux kernel. This patch has been tested with "make bootstrap" and "make -k check" on x86_64-pc-linux-gnu with no regressions. If approved, I'd very much appreciate it if someone could commit this change for me. 2020-06-12 Roger Sayle <ro...@nextmovesoftware.com> * match.pd (popcount(x)&1 -> parity(x)): New simplification. (parity(~x) -> parity(x)): New simplification. (parity(x&1) -> x&1): New simplification. (parity(x)^parity(y) -> parity(x^y)): New simplification. 2020-06-12 Roger Sayle <ro...@nextmovesoftware.com> * gcc.dg/fold-parity-1.c: New test. * gcc.dg/fold-parity-2.c: Likewise. * gcc.dg/fold-parity-3.c: Likewise. * gcc.dg/fold-parity-4.c: Likewise. Thanks in advance, Roger -- Roger Sayle NextMove Software Cambridge, UK
diff --git a/gcc/match.pd b/gcc/match.pd index 2b8c37e..ddb0650 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5949,6 +5949,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cmp (popcount @0) integer_zerop) (rep @0 { build_zero_cst (TREE_TYPE (@0)); })))) +/* Canonicalize POPCOUNT(x)&1 as PARITY(X). */ +(for popcount (BUILT_IN_POPCOUNT BUILT_IN_POPCOUNTL BUILT_IN_POPCOUNTLL + BUILT_IN_POPCOUNTIMAX) + parity (BUILT_IN_PARITY BUILT_IN_PARITYL BUILT_IN_PARITYLL + BUILT_IN_PARITYIMAX) + (simplify + (bit_and (popcount @0) integer_onep) + (parity @0))) + +/* PARITY simplifications. */ +(for parity (BUILT_IN_PARITY BUILT_IN_PARITYL BUILT_IN_PARITYLL + BUILT_IN_PARITYIMAX) + /* parity(~X) is parity(X). */ + (simplify + (parity (bit_not @0)) + (parity @0)) + /* parity(X&1) is nop_expr(X&1). */ + (simplify + (parity @0) + (if (tree_nonzero_bits (@0) == 1) + (convert @0))) + /* parity(X)^parity(Y) is parity(X^Y). */ + (simplify + (bit_xor (parity:s @0) (parity:s @1)) + (parity (bit_xor @0 @1)))) + #if GIMPLE /* 64- and 32-bits branchless implementations of popcount are detected:
/* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-original" } */ int foo(unsigned int x) { return __builtin_popcount(x) & 1; } int fool(unsigned long x) { return __builtin_popcountl(x) & 1; } int fooll(unsigned long long x) { return __builtin_popcountll(x) & 1; } /* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "original" } } */ /* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "original" } } */
/* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ int foo(unsigned int x) { return __builtin_parity(~x); } int fool(unsigned long x) { return __builtin_parityl(~x); } int fooll(unsigned long long x) { return __builtin_parityll(~x); } /* { dg-final { scan-tree-dump-times "~" 0 "optimized" } } */
/* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ int foo(unsigned int x) { return __builtin_parity(x&1); } int fool(unsigned long x) { return __builtin_parityl(x&1); } int fooll(unsigned long long x) { return __builtin_parityll(x&1); } /* { dg-final { scan-tree-dump-times "__builtin_parity" 0 "optimized" } } */
/* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ int foo(unsigned int x, unsigned int y) { return __builtin_parity(x) ^ __builtin_parity(y); } int fool(unsigned long x, unsigned long y) { return __builtin_parityl(x) ^ __builtin_parityl(y); } int fooll(unsigned long long x, unsigned long long y) { return __builtin_parityll(x) ^ __builtin_parityll(y); } /* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "optimized" } } */