Hello,those have been on my TODO-list for a long time (found in LLVM IIRC). We were not doing any of those transformations, even in GENERIC, so nothing to remove from fold-const.c. The idea is that any expression involving only 2 variables and operators &|^~ should simplify to at most 2 insn (modulo some single_use issues), not sure if we are there yet. The second transformation became almost redundant when I added the last canonicalization, but since the (rather arbitrary) single_use checks are not the same, I left it. The number of transformations on bit operations in match.pd is large (and I still have some on that TODO-list), if someone can think of a nice way to organize them, please go ahead.
I was tempted to use C++ templates for the testcase... Bootstrap+regtest on powerpc64le-unknown-linux-gnu. 2017-11-07 Marc Glisse <marc.gli...@inria.fr> gcc/ * match.pd ((a&~b)|(a^b),(a&~b)^~a,(a|b)&~(a^b),a|~(a^b), (a|b)|(a&^b),(a&b)|~(a^b),~(~a&b),~X^Y): New transformations. gcc/testsuite/ * gcc.dg/tree-ssa/bitops-1.c: New file. -- Marc Glisse
Index: gcc/match.pd =================================================================== --- gcc/match.pd (revision 254446) +++ gcc/match.pd (working copy) @@ -678,20 +678,56 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (op:c (bit_and @0 INTEGER_CST@2) (bit_and (bit_not @0) INTEGER_CST@1)) (if (~wi::to_wide (@2) == wi::to_wide (@1)) (bit_xor @0 @1)))) /* PR53979: Transform ((a ^ b) | a) -> (a | b) */ (simplify (bit_ior:c (bit_xor:c @0 @1) @0) (bit_ior @0 @1)) +/* (a & ~b) | (a ^ b) --> a ^ b */ +(simplify + (bit_ior:c (bit_and:c @0 (bit_not @1)) (bit_xor:c@2 @0 @1)) + @2) + +/* (a & ~b) ^ ~a --> ~(a & b) */ +(simplify + (bit_xor:c (bit_and:cs @0 (bit_not @1)) (bit_not @0)) + (bit_not (bit_and @0 @1))) + +/* (a | b) & ~(a ^ b) --> a & b */ +(simplify + (bit_and:c (bit_ior @0 @1) (bit_not (bit_xor:c @0 @1))) + (bit_and @0 @1)) + +/* a | ~(a ^ b) --> a | ~b */ +(simplify + (bit_ior:c @0 (bit_not:s (bit_xor:c @0 @1))) + (bit_ior @0 (bit_not @1))) + +/* (a | b) | (a &^ b) --> a | b */ +(for op (bit_and bit_xor) + (simplify + (bit_ior:c (bit_ior@2 @0 @1) (op:c @0 @1)) + @2)) + +/* (a & b) | ~(a ^ b) --> ~(a ^ b) */ +(simplify + (bit_ior:c (bit_and:c @0 @1) (bit_not@2 (bit_xor @0 @1))) + @2) + +/* ~(~a & b) --> a | ~b */ +(simplify + (bit_not (bit_and:cs (bit_not @0) @1)) + (bit_ior @0 (bit_not @1))) + /* Simplify (~X & Y) to X ^ Y if we know that (X & ~Y) is 0. */ #if GIMPLE (simplify (bit_and (bit_not SSA_NAME@0) INTEGER_CST@1) (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && wi::bit_and_not (get_nonzero_bits (@0), wi::to_wide (@1)) == 0) (bit_xor @0 @1))) #endif /* X % Y is smaller than Y. */ @@ -1097,20 +1133,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Part of convert ~(X ^ Y) to ~X ^ Y or X ^ ~Y if ~X or ~Y simplify. */ (simplify (bit_not (convert? (bit_xor @0 INTEGER_CST@1))) (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) (convert (bit_xor @0 (bit_not @1))))) (simplify (bit_not (convert? (bit_xor:c (bit_not @0) @1))) (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) (convert (bit_xor @0 @1)))) +/* Otherwise prefer ~(X ^ Y) to ~X ^ Y as more canonical. */ +(simplify + (bit_xor:c (nop_convert:s (bit_not:s @0)) @1) + (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) + (bit_not (bit_xor (view_convert @0) @1)))) + /* (x & ~m) | (y & m) -> ((x ^ y) & m) ^ x */ (simplify (bit_ior:c (bit_and:cs @0 (bit_not @2)) (bit_and:cs @1 @2)) (bit_xor (bit_and (bit_xor @0 @1) @2) @0)) /* Fold A - (A & B) into ~B & A. */ (simplify (minus (convert1? @0) (convert2?:s (bit_and:cs @@0 @1))) (if (tree_nop_conversion_p (type, TREE_TYPE (@0)) && tree_nop_conversion_p (type, TREE_TYPE (@1))) Index: gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c (nonexistent) +++ gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c (working copy) @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O -fdump-tree-optimized-raw" } */ + +#define DECLS(n,VOL) \ +__attribute__((noinline,noclone)) \ +int f##n(int A,int B){ \ + VOL int C = A & ~B; \ + VOL int D = A ^ B; \ + return C | D; \ +} \ +__attribute__((noinline,noclone)) \ +int g##n(int A,int B){ \ + VOL int C = A & ~B; \ + return C ^ ~A; \ +} \ +__attribute__((noinline,noclone)) \ +int h##n(int A,int B){ \ + VOL int C = A | B; \ + VOL int D = A ^ B; \ + return C & ~D; \ +} \ +__attribute__((noinline,noclone)) \ +int i##n(int A,int B){ \ + VOL int C = A ^ B; \ + return A | ~C; \ +} \ +__attribute__((noinline,noclone)) \ +int J##n(int A,int B){ \ + VOL int C = A | B; \ + VOL int D = A & B; \ + return C | D; \ +} \ +__attribute__((noinline,noclone)) \ +int k##n(int A,int B){ \ + VOL int C = A & B; \ + VOL int D = A ^ B; \ + return C | ~D; \ +} \ +__attribute__((noinline,noclone)) \ +int l##n(int A,int B){ \ + VOL int C = A & ~B; \ + return ~C; \ +} \ +__attribute__((noinline,noclone)) \ +int m##n(int A,int B){ \ + VOL int C = A & B; \ + VOL int D = A ^ B; \ + return C ^ D; \ +} + +DECLS(0,) +DECLS(1,volatile) + +int main(){ + for(int A = 0; A <= 1; ++A) + for(int B = 0; B <= 1; ++B) + { + if (f0 (A, B) != f1 (A, B)) __builtin_abort(); + if (g0 (A, B) != g1 (A, B)) __builtin_abort(); + if (h0 (A, B) != h1 (A, B)) __builtin_abort(); + if (i0 (A, B) != i1 (A, B)) __builtin_abort(); + if (J0 (A, B) != J1 (A, B)) __builtin_abort(); + if (k0 (A, B) != k1 (A, B)) __builtin_abort(); + if (l0 (A, B) != l1 (A, B)) __builtin_abort(); + if (m0 (A, B) != m1 (A, B)) __builtin_abort(); + } +} + +/* { dg-final { scan-tree-dump-times "bit_not_expr" 12 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "bit_and_expr" 9 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "bit_ior_expr" 10 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "bit_xor_expr" 9 "optimized"} } */