Hi! This patch moves this optimization from fold-const.c to match.pd where it is actually much shorter to do and lets optimize even code not seen together in a single expression in the source, as the first step towards fixing the PR.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for stage1? 2020-04-24 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/94718 * fold-const.c (fold_binary_loc): Move (X & C) eqne (Y & C) -> (X ^ Y) & C eqne 0 optimization to ... * match.pd ((X & C) op (Y & C) into (X ^ Y) & C op 0): ... here. * gcc.dg/tree-ssa/pr94718-1.c: New test. * gcc.dg/tree-ssa/pr94718-2.c: New test. --- gcc/fold-const.c.jj 2020-04-23 19:50:11.693805797 +0200 +++ gcc/fold-const.c 2020-04-24 18:34:04.245559495 +0200 @@ -11631,50 +11631,6 @@ fold_binary_loc (location_t loc, enum tr return omit_one_operand_loc (loc, type, res, arg0); } - /* Fold (X & C) op (Y & C) as (X ^ Y) & C op 0", and symmetries. */ - if (TREE_CODE (arg0) == BIT_AND_EXPR - && TREE_CODE (arg1) == BIT_AND_EXPR) - { - tree arg00 = TREE_OPERAND (arg0, 0); - tree arg01 = TREE_OPERAND (arg0, 1); - tree arg10 = TREE_OPERAND (arg1, 0); - tree arg11 = TREE_OPERAND (arg1, 1); - tree itype = TREE_TYPE (arg0); - - if (operand_equal_p (arg01, arg11, 0)) - { - tem = fold_convert_loc (loc, itype, arg10); - tem = fold_build2_loc (loc, BIT_XOR_EXPR, itype, arg00, tem); - tem = fold_build2_loc (loc, BIT_AND_EXPR, itype, tem, arg01); - return fold_build2_loc (loc, code, type, tem, - build_zero_cst (itype)); - } - if (operand_equal_p (arg01, arg10, 0)) - { - tem = fold_convert_loc (loc, itype, arg11); - tem = fold_build2_loc (loc, BIT_XOR_EXPR, itype, arg00, tem); - tem = fold_build2_loc (loc, BIT_AND_EXPR, itype, tem, arg01); - return fold_build2_loc (loc, code, type, tem, - build_zero_cst (itype)); - } - if (operand_equal_p (arg00, arg11, 0)) - { - tem = fold_convert_loc (loc, itype, arg10); - tem = fold_build2_loc (loc, BIT_XOR_EXPR, itype, arg01, tem); - tem = fold_build2_loc (loc, BIT_AND_EXPR, itype, tem, arg00); - return fold_build2_loc (loc, code, type, tem, - build_zero_cst (itype)); - } - if (operand_equal_p (arg00, arg10, 0)) - { - tem = fold_convert_loc (loc, itype, arg11); - tem = fold_build2_loc (loc, BIT_XOR_EXPR, itype, arg01, tem); - tem = fold_build2_loc (loc, BIT_AND_EXPR, itype, tem, arg00); - return fold_build2_loc (loc, code, type, tem, - build_zero_cst (itype)); - } - } - if (TREE_CODE (arg0) == BIT_XOR_EXPR && TREE_CODE (arg1) == BIT_XOR_EXPR) { --- gcc/match.pd.jj 2020-04-23 19:50:11.710805542 +0200 +++ gcc/match.pd 2020-04-24 18:34:04.247559465 +0200 @@ -4335,7 +4335,12 @@ (define_operator_list COND_TERNARY (simplify (cmp (convert? addr@0) integer_zerop) (if (tree_single_nonzero_warnv_p (@0, NULL)) - { constant_boolean_node (cmp == NE_EXPR, type); }))) + { constant_boolean_node (cmp == NE_EXPR, type); })) + + /* (X & C) op (Y & C) into (X ^ Y) & C op 0. */ + (simplify + (cmp (bit_and:cs @0 @2) (bit_and:cs @1 @2)) + (cmp (bit_and (bit_xor @0 @1) @2) { build_zero_cst (TREE_TYPE (@2)); }))) /* If we have (A & C) == C where C is a power of 2, convert this into (A & C) != 0. Similarly for NE_EXPR. */ --- gcc/testsuite/gcc.dg/tree-ssa/pr94718-1.c.jj 2020-04-24 17:31:10.392472875 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr94718-1.c 2020-04-24 18:43:35.829119496 +0200 @@ -0,0 +1,49 @@ +/* PR tree-optimization/94718 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "= \[xy]_\[0-9]+\\\(D\\\) \\^ \[xy]_\[0-9]+\\\(D\\\);" 6 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\[0-9]+ < 0;" 6 "optimized" } } */ + +#define I (-__INT_MAX__ - 1) + +int +f1 (int x, int y) +{ + return (x & I) != (y & I); +} + +int +f2 (int x, int y) +{ + return (~x & I) != (~y & I); +} + +int +f3 (int x, int y) +{ + return ((x & I) ^ I) != ((y & I) ^ I); +} + +int +f4 (int x, int y) +{ + int s = (x & I); + int t = (y & I); + return s != t; +} + +int +f5 (int x, int y) +{ + int s = (~x & I); + int t = (~y & I); + return s != t; +} + +int +f6 (int x, int y) +{ + int s = ((x & I) ^ I); + int t = ((y & I) ^ I); + return s != t; +} --- gcc/testsuite/gcc.dg/tree-ssa/pr94718-2.c.jj 2020-04-24 17:31:13.673424244 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr94718-2.c 2020-04-24 18:43:44.669988955 +0200 @@ -0,0 +1,49 @@ +/* PR tree-optimization/94718 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "= \[xy]_\[0-9]+\\\(D\\\) \\^ \[xy]_\[0-9]+\\\(D\\\);" 6 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\[0-9]+ >= 0;" 6 "optimized" } } */ + +#define I (-__INT_MAX__ - 1) + +int +f1 (int x, int y) +{ + return (x & I) == (y & I); +} + +int +f2 (int x, int y) +{ + return (~x & I) == (~y & I); +} + +int +f3 (int x, int y) +{ + return ((x & I) ^ I) == ((y & I) ^ I); +} + +int +f4 (int x, int y) +{ + int s = (x & I); + int t = (y & I); + return s == t; +} + +int +f5 (int x, int y) +{ + int s = (~x & I); + int t = (~y & I); + return s == t; +} + +int +f6 (int x, int y) +{ + int s = ((x & I) ^ I); + int t = ((y & I) ^ I); + return s == t; +} Jakub