Thank you for the feedback on the v1 patch. As requested, I added detailed tests for various signed and unsigned integer types in the test file bitops-11.c. I also included more complex expressions to observe how everything behaves at the GIMPLE level and added vector test examples as well. Since vector expressions are matched on (minus @0 1) instead of (plus @0 -1), I added a simplification for the minus case in match.pd.
Additionally, I introduced simplifications for the expressions (a - 1) & -a, (a - 1) | -a, and (a - 1) ^ -a to 0, -1, and -1, respectively, in simplify-rtx.cc. For each of the three expressions, I added two if statements. The first matches the typical (BIT_OP (plus (A -1)) (neg A)), while the second recognizes the presence of a SUBREG within the RTL expression. For example, when A is of type short, the second if statement is triggered. I didn't observe any issues with match.pd missing any simplifications, but if that happens, the code in simplify-rtx.cc should help. Bootstrapped and tested on x86-linux-gnu with no regressions. 2024-11-26 Jovan Vukic <jovan.vu...@rt-rk.com> gcc/ChangeLog: * match.pd: New pattern. * simplify-rtx.cc (simplify_context::simplify_binary_operation_1): New code to handle (a - 1) & -a, (a - 1) | -a and (a - 1) ^ -a. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/bitops-11.c: New test. CONFIDENTIALITY: The contents of this e-mail are confidential and intended only for the above addressee(s). If you are not the intended recipient, or the person responsible for delivering it to the intended recipient, copying or delivering it to anyone else or using it in any unauthorized manner is prohibited and may be unlawful. If you receive this e-mail by mistake, please notify the sender and the systems administrator at straym...@rt-rk.com immediately.
--- gcc/match.pd | 16 +++ gcc/simplify-rtx.cc | 87 ++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c | 116 ++++++++++++++++++++++ 3 files changed, 219 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c diff --git a/gcc/match.pd b/gcc/match.pd index 0ac5674f24b..c85d4b9ae6c 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -1472,6 +1472,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (bit_and:c @0 (bit_not (bit_xor:c @0 @1))) (bit_and @0 @1)) +/* Transform: + (a - 1) & -a -> 0. + (a - 1) | -a -> -1. + (a - 1) ^ -a -> -1. */ +(for bit_op (bit_ior bit_xor bit_and) + (simplify + (bit_op:c (plus @0 integer_minus_onep@1) (negate @0)) + (if (bit_op == BIT_AND_EXPR) + { build_zero_cst (type); } + { build_minus_one_cst (type); })) + (simplify + (bit_op:c (minus @0 integer_onep@1) (negate @0)) + (if (bit_op == BIT_AND_EXPR) + { build_zero_cst (type); } + { build_minus_one_cst (type); }))) + /* a & (a == b) --> a & b (boolean version of the above). */ (simplify (bit_and:c @0 (nop_convert? (eq:c @0 @1))) diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 893c5f6e1ae..514d21c6ef5 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -3530,6 +3530,35 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, && GET_MODE_CLASS (mode) != MODE_CC) return CONSTM1_RTX (mode); + /* (ior (plus (A -1)) (neg A)) is -1. */ + if (((GET_CODE (op1) == NEG + && GET_CODE (op0) == PLUS + && XEXP (op0, 1) == constm1_rtx) + || (GET_CODE (op0) == NEG + && GET_CODE (op1) == PLUS + && XEXP (op1, 1) == constm1_rtx)) + && rtx_equal_p (XEXP (op0, 0), XEXP (op1, 0)) + && !side_effects_p (XEXP (op0, 0)) + && !side_effects_p (XEXP (op1, 0))) + return CONSTM1_RTX (mode); + + /* (ior (subreg (plus (A -1))) (subreg (neg A))) is -1. */ + if (GET_CODE (op0) == SUBREG + && GET_CODE (op1) == SUBREG + && subreg_lowpart_p (op0) + && subreg_lowpart_p (op1) + && ((GET_CODE (XEXP (op1, 0)) == NEG + && GET_CODE (XEXP (op0, 0)) == PLUS + && XEXP (XEXP (op0, 0), 1) == constm1_rtx) + || (GET_CODE (XEXP (op0, 0)) == NEG + && GET_CODE (XEXP (op1, 0)) == PLUS + && XEXP (XEXP (op1, 0), 1) == constm1_rtx)) + && rtx_equal_p (XEXP (XEXP (op0, 0), 0), + XEXP (XEXP (op1, 0), 0)) + && !side_effects_p (XEXP (XEXP (op0, 0), 0)) + && !side_effects_p (XEXP (XEXP (op1, 0), 0))) + return CONSTM1_RTX (mode); + /* (ior A C) is C if all bits of A that might be nonzero are on in C. */ if (CONST_INT_P (op1) && HWI_COMPUTABLE_MODE_P (mode) @@ -3691,6 +3720,35 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, & nonzero_bits (op1, mode)) == 0) return (simplify_gen_binary (IOR, mode, op0, op1)); + /* (xor (plus (A -1)) (neg A)) is -1. */ + if (((GET_CODE (op1) == NEG + && GET_CODE (op0) == PLUS + && XEXP (op0, 1) == constm1_rtx) + || (GET_CODE (op0) == NEG + && GET_CODE (op1) == PLUS + && XEXP (op1, 1) == constm1_rtx)) + && rtx_equal_p (XEXP (op0, 0), XEXP (op1, 0)) + && !side_effects_p (XEXP (op0, 0)) + && !side_effects_p (XEXP (op1, 0))) + return CONSTM1_RTX (mode); + + /* (xor (subreg (plus (A -1))) (subreg (neg A))) is -1. */ + if (GET_CODE (op0) == SUBREG + && GET_CODE (op1) == SUBREG + && subreg_lowpart_p (op0) + && subreg_lowpart_p (op1) + && ((GET_CODE (XEXP (op1, 0)) == NEG + && GET_CODE (XEXP (op0, 0)) == PLUS + && XEXP (XEXP (op0, 0), 1) == constm1_rtx) + || (GET_CODE (XEXP (op0, 0)) == NEG + && GET_CODE (XEXP (op1, 0)) == PLUS + && XEXP (XEXP (op1, 0), 1) == constm1_rtx)) + && rtx_equal_p (XEXP (XEXP (op0, 0), 0), + XEXP (XEXP (op1, 0), 0)) + && !side_effects_p (XEXP (XEXP (op0, 0), 0)) + && !side_effects_p (XEXP (XEXP (op1, 0), 0))) + return CONSTM1_RTX (mode); + /* Convert (XOR (NOT x) (NOT y)) to (XOR x y). Also convert (XOR (NOT x) y) to (NOT (XOR x y)), similarly for (NOT y). */ @@ -3958,6 +4016,35 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, && GET_MODE_CLASS (mode) != MODE_CC) return CONST0_RTX (mode); + /* (and (plus (A -1)) (neg A)) is 0. */ + if (((GET_CODE (op1) == NEG + && GET_CODE (op0) == PLUS + && XEXP (op0, 1) == constm1_rtx) + || (GET_CODE (op0) == NEG + && GET_CODE (op1) == PLUS + && XEXP (op1, 1) == constm1_rtx)) + && rtx_equal_p (XEXP (op0, 0), XEXP (op1, 0)) + && !side_effects_p (XEXP (op0, 0)) + && !side_effects_p (XEXP (op1, 0))) + return CONST0_RTX (mode); + + /* (and (subreg (plus (A -1))) (subreg (neg A))) is 0. */ + if (GET_CODE (op0) == SUBREG + && GET_CODE (op1) == SUBREG + && subreg_lowpart_p (op0) + && subreg_lowpart_p (op1) + && ((GET_CODE (XEXP (op1, 0)) == NEG + && GET_CODE (XEXP (op0, 0)) == PLUS + && XEXP (XEXP (op0, 0), 1) == constm1_rtx) + || (GET_CODE (XEXP (op0, 0)) == NEG + && GET_CODE (XEXP (op1, 0)) == PLUS + && XEXP (XEXP (op1, 0), 1) == constm1_rtx)) + && rtx_equal_p (XEXP (XEXP (op0, 0), 0), + XEXP (XEXP (op1, 0), 0)) + && !side_effects_p (XEXP (XEXP (op0, 0), 0)) + && !side_effects_p (XEXP (XEXP (op1, 0), 0))) + return CONST0_RTX (mode); + /* Transform (and (extend X) C) into (zero_extend (and X C)) if there are no nonzero bits of C outside of X's mode. */ if ((GET_CODE (op0) == SIGN_EXTEND diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c new file mode 100644 index 00000000000..365a113f671 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-11.c @@ -0,0 +1,116 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized-raw -Wno-psabi" } */ + +typedef int v4si __attribute__((vector_size(4 * sizeof(int)))); + +/* Generic */ +#define BIT_OPERATIONS(result_type, operand_type, suffix) \ +result_type foo_generic_1##suffix(operand_type a) { \ + return (a - 1) & -a; \ +} \ +result_type foo_generic_2##suffix(operand_type a) { \ + return (a - 1) | -a; \ +} \ +result_type foo_generic_3##suffix(operand_type a) { \ + return (a - 1) ^ -a; \ +} + +BIT_OPERATIONS(signed char, signed char, 11) +BIT_OPERATIONS(unsigned char, unsigned char, 12) + +BIT_OPERATIONS(signed short, signed short, 21) +BIT_OPERATIONS(unsigned short, unsigned short, 22) + +BIT_OPERATIONS(signed int, signed int, 31) +BIT_OPERATIONS(unsigned int, unsigned int, 32) + +BIT_OPERATIONS(signed long, signed long, 41) +BIT_OPERATIONS(unsigned long, unsigned long, 42) + +/* Gimple */ +int +foo_gimple_1(int a) +{ + int t1 = a - 1; + int t2 = -a; + int t3 = t1 & t2; + return t3; +} + +short +foo_gimple_2(short a) +{ + short t1 = a - 1; + short t2 = -a; + short t3 = t1 | t2; + return t3; +} + +unsigned long +foo_gimple_3(unsigned long a) +{ + unsigned long t1 = a - 1; + unsigned long t2 = -a; + unsigned long t3 = t1 ^ t2; + return t3; +} + +int +foo_gimple_4(int a, unsigned char b) +{ + /* The return expression should simplify to b + 7. */ + int t1 = b; + t1 |= (a - 1) | -a; + t1 |= b & (a >> 3); + + int t2 = b + 7; + t2 &= ~((b - 1) & -b); + t2 &= (a - 1) ^ -a; + + int t3 = t1 & t2; + return t3; +} + +/* Vectors */ +v4si +foo_vector_1(v4si a) +{ + return (a - (v4si) {1, 1, 1, 1}) & -a; +} + +v4si +foo_vector_2(v4si a) +{ + v4si t0 = (v4si) {1, 1, 1, 1}; + v4si t1 = a - t0; + v4si t2 = -a; + v4si t3 = t1 | t2; + return t3; +} + +v4si +foo_vector_3(v4si a) +{ + v4si t0 = (v4si) {1, 1, 1, 1}; + v4si t1 = a - t0; + v4si t2 = -a; + v4si t3 = t1 ^ t2; + return t3; +} + +v4si +foo_vector_4(v4si a) +{ + v4si t0 = (v4si) {1, 1, 1, 1}; + v4si t1 = (a - t0) & -a; + v4si t2 = (a - t0) | -a; + v4si t3 = (a - t0) ^ -a; + v4si t4 = t1 - t2 + t3; + return t4; +} + +/* { dg-final { scan-tree-dump-not "bit_and_expr, " "optimized" } } */ +/* { dg-final { scan-tree-dump-not "bit_ior_expr, " "optimized" } } */ +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */ +/* { dg-final { scan-tree-dump-not "negate_expr, " "optimized" } } */ +/* { dg-final { scan-tree-dump-times "plus_expr, " 1 "optimized" } } */ -- 2.43.0