From: kelefth <konstantinos.elefther...@vrull.eu> `(A * B) + (-C) to (B - C/A) * A` fails to match on ILP32 targets due to the upper bits of CST0 being zeros in some cases.
This patch adds the following pattern in match.pd: (A + CST0) * CST1 -> (A + CST0') * CST1, where CST1 is a power of 2 constant and CST0' is CST0 with the log2(CST1) MS bits sign-extended. This pattern sign-extends the log2(CST1) MS bits of CST0, in case that the bit that follows them is set. These bits will be pushed out by the multiplication with CST1. Bootstrapped/regtested on x86 and AArch64. PR tree-optimization/116845 gcc/ChangeLog: * match.pd: New pattern. gcc/testsuite/ChangeLog: * gcc.target/i386/pr116845.c: New test. --- gcc/match.pd | 21 ++++++++++++++++++++- gcc/testsuite/gcc.target/i386/pr116845.c | 23 +++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr116845.c diff --git a/gcc/match.pd b/gcc/match.pd index 83eca8b2e0a..aa2108726f9 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4400,7 +4400,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Calculate @2 / @0 in order to factorize the expression. */ wide_int div_res = wi::sdiv_trunc (c2, c0); tree div_cst = wide_int_to_tree (type, div_res); } - (mult (plus @1 { div_cst; }) @0)))))))) + (mult (plus @1 { div_cst; }) @0))))))) + /* (A + CST0) * CST1 -> (A + CST0') * CST1, where CST1 is a power of 2 + constant and CST0' is CST0 with the log2(CST1) MS bits sign-extended. */ + (simplify + (mult (plus:c @0 INTEGER_CST@1) integer_pow2p@2) + (with { wide_int c1 = wi::to_wide (@1); } + (if (!wi::neg_p (c1)) + (with { + int bits_to_extend = wi::exact_log2 (wi::to_wide (@2)); + uint rest_bits = tree_to_uhwi (TYPE_SIZE (type)) - bits_to_extend; + /* Get the value of the MSB of the rest_bits. */ + wide_int bitmask = wi::set_bit_in_zero (rest_bits - 1, + TYPE_PRECISION (type)); + wide_int bit_value = wi::lrshift (c1 & bitmask, rest_bits - 1); } + /* If the bit is set, extend c1 with its value. */ + (if (!wi::cmp (bit_value, 1, TYPE_SIGN (type))) + (with { + c1 |= wi::mask (rest_bits, true, TYPE_PRECISION (type)); + tree c1_ext = wide_int_to_tree (type, c1); } + (mult (plus @0 { c1_ext; }) @2)))))))) #if GIMPLE /* Canonicalize X + (X << C) into X * (1 + (1 << C)) and diff --git a/gcc/testsuite/gcc.target/i386/pr116845.c b/gcc/testsuite/gcc.target/i386/pr116845.c new file mode 100644 index 00000000000..230e4d5f8b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116845.c @@ -0,0 +1,23 @@ +/* PR tree-optimization/116845 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized -m32" } */ + +int foo(int *a, int j) +{ + int k = j - 1; + return a[j - 1] == a[k]; +} + +int foo2(int *a, int j) +{ + int k = j - 5; + return a[j - 5] == a[k]; +} + +int bar(int *a, int j) +{ + int k = j - 1; + return (&a[j + 1] - 2) == &a[k]; +} + +/* { dg-final { scan-tree-dump-times "return 1;" 3 "optimized" } } */ -- 2.47.0