Folks, we'd appreciate it if someone could take the time to review this fix for PR116845.
Thanks, Philipp. On Tue, 31 Dec 2024 at 10:03, Konstantinos Eleftheriou <konstantinos.elefther...@vrull.eu> wrote: > > From: kelefth <konstantinos.elefther...@vrull.eu> > > `(A * B) + (-C) to (B - C/A) * A` fails to match on ILP32 targets due to > the upper bits of CST0 being zeros in some cases. > > This patch adds the following pattern in match.pd: > (A + CST0) * CST1 -> (A + CST0') * CST1, where CST1 is a power of 2 > constant and CST0' is CST0 with the log2(CST1) MS bits sign-extended. > > This pattern sign-extends the log2(CST1) MS bits of CST0, in case that > the bit that follows them is set. These bits will be pushed out by > the multiplication with CST1. > > Bootstrapped/regtested on x86 and AArch64. > > PR tree-optimization/116845 > > gcc/ChangeLog: > > * match.pd: New pattern. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr116845.c: New test. > --- > gcc/match.pd | 21 ++++++++++++++++++++- > gcc/testsuite/gcc.target/i386/pr116845.c | 23 +++++++++++++++++++++++ > 2 files changed, 43 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr116845.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 83eca8b2e0a..aa2108726f9 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -4400,7 +4400,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > /* Calculate @2 / @0 in order to factorize the expression. */ > wide_int div_res = wi::sdiv_trunc (c2, c0); > tree div_cst = wide_int_to_tree (type, div_res); } > - (mult (plus @1 { div_cst; }) @0)))))))) > + (mult (plus @1 { div_cst; }) @0))))))) > + /* (A + CST0) * CST1 -> (A + CST0') * CST1, where CST1 is a power of 2 > + constant and CST0' is CST0 with the log2(CST1) MS bits sign-extended. */ > + (simplify > + (mult (plus:c @0 INTEGER_CST@1) integer_pow2p@2) > + (with { wide_int c1 = wi::to_wide (@1); } > + (if (!wi::neg_p (c1)) > + (with { > + int bits_to_extend = wi::exact_log2 (wi::to_wide (@2)); > + uint rest_bits = tree_to_uhwi (TYPE_SIZE (type)) - bits_to_extend; > + /* Get the value of the MSB of the rest_bits. */ > + wide_int bitmask = wi::set_bit_in_zero (rest_bits - 1, > + TYPE_PRECISION (type)); > + wide_int bit_value = wi::lrshift (c1 & bitmask, rest_bits - 1); } > + /* If the bit is set, extend c1 with its value. */ > + (if (!wi::cmp (bit_value, 1, TYPE_SIGN (type))) > + (with { > + c1 |= wi::mask (rest_bits, true, TYPE_PRECISION (type)); > + tree c1_ext = wide_int_to_tree (type, c1); } > + (mult (plus @0 { c1_ext; }) @2)))))))) > > #if GIMPLE > /* Canonicalize X + (X << C) into X * (1 + (1 << C)) and > diff --git a/gcc/testsuite/gcc.target/i386/pr116845.c > b/gcc/testsuite/gcc.target/i386/pr116845.c > new file mode 100644 > index 00000000000..230e4d5f8b5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr116845.c > @@ -0,0 +1,23 @@ > +/* PR tree-optimization/116845 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized -m32" } */ > + > +int foo(int *a, int j) > +{ > + int k = j - 1; > + return a[j - 1] == a[k]; > +} > + > +int foo2(int *a, int j) > +{ > + int k = j - 5; > + return a[j - 5] == a[k]; > +} > + > +int bar(int *a, int j) > +{ > + int k = j - 1; > + return (&a[j + 1] - 2) == &a[k]; > +} > + > +/* { dg-final { scan-tree-dump-times "return 1;" 3 "optimized" } } */ > -- > 2.47.0 >