From: kelefth <konstantinos.elefther...@vrull.eu>

`(A * B) + (-C) to (B - C/A) * A` fails to match on ILP32 targets due to
the upper bits of CST0 being zeros in some cases.

This patch adds the following pattern in match.pd:
(A + CST0) * CST1 -> (A + CST0') * CST1, where CST1 is a power of 2
constant and CST0' is CST0 with the log2(CST1) MS bits sign-extended.

This pattern sign-extends the log2(CST1) MS bits of CST0, in case that
the bit that follows them is set. These bits will be pushed out by
the multiplication with CST1.

Bootstrapped/regtested on x86 and AArch64.

        PR tree-optimization/116845

gcc/ChangeLog:

        * match.pd: New pattern.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr116845.c: New test.
---
 gcc/match.pd                             | 21 ++++++++++++++++++++-
 gcc/testsuite/gcc.target/i386/pr116845.c | 23 +++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr116845.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 83eca8b2e0a..aa2108726f9 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4400,7 +4400,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
          /* Calculate @2 / @0 in order to factorize the expression.  */
          wide_int div_res = wi::sdiv_trunc (c2, c0);
          tree div_cst = wide_int_to_tree (type, div_res); }
-       (mult (plus @1 { div_cst; }) @0))))))))
+       (mult (plus @1 { div_cst; }) @0)))))))
+ /* (A + CST0) * CST1 -> (A + CST0') * CST1, where CST1 is a power of 2
+    constant and CST0' is CST0 with the log2(CST1) MS bits sign-extended.  */
+ (simplify
+   (mult (plus:c @0 INTEGER_CST@1) integer_pow2p@2)
+     (with { wide_int c1 = wi::to_wide (@1); }
+     (if (!wi::neg_p (c1))
+     (with {
+       int bits_to_extend = wi::exact_log2 (wi::to_wide (@2));
+       uint rest_bits = tree_to_uhwi (TYPE_SIZE (type)) - bits_to_extend;
+       /* Get the value of the MSB of the rest_bits.  */
+       wide_int bitmask = wi::set_bit_in_zero (rest_bits - 1,
+                                              TYPE_PRECISION (type));
+       wide_int bit_value = wi::lrshift (c1 & bitmask, rest_bits - 1); }
+       /* If the bit is set, extend c1 with its value.  */
+       (if (!wi::cmp (bit_value, 1, TYPE_SIGN (type)))
+        (with {
+          c1 |= wi::mask (rest_bits, true, TYPE_PRECISION (type));
+          tree c1_ext = wide_int_to_tree (type, c1); }
+        (mult (plus @0 { c1_ext; }) @2))))))))
 
 #if GIMPLE
 /* Canonicalize X + (X << C) into X * (1 + (1 << C)) and
diff --git a/gcc/testsuite/gcc.target/i386/pr116845.c 
b/gcc/testsuite/gcc.target/i386/pr116845.c
new file mode 100644
index 00000000000..230e4d5f8b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116845.c
@@ -0,0 +1,23 @@
+/* PR tree-optimization/116845 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -m32" } */
+
+int foo(int *a, int j)
+{
+  int k = j - 1;
+  return a[j - 1] == a[k];
+}
+
+int foo2(int *a, int j)
+{
+  int k = j - 5;
+  return a[j - 5] == a[k];
+}
+
+int bar(int *a, int j)
+{
+  int k = j - 1;
+  return (&a[j + 1] - 2) == &a[k];
+}
+
+/* { dg-final { scan-tree-dump-times "return 1;" 3 "optimized" } } */
-- 
2.47.0

Reply via email to