Odd numbers are invertible in Z / 2^n Z, so X * C1 == C2 can be rewritten
as X == C2 * inv(C1) when overflow wraps.
mod_inv should probably be updated to better match the other wide_int
functions, but that's a separate issue.
Bootstrap+regtest on x86_64-pc-linux-gnu.
2020-08-10 Marc Glisse <marc.gli...@inria.fr>
PR tree-optimization/95433
* match.pd (X * C1 == C2): Handle wrapping overflow.
* expr.c (maybe_optimize_mod_cmp): Qualify call to mod_inv.
(mod_inv): Move...
* wide-int.cc (mod_inv): ... here.
* wide-int.h (mod_inv): Declare it.
* gcc.dg/tree-ssa/pr95433-2.c: New file.
--
Marc Glisse
diff --git a/gcc/expr.c b/gcc/expr.c
index a150fa0d3b5..ebf0c9e4797 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -11859,38 +11859,6 @@ string_constant (tree arg, tree *ptr_offset, tree *mem_size, tree *decl)
return init;
}
-/* Compute the modular multiplicative inverse of A modulo M
- using extended Euclid's algorithm. Assumes A and M are coprime. */
-static wide_int
-mod_inv (const wide_int &a, const wide_int &b)
-{
- /* Verify the assumption. */
- gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
-
- unsigned int p = a.get_precision () + 1;
- gcc_checking_assert (b.get_precision () + 1 == p);
- wide_int c = wide_int::from (a, p, UNSIGNED);
- wide_int d = wide_int::from (b, p, UNSIGNED);
- wide_int x0 = wide_int::from (0, p, UNSIGNED);
- wide_int x1 = wide_int::from (1, p, UNSIGNED);
-
- if (wi::eq_p (b, 1))
- return wide_int::from (1, p, UNSIGNED);
-
- while (wi::gt_p (c, 1, UNSIGNED))
- {
- wide_int t = d;
- wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
- c = t;
- wide_int s = x0;
- x0 = wi::sub (x1, wi::mul (q, x0));
- x1 = s;
- }
- if (wi::lt_p (x1, 0, SIGNED))
- x1 += d;
- return x1;
-}
-
/* Optimize x % C1 == C2 for signed modulo if C1 is a power of two and C2
is non-zero and C3 ((1<<(prec-1)) | (C1 - 1)):
for C2 > 0 to x & C3 == C2
@@ -12101,7 +12069,7 @@ maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
w = wi::lrshift (w, shift);
wide_int a = wide_int::from (w, prec + 1, UNSIGNED);
wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
- wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED);
+ wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED);
tree c3 = wide_int_to_tree (type, m);
tree c5 = NULL_TREE;
wide_int d, e;
diff --git a/gcc/match.pd b/gcc/match.pd
index 7e5c5a6eae6..c3b88168ac4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3828,7 +3828,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(cmp @0 @2))))))
/* For integral types with undefined overflow fold
- x * C1 == C2 into x == C2 / C1 or false. */
+ x * C1 == C2 into x == C2 / C1 or false.
+ If overflow wraps and C1 is odd, simplify to x == C2 / C1 in the ring
+ Z / 2^n Z. */
(for cmp (eq ne)
(simplify
(cmp (mult @0 INTEGER_CST@1) INTEGER_CST@2)
@@ -3839,7 +3841,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (wi::multiple_of_p (wi::to_widest (@2), wi::to_widest (@1),
TYPE_SIGN (TREE_TYPE (@0)), "))
(cmp @0 { wide_int_to_tree (TREE_TYPE (@0), quot); })
- { constant_boolean_node (cmp == NE_EXPR, type); })))))
+ { constant_boolean_node (cmp == NE_EXPR, type); }))
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
+ && (wi::bit_and (wi::to_wide (@1), 1) == 1))
+ (cmp @0
+ {
+ tree itype = TREE_TYPE (@0);
+ int p = TYPE_PRECISION (itype);
+ wide_int m = wi::one (p + 1) << p;
+ wide_int a = wide_int::from (wi::to_wide (@1), p + 1, UNSIGNED);
+ wide_int i = wide_int::from (wi::mod_inv (a, m),
+ p, TYPE_SIGN (itype));
+ wide_int_to_tree (itype, wi::mul (i, wi::to_wide (@2)));
+ })))))
/* Simplify comparison of something with itself. For IEEE
floating-point, we can only do some of these simplifications. */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c
new file mode 100644
index 00000000000..c830a3d195f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fwrapv -fdump-tree-gimple" } */
+
+typedef __INT32_TYPE__ int32_t;
+typedef unsigned __INT32_TYPE__ uint32_t;
+
+int e(int32_t x){return 3*x==5;}
+int f(int32_t x){return 3*x==-5;}
+int g(int32_t x){return -3*x==5;}
+int h(int32_t x){return 7*x==3;}
+int i(uint32_t x){return 7*x==3;}
+
+/* { dg-final { scan-tree-dump-times "== 1431655767" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "== -1431655767" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "== 613566757" 2 "gimple" } } */
diff --git a/gcc/wide-int.cc b/gcc/wide-int.cc
index 03be0074816..f4d949c38a0 100644
--- a/gcc/wide-int.cc
+++ b/gcc/wide-int.cc
@@ -2223,6 +2223,39 @@ wi::round_up_for_mask (const wide_int &val, const wide_int &mask)
return (val | tmp) & -tmp;
}
+/* Compute the modular multiplicative inverse of A modulo B
+ using extended Euclid's algorithm. Assumes A and B are coprime,
+ and that A and B have the same precision. */
+wide_int
+wi::mod_inv (const wide_int &a, const wide_int &b)
+{
+ /* Verify the assumption. */
+ gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
+
+ unsigned int p = a.get_precision () + 1;
+ gcc_checking_assert (b.get_precision () + 1 == p);
+ wide_int c = wide_int::from (a, p, UNSIGNED);
+ wide_int d = wide_int::from (b, p, UNSIGNED);
+ wide_int x0 = wide_int::from (0, p, UNSIGNED);
+ wide_int x1 = wide_int::from (1, p, UNSIGNED);
+
+ if (wi::eq_p (b, 1))
+ return wide_int::from (1, p, UNSIGNED);
+
+ while (wi::gt_p (c, 1, UNSIGNED))
+ {
+ wide_int t = d;
+ wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
+ c = t;
+ wide_int s = x0;
+ x0 = wi::sub (x1, wi::mul (q, x0));
+ x1 = s;
+ }
+ if (wi::lt_p (x1, 0, SIGNED))
+ x1 += d;
+ return x1;
+}
+
/*
* Private utilities.
*/
diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index bb0d16b99b1..39cd5b9bd17 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -3389,6 +3389,8 @@ namespace wi
wide_int round_down_for_mask (const wide_int &, const wide_int &);
wide_int round_up_for_mask (const wide_int &, const wide_int &);
+ wide_int mod_inv (const wide_int &a, const wide_int &b);
+
template <typename T>
T mask (unsigned int, bool);