Range-ops uses wi_fold (implemented by each opcode) to individually
fold subranges one at a time and then combines them. This patch first
calls wi_fold_in_parts which checks if one of the subranges is small,
and if so, further splits that subrange into constants.
Currently, if a subrange is 4 or less values, then we call it
individually for each of the 4 values. 4 was chosen as a reasonable
tradeoff between excess work vs benefit. We could consider
increasing that number for -O3 perhaps.
This allows us under some circumstances to generate much more precise
values. For instance:
tmp_5 = x_4(D) != 0;
_1 = (int) tmp_5;
_2 = 255 >> _1;
_3 = (unsigned char) _2;
_6 = _3 * 2;
return _6;
_1 : int [0, 1]
_2 : int [127, 255]
_3 : unsigned char [127, +INF]
we currently produce a range of [127,255] for _2. the RHS of the shift
(_1) only has 2 values, so by further splitting that subrange in [0,0]
and [1,1], we can produce a more precise range, and do better:
y_5 = x_4(D) != 0;
_1 = (int) y_5;
_2 = 255 >> _1;
_3 = (unsigned char) _2;
return 254;
_1 : int [0, 1]
_2 : int [127, 127][255, 255]
_3 : unsigned char [127, 127][+INF, +INF]
Now _2 is a perfect 127 or 255, and we can fold this function to just
the return value now.
This will work for many different opcodes without having to go and
rework range-ops for them.
Bootstraps on x86_64-pc-linux-gnu with no regressions and fixes all 3
testcases in the PR. pushed.
Andrew
commit 704e8a825c78b9a8424c291509413bbb48e602c7
Author: Andrew MacLeod <amacl...@redhat.com>
Date: Fri Jul 16 11:42:14 2021 -0400
Add wi_fold_in_parts.
range-ops uses wi_fold to individually fold subranges one at a time and
then combined them. This patch first calls wi_fold_in_parts which checks if
one of the subranges is small, and if so, further splits that subrange
into constants.
gcc/
PR tree-optimization/96542
* range-op.cc (range_operator::wi_fold_in_parts): New.
(range_operator::fold_range): Call wi_fold_in_parts.
(operator_lshift::wi_fold): Fix broken lshift by [0,0].
* range-op.h (wi_fold_in_parts): Add prototype.
gcc/testsuite
* gcc.dg/pr96542.c: New.
diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 08000465fd9..e0be51dbc90 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -133,6 +133,65 @@ range_operator::wi_fold (irange &r, tree type,
r.set_varying (type);
}
+// Call wi_fold, except further split small subranges into constants.
+// This can provide better precision. For something 8 >> [0,1]
+// Instead of [8, 16], we will produce [8,8][16,16]
+
+void
+range_operator::wi_fold_in_parts (irange &r, tree type,
+ const wide_int &lh_lb,
+ const wide_int &lh_ub,
+ const wide_int &rh_lb,
+ const wide_int &rh_ub) const
+{
+ wi::overflow_type ov_rh, ov_lh;
+ int_range_max tmp;
+ wide_int rh_range = wi::sub (rh_ub, rh_lb, TYPE_SIGN (type), &ov_rh);
+ wide_int lh_range = wi::sub (lh_ub, lh_lb, TYPE_SIGN (type), &ov_lh);
+ signop sign = TYPE_SIGN (type);;
+ // If there are 2, 3, or 4 values in the RH range, do them separately.
+ // Call wi_fold_in_parts to check the RH side.
+ if (wi::gt_p (rh_range, 0, sign) && wi::lt_p (rh_range, 4, sign)
+ && ov_rh == wi::OVF_NONE)
+ {
+ wi_fold_in_parts (r, type, lh_lb, lh_ub, rh_lb, rh_lb);
+ if (wi::gt_p (rh_range, 1, sign))
+ {
+ wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_lb + 1, rh_lb + 1);
+ r.union_ (tmp);
+ if (wi::eq_p (rh_range, 3))
+ {
+ wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_lb + 2, rh_lb + 2);
+ r.union_ (tmp);
+ }
+ }
+ wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_ub, rh_ub);
+ r.union_ (tmp);
+ }
+ // Otherise check for 2, 3, or 4 values in the LH range and split them up.
+ // The RH side has been checked, so no recursion needed.
+ else if (wi::gt_p (lh_range, 0, sign) && wi::lt_p (lh_range, 4, sign)
+ && ov_lh == wi::OVF_NONE)
+ {
+ wi_fold (r, type, lh_lb, lh_lb, rh_lb, rh_ub);
+ if (wi::gt_p (lh_range, 1, sign))
+ {
+ wi_fold (tmp, type, lh_lb + 1, lh_lb + 1, rh_lb, rh_ub);
+ r.union_ (tmp);
+ if (wi::eq_p (lh_range, 3))
+ {
+ wi_fold (tmp, type, lh_lb + 2, lh_lb + 2, rh_lb, rh_ub);
+ r.union_ (tmp);
+ }
+ }
+ wi_fold (tmp, type, lh_ub, lh_ub, rh_lb, rh_ub);
+ r.union_ (tmp);
+ }
+ // Otherwise just call wi_fold.
+ else
+ wi_fold (r, type, lh_lb, lh_ub, rh_lb, rh_ub);
+}
+
// The default for fold is to break all ranges into sub-ranges and
// invoke the wi_fold method on each sub-range pair.
@@ -152,8 +211,8 @@ range_operator::fold_range (irange &r, tree type,
// If both ranges are single pairs, fold directly into the result range.
if (num_lh == 1 && num_rh == 1)
{
- wi_fold (r, type, lh.lower_bound (0), lh.upper_bound (0),
- rh.lower_bound (0), rh.upper_bound (0));
+ wi_fold_in_parts (r, type, lh.lower_bound (0), lh.upper_bound (0),
+ rh.lower_bound (0), rh.upper_bound (0));
op1_op2_relation_effect (r, type, lh, rh, rel);
return true;
}
@@ -167,7 +226,7 @@ range_operator::fold_range (irange &r, tree type,
wide_int lh_ub = lh.upper_bound (x);
wide_int rh_lb = rh.lower_bound (y);
wide_int rh_ub = rh.upper_bound (y);
- wi_fold (tmp, type, lh_lb, lh_ub, rh_lb, rh_ub);
+ wi_fold_in_parts (tmp, type, lh_lb, lh_ub, rh_lb, rh_ub);
r.union_ (tmp);
if (r.varying_p ())
{
@@ -1915,8 +1974,14 @@ operator_lshift::wi_fold (irange &r, tree type,
int bound_shift = overflow_pos - rh_ub.to_shwi ();
// If bound_shift == HOST_BITS_PER_WIDE_INT, the llshift can
// overflow. However, for that to happen, rh.max needs to be zero,
- // which means rh is a singleton range of zero, which means it
- // should be handled by the lshift fold_range above.
+ // which means rh is a singleton range of zero, which means we simply return
+ // [lh_lb, lh_ub] as the range.
+ if (wi::eq_p (rh_ub, rh_lb) && wi::eq_p (rh_ub, 0))
+ {
+ r = int_range<2> (type, lh_lb, lh_ub);
+ return;
+ }
+
wide_int bound = wi::set_bit_in_zero (bound_shift, prec);
wide_int complement = ~(bound - 1);
wide_int low_bound, high_bound;
diff --git a/gcc/range-op.h b/gcc/range-op.h
index 2b5db64bb98..17be9e00c08 100644
--- a/gcc/range-op.h
+++ b/gcc/range-op.h
@@ -97,6 +97,12 @@ protected:
const irange &op1_range,
const irange &op2_range,
relation_kind rel) const;
+ // Called by fold range to split small subranges into parts.
+ void wi_fold_in_parts (irange &r, tree type,
+ const wide_int &lh_lb,
+ const wide_int &lh_ub,
+ const wide_int &rh_lb,
+ const wide_int &rh_ub) const;
};
extern range_operator *range_op_handler (enum tree_code code, tree type);
diff --git a/gcc/testsuite/gcc.dg/pr96542.c b/gcc/testsuite/gcc.dg/pr96542.c
new file mode 100644
index 00000000000..5014f2acad8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr96542.c
@@ -0,0 +1,27 @@
+/* { dg-do compile} */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+
+unsigned char
+foo (unsigned int x)
+{
+ _Bool y = x;
+ return (((unsigned char) ~0) >> y) * 2;
+}
+
+unsigned char
+bar (unsigned int x)
+{
+ return (((unsigned char) ~0) >> (_Bool) x) * 2;
+}
+
+unsigned
+baz (unsigned int x)
+{
+ if (x >= 4) return 32;
+ return (-1U >> x) * 16;
+}
+
+/* { dg-final { scan-tree-dump-times "254" 2 "evrp" } } */
+/* { dg-final { scan-tree-dump "= PHI <32.*, 4294967280" "evrp" } } */
+