Thanks,
Richard.
2021-05-28 Richard Biener <rguent...@suse.de>
PR tree-optimization/100801
* tree-scalar-evolution.h (final_value_at_exit): Declare.
* tree-scalar-evolution.c (final_value_at_exit): New API,
split out from ...
(final_value_replacement_loop): ... here.
* tree-vrp.c (execute_vrp): Build loop-closed SSA only
after inserting assert expressions.
(vrp_asserts::process_assert_insertions_for): Avoid inserting
asserts for RESULT_DECLs, when building loop-closed SSA
after assert insertion we're not able to propagate out all
PHIs and thus trigger IL validation.
* vr-values.c (vr_values::extract_range_from_phi_node):
For loop exit PHIs also perform final value analysis using
SCEV.
* gcc.target/i386/pr100801.c: New testcase.
---
gcc/testsuite/gcc.target/i386/pr100801.c | 29 ++++++++++++++++++++++++
gcc/tree-scalar-evolution.c | 28 +++++++++++++++--------
gcc/tree-scalar-evolution.h | 1 +
gcc/tree-vrp.c | 12 ++++++++--
gcc/vr-values.c | 23 +++++++++++++++----
5 files changed, 77 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr100801.c
diff --git a/gcc/testsuite/gcc.target/i386/pr100801.c
b/gcc/testsuite/gcc.target/i386/pr100801.c
new file mode 100644
index 00000000000..c58f9be6898
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100801.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+#include <x86intrin.h>
+
+void copy_32_unaligned(unsigned int* dest, const unsigned int* src, size_t
count)
+{
+ // invariant/nop
+ __m128i shufmask = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4,
3, 2, 1, 0);
+
+ size_t i;
+ for (i = 0; i + 4 <= count; i += 4) {
+ __m128i input = _mm_loadu_si128((const __m128i*)(&src[i]));
+ __m128i output = input;
+ // no warning without the shuffle:
+ output = _mm_shuffle_epi8(input, shufmask);
+ _mm_storeu_si128((__m128i*)(&dest[i]), output);
+ }
+ for (; i < count; ++i) { // handle residual elements
+ dest[i] = src[i]; /* { dg-bogus "invokes undefined" } */
+ }
+}
+
+void test(unsigned int* buf1, unsigned int* buf2)
+{
+ copy_32_unaligned(buf2, buf1,
+ // multiples of 4 and greater or equal then 12 trigger it:
+ 12);
+}
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index b22d49a0ab6..6917b8c9dab 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -3491,6 +3491,23 @@ expression_expensive_p (tree expr)
|| expanded_size > cache.elements ());
}
+/* Compute the final value of DEF, a DEF inside the loop E exits from. */
+
+tree
+final_value_at_exit (tree def, edge e, bool *folded_casts)
+{
+ class loop *loop = e->src->loop_father;
+ gcc_assert (loop_exit_edge_p (loop, e));
+ class loop *ex_loop
+ = superloop_at_depth (loop, loop_depth (e->dest->loop_father) + 1);
+ tree ev = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
folded_casts);
+ ev = compute_overall_effect_of_inner_loop (ex_loop, ev);
+ if (!tree_does_not_contain_chrecs (def)
+ || chrec_contains_symbols_defined_in_loop (ev, ex_loop->num))
+ ev = chrec_dont_know;
+ return ev;
+}
+
/* Do final value replacement for LOOP, return true if we did anything. */
bool
@@ -3513,10 +3530,6 @@ final_value_replacement_loop (class loop *loop)
/* Set stmt insertion pointer. All stmts are inserted before this point.
*/
gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
- class loop *ex_loop
- = superloop_at_depth (loop,
- loop_depth (exit->dest->loop_father) + 1);
-
bool any = false;
gphi_iterator psi;
for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
@@ -3538,11 +3551,8 @@ final_value_replacement_loop (class loop *loop)
}
bool folded_casts;
- def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
- &folded_casts);
- def = compute_overall_effect_of_inner_loop (ex_loop, def);
- if (!tree_does_not_contain_chrecs (def)
- || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
+ def = final_value_at_exit (def, exit, &folded_casts);
+ if (def == chrec_dont_know
/* Moving the computation from the loop may prolong life range
of some ssa names, which may cause problems if they appear
on abnormal edges. */
diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h
index d679f7285b3..18186c32d3d 100644
--- a/gcc/tree-scalar-evolution.h
+++ b/gcc/tree-scalar-evolution.h
@@ -33,6 +33,7 @@ extern tree analyze_scalar_evolution (class loop *, tree);
extern tree instantiate_scev (edge, class loop *, tree);
extern tree resolve_mixers (class loop *, tree, bool *);
extern void gather_stats_on_scev_database (void);
+extern tree final_value_at_exit (tree, edge, bool *);
extern bool final_value_replacement_loop (class loop *);
extern unsigned int scev_const_prop (void);
extern bool expression_expensive_p (tree);
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 450926d5f9b..b3d937cb27f 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -3416,6 +3416,13 @@ vrp_asserts::process_assert_insertions_for (tree name,
assert_locus *loc)
if (loc->expr == loc->val)
return false;
+ /* Do not insert assertions for RESULT_DECLs, those are asserted to be
+ read-only in the IL and are always non-NULL. */
+ if (SSA_NAME_VAR (name)
+ && TREE_CODE (SSA_NAME_VAR (name)) == RESULT_DECL
+ && DECL_BY_REFERENCE (SSA_NAME_VAR (name)))
+ return false;
+
cond = build2 (loc->comp_code, boolean_type_node, loc->expr, loc->val);
assert_stmt = build_assert_expr_for (cond, name);
if (loc->e)
@@ -4471,8 +4478,6 @@ static unsigned int
execute_vrp (struct function *fun, bool warn_array_bounds_p)
{
loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
- rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
- scev_initialize ();
/* ??? This ends up using stale EDGE_DFS_BACK for liveness computation.
Inserting assertions may split edges which will invalidate
@@ -4480,6 +4485,9 @@ execute_vrp (struct function *fun, bool
warn_array_bounds_p)
vrp_asserts assert_engine (fun);
assert_engine.insert_range_assertions ();
+ rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+ scev_initialize ();
+
/* For visiting PHI nodes we need EDGE_DFS_BACK computed. */
mark_dfs_back_edges ();
diff --git a/gcc/vr-values.c b/gcc/vr-values.c
index 3d0be8edb3b..1ce421e7bef 100644
--- a/gcc/vr-values.c
+++ b/gcc/vr-values.c
@@ -2882,7 +2882,7 @@ vr_values::extract_range_from_phi_node (gphi *phi,
goto infinite_check;
}
- goto update_range;
+ goto scev_check;
varying:
vr_result->set_varying (TREE_TYPE (lhs));
@@ -2892,10 +2892,23 @@ scev_check:
scev_check can be reached from two paths, one is a fall through from
above
"varying" label, the other is direct goto from code block which tries to
avoid infinite simulation. */
- if (scev_initialized_p ()
- && (l = loop_containing_stmt (phi))
- && l->header == gimple_bb (phi))
- adjust_range_with_scev (vr_result, l, phi, lhs);
+ if (scev_initialized_p ())
+ {
+ edge e;
+ if ((l = loop_containing_stmt (phi))
+ && l->header == gimple_bb (phi))
+ adjust_range_with_scev (vr_result, l, phi, lhs);
+ else if (single_pred_p (gimple_bb (phi))
+ && ((e = single_pred_edge (gimple_bb (phi))), true)
+ && loop_exit_edge_p (e->src->loop_father, e))
+ {
+ bool folded_casts;
+ tree ev = final_value_at_exit (PHI_ARG_DEF_FROM_EDGE (phi, e), e,
+ &folded_casts);
+ if (is_gimple_min_invariant (ev) || TREE_CODE (ev) == SSA_NAME)
+ vr_result->update (ev, ev, VR_RANGE);
+ }
+ }
infinite_check:
/* If we will end up with a (-INF, +INF) range, set it to