This makes sure to perform final value replacement of constants when we also are sure to propagate those, like in VRP. This avoids spurious diagnostics when doing so only from within SCCP which can leave unreachable loops in the IL triggering bogus diagnostics.
The choice is VRP because it already defers to SCEV for PHI nodes so the following makes it analyze final values for loop exit PHIs. To make that work it arranges for loop-closed SSA to be built only after inserting ASSERT_EXPRs. Bootstrapped and tested on x86_64-unknown-linux-gnu. It does FAIL: gcc.dg/ubsan/pr94423.c -O2 (internal compiler error) where VRP somehow propagates abnormals in a bogus way. It also needs adjustment for a couple of fails like FAIL: gcc.dg/vect/no-scevccp-outer-11.c scan-tree-dump-times vect "OUTER LOOP VECTORIZED." 1 where the testcases do -fno-tree-scev-cprop but that no longer has the desired effect then (might just guard the final value replacement in VRP with this). Any comments? I probably can plug final_value_at_exit elsewhere than VRP (to avoid the issues with asserts) but it somewhat felt naturally because that already uses SCEV. Thanks, Richard. 2021-05-28 Richard Biener <rguent...@suse.de> PR tree-optimization/100801 * tree-scalar-evolution.h (final_value_at_exit): Declare. * tree-scalar-evolution.c (final_value_at_exit): New API, split out from ... (final_value_replacement_loop): ... here. * tree-vrp.c (execute_vrp): Build loop-closed SSA only after inserting assert expressions. (vrp_asserts::process_assert_insertions_for): Avoid inserting asserts for RESULT_DECLs, when building loop-closed SSA after assert insertion we're not able to propagate out all PHIs and thus trigger IL validation. * vr-values.c (vr_values::extract_range_from_phi_node): For loop exit PHIs also perform final value analysis using SCEV. * gcc.target/i386/pr100801.c: New testcase. --- gcc/testsuite/gcc.target/i386/pr100801.c | 29 ++++++++++++++++++++++++ gcc/tree-scalar-evolution.c | 28 +++++++++++++++-------- gcc/tree-scalar-evolution.h | 1 + gcc/tree-vrp.c | 12 ++++++++-- gcc/vr-values.c | 23 +++++++++++++++---- 5 files changed, 77 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr100801.c diff --git a/gcc/testsuite/gcc.target/i386/pr100801.c b/gcc/testsuite/gcc.target/i386/pr100801.c new file mode 100644 index 00000000000..c58f9be6898 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100801.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +#include <x86intrin.h> + +void copy_32_unaligned(unsigned int* dest, const unsigned int* src, size_t count) +{ + // invariant/nop + __m128i shufmask = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + + size_t i; + for (i = 0; i + 4 <= count; i += 4) { + __m128i input = _mm_loadu_si128((const __m128i*)(&src[i])); + __m128i output = input; + // no warning without the shuffle: + output = _mm_shuffle_epi8(input, shufmask); + _mm_storeu_si128((__m128i*)(&dest[i]), output); + } + for (; i < count; ++i) { // handle residual elements + dest[i] = src[i]; /* { dg-bogus "invokes undefined" } */ + } +} + +void test(unsigned int* buf1, unsigned int* buf2) +{ + copy_32_unaligned(buf2, buf1, + // multiples of 4 and greater or equal then 12 trigger it: + 12); +} diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c index b22d49a0ab6..6917b8c9dab 100644 --- a/gcc/tree-scalar-evolution.c +++ b/gcc/tree-scalar-evolution.c @@ -3491,6 +3491,23 @@ expression_expensive_p (tree expr) || expanded_size > cache.elements ()); } +/* Compute the final value of DEF, a DEF inside the loop E exits from. */ + +tree +final_value_at_exit (tree def, edge e, bool *folded_casts) +{ + class loop *loop = e->src->loop_father; + gcc_assert (loop_exit_edge_p (loop, e)); + class loop *ex_loop + = superloop_at_depth (loop, loop_depth (e->dest->loop_father) + 1); + tree ev = analyze_scalar_evolution_in_loop (ex_loop, loop, def, folded_casts); + ev = compute_overall_effect_of_inner_loop (ex_loop, ev); + if (!tree_does_not_contain_chrecs (def) + || chrec_contains_symbols_defined_in_loop (ev, ex_loop->num)) + ev = chrec_dont_know; + return ev; +} + /* Do final value replacement for LOOP, return true if we did anything. */ bool @@ -3513,10 +3530,6 @@ final_value_replacement_loop (class loop *loop) /* Set stmt insertion pointer. All stmts are inserted before this point. */ gimple_stmt_iterator gsi = gsi_after_labels (exit->dest); - class loop *ex_loop - = superloop_at_depth (loop, - loop_depth (exit->dest->loop_father) + 1); - bool any = false; gphi_iterator psi; for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); ) @@ -3538,11 +3551,8 @@ final_value_replacement_loop (class loop *loop) } bool folded_casts; - def = analyze_scalar_evolution_in_loop (ex_loop, loop, def, - &folded_casts); - def = compute_overall_effect_of_inner_loop (ex_loop, def); - if (!tree_does_not_contain_chrecs (def) - || chrec_contains_symbols_defined_in_loop (def, ex_loop->num) + def = final_value_at_exit (def, exit, &folded_casts); + if (def == chrec_dont_know /* Moving the computation from the loop may prolong life range of some ssa names, which may cause problems if they appear on abnormal edges. */ diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h index d679f7285b3..18186c32d3d 100644 --- a/gcc/tree-scalar-evolution.h +++ b/gcc/tree-scalar-evolution.h @@ -33,6 +33,7 @@ extern tree analyze_scalar_evolution (class loop *, tree); extern tree instantiate_scev (edge, class loop *, tree); extern tree resolve_mixers (class loop *, tree, bool *); extern void gather_stats_on_scev_database (void); +extern tree final_value_at_exit (tree, edge, bool *); extern bool final_value_replacement_loop (class loop *); extern unsigned int scev_const_prop (void); extern bool expression_expensive_p (tree); diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index 450926d5f9b..b3d937cb27f 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -3416,6 +3416,13 @@ vrp_asserts::process_assert_insertions_for (tree name, assert_locus *loc) if (loc->expr == loc->val) return false; + /* Do not insert assertions for RESULT_DECLs, those are asserted to be + read-only in the IL and are always non-NULL. */ + if (SSA_NAME_VAR (name) + && TREE_CODE (SSA_NAME_VAR (name)) == RESULT_DECL + && DECL_BY_REFERENCE (SSA_NAME_VAR (name))) + return false; + cond = build2 (loc->comp_code, boolean_type_node, loc->expr, loc->val); assert_stmt = build_assert_expr_for (cond, name); if (loc->e) @@ -4471,8 +4478,6 @@ static unsigned int execute_vrp (struct function *fun, bool warn_array_bounds_p) { loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); - rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); - scev_initialize (); /* ??? This ends up using stale EDGE_DFS_BACK for liveness computation. Inserting assertions may split edges which will invalidate @@ -4480,6 +4485,9 @@ execute_vrp (struct function *fun, bool warn_array_bounds_p) vrp_asserts assert_engine (fun); assert_engine.insert_range_assertions (); + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); + scev_initialize (); + /* For visiting PHI nodes we need EDGE_DFS_BACK computed. */ mark_dfs_back_edges (); diff --git a/gcc/vr-values.c b/gcc/vr-values.c index 3d0be8edb3b..1ce421e7bef 100644 --- a/gcc/vr-values.c +++ b/gcc/vr-values.c @@ -2882,7 +2882,7 @@ vr_values::extract_range_from_phi_node (gphi *phi, goto infinite_check; } - goto update_range; + goto scev_check; varying: vr_result->set_varying (TREE_TYPE (lhs)); @@ -2892,10 +2892,23 @@ scev_check: scev_check can be reached from two paths, one is a fall through from above "varying" label, the other is direct goto from code block which tries to avoid infinite simulation. */ - if (scev_initialized_p () - && (l = loop_containing_stmt (phi)) - && l->header == gimple_bb (phi)) - adjust_range_with_scev (vr_result, l, phi, lhs); + if (scev_initialized_p ()) + { + edge e; + if ((l = loop_containing_stmt (phi)) + && l->header == gimple_bb (phi)) + adjust_range_with_scev (vr_result, l, phi, lhs); + else if (single_pred_p (gimple_bb (phi)) + && ((e = single_pred_edge (gimple_bb (phi))), true) + && loop_exit_edge_p (e->src->loop_father, e)) + { + bool folded_casts; + tree ev = final_value_at_exit (PHI_ARG_DEF_FROM_EDGE (phi, e), e, + &folded_casts); + if (is_gimple_min_invariant (ev) || TREE_CODE (ev) == SSA_NAME) + vr_result->update (ev, ev, VR_RANGE); + } + } infinite_check: /* If we will end up with a (-INF, +INF) range, set it to -- 2.26.2