This fixes another testcase that shows that our ANTIC iteration can fail to converge. The fix continues what we did with previous fixes, avoid spuriously removing stuff from the expression side of the sets. This time going full-in and allowing multiple expressions for the same value in the sets. I've added verification that the value-sets never grow during iteration which barfed on some of the related existing testcases and thus helped finishing this and fixing all places where we end up somewhat randomly choosing one or another expression to drop.
I've verified with a GIMPLE testcase that if at insertion time we have multiple partially redundant expressions for the same value we insert/hoist it only once. So the only "bad" effect of the patch is that the expression part of the ANTIC sets grows -- by not throwing away random stuff we might also arrive at larger (value) solutions for ANTIC which means we may catch previously missed optimizations (or pessimizations as you know PRE...). I'm not entirely happy with the timing but I'm quite confident in the approach also (again) having heavily discussed this with Micha. Re-boostrap and regtest running on x86_64-unknown-linux-gnu. I managed to go into a different solution at the beginning asking for that pred/phiblock->edge cleanup and decided to leave that in... The new verification is guarded with flag_checking so if it would trigger but wouldn't result in not converging a release build should be not affected. Richard. 2018-01-03 Richard Biener <rguent...@suse.de> PR tree-optimization/84427 * tree-ssa-pre.c (bitmap_remove_expr_from_set): Remove. (bitmap_set_subtract_values): Rewrite to handle multiple exprs per value. (clean): Likewise. (prune_clobbered_mems): Likewise. (phi_translate): Take edge instead of pred/phiblock. (phi_translate_1): Likewise. (phi_translate_set): Likewise. Insert all translated exprs for a value into the set, keeping possibly multiple expressions per value. (compute_antic_aux): Adjust for phi_translate changes. When intersecting union the expressions and prune those not in the final value set, keeping possibly multiple expressions per value. Do not use value-insertion for unioning ANTIC_OUT U EXP_GEN - TMP_GEN but merge all expressions. Add verification that the value-sets only shrink during iteration. (compute_partial_antic_aux): Adjust for the phi_translate changes. (do_pre_regular_insertion): Likewise. (do_pre_partial_partial_insertion): Likewise. * gcc.dg/torture/pr84427.c: New testcase. Index: gcc/tree-ssa-pre.c =================================================================== --- gcc/tree-ssa-pre.c (revision 258097) +++ gcc/tree-ssa-pre.c (working copy) @@ -696,16 +696,6 @@ sccvn_valnum_from_value_id (unsigned int return NULL_TREE; } -/* Remove an expression EXPR from a bitmapped set. */ - -static void -bitmap_remove_expr_from_set (bitmap_set_t set, pre_expr expr) -{ - unsigned int val = get_expr_value_id (expr); - bitmap_clear_bit (&set->values, val); - bitmap_clear_bit (&set->expressions, get_expression_id (expr)); -} - /* Insert an expression EXPR into a bitmapped set. */ static void @@ -805,20 +795,21 @@ bitmap_set_subtract_values (bitmap_set_t { unsigned int i; bitmap_iterator bi; - pre_expr to_remove = NULL; + unsigned to_remove = -1U; + bitmap_and_compl_into (&a->values, &b->values); FOR_EACH_EXPR_ID_IN_SET (a, i, bi) { - if (to_remove) + if (to_remove != -1U) { - bitmap_remove_expr_from_set (a, to_remove); - to_remove = NULL; + bitmap_clear_bit (&a->expressions, to_remove); + to_remove = -1U; } pre_expr expr = expression_for_id (i); - if (bitmap_bit_p (&b->values, get_expr_value_id (expr))) - to_remove = expr; + if (! bitmap_bit_p (&a->values, get_expr_value_id (expr))) + to_remove = i; } - if (to_remove) - bitmap_remove_expr_from_set (a, to_remove); + if (to_remove != -1U) + bitmap_clear_bit (&a->expressions, to_remove); } @@ -1335,17 +1326,17 @@ get_representative_for (const pre_expr e static pre_expr -phi_translate (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, - basic_block pred, basic_block phiblock); +phi_translate (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e); /* Translate EXPR using phis in PHIBLOCK, so that it has the values of the phis in PRED. Return NULL if we can't find a leader for each part of the translated expression. */ static pre_expr -phi_translate_1 (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, - basic_block pred, basic_block phiblock) +phi_translate_1 (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e) { + basic_block pred = e->src; + basic_block phiblock = e->dest; switch (expr->kind) { case NARY: @@ -1366,7 +1357,7 @@ phi_translate_1 (pre_expr expr, bitmap_s pre_expr leader, result; unsigned int op_val_id = VN_INFO (newnary->op[i])->value_id; leader = find_leader_in_sets (op_val_id, set1, set2); - result = phi_translate (leader, set1, set2, pred, phiblock); + result = phi_translate (leader, set1, set2, e); if (result && result != leader) /* Force a leader as well as we are simplifying this expression. */ @@ -1397,7 +1388,7 @@ phi_translate_1 (pre_expr expr, bitmap_s to be inserted and increased register pressure. See PR77498 - this avoids doing predcoms work in a less efficient way. */ - if (find_edge (pred, phiblock)->flags & EDGE_DFS_BACK) + if (e->flags & EDGE_DFS_BACK) ; else { @@ -1488,7 +1479,7 @@ phi_translate_1 (pre_expr expr, bitmap_s } op_val_id = VN_INFO (op[n])->value_id; leader = find_leader_in_sets (op_val_id, set1, set2); - opresult = phi_translate (leader, set1, set2, pred, phiblock); + opresult = phi_translate (leader, set1, set2, e); if (opresult && opresult != leader) { tree name = get_representative_for (opresult); @@ -1616,7 +1607,6 @@ phi_translate_1 (pre_expr expr, bitmap_s if (gimple_code (def_stmt) == GIMPLE_PHI && gimple_bb (def_stmt) == phiblock) { - edge e = find_edge (pred, gimple_bb (def_stmt)); tree def = PHI_ARG_DEF (def_stmt, e->dest_idx); /* Handle constant. */ @@ -1639,8 +1629,7 @@ phi_translate_1 (pre_expr expr, bitmap_s /* Wrapper around phi_translate_1 providing caching functionality. */ static pre_expr -phi_translate (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, - basic_block pred, basic_block phiblock) +phi_translate (pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e) { expr_pred_trans_t slot = NULL; pre_expr phitrans; @@ -1658,7 +1647,7 @@ phi_translate (pre_expr expr, bitmap_set /* Don't add translations of NAMEs as those are cheap to translate. */ if (expr->kind != NAME) { - if (phi_trans_add (&slot, expr, pred)) + if (phi_trans_add (&slot, expr, e->src)) return slot->v; /* Store NULL for the value we want to return in the case of recursing. */ @@ -1666,7 +1655,7 @@ phi_translate (pre_expr expr, bitmap_set } /* Translate. */ - phitrans = phi_translate_1 (expr, set1, set2, pred, phiblock); + phitrans = phi_translate_1 (expr, set1, set2, e); if (slot) { @@ -1687,14 +1676,13 @@ phi_translate (pre_expr expr, bitmap_set expressions in DEST. */ static void -phi_translate_set (bitmap_set_t dest, bitmap_set_t set, basic_block pred, - basic_block phiblock) +phi_translate_set (bitmap_set_t dest, bitmap_set_t set, edge e) { vec<pre_expr> exprs; pre_expr expr; int i; - if (gimple_seq_empty_p (phi_nodes (phiblock))) + if (gimple_seq_empty_p (phi_nodes (e->dest))) { bitmap_set_copy (dest, set); return; @@ -1704,18 +1692,11 @@ phi_translate_set (bitmap_set_t dest, bi FOR_EACH_VEC_ELT (exprs, i, expr) { pre_expr translated; - translated = phi_translate (expr, set, NULL, pred, phiblock); + translated = phi_translate (expr, set, NULL, e); if (!translated) continue; - /* We might end up with multiple expressions from SET being - translated to the same value. In this case we do not want - to retain the NARY or REFERENCE expression but prefer a NAME - which would be the leader. */ - if (translated->kind == NAME) - bitmap_value_replace_in_set (dest, translated); - else - bitmap_value_insert_into_set (dest, translated); + bitmap_insert_into_set (dest, translated); } exprs.release (); } @@ -1918,7 +1899,15 @@ clean (bitmap_set_t set1, bitmap_set_t s FOR_EACH_VEC_ELT (exprs, i, expr) { if (!valid_in_sets (set1, set2, expr)) - bitmap_remove_expr_from_set (set1, expr); + { + unsigned int val = get_expr_value_id (expr); + bitmap_clear_bit (&set1->expressions, get_expression_id (expr)); + /* We are entered with possibly multiple expressions for a value + so before removing a value from the set see if there's an + expression for it left. */ + if (! bitmap_find_leader (set1, val)) + bitmap_clear_bit (&set1->values, val); + } } exprs.release (); } @@ -1931,15 +1920,17 @@ prune_clobbered_mems (bitmap_set_t set, { bitmap_iterator bi; unsigned i; - pre_expr to_remove = NULL; + unsigned to_remove = -1U; + bool any_removed = false; FOR_EACH_EXPR_ID_IN_SET (set, i, bi) { /* Remove queued expr. */ - if (to_remove) + if (to_remove != -1U) { - bitmap_remove_expr_from_set (set, to_remove); - to_remove = NULL; + bitmap_clear_bit (&set->expressions, to_remove); + any_removed = true; + to_remove = -1U; } pre_expr expr = expression_for_id (i); @@ -1955,7 +1946,7 @@ prune_clobbered_mems (bitmap_set_t set, block, gimple_bb (def_stmt))) || (gimple_bb (def_stmt) == block && value_dies_in_block_x (expr, block)))) - to_remove = expr; + to_remove = i; } } else if (expr->kind == NARY) @@ -1967,13 +1958,36 @@ prune_clobbered_mems (bitmap_set_t set, as the available expression might be after the exit point. */ if (BB_MAY_NOTRETURN (block) && vn_nary_may_trap (nary)) - to_remove = expr; + to_remove = i; } } /* Remove queued expr. */ - if (to_remove) - bitmap_remove_expr_from_set (set, to_remove); + if (to_remove != -1U) + { + bitmap_clear_bit (&set->expressions, to_remove); + any_removed = true; + } + + /* Above we only removed expressions, now clean the set of values + which no longer have any corresponding expression. We cannot + clear the value at the time we remove an expression since there + may be multiple expressions per value. + If we'd queue possibly to be removed values we could use + the bitmap_find_leader way to see if there's still an expression + for it. For some ratio of to be removed values and number of + values/expressions in the set this might be faster than rebuilding + the value-set. */ + if (any_removed) + { + bitmap_clear (&set->values); + FOR_EACH_EXPR_ID_IN_SET (set, i, bi) + { + pre_expr expr = expression_for_id (i); + unsigned int value_id = get_expr_value_id (expr); + bitmap_set_bit (&set->values, value_id); + } + } } static sbitmap has_abnormal_preds; @@ -1993,11 +2007,10 @@ static bool compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge) { bitmap_set_t S, old, ANTIC_OUT; - bitmap_iterator bi; - unsigned int bii; edge e; edge_iterator ei; + bool was_visited = BB_VISITED (block); bool changed = ! BB_VISITED (block); BB_VISITED (block) = 1; old = ANTIC_OUT = S = NULL; @@ -2017,9 +2030,9 @@ compute_antic_aux (basic_block block, bo translate through. */ else if (single_succ_p (block)) { - basic_block succ_bb = single_succ (block); - gcc_assert (BB_VISITED (succ_bb)); - phi_translate_set (ANTIC_OUT, ANTIC_IN (succ_bb), block, succ_bb); + e = single_succ_edge (block); + gcc_assert (BB_VISITED (e->dest)); + phi_translate_set (ANTIC_OUT, ANTIC_IN (e->dest), e); } /* If we have multiple successors, we take the intersection of all of them. Note that in the case of loop exit phi nodes, we may have @@ -2027,16 +2040,16 @@ compute_antic_aux (basic_block block, bo else { size_t i; - basic_block bprime, first = NULL; + edge first = NULL; - auto_vec<basic_block> worklist (EDGE_COUNT (block->succs)); + auto_vec<edge> worklist (EDGE_COUNT (block->succs)); FOR_EACH_EDGE (e, ei, block->succs) { if (!first && BB_VISITED (e->dest)) - first = e->dest; + first = e; else if (BB_VISITED (e->dest)) - worklist.quick_push (e->dest); + worklist.quick_push (e); else { /* Unvisited successors get their ANTIC_IN replaced by the @@ -2053,7 +2066,7 @@ compute_antic_aux (basic_block block, bo which is guaranteed by iteration order. */ gcc_assert (first != NULL); - phi_translate_set (ANTIC_OUT, ANTIC_IN (first), block, first); + phi_translate_set (ANTIC_OUT, ANTIC_IN (first->dest), first); /* If we have multiple successors we need to intersect the ANTIC_OUT sets. For values that's a simple intersection but for @@ -2062,31 +2075,29 @@ compute_antic_aux (basic_block block, bo Avoid randomness and running into cycles like for PR82129 and canonicalize the expression we choose to the one with the lowest id. This requires we actually compute the union first. */ - FOR_EACH_VEC_ELT (worklist, i, bprime) + FOR_EACH_VEC_ELT (worklist, i, e) { - if (!gimple_seq_empty_p (phi_nodes (bprime))) + if (!gimple_seq_empty_p (phi_nodes (e->dest))) { bitmap_set_t tmp = bitmap_set_new (); - phi_translate_set (tmp, ANTIC_IN (bprime), block, bprime); + phi_translate_set (tmp, ANTIC_IN (e->dest), e); bitmap_and_into (&ANTIC_OUT->values, &tmp->values); bitmap_ior_into (&ANTIC_OUT->expressions, &tmp->expressions); bitmap_set_free (tmp); } else { - bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (bprime)->values); + bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (e->dest)->values); bitmap_ior_into (&ANTIC_OUT->expressions, - &ANTIC_IN (bprime)->expressions); + &ANTIC_IN (e->dest)->expressions); } } if (! worklist.is_empty ()) { - /* Prune expressions not in the value set, canonicalizing to - expression with lowest ID. */ + /* Prune expressions not in the value set. */ bitmap_iterator bi; unsigned int i; unsigned int to_clear = -1U; - bitmap seen_value = BITMAP_ALLOC (NULL); FOR_EACH_EXPR_ID_IN_SET (ANTIC_OUT, i, bi) { if (to_clear != -1U) @@ -2096,13 +2107,11 @@ compute_antic_aux (basic_block block, bo } pre_expr expr = expression_for_id (i); unsigned int value_id = get_expr_value_id (expr); - if (!bitmap_bit_p (&ANTIC_OUT->values, value_id) - || !bitmap_set_bit (seen_value, value_id)) + if (!bitmap_bit_p (&ANTIC_OUT->values, value_id)) to_clear = i; } if (to_clear != -1U) bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear); - BITMAP_FREE (seen_value); } } @@ -2119,15 +2128,26 @@ compute_antic_aux (basic_block block, bo /* Then union in the ANTIC_OUT - TMP_GEN values, to get ANTIC_OUT U EXP_GEN - TMP_GEN */ - FOR_EACH_EXPR_ID_IN_SET (S, bii, bi) - bitmap_value_insert_into_set (ANTIC_IN (block), - expression_for_id (bii)); + bitmap_ior_into (&ANTIC_IN (block)->values, &S->values); + bitmap_ior_into (&ANTIC_IN (block)->expressions, &S->expressions); /* clean (ANTIC_IN (block)) is defered to after the iteration converged because it can cause non-convergence, see for example PR81181. */ if (!bitmap_set_equal (old, ANTIC_IN (block))) - changed = true; + { + changed = true; + /* After the initial value set computation the value set may + only shrink during the iteration. */ + if (was_visited && flag_checking) + { + bitmap_iterator bi; + unsigned int i; + EXECUTE_IF_AND_COMPL_IN_BITMAP (&ANTIC_IN (block)->values, + &old->values, 0, i, bi) + gcc_unreachable (); + } + } maybe_dump_sets: if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2202,45 +2222,44 @@ compute_partial_antic_aux (basic_block b VH.1 + 1 (VH.2), VH.2 + 1 (VH.3), etc), forever. */ else if (single_succ_p (block)) { - basic_block succ = single_succ (block); - if (!(single_succ_edge (block)->flags & EDGE_DFS_BACK)) - phi_translate_set (PA_OUT, PA_IN (succ), block, succ); + e = single_succ_edge (block); + if (!(e->flags & EDGE_DFS_BACK)) + phi_translate_set (PA_OUT, PA_IN (e->dest), e); } /* If we have multiple successors, we take the union of all of them. */ else { size_t i; - basic_block bprime; - auto_vec<basic_block> worklist (EDGE_COUNT (block->succs)); + auto_vec<edge> worklist (EDGE_COUNT (block->succs)); FOR_EACH_EDGE (e, ei, block->succs) { if (e->flags & EDGE_DFS_BACK) continue; - worklist.quick_push (e->dest); + worklist.quick_push (e); } if (worklist.length () > 0) { - FOR_EACH_VEC_ELT (worklist, i, bprime) + FOR_EACH_VEC_ELT (worklist, i, e) { unsigned int i; bitmap_iterator bi; - FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (bprime), i, bi) + FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (e->dest), i, bi) bitmap_value_insert_into_set (PA_OUT, expression_for_id (i)); - if (!gimple_seq_empty_p (phi_nodes (bprime))) + if (!gimple_seq_empty_p (phi_nodes (e->dest))) { bitmap_set_t pa_in = bitmap_set_new (); - phi_translate_set (pa_in, PA_IN (bprime), block, bprime); + phi_translate_set (pa_in, PA_IN (e->dest), e); FOR_EACH_EXPR_ID_IN_SET (pa_in, i, bi) bitmap_value_insert_into_set (PA_OUT, expression_for_id (i)); bitmap_set_free (pa_in); } else - FOR_EACH_EXPR_ID_IN_SET (PA_IN (bprime), i, bi) + FOR_EACH_EXPR_ID_IN_SET (PA_IN (e->dest), i, bi) bitmap_value_insert_into_set (PA_OUT, expression_for_id (i)); } @@ -3158,8 +3177,7 @@ do_pre_regular_insertion (basic_block bl gcc_assert (!(pred->flags & EDGE_FAKE)); bprime = pred->src; /* We are looking at ANTIC_OUT of bprime. */ - eprime = phi_translate (expr, ANTIC_IN (block), NULL, - bprime, block); + eprime = phi_translate (expr, ANTIC_IN (block), NULL, pred); /* eprime will generally only be NULL if the value of the expression, translated @@ -3315,8 +3333,7 @@ do_pre_partial_partial_insertion (basic_ gcc_assert (!(pred->flags & EDGE_FAKE)); bprime = pred->src; eprime = phi_translate (expr, ANTIC_IN (block), - PA_IN (block), - bprime, block); + PA_IN (block), pred); /* eprime will generally only be NULL if the value of the expression, translated Index: gcc/testsuite/gcc.dg/torture/pr84427.c =================================================================== --- gcc/testsuite/gcc.dg/torture/pr84427.c (nonexistent) +++ gcc/testsuite/gcc.dg/torture/pr84427.c (working copy) @@ -0,0 +1,22 @@ +/* { dg-do compile } */ + +short a, d, e; +unsigned char b; +int c, f; +char g, h; +void fn2(int, int); +void fn1() { fn2(e, a); } +void fn2(int p1, int p2) +{ +l1: + b = a; + for (; h; h--) + if (p1) + g = p2 * c; + else + { + c = d; + if (f) + goto l1; + } +}