Robin Dapp <rd...@linux.ibm.com> writes: > A swap-style idiom like > tmp = a > a = b > b = tmp > would be transformed like > tmp_tmp = cond ? a : tmp > tmp_a = cond ? b : a > tmp_b = cond ? tmp_tmp : b > [...] > including rewiring the first source operand to previous writes (e.g. tmp -> > tmp_tmp). > > The code would recognize this, though, and change the last line to > tmp_b = cond ? a : b. > > Without additional temporaries we can now emit the following sequence: > tmp = a // (no condition here) > a = cond ? b : a > b = cond ? tmp : b > avoiding any rewiring which would break things now. > > check_need_cmovs () finds swap-style idioms and marks the first of the > three instructions as not needing a cmove.
Looks like a nice optimisation, but could we just test whether the destination of a set isn't live on exit from the then block? I think we could do that on the fly during the main noce_convert_multiple_sets loop. Thanks, Richard > --- > gcc/ifcvt.c | 97 ++++++++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 78 insertions(+), 19 deletions(-) > > diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c > index 955f9541f60..09bf443656c 100644 > --- a/gcc/ifcvt.c > +++ b/gcc/ifcvt.c > @@ -103,6 +103,7 @@ static rtx_insn *block_has_only_trap (basic_block); > static void check_need_temps (basic_block bb, > hash_map<rtx, bool> *need_temp, > rtx cond); > +static void check_need_cmovs (basic_block bb, hash_map<rtx, bool> > *need_cmov); > > > /* Count the number of non-jump active insns in BB. */ > @@ -3207,6 +3208,7 @@ noce_convert_multiple_sets (struct noce_if_info > *if_info) > int count = 0; > > hash_map<rtx, bool> need_temps; > + hash_map<rtx, bool> need_no_cmovs; > > /* If we newly set a CC before a cmov, we might need a temporary > even though the compare will be removed by a later pass. Costing > @@ -3214,6 +3216,9 @@ noce_convert_multiple_sets (struct noce_if_info > *if_info) > > check_need_temps (then_bb, &need_temps, cond); > > + /* Identify swap-style idioms. */ > + check_need_cmovs (then_bb, &need_no_cmovs); > + > hash_map<rtx, bool> temps_created; > > FOR_BB_INSNS (then_bb, insn) > @@ -3229,10 +3234,8 @@ noce_convert_multiple_sets (struct noce_if_info > *if_info) > rtx new_val = SET_SRC (set); > rtx old_val = target; > > - rtx dest = SET_DEST (set); > - > rtx temp; > - if (need_temps.get (dest)) > + if (need_temps.get (target)) > { > temp = gen_reg_rtx (GET_MODE (target)); > temps_created.put (target, true); > @@ -3241,18 +3244,11 @@ noce_convert_multiple_sets (struct noce_if_info > *if_info) > temp = target; > > /* If we were supposed to read from an earlier write in this block, > - we've changed the register allocation. Rewire the read. While > - we are looking, also try to catch a swap idiom. */ > + we've changed the register allocation. Rewire the read. */ > for (int i = count - 1; i >= 0; --i) > if (reg_overlap_mentioned_p (new_val, targets[i])) > { > - /* Catch a "swap" style idiom. */ > - if (find_reg_note (insn, REG_DEAD, new_val) != NULL_RTX) > - /* The write to targets[i] is only live until the read > - here. As the condition codes match, we can propagate > - the set to here. */ > - new_val = SET_SRC (single_set (unmodified_insns[i])); > - else > + if (find_reg_note (insn, REG_DEAD, new_val) == NULL_RTX) > new_val = temporaries[i]; > break; > } > @@ -3324,8 +3320,11 @@ noce_convert_multiple_sets (struct noce_if_info > *if_info) > > { > start_sequence (); > - temp_dest1 = noce_emit_cmove (if_info, temp, cond_code, > - x, y, new_val, old_val, NULL_RTX); > + if (!need_no_cmovs.get (insn)) > + temp_dest1 = noce_emit_cmove (if_info, temp, cond_code, > + x, y, new_val, old_val, NULL_RTX); > + else > + noce_emit_move_insn (target, new_val); > > /* If we failed to expand the conditional move, drop out and don't > try to continue. */ > @@ -3346,13 +3345,16 @@ noce_convert_multiple_sets (struct noce_if_info > *if_info) > end_sequence (); > } > > - /* Now try emitting one passing a non-canonicalized cc comparison. > - This allows the backend to emit a cmov directly without additional > + /* Now try emitting a cmov passing a non-canonicalized cc comparison. > + This allows backends to emit a cmov directly without additional > compare. */ > { > start_sequence (); > - temp_dest2 = noce_emit_cmove (if_info, target, cond_code, > - x, y, new_val, old_val, cc_cmp); > + if (!need_no_cmovs.get (insn)) > + temp_dest2 = noce_emit_cmove (if_info, target, cond_code, > + x, y, new_val, old_val, cc_cmp); > + else > + noce_emit_move_insn (target, new_val); > > /* If we failed to expand the conditional move, drop out and don't > try to continue. */ > @@ -3931,6 +3933,7 @@ check_cond_move_block (basic_block bb, > > /* Check for which sets we need to emit temporaries to hold the destination > of > a conditional move. */ > + > static void > check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond) > { > @@ -3938,7 +3941,7 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> > *need_temp, rtx cond) > > FOR_BB_INSNS (bb, insn) > { > - rtx set, dest; > + rtx set, src, dest; > > if (!active_insn_p (insn)) > continue; > @@ -3947,12 +3950,68 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> > *need_temp, rtx cond) > if (set == NULL_RTX) > continue; > > + src = SET_SRC (set); > dest = SET_DEST (set); > > /* noce_emit_cmove will emit the condition check every time it is > called > so we need a temp register if the destination is modified. */ > if (reg_overlap_mentioned_p (dest, cond)) > need_temp->put (dest, true); > + > + } > +} > + > +/* Find local swap-style idioms in BB and mark the first insn (1) > + that is only a temporary as not needing a conditional move as > + it is going to be dead afterwards anyway. > + > + (1) int tmp = a; > + a = b; > + b = tmp; > + > + > + ifcvt > + --> > + > + load tmp,a > + cmov a,b > + cmov b,tmp */ > + > +static void > +check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov) > +{ > + rtx_insn *insn; > + int count = 0; > + auto_vec<rtx> insns; > + auto_vec<rtx> dests; > + > + FOR_BB_INSNS (bb, insn) > + { > + rtx set, src, dest; > + > + if (!active_insn_p (insn)) > + continue; > + > + set = single_set (insn); > + if (set == NULL_RTX) > + continue; > + > + src = SET_SRC (set); > + dest = SET_DEST (set); > + > + for (int i = count - 1; i >= 0; --i) > + { > + if (reg_overlap_mentioned_p (src, dests[i]) > + && find_reg_note (insn, REG_DEAD, src) != NULL_RTX) > + { > + need_cmov->put (insns[i], false); > + } > + } > + > + insns.safe_push (insn); > + dests.safe_push (dest); > + > + count++; > } > }