So gcc.target/aarch64/rev16_2.c started to fail after r15-268-g9dbff9c05520a7, the problem is combine now rejects the instruction combine. This happens because after a different combine which uses a define_split and that define_split creates a new pseudo register. That new pseudo register is dead after the last instruction in the stream BUT combine never creates a REG_DEAD for it. So combine thinks it is still needed after and now with the i2 not changing, combine rejects the combine.
Now combine should be creating a REG_DEAD for the new pseudo registers for the last instruction of the split. This fixes rev16_2.c and also improves the situtation in other cases by removing other dead instructions. Bootstrapped and tested on aarch64-linux-gnu and x86_64-linux-gnu. gcc/ChangeLog: PR rtl-optimization/118914 * combine.cc (recog_for_combine): Add old_nregs and new_nregs argument (defaulting to 0). Update call to recog_for_combine_1. (combine_split_insns): Add old_nregs and new_nregs arguments, store the old and new max registers to them. (try_combine): Update calls to combine_split_insns and pass old_nregs and new_nregs for the i3 call to recog_for_combine. (find_split_point): Update call to combine_split_insns; ignoring the values there. (recog_for_combine_1): Add old_nregs and new_nregs arguments, if the insn was recognized (and not to no-op move), add the REG_DEAD notes to pnotes argument. Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> --- gcc/combine.cc | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/gcc/combine.cc b/gcc/combine.cc index 3beeb514b81..a687c7c2015 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -454,7 +454,7 @@ static bool merge_outer_ops (enum rtx_code *, HOST_WIDE_INT *, enum rtx_code, static rtx simplify_shift_const_1 (enum rtx_code, machine_mode, rtx, int); static rtx simplify_shift_const (rtx, enum rtx_code, machine_mode, rtx, int); -static int recog_for_combine (rtx *, rtx_insn *, rtx *); +static int recog_for_combine (rtx *, rtx_insn *, rtx *, unsigned = 0, unsigned = 0); static rtx gen_lowpart_for_combine (machine_mode, rtx); static enum rtx_code simplify_compare_const (enum rtx_code, machine_mode, rtx *, rtx *); @@ -515,18 +515,22 @@ target_canonicalize_comparison (enum rtx_code *code, rtx *op0, rtx *op1, /* Try to split PATTERN found in INSN. This returns NULL_RTX if PATTERN cannot be split. Otherwise, it returns an insn sequence. + Updates OLD_NREGS with the max number of regs before the split + and NEW_NREGS after the split. This is a wrapper around split_insns which ensures that the reg_stat vector is made larger if the splitter creates a new register. */ static rtx_insn * -combine_split_insns (rtx pattern, rtx_insn *insn) +combine_split_insns (rtx pattern, rtx_insn *insn, + unsigned int *old_nregs, + unsigned int *new_regs) { rtx_insn *ret; unsigned int nregs; - + *old_nregs = max_reg_num (); ret = split_insns (pattern, insn); - nregs = max_reg_num (); + *new_regs = nregs = max_reg_num (); if (nregs > reg_stat.length ()) reg_stat.safe_grow_cleared (nregs, true); return ret; @@ -3566,12 +3570,13 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, { rtx parallel, *split; rtx_insn *m_split_insn; + unsigned int old_nregs, new_nregs; /* See if the MD file can split NEWPAT. If it can't, see if letting it use I2DEST as a scratch register will help. In the latter case, convert I2DEST to the mode of the source of NEWPAT if we can. */ - m_split_insn = combine_split_insns (newpat, i3); + m_split_insn = combine_split_insns (newpat, i3, &old_nregs, &new_nregs); /* We can only use I2DEST as a scratch reg if it doesn't overlap any inputs of NEWPAT. */ @@ -3599,7 +3604,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, gen_rtvec (2, newpat, gen_rtx_CLOBBER (VOIDmode, i2dest))); - m_split_insn = combine_split_insns (parallel, i3); + m_split_insn = combine_split_insns (parallel, i3, &old_nregs, &new_nregs); /* If that didn't work, try changing the mode of I2DEST if we can. */ @@ -3624,7 +3629,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, gen_rtvec (2, newpat, gen_rtx_CLOBBER (VOIDmode, ni2dest)))); - m_split_insn = combine_split_insns (parallel, i3); + m_split_insn = combine_split_insns (parallel, i3, &old_nregs, &new_nregs); if (m_split_insn == 0 && REGNO (i2dest) >= FIRST_PSEUDO_REGISTER) @@ -3647,13 +3652,14 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, if (m_split_insn == 0 && newpat_vec_with_clobbers) { parallel = gen_rtx_PARALLEL (VOIDmode, newpat_vec_with_clobbers); - m_split_insn = combine_split_insns (parallel, i3); + m_split_insn = combine_split_insns (parallel, i3, &old_nregs, &new_nregs); } if (m_split_insn && NEXT_INSN (m_split_insn) == NULL_RTX) { rtx m_split_pat = PATTERN (m_split_insn); - insn_code_number = recog_for_combine (&m_split_pat, i3, &new_i3_notes); + insn_code_number = recog_for_combine (&m_split_pat, i3, &new_i3_notes, + old_nregs, new_nregs); if (insn_code_number >= 0) newpat = m_split_pat; } @@ -3678,7 +3684,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, && (next_nonnote_nondebug_insn (i2) == i3 || ! reg_used_between_p (SET_DEST (i2set), i2, i3))) insn_code_number = recog_for_combine (&newi3pat, i3, - &new_i3_notes); + &new_i3_notes, + old_nregs, new_nregs); if (insn_code_number >= 0) newpat = newi3pat; @@ -4915,8 +4922,9 @@ find_split_point (rtx *loc, rtx_insn *insn, bool set_src) MEM_ADDR_SPACE (x))) { rtx reg = regno_reg_rtx[FIRST_PSEUDO_REGISTER]; + unsigned int old_nregs, new_nregs; rtx_insn *seq = combine_split_insns (gen_rtx_SET (reg, XEXP (x, 0)), - subst_insn); + subst_insn, &old_nregs, &new_nregs); /* This should have produced two insns, each of which sets our placeholder. If the source of the second is a valid address, @@ -11418,7 +11426,8 @@ simplify_shift_const (rtx x, enum rtx_code code, machine_mode result_mode, return value. */ static int -recog_for_combine_1 (rtx *pnewpat, rtx_insn *insn, rtx *pnotes) +recog_for_combine_1 (rtx *pnewpat, rtx_insn *insn, rtx *pnotes, + unsigned old_nregs, unsigned new_nregs) { rtx pat = *pnewpat; rtx pat_without_clobbers; @@ -11534,6 +11543,9 @@ recog_for_combine_1 (rtx *pnewpat, rtx_insn *insn, rtx *pnotes) if (insn_code_number >= 0 && insn_code_number != NOOP_MOVE_INSN_CODE) { + /* Create the reg dead notes if needed for the regs that were created via split. */ + for (; old_nregs < new_nregs; old_nregs++) + notes = alloc_reg_note (REG_DEAD, regno_reg_rtx[old_nregs], notes); old_pat = PATTERN (insn); old_notes = REG_NOTES (insn); old_icode = INSN_CODE (insn); @@ -11705,15 +11717,18 @@ change_zero_ext (rtx pat) PNOTES is a pointer to a location where any REG_UNUSED notes added for the CLOBBERs are placed. + If OLD_NREGS != NEW_NREGS, then PNOTES also includes REG_DEAD notes added. The value is the final insn code from the pattern ultimately matched, or -1. */ static int -recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes) +recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes, + unsigned int old_nregs, unsigned int new_nregs) { rtx pat = *pnewpat; - int insn_code_number = recog_for_combine_1 (pnewpat, insn, pnotes); + int insn_code_number = recog_for_combine_1 (pnewpat, insn, pnotes, + old_nregs, new_nregs); if (insn_code_number >= 0 || check_asm_operands (pat)) return insn_code_number; @@ -11755,7 +11770,8 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes) if (changed) { - insn_code_number = recog_for_combine_1 (pnewpat, insn, pnotes); + insn_code_number = recog_for_combine_1 (pnewpat, insn, pnotes, + old_nregs, new_nregs); if (insn_code_number < 0) undo_to_marker (marker); -- 2.43.0