> -----Original Message----- > From: Andrew Pinski (QUIC) <quic_apin...@quicinc.com> > Sent: Wednesday, February 19, 2025 8:14 PM > To: gcc-patches@gcc.gnu.org > Cc: Andrew Pinski (QUIC) <quic_apin...@quicinc.com> > Subject: [PATCH] combine: Add REG_DEAD notes to the last > instruction after a split [PR118914] > > So gcc.target/aarch64/rev16_2.c started to fail after r15-268- > g9dbff9c05520a7, the problem is combine now rejects the > instruction combine. This happens because after a different > combine which uses a define_split and that define_split > creates a new pseudo register. That new pseudo register is > dead after the last instruction in the stream BUT combine > never creates a REG_DEAD for it. So combine thinks it is still > needed after and now with the i2 not changing, combine > rejects the combine. > > Now combine should be creating a REG_DEAD for the new > pseudo registers for the last instruction of the split. This fixes > rev16_2.c and also improves the situtation in other cases by > removing other dead instructions. > > Bootstrapped and tested on aarch64-linux-gnu and x86_64- > linux-gnu.
Ping? This fixes a regression on aarch64. > > gcc/ChangeLog: > > PR rtl-optimization/118914 > * combine.cc (recog_for_combine): Add old_nregs and > new_nregs > argument (defaulting to 0). Update call to > recog_for_combine_1. > (combine_split_insns): Add old_nregs and new_nregs > arguments, > store the old and new max registers to them. > (try_combine): Update calls to combine_split_insns and > pass old_nregs and new_nregs for the i3 call to > recog_for_combine. > (find_split_point): Update call to combine_split_insns; > ignoring > the values there. > (recog_for_combine_1): Add old_nregs and new_nregs > arguments, > if the insn was recognized (and not to no-op move), add > the > REG_DEAD notes to pnotes argument. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > gcc/combine.cc | 46 +++++++++++++++++++++++++++++++---- > ----------- > 1 file changed, 31 insertions(+), 15 deletions(-) > > diff --git a/gcc/combine.cc b/gcc/combine.cc index > 3beeb514b81..a687c7c2015 100644 > --- a/gcc/combine.cc > +++ b/gcc/combine.cc > @@ -454,7 +454,7 @@ static bool merge_outer_ops (enum > rtx_code *, HOST_WIDE_INT *, enum rtx_code, static rtx > simplify_shift_const_1 (enum rtx_code, machine_mode, rtx, > int); static rtx simplify_shift_const (rtx, enum rtx_code, > machine_mode, rtx, > int); > -static int recog_for_combine (rtx *, rtx_insn *, rtx *); > +static int recog_for_combine (rtx *, rtx_insn *, rtx *, unsigned > = 0, > +unsigned = 0); > static rtx gen_lowpart_for_combine (machine_mode, rtx); > static enum rtx_code simplify_compare_const (enum > rtx_code, machine_mode, > rtx *, rtx *); > @@ -515,18 +515,22 @@ target_canonicalize_comparison > (enum rtx_code *code, rtx *op0, rtx *op1, > > /* Try to split PATTERN found in INSN. This returns NULL_RTX > if > PATTERN cannot be split. Otherwise, it returns an insn > sequence. > + Updates OLD_NREGS with the max number of regs before > the split > + and NEW_NREGS after the split. > This is a wrapper around split_insns which ensures that the > reg_stat vector is made larger if the splitter creates a new > register. */ > > static rtx_insn * > -combine_split_insns (rtx pattern, rtx_insn *insn) > +combine_split_insns (rtx pattern, rtx_insn *insn, > + unsigned int *old_nregs, > + unsigned int *new_regs) > { > rtx_insn *ret; > unsigned int nregs; > - > + *old_nregs = max_reg_num (); > ret = split_insns (pattern, insn); > - nregs = max_reg_num (); > + *new_regs = nregs = max_reg_num (); > if (nregs > reg_stat.length ()) > reg_stat.safe_grow_cleared (nregs, true); > return ret; > @@ -3566,12 +3570,13 @@ try_combine (rtx_insn *i3, > rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, > { > rtx parallel, *split; > rtx_insn *m_split_insn; > + unsigned int old_nregs, new_nregs; > > /* See if the MD file can split NEWPAT. If it can't, see if > letting it > use I2DEST as a scratch register will help. In the latter > case, > convert I2DEST to the mode of the source of NEWPAT if we > can. */ > > - m_split_insn = combine_split_insns (newpat, i3); > + m_split_insn = combine_split_insns (newpat, i3, > &old_nregs, > + &new_nregs); > > /* We can only use I2DEST as a scratch reg if it doesn't > overlap any > inputs of NEWPAT. */ > @@ -3599,7 +3604,7 @@ try_combine (rtx_insn *i3, rtx_insn > *i2, rtx_insn *i1, rtx_insn *i0, > gen_rtvec (2, newpat, > gen_rtx_CLOBBER (VOIDmode, > i2dest))); > - m_split_insn = combine_split_insns (parallel, i3); > + m_split_insn = combine_split_insns (parallel, i3, > &old_nregs, > +&new_nregs); > > /* If that didn't work, try changing the mode of I2DEST if > we can. */ > @@ -3624,7 +3629,7 @@ try_combine (rtx_insn *i3, rtx_insn > *i2, rtx_insn *i1, rtx_insn *i0, > gen_rtvec (2, newpat, > gen_rtx_CLOBBER (VOIDmode, > ni2dest)))); > - m_split_insn = combine_split_insns (parallel, i3); > + m_split_insn = combine_split_insns (parallel, i3, > &old_nregs, > +&new_nregs); > > if (m_split_insn == 0 > && REGNO (i2dest) >= FIRST_PSEUDO_REGISTER) @@ > -3647,13 +3652,14 @@ try_combine (rtx_insn *i3, rtx_insn > *i2, rtx_insn *i1, rtx_insn *i0, > if (m_split_insn == 0 && newpat_vec_with_clobbers) > { > parallel = gen_rtx_PARALLEL (VOIDmode, > newpat_vec_with_clobbers); > - m_split_insn = combine_split_insns (parallel, i3); > + m_split_insn = combine_split_insns (parallel, i3, > &old_nregs, > +&new_nregs); > } > > if (m_split_insn && NEXT_INSN (m_split_insn) == > NULL_RTX) > { > rtx m_split_pat = PATTERN (m_split_insn); > - insn_code_number = recog_for_combine (&m_split_pat, > i3, &new_i3_notes); > + insn_code_number = recog_for_combine (&m_split_pat, > i3, &new_i3_notes, > + old_nregs, new_nregs); > if (insn_code_number >= 0) > newpat = m_split_pat; > } > @@ -3678,7 +3684,8 @@ try_combine (rtx_insn *i3, rtx_insn > *i2, rtx_insn *i1, rtx_insn *i0, > && (next_nonnote_nondebug_insn (i2) == i3 > || ! reg_used_between_p (SET_DEST (i2set), i2, i3))) > insn_code_number = recog_for_combine (&newi3pat, > i3, > - &new_i3_notes); > + &new_i3_notes, > + old_nregs, new_nregs); > if (insn_code_number >= 0) > newpat = newi3pat; > > @@ -4915,8 +4922,9 @@ find_split_point (rtx *loc, rtx_insn > *insn, bool set_src) > MEM_ADDR_SPACE (x))) > { > rtx reg = regno_reg_rtx[FIRST_PSEUDO_REGISTER]; > + unsigned int old_nregs, new_nregs; > rtx_insn *seq = combine_split_insns (gen_rtx_SET (reg, > XEXP (x, 0)), > - subst_insn); > + subst_insn, &old_nregs, > &new_nregs); > > /* This should have produced two insns, each of which > sets our > placeholder. If the source of the second is a valid > address, @@ -11418,7 +11426,8 @@ simplify_shift_const (rtx > x, enum rtx_code code, machine_mode result_mode, > return value. */ > > static int > -recog_for_combine_1 (rtx *pnewpat, rtx_insn *insn, rtx > *pnotes) > +recog_for_combine_1 (rtx *pnewpat, rtx_insn *insn, rtx > *pnotes, > + unsigned old_nregs, unsigned new_nregs) > { > rtx pat = *pnewpat; > rtx pat_without_clobbers; > @@ -11534,6 +11543,9 @@ recog_for_combine_1 (rtx > *pnewpat, rtx_insn *insn, rtx *pnotes) > if (insn_code_number >= 0 > && insn_code_number != NOOP_MOVE_INSN_CODE) > { > + /* Create the reg dead notes if needed for the regs that > were created via split. */ > + for (; old_nregs < new_nregs; old_nregs++) > + notes = alloc_reg_note (REG_DEAD, > regno_reg_rtx[old_nregs], notes); > old_pat = PATTERN (insn); > old_notes = REG_NOTES (insn); > old_icode = INSN_CODE (insn); > @@ -11705,15 +11717,18 @@ change_zero_ext (rtx pat) > > PNOTES is a pointer to a location where any REG_UNUSED > notes added for > the CLOBBERs are placed. > + If OLD_NREGS != NEW_NREGS, then PNOTES also includes > REG_DEAD notes added. > > The value is the final insn code from the pattern ultimately > matched, > or -1. */ > > static int > -recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx > *pnotes) > +recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx > *pnotes, > + unsigned int old_nregs, unsigned int new_nregs) > { > rtx pat = *pnewpat; > - int insn_code_number = recog_for_combine_1 (pnewpat, > insn, pnotes); > + int insn_code_number = recog_for_combine_1 (pnewpat, > insn, pnotes, > + old_nregs, new_nregs); > if (insn_code_number >= 0 || check_asm_operands (pat)) > return insn_code_number; > > @@ -11755,7 +11770,8 @@ recog_for_combine (rtx > *pnewpat, rtx_insn *insn, rtx *pnotes) > > if (changed) > { > - insn_code_number = recog_for_combine_1 (pnewpat, > insn, pnotes); > + insn_code_number = recog_for_combine_1 (pnewpat, > insn, pnotes, > + old_nregs, new_nregs); > > if (insn_code_number < 0) > undo_to_marker (marker); > -- > 2.43.0