> -----Original Message-----
> From: Andrew Pinski (QUIC) <quic_apin...@quicinc.com>
> Sent: Wednesday, February 19, 2025 8:14 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Andrew Pinski (QUIC) <quic_apin...@quicinc.com>
> Subject: [PATCH] combine: Add REG_DEAD notes to the last
> instruction after a split [PR118914]
> 
> So gcc.target/aarch64/rev16_2.c started to fail after r15-268-
> g9dbff9c05520a7, the problem is combine now rejects the
> instruction combine. This happens because after a different
> combine which uses a define_split and that define_split
> creates a new pseudo register. That new pseudo register is
> dead after the last instruction in the stream BUT combine
> never creates a REG_DEAD for it. So combine thinks it is still
> needed after and now with the i2 not changing, combine
> rejects the combine.
> 
> Now combine should be creating a REG_DEAD for the new
> pseudo registers for the last instruction of the split. This fixes
> rev16_2.c and also improves the situtation in other cases by
> removing other dead instructions.
> 
> Bootstrapped and tested on aarch64-linux-gnu and x86_64-
> linux-gnu.

Ping? This fixes a regression on aarch64.

> 
> gcc/ChangeLog:
> 
>       PR rtl-optimization/118914
>       * combine.cc (recog_for_combine): Add old_nregs and
> new_nregs
>       argument (defaulting to 0). Update call to
> recog_for_combine_1.
>       (combine_split_insns): Add old_nregs and new_nregs
> arguments,
>       store the old and new max registers to them.
>       (try_combine): Update calls to combine_split_insns and
>       pass old_nregs and new_nregs for the i3 call to
> recog_for_combine.
>       (find_split_point): Update call to combine_split_insns;
> ignoring
>       the values there.
>       (recog_for_combine_1): Add old_nregs and new_nregs
> arguments,
>       if the insn was recognized (and not to no-op move), add
> the
>       REG_DEAD notes to pnotes argument.
> 
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  gcc/combine.cc | 46 +++++++++++++++++++++++++++++++----
> -----------
>  1 file changed, 31 insertions(+), 15 deletions(-)
> 
> diff --git a/gcc/combine.cc b/gcc/combine.cc index
> 3beeb514b81..a687c7c2015 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -454,7 +454,7 @@ static bool merge_outer_ops (enum
> rtx_code *, HOST_WIDE_INT *, enum rtx_code,  static rtx
> simplify_shift_const_1 (enum rtx_code, machine_mode, rtx,
> int);  static rtx simplify_shift_const (rtx, enum rtx_code,
> machine_mode, rtx,
>                                int);
> -static int recog_for_combine (rtx *, rtx_insn *, rtx *);
> +static int recog_for_combine (rtx *, rtx_insn *, rtx *, unsigned
> = 0,
> +unsigned = 0);
>  static rtx gen_lowpart_for_combine (machine_mode, rtx);
> static enum rtx_code simplify_compare_const (enum
> rtx_code, machine_mode,
>                                            rtx *, rtx *);
> @@ -515,18 +515,22 @@ target_canonicalize_comparison
> (enum rtx_code *code, rtx *op0, rtx *op1,
> 
>  /* Try to split PATTERN found in INSN.  This returns NULL_RTX
> if
>     PATTERN cannot be split.  Otherwise, it returns an insn
> sequence.
> +   Updates OLD_NREGS with the max number of regs before
> the split
> +   and NEW_NREGS after the split.
>     This is a wrapper around split_insns which ensures that the
>     reg_stat vector is made larger if the splitter creates a new
>     register.  */
> 
>  static rtx_insn *
> -combine_split_insns (rtx pattern, rtx_insn *insn)
> +combine_split_insns (rtx pattern, rtx_insn *insn,
> +                  unsigned int *old_nregs,
> +                  unsigned int *new_regs)
>  {
>    rtx_insn *ret;
>    unsigned int nregs;
> -
> +  *old_nregs = max_reg_num ();
>    ret = split_insns (pattern, insn);
> -  nregs = max_reg_num ();
> +  *new_regs = nregs = max_reg_num ();
>    if (nregs > reg_stat.length ())
>      reg_stat.safe_grow_cleared (nregs, true);
>    return ret;
> @@ -3566,12 +3570,13 @@ try_combine (rtx_insn *i3,
> rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>      {
>        rtx parallel, *split;
>        rtx_insn *m_split_insn;
> +      unsigned int old_nregs, new_nregs;
> 
>        /* See if the MD file can split NEWPAT.  If it can't, see if
> letting it
>        use I2DEST as a scratch register will help.  In the latter
> case,
>        convert I2DEST to the mode of the source of NEWPAT if we
> can.  */
> 
> -      m_split_insn = combine_split_insns (newpat, i3);
> +      m_split_insn = combine_split_insns (newpat, i3,
> &old_nregs,
> + &new_nregs);
> 
>        /* We can only use I2DEST as a scratch reg if it doesn't
> overlap any
>        inputs of NEWPAT.  */
> @@ -3599,7 +3604,7 @@ try_combine (rtx_insn *i3, rtx_insn
> *i2, rtx_insn *i1, rtx_insn *i0,
>                                      gen_rtvec (2, newpat,
>                                                 gen_rtx_CLOBBER (VOIDmode,
>                                                                  i2dest)));
> -       m_split_insn = combine_split_insns (parallel, i3);
> +       m_split_insn = combine_split_insns (parallel, i3,
> &old_nregs,
> +&new_nregs);
> 
>         /* If that didn't work, try changing the mode of I2DEST if
>            we can.  */
> @@ -3624,7 +3629,7 @@ try_combine (rtx_insn *i3, rtx_insn
> *i2, rtx_insn *i1, rtx_insn *i0,
>                          gen_rtvec (2, newpat,
>                                     gen_rtx_CLOBBER (VOIDmode,
>                                                      ni2dest))));
> -           m_split_insn = combine_split_insns (parallel, i3);
> +           m_split_insn = combine_split_insns (parallel, i3,
> &old_nregs,
> +&new_nregs);
> 
>             if (m_split_insn == 0
>                 && REGNO (i2dest) >= FIRST_PSEUDO_REGISTER) @@
> -3647,13 +3652,14 @@ try_combine (rtx_insn *i3, rtx_insn
> *i2, rtx_insn *i1, rtx_insn *i0,
>        if (m_split_insn == 0 && newpat_vec_with_clobbers)
>       {
>         parallel = gen_rtx_PARALLEL (VOIDmode,
> newpat_vec_with_clobbers);
> -       m_split_insn = combine_split_insns (parallel, i3);
> +       m_split_insn = combine_split_insns (parallel, i3,
> &old_nregs,
> +&new_nregs);
>       }
> 
>        if (m_split_insn && NEXT_INSN (m_split_insn) ==
> NULL_RTX)
>       {
>         rtx m_split_pat = PATTERN (m_split_insn);
> -       insn_code_number = recog_for_combine (&m_split_pat,
> i3, &new_i3_notes);
> +       insn_code_number = recog_for_combine (&m_split_pat,
> i3, &new_i3_notes,
> +                                             old_nregs, new_nregs);
>         if (insn_code_number >= 0)
>           newpat = m_split_pat;
>       }
> @@ -3678,7 +3684,8 @@ try_combine (rtx_insn *i3, rtx_insn
> *i2, rtx_insn *i1, rtx_insn *i0,
>             && (next_nonnote_nondebug_insn (i2) == i3
>                 || ! reg_used_between_p (SET_DEST (i2set), i2, i3)))
>           insn_code_number = recog_for_combine (&newi3pat,
> i3,
> -                                               &new_i3_notes);
> +                                               &new_i3_notes,
> +                                               old_nregs, new_nregs);
>         if (insn_code_number >= 0)
>           newpat = newi3pat;
> 
> @@ -4915,8 +4922,9 @@ find_split_point (rtx *loc, rtx_insn
> *insn, bool set_src)
>                                           MEM_ADDR_SPACE (x)))
>       {
>         rtx reg = regno_reg_rtx[FIRST_PSEUDO_REGISTER];
> +       unsigned int old_nregs, new_nregs;
>         rtx_insn *seq = combine_split_insns (gen_rtx_SET (reg,
> XEXP (x, 0)),
> -                                            subst_insn);
> +                                            subst_insn, &old_nregs, 
> &new_nregs);
> 
>         /* This should have produced two insns, each of which
> sets our
>            placeholder.  If the source of the second is a valid
> address, @@ -11418,7 +11426,8 @@ simplify_shift_const (rtx
> x, enum rtx_code code, machine_mode result_mode,
>     return value.  */
> 
>  static int
> -recog_for_combine_1 (rtx *pnewpat, rtx_insn *insn, rtx
> *pnotes)
> +recog_for_combine_1 (rtx *pnewpat, rtx_insn *insn, rtx
> *pnotes,
> +                  unsigned old_nregs, unsigned new_nregs)
>  {
>    rtx pat = *pnewpat;
>    rtx pat_without_clobbers;
> @@ -11534,6 +11543,9 @@ recog_for_combine_1 (rtx
> *pnewpat, rtx_insn *insn, rtx *pnotes)
>    if (insn_code_number >= 0
>        && insn_code_number != NOOP_MOVE_INSN_CODE)
>      {
> +      /* Create the reg dead notes if needed for the regs that
> were created via split.   */
> +      for (; old_nregs < new_nregs; old_nregs++)
> +     notes = alloc_reg_note (REG_DEAD,
> regno_reg_rtx[old_nregs], notes);
>        old_pat = PATTERN (insn);
>        old_notes = REG_NOTES (insn);
>        old_icode = INSN_CODE (insn);
> @@ -11705,15 +11717,18 @@ change_zero_ext (rtx pat)
> 
>     PNOTES is a pointer to a location where any REG_UNUSED
> notes added for
>     the CLOBBERs are placed.
> +   If OLD_NREGS != NEW_NREGS, then PNOTES also includes
> REG_DEAD notes added.
> 
>     The value is the final insn code from the pattern ultimately
> matched,
>     or -1.  */
> 
>  static int
> -recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx
> *pnotes)
> +recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx
> *pnotes,
> +                unsigned int old_nregs, unsigned int new_nregs)
>  {
>    rtx pat = *pnewpat;
> -  int insn_code_number = recog_for_combine_1 (pnewpat,
> insn, pnotes);
> +  int insn_code_number = recog_for_combine_1 (pnewpat,
> insn, pnotes,
> +                                           old_nregs, new_nregs);
>    if (insn_code_number >= 0 || check_asm_operands (pat))
>      return insn_code_number;
> 
> @@ -11755,7 +11770,8 @@ recog_for_combine (rtx
> *pnewpat, rtx_insn *insn, rtx *pnotes)
> 
>    if (changed)
>      {
> -      insn_code_number = recog_for_combine_1 (pnewpat,
> insn, pnotes);
> +      insn_code_number = recog_for_combine_1 (pnewpat,
> insn, pnotes,
> +                                           old_nregs, new_nregs);
> 
>        if (insn_code_number < 0)
>       undo_to_marker (marker);
> --
> 2.43.0

Reply via email to