We've sent a new version for this (
https://gcc.gnu.org/pipermail/gcc-patches/2025-November/700641.html).
This is causing a bootstrap failure on RISC-V, so we changed the pass to be
enabled by default on AArch64 and x86 only, for now.

> > > +/* Get the single reaching definition of REG used in INSN within a
BB.
> > > +   Return the definition or NULL if there's no definition with the
desired
> > > +   criteria.  If SINGLE_USE is set to true the DEF must have exactly
one
> > > +   USE resulting in a 1:1 DEF-USE relationship.  If set to false,
then a
> > > +   1:n DEF-USE relationship is accepted and the caller must take
care to
> > > +   ensure all USEs are safe for folding.  */
> > > +static set_info *
> > > +get_single_def_in_bb (insn_info *insn, rtx reg, bool single_use)
> > >  {
> > > -  df_ref use;
> > > -  struct df_link *ref_chain, *ref_link;
> > > -
> > > -  FOR_EACH_INSN_USE (use, insn)
> > > +  /* Get the use_info of the base register.  */
> > > +  for (use_info *use : insn->uses ())
> > >      {
> > > -      if (GET_CODE (DF_REF_REG (use)) == SUBREG)
> > > +      /* Other USEs can be ignored and multiple equal USEs are fine.
 */
> > > +      if (use->regno () != REGNO (reg))
> > > +     continue;
> >
> > This can use:
> >
> >   use_info *use = find_access (insn->uses (), REGNO (reg));
> >   if (!use)
> >     return NULL;
> >
> > and so avoid the loop.

I kept this as is, as the suggested change caused some errors.

> Will be done.
>
> > > +
> > > +      /* Don't handle subregs for now.  */
> > > +      if (use->includes_subregs ())
> > >       return NULL;
> > > -      if (REGNO (DF_REF_REG (use)) == REGNO (reg))
> > > -     break;
> > > -    }
> > >
> > > -  if (!use)
> > > -    return NULL;
> > > +      /* Get the DEF of the register.  */
> > > +      set_info *def = use->def ();
> > > +      if (!def)
> > > +     return NULL;
> > >
> > > -  ref_chain = DF_REF_CHAIN (use);
> > > +      /* Limit the amount of USEs of DEF to 1.  */
> > > +      auto iter = def->nondebug_insn_uses ();
> > > +      if (single_use && ++iter.begin () != iter.end ())
> >
> > This can use def->single_nondebug_use ()
>
> Will be done.
>
> > > +     return NULL;
> > >
> > > -  if (!ref_chain)
> > > -    return NULL;
> > > +      /* Don't handle multiregs for now.  */
> > > +      if (def->includes_multiregs ())
> > > +     return NULL;
> > >
> > > -  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
> > > -    {
> > > -      /* Problem getting some definition for this instruction.  */
> > > -      if (ref_link->ref == NULL)
> > > +      /* Only consider uses whose definition comes from a real
instruction
> > > +      and has no notes attached.  */
> > > +      insn_info *def_insn = def->insn ();
> > > +      if (def_insn->is_artificial () || def_insn->first_note ())
> > >       return NULL;
> >
> > What's the reason for the first_note check?
>
> That's a mistake.
> I mixed up insn_notes and REG_NOTES.
>
> The upstream code does this in get_uses():
>   /* We do not handle REG_EQUIV/REG_EQ notes for now.  */
>   if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE)
>     return NULL;
>
> I'll filter out such cases in get_single_def_in_bb() with:
>   find_reg_note (def_rtl, REG_EQUIV, NULL_RTX)
>   find_reg_note (def_rtl, REG_EQUAL, NULL_RTX)
>
> > > -      if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
> > > +
> > > +      /* No parallel expressions or clobbers.  */
> > > +      if (def_insn->num_defs () != 1)
> > >       return NULL;
> > > -      if (global_regs[REGNO (reg)]
> > > -       && !set_of (reg, DF_REF_INSN (ref_link->ref)))
> > > +
> > > +      rtx_insn *def_rtl = def_insn->rtl ();
> > > +      if (!NONJUMP_INSN_P (def_rtl) || RTX_FRAME_RELATED_P (def_rtl))
> > >       return NULL;
> > > -    }
> > >
> > > -  if (ref_chain->next)
> > > -    return NULL;
> > > +      /* Check if the DEF is a SET of the expected form.  */
> > > +      rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno
());
> > > +      if (!def_set)
> > > +     return NULL;
> > >
> > > -  rtx_insn *def = DF_REF_INSN (ref_chain->ref);
> > > +      /* Ensure DEF and USE are in the same BB.  */
> > > +      if (def->bb () != insn->bb ())
> > > +     return NULL;
> > >
> > > -  if (BLOCK_FOR_INSN (def) != BLOCK_FOR_INSN (insn))
> > > -    return NULL;
> > > +      /* Ensure the definition comes before the insn.  */
> > > +      if (*use->insn () < *def_insn)
> > > +     return NULL;
> >
> > This is redundant: the definition always comes earlier (in RPO) than
the use.
>
> This was added upstream in response to a bootstrap issue (PR111601).
> I kept it for that reason. I'll drop it.
>
> > > -  if (DF_INSN_LUID (def) > DF_INSN_LUID (insn))
> > > -    return NULL;
> > > +      return def;
> > > +    }
> > >
> > > -  return def;
> > > +  return NULL;
> > >  }
> > >
> > > -/* Get all uses of REG which is set in INSN.  Return the use list or
NULL if a
> > > -   use is missing / irregular.  If SUCCESS is not NULL then set it
to false if
> > > -   there are missing / irregular uses and true otherwise.  */
> > > -static df_link *
> > > -get_uses (rtx_insn *insn, rtx reg, bool *success)
> > > +/* Test if all USEs of DEF (which defines REG) meet certain criteria
to be
> > > +   foldable.  Returns true iff all USEs are fine or false otherwise.
 */
> > > +static bool
> > > +has_foldable_uses_p (set_info *def, rtx reg)
> > >  {
> > > -  df_ref def;
> > > -
> > > -  if (success)
> > > -    *success = false;
> > > -
> > > -  FOR_EACH_INSN_DEF (def, insn)
> > > -    if (REGNO (DF_REF_REG (def)) == REGNO (reg))
> > > -      break;
> > > -
> > > -  if (!def)
> > > -    return NULL;
> > > +  /* We only fold through instructions that are transitively used as
> > > +     memory addresses and do not have other uses.  Use the same logic
> > > +     from offset calculation to visit instructions that can propagate
> > > +     offsets and keep track of them in CAN_FOLD_INSNS.  */
> > > +  for (use_info *use : def->nondebug_insn_uses ())
> > > +    {
> >
> > Would use->includes_address_uses () be useful here, as a short-cut?
>
> After thinking about has_foldable_uses_p () again, I conclude that this
function
> is pointless in the new implementation because now all USEs must be part
of
> instructions that are successfully identified by fold_offsets_1 (and
> thus are foldable).
>
> The original code used these tests (in the loop in fold_offsets ()) to
> filter out
> Instructions that are not safe to modify, as it did not require all
> USEs to be part
> of INSNs that have been identified.
>
> > > +      insn_info *use_insn = use->insn ();
> > > +      if (use_insn->is_artificial ())
> > > +     return false;
> > >
> > > -  df_link *ref_chain = DF_REF_CHAIN (def);
> > > -  int insn_luid = DF_INSN_LUID (insn);
> > > -  basic_block insn_bb = BLOCK_FOR_INSN (insn);
> > > +      /* Punt if the use is anything more complicated than a set
> > > +      (clobber, use, etc).  */
> > > +      rtx_insn *use_rtl = use_insn->rtl ();
> > > +      if (!NONJUMP_INSN_P (use_rtl) || GET_CODE (PATTERN (use_rtl))
!= SET)
> > > +     return false;
> > >
> > > -  for (df_link *ref_link = ref_chain; ref_link; ref_link =
ref_link->next)
> > > -    {
> > > -      /* Problem getting a use for this instruction.  */
> > > -      if (ref_link->ref == NULL)
> > > -     return NULL;
> > > -      if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
> > > -     return NULL;
> > > +      /* Special case: A foldable memory store is not foldable if it
> > > +      mentions DEST outside of the address calculation.  */
> > > +      rtx use_set = PATTERN (use_rtl);
> > > +      if (use_set && MEM_P (SET_DEST (use_set))
> > > +       && reg_mentioned_p (reg, SET_SRC (use_set)))
> > > +     return false;
> > >
> > > -      rtx_insn *use = DF_REF_INSN (ref_link->ref);
> > > -      if (DEBUG_INSN_P (use))
> > > -     continue;
> > > +      if (use->bb () != def->bb ())
> > > +     return false;
> > >
> > > -      /* We do not handle REG_EQUIV/REG_EQ notes for now.  */
> > > -      if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE)
> > > -     return NULL;
> > > -      if (BLOCK_FOR_INSN (use) != insn_bb)
> > > -     return NULL;
> > >        /* Punt if use appears before def in the basic block.  See
PR111601.  */
> > > -      if (DF_INSN_LUID (use) < insn_luid)
> > > -     return NULL;
> > > +      if (*use_insn < *def->insn ())
> > > +     return false;
> >
> > As above, this check is redundant.
>
> Will be done.
>
> >
> > > [...]
> > > @@ -856,69 +897,82 @@ pass_fold_mem_offsets::execute (function *fn)
> > >              "fold-mem-offsets: %d basic blocks and %d edges/basic
block",
> > >              n_basic_blocks_for_fn (cfun),
> > >              n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
> > > -      return 0;
> > > +      multi_use_mode = false;
> > >      }
> > >
> > > -  df_set_flags (DF_EQ_NOTES + DF_RD_PRUNE_DEAD_DEFS +
DF_DEFER_INSN_RESCAN);
> > > -  df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
> > > +  /* There is a conflict between this pass and RISCV's
shorten-memrefs
> > > +     pass.  For now disable folding if optimizing for size because
> > > +     otherwise this cancels the effects of shorten-memrefs.  */
> > > +  cgraph_node *n = cgraph_node::get (fn->decl);
> > > +  if (n && n->optimize_for_size_p ())
> > > +    return 0;
> > > +
> > > +  /* Initialise RTL SSA.  */
> > > +  calculate_dominance_info (CDI_DOMINATORS);
> > >    df_analyze ();
> > > +  crtl->ssa = new rtl_ssa::function_info (cfun);
> > >
> > > -  bitmap_initialize (&can_fold_insns, NULL);
> > > -  bitmap_initialize (&candidate_fold_insns, NULL);
> > > -  bitmap_initialize (&cannot_fold_insns, NULL);
> > > +  /* The number of instructions that were simplified or eliminated.
 */
> > > +  int stats_fold_count = 0;
> > >
> > > -  stats_fold_count = 0;
> > > +  /* Fold mem offsets with DEFs that have a single USE.  */
> > > +  stats_fold_count += fold_mem_offsets_single_use ();
> > >
> > > -  basic_block bb;
> > > -  rtx_insn *insn;
> > > -  FOR_ALL_BB_FN (bb, fn)
> > > +  /* Fold mem offsets with DEFs that have multiple USEs.  */
> > > +  if (multi_use_mode || flag_expensive_optimizations)
> > >      {
> > > -      /* There is a conflict between this pass and RISCV's
shorten-memrefs
> > > -      pass.  For now disable folding if optimizing for size because
> > > -      otherwise this cancels the effects of shorten-memrefs.  */
> > > -      if (optimize_bb_for_size_p (bb))
> > > -     continue;
> > > -
> > > -      fold_info_map fold_info;
> > > +      if (dump_file)
> > > +     fprintf (dump_file, "Starting multi-use analysis\n");
> > > +      stats_fold_count += fold_mem_offsets_multi_use ();
> > > +    }
> > >
> > > -      bitmap_clear (&can_fold_insns);
> > > -      bitmap_clear (&candidate_fold_insns);
> > > -      bitmap_clear (&cannot_fold_insns);
> > > +  statistics_counter_event (cfun, "Number of folded instructions",
> > > +                         stats_fold_count);
> > >
> > > -      FOR_BB_INSNS (bb, insn)
> > > -     do_analysis (insn);
> > > +  free_dominance_info (CDI_DOMINATORS);
> > > +  cleanup_cfg (0);
> >
> > With the changes suggested in the general comments, this would become:
> >
> >   if (crtl->ssa->perform_pending_updates ())
> >     cleanup_cfg (0);
>
> Will be done.
>
> >
> > >
> > > -      FOR_BB_INSNS (bb, insn)
> > > -     do_fold_info_calculation (insn, &fold_info);
> > > +  delete crtl->ssa;
> > > +  crtl->ssa = nullptr;
> > >
> > > -      FOR_BB_INSNS (bb, insn)
> > > -     if (fold_mem_info **info = fold_info.get (insn))
> > > -       do_check_validity (insn, *info);
> > > +  delete_trivially_dead_insns (get_insns (), max_reg_num ());
> >
> > Which cases is this for?   Going back to the example:
>
> There are none (this was copied from fwprop, but it is indeed useless).
> I'll drop this.
>
>
> >    For example it can transform code like this:
> >
> >      add  t4, sp, 16
> >      add  t2, a6, t4
> >      shl  t3, t2, 1
> >      ld   a2, 0(t3)
> >      add  a2, 1
> >      sd   a2, 8(t2)
> >
> >    into the following (one instruction less):
> >
> >      add  t2, a6, sp
> >      shl  t3, t2, 1
> >      ld   a2, 32(t3)
> >      add  a2, 1
> >      sd   a2, 24(t2)
> >
> > I would have expected the pass to be able to delete the t4 instruction
> > on the fly, rather than need a separate instruction walk.
> >
> > Thanks,
> > Richard
Thanks,
Konstantinos

Reply via email to