On Tue, Nov 28, 2023 at 12:53 PM Jakub Jelinek <ja...@redhat.com> wrote:
>
> On Tue, Nov 28, 2023 at 11:45:58AM +0200, Manolis Tsamis wrote:
> > > But, while get_single_def_in_bb checks for
> > >   if (DF_INSN_LUID (def) > DF_INSN_LUID (insn))
> > >     return NULL;
> > > (OT, why not DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_chain->ref))
> > > instead of DF_INSN_LUID (def), then it doesn't need to look up
> > > DF_INSN_INFO_GET (def)?), nothing when walking all uses of def does such
> > > luid check.
> >
> > I didn't actually know about the difference between DF_INSN_LUID (def)
> > and DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_chain->ref)), so I just
> > used the more concise version.
> > Thanks for pointing this out, I could make this a separate change.
>
> Note, in the end in the actually tested patch (see the follow-up)
> I've just used DF_INSN_LUID, after looking up that DF_INSN_INFO_GET is
> fairly cheap -
> #define DF_INSN_INFO_GET(INSN) (df->insns[(INSN_UID (INSN))])
> I was afraid it could be a hash table lookup.
> Because, while using that DF_INSN_INFO_LUID (DF_REF_INSN_INFO 
> (ref_chain->ref))
> etc. is tiny bit faster (doesn't have to read the u2.insn_uid from ref
> and look it up in the table), it is less readable, and because it is
> just tiny bit, I guess readability/maintainability should win.
>
Ok, I agree it's more readable and the overhead is small.

> > Indeed. Your observations got me thinking about this issue and I see
> > two main weaknesses with my current implementation:
>
> > > --- gcc/fold-mem-offsets.cc.jj  2023-11-02 07:49:17.060865772 +0100
> > > +++ gcc/fold-mem-offsets.cc     2023-11-27 21:35:35.089007365 +0100
> > > @@ -511,6 +511,7 @@ fold_offsets (rtx_insn *insn, rtx reg, b
> > >        if (!success)
> > >         return 0;
> > >
> > > +      unsigned luid = DF_INSN_LUID (def);
> > >        for (ref_link = uses; ref_link; ref_link = ref_link->next)
> > >         {
> > >           rtx_insn *use = DF_REF_INSN (ref_link->ref);
> > > @@ -534,6 +535,11 @@ fold_offsets (rtx_insn *insn, rtx reg, b
> > >           if (use_set && MEM_P (SET_DEST (use_set))
> > >               && reg_mentioned_p (dest, SET_SRC (use_set)))
> > >             return 0;
> > > +
> > > +         /* Punt if use appears before def in the basic block.  See
> > > +            PR111601.  */
> > > +         if (DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_link->ref)) < luid)
> > > +           return 0;
> > I think it would be fitting to put this condition within the get_uses
> > function? This way it would reflect what exists in
> > get_single_def_in_bb.
>
> get_uses is where I've initially added it to, but to do it there
> one needs to also copy the DEBUG_INSN_P check in there (because we
> shouldn't be doing such tests on debug insns).
>
> If we put it into get_uses, maybe we should in sync with
> get_single_def_in_bb also check it is a use in the same bb.
>
I have omitted the "use is in the same bb" check from get_uses because
we only mark instructions as foldable within a BB, so if there were
any uses outside they wouldn't be foldable anyway.
But this check is harmless and probably makes the intention more
clear. Also it could be a small performance improvement as it punts
earlier than otherwise.
So it looks good to add the check imo.

> So, like this (so far untested)?
>
> Note, the earlier posted patch passed bootstrap/regtest on
> {powerpc64le,x86_64,i686}-linux.
>
> 2023-11-28  Jakub Jelinek  <ja...@redhat.com>
>
>         PR bootstrap/111601
>         * fold-mem-offsets.cc (get_uses): Ignore DEBUG_INSN uses.  Otherwise,
>         punt if use is in a different basic block from INSN or appears before
>         INSN in the same basic block.  Formatting fixes.
>         (get_single_def_in_bb): Formatting fixes.
>         (fold_offsets_1, pass_fold_mem_offsets::execute): Comment formatting
>         fixes.
>
>         * g++.dg/opt/pr111601.C: New test.
>
> --- gcc/fold-mem-offsets.cc.jj  2023-11-02 07:49:17.060865772 +0100
> +++ gcc/fold-mem-offsets.cc     2023-11-28 11:47:34.941679105 +0100
> @@ -154,7 +154,7 @@ static int stats_fold_count;
>     The definition is desired for REG used in INSN.
>     Return the definition insn or NULL if there's no definition with
>     the desired criteria.  */
> -static rtx_insn*
> +static rtx_insn *
>  get_single_def_in_bb (rtx_insn *insn, rtx reg)
>  {
>    df_ref use;
> @@ -205,11 +205,10 @@ get_single_def_in_bb (rtx_insn *insn, rt
>  /* Get all uses of REG which is set in INSN.  Return the use list or NULL if 
> a
>     use is missing / irregular.  If SUCCESS is not NULL then set it to false 
> if
>     there are missing / irregular uses and true otherwise.  */
> -static struct df_link*
> +static df_link *
>  get_uses (rtx_insn *insn, rtx reg, bool *success)
>  {
>    df_ref def;
> -  struct df_link *ref_chain, *ref_link;
>
>    if (success)
>      *success = false;
> @@ -221,18 +220,30 @@ get_uses (rtx_insn *insn, rtx reg, bool
>    if (!def)
>      return NULL;
>
> -  ref_chain = DF_REF_CHAIN (def);
> +  df_link *ref_chain = DF_REF_CHAIN (def);
> +  int insn_luid = DF_INSN_LUID (insn);
> +  basic_block insn_bb = BLOCK_FOR_INSN (insn);
>
> -  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
> +  for (df_link *ref_link = ref_chain; ref_link; ref_link = ref_link->next)
>      {
>        /* Problem getting a use for this instruction.  */
>        if (ref_link->ref == NULL)
>         return NULL;
> +
> +      rtx_insn *use = DF_REF_INSN (ref_link->ref);
> +      if (DEBUG_INSN_P (use))
> +       continue;
> +
>        if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
>         return NULL;
>        /* We do not handle REG_EQUIV/REG_EQ notes for now.  */
>        if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE)
>         return NULL;
> +      if (BLOCK_FOR_INSN (use) != insn_bb)
> +       return NULL;
> +      /* Punt if use appears before def in the basic block.  See PR111601.  
> */
> +      if (DF_INSN_LUID (use) < insn_luid)
> +       return NULL;
>      }
>
>    if (success)
> @@ -255,8 +266,7 @@ fold_offsets (rtx_insn *insn, rtx reg, b
>
>      If DO_RECURSION is true and ANALYZE is false then offset that would 
> result
>      from folding is computed and is returned through the pointer OFFSET_OUT.
> -    The instructions that can be folded are recorded in FOLDABLE_INSNS.
> -*/
> +    The instructions that can be folded are recorded in FOLDABLE_INSNS.  */
>  static bool
>  fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion,
>                 HOST_WIDE_INT *offset_out, bitmap foldable_insns)
> @@ -846,8 +856,8 @@ pass_fold_mem_offsets::execute (function
>    FOR_ALL_BB_FN (bb, fn)
>      {
>        /* There is a conflict between this pass and RISCV's shorten-memrefs
> -         pass.  For now disable folding if optimizing for size because
> -         otherwise this cancels the effects of shorten-memrefs.  */
> +        pass.  For now disable folding if optimizing for size because
> +        otherwise this cancels the effects of shorten-memrefs.  */
>        if (optimize_bb_for_size_p (bb))
>         continue;
>
> --- gcc/testsuite/g++.dg/opt/pr111601.C.jj      2023-11-27 21:33:12.605006881 
> +0100
> +++ gcc/testsuite/g++.dg/opt/pr111601.C 2023-11-27 21:34:47.267678510 +0100
> @@ -0,0 +1,86 @@
> +// PR bootstrap/111601
> +// { dg-do run { target c++11 } }
> +// { dg-options "-O2 -fno-exceptions -fno-rtti -fprofile-generate" }
> +// { dg-require-profiling "-fprofile-generate" }
> +// { dg-final { cleanup-coverage-files } }
> +
> +struct tree_base
> +{
> +  int code:16;
> +};
> +struct saved_scope
> +{
> +  void *pad[14];
> +  int x_processing_template_decl;
> +};
> +struct saved_scope *scope_chain;
> +struct z_candidate
> +{
> +  tree_base *fn;
> +  void *pad[11];
> +  z_candidate *next;
> +  int viable;
> +  int flags;
> +};
> +
> +__attribute__((noipa)) struct z_candidate *
> +splice_viable (struct z_candidate *cands, bool strict_p, bool *any_viable_p)
> +{
> +  struct z_candidate *viable;
> +  struct z_candidate **last_viable;
> +  struct z_candidate **cand;
> +  bool found_strictly_viable = false;
> +  if (scope_chain->x_processing_template_decl)
> +    strict_p = true;
> +  viable = (z_candidate *) 0;
> +  last_viable = &viable;
> +  *any_viable_p = false;
> +  cand = &cands;
> +  while (*cand)
> +    {
> +      struct z_candidate *c = *cand;
> +      if (!strict_p && (c->viable == 1 || ((int) (c->fn)->code) == 273))
> +       {
> +         strict_p = true;
> +         if (viable && !found_strictly_viable)
> +           {
> +             *any_viable_p = false;
> +             *last_viable = cands;
> +             cands = viable;
> +             viable = (z_candidate *) 0;
> +             last_viable = &viable;
> +           }
> +       }
> +      if (strict_p ? c->viable == 1 : c->viable)
> +       {
> +         *last_viable = c;
> +         *cand = c->next;
> +         c->next = (z_candidate *) 0;
> +         last_viable = &c->next;
> +         *any_viable_p = true;
> +         if (c->viable == 1)
> +           found_strictly_viable = true;
> +       }
> +      else
> +       cand = &c->next;
> +    }
> +  return viable ? viable : cands;
> +}
> +
> +int
> +main ()
> +{
> +  saved_scope s{};
> +  scope_chain = &s;
> +  z_candidate z[4] = {};
> +  z[0].next = &z[1];
> +  z[1].viable = 1;
> +  z[1].next = &z[2];
> +  z[2].viable = 1;
> +  z[2].next = &z[3];
> +  bool b;
> +  z_candidate *c = splice_viable (&z[0], true, &b);
> +  if (c != &z[1] || z[1].next != &z[2] || z[2].next)
> +    __builtin_abort ();
> +  return 0;
> +}
>
>
>         Jakub
>

Reply via email to