On Tue, Nov 28, 2023 at 11:45:58AM +0200, Manolis Tsamis wrote:
> > But, while get_single_def_in_bb checks for
> >   if (DF_INSN_LUID (def) > DF_INSN_LUID (insn))
> >     return NULL;
> > (OT, why not DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_chain->ref))
> > instead of DF_INSN_LUID (def), then it doesn't need to look up
> > DF_INSN_INFO_GET (def)?), nothing when walking all uses of def does such
> > luid check.
> 
> I didn't actually know about the difference between DF_INSN_LUID (def)
> and DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_chain->ref)), so I just
> used the more concise version.
> Thanks for pointing this out, I could make this a separate change.

Note, in the end in the actually tested patch (see the follow-up)
I've just used DF_INSN_LUID, after looking up that DF_INSN_INFO_GET is
fairly cheap -
#define DF_INSN_INFO_GET(INSN) (df->insns[(INSN_UID (INSN))])
I was afraid it could be a hash table lookup.
Because, while using that DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_chain->ref))
etc. is tiny bit faster (doesn't have to read the u2.insn_uid from ref
and look it up in the table), it is less readable, and because it is
just tiny bit, I guess readability/maintainability should win.

> Indeed. Your observations got me thinking about this issue and I see
> two main weaknesses with my current implementation:

> > --- gcc/fold-mem-offsets.cc.jj  2023-11-02 07:49:17.060865772 +0100
> > +++ gcc/fold-mem-offsets.cc     2023-11-27 21:35:35.089007365 +0100
> > @@ -511,6 +511,7 @@ fold_offsets (rtx_insn *insn, rtx reg, b
> >        if (!success)
> >         return 0;
> >
> > +      unsigned luid = DF_INSN_LUID (def);
> >        for (ref_link = uses; ref_link; ref_link = ref_link->next)
> >         {
> >           rtx_insn *use = DF_REF_INSN (ref_link->ref);
> > @@ -534,6 +535,11 @@ fold_offsets (rtx_insn *insn, rtx reg, b
> >           if (use_set && MEM_P (SET_DEST (use_set))
> >               && reg_mentioned_p (dest, SET_SRC (use_set)))
> >             return 0;
> > +
> > +         /* Punt if use appears before def in the basic block.  See
> > +            PR111601.  */
> > +         if (DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_link->ref)) < luid)
> > +           return 0;
> I think it would be fitting to put this condition within the get_uses
> function? This way it would reflect what exists in
> get_single_def_in_bb.

get_uses is where I've initially added it to, but to do it there
one needs to also copy the DEBUG_INSN_P check in there (because we
shouldn't be doing such tests on debug insns).

If we put it into get_uses, maybe we should in sync with
get_single_def_in_bb also check it is a use in the same bb.

So, like this (so far untested)?

Note, the earlier posted patch passed bootstrap/regtest on
{powerpc64le,x86_64,i686}-linux.

2023-11-28  Jakub Jelinek  <ja...@redhat.com>

        PR bootstrap/111601
        * fold-mem-offsets.cc (get_uses): Ignore DEBUG_INSN uses.  Otherwise,
        punt if use is in a different basic block from INSN or appears before
        INSN in the same basic block.  Formatting fixes.
        (get_single_def_in_bb): Formatting fixes.
        (fold_offsets_1, pass_fold_mem_offsets::execute): Comment formatting
        fixes.

        * g++.dg/opt/pr111601.C: New test.

--- gcc/fold-mem-offsets.cc.jj  2023-11-02 07:49:17.060865772 +0100
+++ gcc/fold-mem-offsets.cc     2023-11-28 11:47:34.941679105 +0100
@@ -154,7 +154,7 @@ static int stats_fold_count;
    The definition is desired for REG used in INSN.
    Return the definition insn or NULL if there's no definition with
    the desired criteria.  */
-static rtx_insn*
+static rtx_insn *
 get_single_def_in_bb (rtx_insn *insn, rtx reg)
 {
   df_ref use;
@@ -205,11 +205,10 @@ get_single_def_in_bb (rtx_insn *insn, rt
 /* Get all uses of REG which is set in INSN.  Return the use list or NULL if a
    use is missing / irregular.  If SUCCESS is not NULL then set it to false if
    there are missing / irregular uses and true otherwise.  */
-static struct df_link*
+static df_link *
 get_uses (rtx_insn *insn, rtx reg, bool *success)
 {
   df_ref def;
-  struct df_link *ref_chain, *ref_link;
 
   if (success)
     *success = false;
@@ -221,18 +220,30 @@ get_uses (rtx_insn *insn, rtx reg, bool
   if (!def)
     return NULL;
 
-  ref_chain = DF_REF_CHAIN (def);
+  df_link *ref_chain = DF_REF_CHAIN (def);
+  int insn_luid = DF_INSN_LUID (insn);
+  basic_block insn_bb = BLOCK_FOR_INSN (insn);
 
-  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+  for (df_link *ref_link = ref_chain; ref_link; ref_link = ref_link->next)
     {
       /* Problem getting a use for this instruction.  */
       if (ref_link->ref == NULL)
        return NULL;
+
+      rtx_insn *use = DF_REF_INSN (ref_link->ref);
+      if (DEBUG_INSN_P (use))
+       continue;
+
       if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR)
        return NULL;
       /* We do not handle REG_EQUIV/REG_EQ notes for now.  */
       if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE)
        return NULL;
+      if (BLOCK_FOR_INSN (use) != insn_bb)
+       return NULL;
+      /* Punt if use appears before def in the basic block.  See PR111601.  */
+      if (DF_INSN_LUID (use) < insn_luid)
+       return NULL;
     }
 
   if (success)
@@ -255,8 +266,7 @@ fold_offsets (rtx_insn *insn, rtx reg, b
 
     If DO_RECURSION is true and ANALYZE is false then offset that would result
     from folding is computed and is returned through the pointer OFFSET_OUT.
-    The instructions that can be folded are recorded in FOLDABLE_INSNS.
-*/
+    The instructions that can be folded are recorded in FOLDABLE_INSNS.  */
 static bool
 fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion,
                HOST_WIDE_INT *offset_out, bitmap foldable_insns)
@@ -846,8 +856,8 @@ pass_fold_mem_offsets::execute (function
   FOR_ALL_BB_FN (bb, fn)
     {
       /* There is a conflict between this pass and RISCV's shorten-memrefs
-         pass.  For now disable folding if optimizing for size because
-         otherwise this cancels the effects of shorten-memrefs.  */
+        pass.  For now disable folding if optimizing for size because
+        otherwise this cancels the effects of shorten-memrefs.  */
       if (optimize_bb_for_size_p (bb))
        continue;
 
--- gcc/testsuite/g++.dg/opt/pr111601.C.jj      2023-11-27 21:33:12.605006881 
+0100
+++ gcc/testsuite/g++.dg/opt/pr111601.C 2023-11-27 21:34:47.267678510 +0100
@@ -0,0 +1,86 @@
+// PR bootstrap/111601
+// { dg-do run { target c++11 } }
+// { dg-options "-O2 -fno-exceptions -fno-rtti -fprofile-generate" }
+// { dg-require-profiling "-fprofile-generate" }
+// { dg-final { cleanup-coverage-files } }
+
+struct tree_base
+{
+  int code:16;
+};
+struct saved_scope
+{
+  void *pad[14];
+  int x_processing_template_decl;
+};
+struct saved_scope *scope_chain;
+struct z_candidate
+{
+  tree_base *fn;
+  void *pad[11];
+  z_candidate *next;
+  int viable;
+  int flags;
+};
+
+__attribute__((noipa)) struct z_candidate *
+splice_viable (struct z_candidate *cands, bool strict_p, bool *any_viable_p)
+{
+  struct z_candidate *viable;
+  struct z_candidate **last_viable;
+  struct z_candidate **cand;
+  bool found_strictly_viable = false;
+  if (scope_chain->x_processing_template_decl)
+    strict_p = true;
+  viable = (z_candidate *) 0;
+  last_viable = &viable;
+  *any_viable_p = false;
+  cand = &cands;
+  while (*cand)
+    {
+      struct z_candidate *c = *cand;
+      if (!strict_p && (c->viable == 1 || ((int) (c->fn)->code) == 273))
+       {
+         strict_p = true;
+         if (viable && !found_strictly_viable)
+           {
+             *any_viable_p = false;
+             *last_viable = cands;
+             cands = viable;
+             viable = (z_candidate *) 0;
+             last_viable = &viable;
+           }
+       }
+      if (strict_p ? c->viable == 1 : c->viable)
+       {
+         *last_viable = c;
+         *cand = c->next;
+         c->next = (z_candidate *) 0;
+         last_viable = &c->next;
+         *any_viable_p = true;
+         if (c->viable == 1)
+           found_strictly_viable = true;
+       }
+      else
+       cand = &c->next;
+    }
+  return viable ? viable : cands;
+}
+
+int
+main ()
+{
+  saved_scope s{};
+  scope_chain = &s;
+  z_candidate z[4] = {};
+  z[0].next = &z[1];
+  z[1].viable = 1;
+  z[1].next = &z[2];
+  z[2].viable = 1;
+  z[2].next = &z[3];
+  bool b;
+  z_candidate *c = splice_viable (&z[0], true, &b);
+  if (c != &z[1] || z[1].next != &z[2] || z[2].next)
+    __builtin_abort ();
+  return 0;
+}


        Jakub

Reply via email to