On Mon, Oct 16, 2023 at 01:11:01PM -0600, Jeff Law wrote:
> > gcc/ChangeLog:
> > 
> >     * Makefile.in: Add fold-mem-offsets.o.
> >     * passes.def: Schedule a new pass.
> >     * tree-pass.h (make_pass_fold_mem_offsets): Declare.
> >     * common.opt: New options.
> >     * doc/invoke.texi: Document new option.
> >     * fold-mem-offsets.cc: New file.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> >     * gcc.target/riscv/fold-mem-offsets-1.c: New test.
> >     * gcc.target/riscv/fold-mem-offsets-2.c: New test.
> >     * gcc.target/riscv/fold-mem-offsets-3.c: New test.
> Thanks, I've pushed this to the trunk.

This breaks profiledbootstrap on powerpc64le-linux.
>From what I can see, the pass works one basic block at a time and
will punt on any non-DEBUG_INSN uses outside of the current block
(I believe because of the
          /* This use affects instructions outside of CAN_FOLD_INSNS.  */
          if (!bitmap_bit_p (&can_fold_insns, INSN_UID (use)))
            return 0;
test and can_fold_insns only set in do_analysis (when processing insns in
current bb, cleared at the end) or results of get_single_def_in_bb
(which are checked to be in the same bb).
But, while get_single_def_in_bb checks for
  if (DF_INSN_LUID (def) > DF_INSN_LUID (insn))
    return NULL;
(OT, why not DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_chain->ref))
instead of DF_INSN_LUID (def), then it doesn't need to look up
DF_INSN_INFO_GET (def)?), nothing when walking all uses of def does such
luid check.
The basic block in the PR in question has:
...
(insn 212 210 215 25 (set (mem/f:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] 
[152]) [2 *last_viable_336+0 S8 A64])
        (reg/f:DI 9 9 [orig:155 _342 ] [155])) "pr111601.ii":50:17 683 
{*movdi_internal64}
     (expr_list:REG_DEAD (reg/v/f:DI 10 10 [orig:152 last_viable ] [152])
        (nil)))
(insn 215 212 484 25 (set (reg:DI 5 5 [226])
        (const_int 0 [0])) "pr111601.ii":52:12 683 {*movdi_internal64}
     (expr_list:REG_EQUIV (const_int 0 [0])
        (nil)))
(insn 484 215 218 25 (set (reg/v/f:DI 10 10 [orig:152 last_viable ] [152])
        (reg/f:DI 9 9 [orig:155 _342 ] [155])) "pr111601.ii":52:12 683 
{*movdi_internal64}
     (nil))
...
(insn 564 214 216 25 (set (reg/v/f:DI 10 10 [orig:152 last_viable ] [152])
        (plus:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] [152])
            (const_int 96 [0x60]))) "pr111601.ii":52:12 66 {*adddi3}
     (nil))
(insn 216 564 219 25 (set (mem/f:DI (reg/v/f:DI 10 10 [orig:152 last_viable ] 
[152]) [2 _343->next+0 S8 A64])
        (reg:DI 5 5 [226])) "pr111601.ii":52:12 683 {*movdi_internal64}
     (expr_list:REG_DEAD (reg:DI 5 5 [226])
        (nil)))
...
and when asking for all uses of %r10 from def 564, it will see uses
in 216 and 212; the former is after the += 96 addition and gets changed
to load from %r10+96 with the addition being dropped, but there is
the other store which is a use across the backedge and when reached
from other edges certainly doesn't have the + 96 addition anywhere,
so the pass doesn't actually change that location.

Haven't bootstrapped/regtested this yet, will start momentarily,
posting here just in case I'm missing something important.

2023-11-27  Jakub Jelinek  <ja...@redhat.com>

        PR bootstrap/111601
        * fold-mem-offsets.cc (fold_offsets): Punt if use appears before
        def in the basic block.

        * g++.dg/opt/pr111601.C: New test.

--- gcc/fold-mem-offsets.cc.jj  2023-11-02 07:49:17.060865772 +0100
+++ gcc/fold-mem-offsets.cc     2023-11-27 21:35:35.089007365 +0100
@@ -511,6 +511,7 @@ fold_offsets (rtx_insn *insn, rtx reg, b
       if (!success)
        return 0;
 
+      unsigned luid = DF_INSN_LUID (def);
       for (ref_link = uses; ref_link; ref_link = ref_link->next)
        {
          rtx_insn *use = DF_REF_INSN (ref_link->ref);
@@ -534,6 +535,11 @@ fold_offsets (rtx_insn *insn, rtx reg, b
          if (use_set && MEM_P (SET_DEST (use_set))
              && reg_mentioned_p (dest, SET_SRC (use_set)))
            return 0;
+
+         /* Punt if use appears before def in the basic block.  See
+            PR111601.  */
+         if (DF_INSN_INFO_LUID (DF_REF_INSN_INFO (ref_link->ref)) < luid)
+           return 0;
        }
 
       bitmap_set_bit (&can_fold_insns, INSN_UID (def));
--- gcc/testsuite/g++.dg/opt/pr111601.C.jj      2023-11-27 21:33:12.605006881 
+0100
+++ gcc/testsuite/g++.dg/opt/pr111601.C 2023-11-27 21:34:47.267678510 +0100
@@ -0,0 +1,86 @@
+// PR bootstrap/111601
+// { dg-do run { target c++11 } }
+// { dg-options "-O2 -fno-exceptions -fno-rtti -fprofile-generate" }
+// { dg-require-profiling "-fprofile-generate" }
+// { dg-final { cleanup-coverage-files } }
+
+struct tree_base
+{
+  int code:16;
+};
+struct saved_scope
+{
+  void *pad[14];
+  int x_processing_template_decl;
+};
+struct saved_scope *scope_chain;
+struct z_candidate
+{
+  tree_base *fn;
+  void *pad[11];
+  z_candidate *next;
+  int viable;
+  int flags;
+};
+
+__attribute__((noipa)) struct z_candidate *
+splice_viable (struct z_candidate *cands, bool strict_p, bool *any_viable_p)
+{
+  struct z_candidate *viable;
+  struct z_candidate **last_viable;
+  struct z_candidate **cand;
+  bool found_strictly_viable = false;
+  if (scope_chain->x_processing_template_decl)
+    strict_p = true;
+  viable = (z_candidate *) 0;
+  last_viable = &viable;
+  *any_viable_p = false;
+  cand = &cands;
+  while (*cand)
+    {
+      struct z_candidate *c = *cand;
+      if (!strict_p && (c->viable == 1 || ((int) (c->fn)->code) == 273))
+       {
+         strict_p = true;
+         if (viable && !found_strictly_viable)
+           {
+             *any_viable_p = false;
+             *last_viable = cands;
+             cands = viable;
+             viable = (z_candidate *) 0;
+             last_viable = &viable;
+           }
+       }
+      if (strict_p ? c->viable == 1 : c->viable)
+       {
+         *last_viable = c;
+         *cand = c->next;
+         c->next = (z_candidate *) 0;
+         last_viable = &c->next;
+         *any_viable_p = true;
+         if (c->viable == 1)
+           found_strictly_viable = true;
+       }
+      else
+       cand = &c->next;
+    }
+  return viable ? viable : cands;
+}
+
+int
+main ()
+{
+  saved_scope s{};
+  scope_chain = &s;
+  z_candidate z[4] = {};
+  z[0].next = &z[1];
+  z[1].viable = 1;
+  z[1].next = &z[2];
+  z[2].viable = 1;
+  z[2].next = &z[3];
+  bool b;
+  z_candidate *c = splice_viable (&z[0], true, &b);
+  if (c != &z[1] || z[1].next != &z[2] || z[2].next)
+    __builtin_abort ();
+  return 0;
+}


        Jakub

Reply via email to