https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111601

--- Comment #21 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Reduced testcase (though, just the function in question, not a runable
testcase):
struct tree_base
{
  int code:16;
};
struct saved_scope
{
  void *pad[14];
  int x_processing_template_decl;
};
extern struct saved_scope *scope_chain;
struct z_candidate
{
  tree_base *fn;
  void *pad[11];
  z_candidate *next;
  int viable;
  int flags;
};

__attribute__((noipa)) struct z_candidate *
splice_viable (struct z_candidate *cands, bool strict_p, bool *any_viable_p)
{
  struct z_candidate *viable;
  struct z_candidate **last_viable;
  struct z_candidate **cand;
  bool found_strictly_viable = false;
  if (scope_chain->x_processing_template_decl)
    strict_p = true;
  viable = (z_candidate *) 0;
  last_viable = &viable;
  *any_viable_p = false;
  cand = &cands;
  while (*cand)
    {
      struct z_candidate *c = *cand;
      if (!strict_p && (c->viable == 1 || ((int) (c->fn)->code) == 273))
        {
          strict_p = true;
          if (viable && !found_strictly_viable)
            {
              *any_viable_p = false;
              *last_viable = cands;
              cands = viable;
              viable = (z_candidate *) 0;
              last_viable = &viable;
            }
        }
      if (strict_p ? c->viable == 1 : c->viable)
        {
          *last_viable = c;
          *cand = c->next;
          c->next = (z_candidate *) 0;
          last_viable = &c->next;
          *any_viable_p = true;
          if (c->viable == 1)
            found_strictly_viable = true;
        }
      else
        cand = &c->next;
    }
  return viable ? viable : cands;
}
With this and
./cc1plus -quiet -fpreprocessed -O2 -fprofile-generate -fno-exceptions
-fno-rtti -fasynchronous-unwind-tables -fno-common -fno-PIE -mcpu=power8
pr111601.ii -o pr111601.s3 -ffold-mem-offsets -da
vs.
./cc1plus -quiet -fpreprocessed -O2 -fprofile-generate -fno-exceptions
-fno-rtti -fasynchronous-unwind-tables -fno-common -fno-PIE -mcpu=power8
pr111601.ii -o pr111601.s4 -fno-fold-mem-offsets -da
the assembly difference is just
 .L13:
        std 9,0(10)
        mr 10,9
        li 5,0
+       addi 10,10,96
        li 7,1
        addi 4,4,1
        addi 6,6,1
        ld 9,96(9)
        std 9,0(8)
-       std 5,96(10)
+       std 5,0(10)
        stb 7,0(31)
        ori 2,2,0
        ld 9,0(8)
        cmpdi 0,9,0
        beq 0,.L18
        lwz 7,104(9)
        li 12,1
        li 5,1
        cmpwi 0,7,1
        beq 0,.L13
which shows the problem in a single loop.  Without the pass, %r10 is set to %r9
+ 96 and 5 (NULL) is stored to it first and if the loop loops again, 9 is
stored to it.  While with the pass, %r10 is set to %r9, 5 (NULL) is stored to
%r10 + 96 and then next iteration overwrites the fn pointer in the structure
rather than next.

Reply via email to