https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111601
--- Comment #21 from Jakub Jelinek <jakub at gcc dot gnu.org> --- Reduced testcase (though, just the function in question, not a runable testcase): struct tree_base { int code:16; }; struct saved_scope { void *pad[14]; int x_processing_template_decl; }; extern struct saved_scope *scope_chain; struct z_candidate { tree_base *fn; void *pad[11]; z_candidate *next; int viable; int flags; }; __attribute__((noipa)) struct z_candidate * splice_viable (struct z_candidate *cands, bool strict_p, bool *any_viable_p) { struct z_candidate *viable; struct z_candidate **last_viable; struct z_candidate **cand; bool found_strictly_viable = false; if (scope_chain->x_processing_template_decl) strict_p = true; viable = (z_candidate *) 0; last_viable = &viable; *any_viable_p = false; cand = &cands; while (*cand) { struct z_candidate *c = *cand; if (!strict_p && (c->viable == 1 || ((int) (c->fn)->code) == 273)) { strict_p = true; if (viable && !found_strictly_viable) { *any_viable_p = false; *last_viable = cands; cands = viable; viable = (z_candidate *) 0; last_viable = &viable; } } if (strict_p ? c->viable == 1 : c->viable) { *last_viable = c; *cand = c->next; c->next = (z_candidate *) 0; last_viable = &c->next; *any_viable_p = true; if (c->viable == 1) found_strictly_viable = true; } else cand = &c->next; } return viable ? viable : cands; } With this and ./cc1plus -quiet -fpreprocessed -O2 -fprofile-generate -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -fno-common -fno-PIE -mcpu=power8 pr111601.ii -o pr111601.s3 -ffold-mem-offsets -da vs. ./cc1plus -quiet -fpreprocessed -O2 -fprofile-generate -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -fno-common -fno-PIE -mcpu=power8 pr111601.ii -o pr111601.s4 -fno-fold-mem-offsets -da the assembly difference is just .L13: std 9,0(10) mr 10,9 li 5,0 + addi 10,10,96 li 7,1 addi 4,4,1 addi 6,6,1 ld 9,96(9) std 9,0(8) - std 5,96(10) + std 5,0(10) stb 7,0(31) ori 2,2,0 ld 9,0(8) cmpdi 0,9,0 beq 0,.L18 lwz 7,104(9) li 12,1 li 5,1 cmpwi 0,7,1 beq 0,.L13 which shows the problem in a single loop. Without the pass, %r10 is set to %r9 + 96 and 5 (NULL) is stored to it first and if the loop loops again, 9 is stored to it. While with the pass, %r10 is set to %r9, 5 (NULL) is stored to %r10 + 96 and then next iteration overwrites the fn pointer in the structure rather than next.