----------------------------------- void bar (unsigned int); unsigned int foo (void) { unsigned int i, j; for (i = 1; i < 30; i++) { j = 2 + 3*i; bar (j); } return j; } ----------------------------------- --> .optimized: foo () { unsigned intD.3 ivtmp.10D.1589; unsigned intD.3 ivtmp.2D.1581; unsigned intD.3 pretmp.1D.1580; unsigned intD.3 pretmp.0D.1579; unsigned intD.3 jD.1567; unsigned intD.3 iD.1566; unsigned intD.3 D.1572; unsigned intD.3 D.1571; # BLOCK 0 # PRED: ENTRY [100.0%] (fallthru,exec) # SUCC: 1 [100.0%] (fallthru,exec) # BLOCK 1 # PRED: 1 [96.5%] (dfs_back,true,exec) 0 [100.0%] (fallthru,exec) # jD.1567_2 = PHI <jD.1567_13(1), 5(0)>; <L0>:; bar (jD.1567_2); jD.1567_13 = jD.1567_2 + 3; if (jD.1567_13 != 92) goto <L0>; else goto <L2>; # SUCC: 1 [96.5%] (dfs_back,true,exec) 2 [3.5%] (loop_exit,false,exec) # BLOCK 2 # PRED: 1 [3.5%] (loop_exit,false,exec) # jD.1567_3 = PHI <jD.1567_2(1)>; <L2>:; return jD.1567_3; # SUCC: EXIT [100.0%] } --> .vars foo () { unsigned intD.3 j.13D.1592; unsigned intD.3 jD.1567; # BLOCK 0 # PRED: ENTRY [100.0%] (fallthru,exec) jD.1567 = 5; # SUCC: 1 [100.0%] (fallthru,exec) # BLOCK 1 # PRED: 3 [100.0%] (fallthru) 0 [100.0%] (fallthru,exec) <L0>:; bar (jD.1567); j.13D.1592 = jD.1567 + 3; if (j.13D.1592 != 92) goto <L8>; else goto <L2>; # SUCC: 3 [96.5%] (dfs_back,true,exec) 2 [3.5%] (loop_exit,false,exec) # BLOCK 3 # PRED: 1 [96.5%] (dfs_back,true,exec) <L8>:; jD.1567 = j.13D.1592; goto <bb 1> (<L0>); # SUCC: 1 [100.0%] (fallthru) # BLOCK 2 # PRED: 1 [3.5%] (loop_exit,false,exec) <L2>:; return jD.1567; # SUCC: EXIT [100.0%] } Note the copy inside the loop, and the extra basic block that is needed for it. This happens because j_2 and j_13 are not coalesced when going out of SSA form. We never again get rid of it later on: foo: pushq %rbx movl $5, %ebx jmp .L2 .p2align 4,,7 .L7: movl %eax, %ebx .L2: movl %ebx, %edi call bar leal 3(%rbx), %eax cmpl $92, %eax jne .L7 movl %ebx, %eax popq %rbx ret This is a regression from GCC 4.0 that is most likely to blame on the copy propagation pass that Diego merged in from the tree-cleanup-branch. It causes a number of performance regressions on various targets. The bug looks somewhat similar to the issue previously discussed in PR18048, but I haven't checked if that bug is also affected by this problem.
-- Summary: [4.1 regression] Not copy propagating single-argument PHIs causes out-of-ssa coalescing failure Product: gcc Version: 4.1.0 Status: UNCONFIRMED Keywords: missed-optimization, TREE Severity: normal Priority: P2 Component: tree-optimization AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: steven at gcc dot gnu dot org CC: amacleod at redhat dot com,dnovillo at redhat dot com,gcc-bugs at gcc dot gnu dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21488