----------------------------------- 
void bar (unsigned int); 
 
unsigned int 
foo (void) 
{ 
  unsigned int i, j; 
 
  for (i = 1; i < 30; i++) 
    { 
      j = 2 + 3*i; 
      bar (j); 
    } 
  return j; 
} 
----------------------------------- 
 
--> .optimized: 
 
foo () 
{ 
  unsigned intD.3 ivtmp.10D.1589; 
  unsigned intD.3 ivtmp.2D.1581; 
  unsigned intD.3 pretmp.1D.1580; 
  unsigned intD.3 pretmp.0D.1579; 
  unsigned intD.3 jD.1567; 
  unsigned intD.3 iD.1566; 
  unsigned intD.3 D.1572; 
  unsigned intD.3 D.1571; 
 
  # BLOCK 0 
  # PRED: ENTRY [100.0%]  (fallthru,exec) 
  # SUCC: 1 [100.0%]  (fallthru,exec) 
 
  # BLOCK 1 
  # PRED: 1 [96.5%]  (dfs_back,true,exec) 0 [100.0%]  (fallthru,exec) 
  # jD.1567_2 = PHI <jD.1567_13(1), 5(0)>; 
<L0>:; 
  bar (jD.1567_2); 
  jD.1567_13 = jD.1567_2 + 3; 
  if (jD.1567_13 != 92) goto <L0>; else goto <L2>; 
  # SUCC: 1 [96.5%]  (dfs_back,true,exec) 2 [3.5%]  (loop_exit,false,exec) 
 
  # BLOCK 2 
  # PRED: 1 [3.5%]  (loop_exit,false,exec) 
  # jD.1567_3 = PHI <jD.1567_2(1)>; 
<L2>:; 
  return jD.1567_3; 
  # SUCC: EXIT [100.0%] 
 
} 
 
 
--> .vars 
 
foo () 
{ 
  unsigned intD.3 j.13D.1592; 
  unsigned intD.3 jD.1567; 
 
  # BLOCK 0 
  # PRED: ENTRY [100.0%]  (fallthru,exec) 
  jD.1567 = 5; 
  # SUCC: 1 [100.0%]  (fallthru,exec) 
 
  # BLOCK 1 
  # PRED: 3 [100.0%]  (fallthru) 0 [100.0%]  (fallthru,exec) 
<L0>:; 
  bar (jD.1567); 
  j.13D.1592 = jD.1567 + 3; 
  if (j.13D.1592 != 92) goto <L8>; else goto <L2>; 
  # SUCC: 3 [96.5%]  (dfs_back,true,exec) 2 [3.5%]  (loop_exit,false,exec) 
 
  # BLOCK 3 
  # PRED: 1 [96.5%]  (dfs_back,true,exec) 
<L8>:; 
  jD.1567 = j.13D.1592; 
  goto <bb 1> (<L0>); 
  # SUCC: 1 [100.0%]  (fallthru) 
 
  # BLOCK 2 
  # PRED: 1 [3.5%]  (loop_exit,false,exec) 
<L2>:; 
  return jD.1567; 
  # SUCC: EXIT [100.0%] 
 
} 
 
Note the copy inside the loop, and the extra basic block that is needed 
for it.  This happens because j_2 and j_13 are not coalesced when going 
out of SSA form.  We never again get rid of it later on: 
 
foo: 
        pushq   %rbx 
        movl    $5, %ebx 
        jmp     .L2 
        .p2align 4,,7 
.L7: 
        movl    %eax, %ebx 
.L2: 
        movl    %ebx, %edi 
        call    bar 
        leal    3(%rbx), %eax 
        cmpl    $92, %eax 
        jne     .L7 
        movl    %ebx, %eax 
        popq    %rbx 
        ret 
 
This is a regression from GCC 4.0 that is most likely to blame on the  
copy propagation pass that Diego merged in from the tree-cleanup-branch. 
It causes a number of performance regressions on various targets. 
 
The bug looks somewhat similar to the issue previously discussed in 
PR18048, but I haven't checked if that bug is also affected by this 
problem.

-- 
           Summary: [4.1 regression] Not copy propagating single-argument
                    PHIs causes out-of-ssa coalescing failure
           Product: gcc
           Version: 4.1.0
            Status: UNCONFIRMED
          Keywords: missed-optimization, TREE
          Severity: normal
          Priority: P2
         Component: tree-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: steven at gcc dot gnu dot org
                CC: amacleod at redhat dot com,dnovillo at redhat dot
                    com,gcc-bugs at gcc dot gnu dot org


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21488

Reply via email to