I found gcc 4.1.1 (and 4.2) can not optimize this simple code well.

void foo(int *a)
{
        int i;
        for (i = 0; i < 100; i++)
                a[0] += a[1];
}

If I compile this code with -O2, instructions to load from a[1] and
store to a[0] are both inside a loop.

Is this a know issue?  PR20463 or PR21676 or any other?


Followings are outputs from some gcc versions on mips and i386.

gcc 4.1.1 on mips: (no good)
foo:
        .set    noreorder
        .set    nomacro
        
        lw      $3,0($4)
        addiu   $6,$4,4
        move    $5,$0
        li      $7,100                  # 0x64
$L2:
        lw      $2,0($6)
        addiu   $5,$5,1
        addu    $2,$3,$2
        move    $3,$2
        bne     $5,$7,$L2
        sw      $2,0($4)

        j       $31
        nop

gcc 4.2 on mips: (no good)
foo:
        .set    noreorder
        .set    nomacro
        
        lw      $3,0($4)
        lw      $2,4($4)
        addiu   $6,$4,4
        addu    $3,$3,$2
        sw      $3,0($4)
        li      $5,1                    # 0x1
$L2:
        lw      $2,0($6)
        addiu   $5,$5,1
        addu    $3,$3,$2
        li      $2,100                  # 0x64
        bne     $5,$2,$L2
        sw      $3,0($4)

        j       $31
        nop

gcc 4.2 on i386:
foo:
        pushl   %ebp
        movl    $1, %edx
        movl    %esp, %ebp
        pushl   %ebx
        movl    8(%ebp), %ebx
        movl    4(%ebx), %eax
        leal    4(%ebx), %ecx
        addl    (%ebx), %eax
        movl    %eax, (%ebx)
        .p2align 4,,7
.L2:
        addl    (%ecx), %eax
        addl    $1, %edx
        cmpl    $100, %edx
        movl    %eax, (%ebx)
        jne     .L2
        popl    %ebx
        popl    %ebp
        ret

gcc 3.4.6 on mips: (good)
foo:
        .set    noreorder
        .set    nomacro
        
        lw      $5,0($4)
        lw      $6,4($4)
        li      $2,99                   # 0x63
$L5:
        addu    $3,$5,$6
        addiu   $2,$2,-1
        bgez    $2,$L5
        move    $5,$3

        j       $31
        sw      $3,0($4)

gcc 3.4.4 on i386: (good)
foo:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %esi
        pushl   %ebx
        movl    8(%ebp), %esi
        movl    (%esi), %ecx
        movl    4(%esi), %ebx
        movl    $99, %eax
        .p2align 4,,15
.L5:
        leal    (%ecx,%ebx), %edx
        decl    %eax
        movl    %edx, %ecx
        jns     .L5
        movl    %edx, (%esi)
        popl    %ebx
        popl    %esi
        popl    %ebp
        ret

---
Atsushi Nemoto

Reply via email to