I found gcc 4.1.1 (and 4.2) can not optimize this simple code well. void foo(int *a) { int i; for (i = 0; i < 100; i++) a[0] += a[1]; }
If I compile this code with -O2, instructions to load from a[1] and store to a[0] are both inside a loop. Is this a know issue? PR20463 or PR21676 or any other? Followings are outputs from some gcc versions on mips and i386. gcc 4.1.1 on mips: (no good) foo: .set noreorder .set nomacro lw $3,0($4) addiu $6,$4,4 move $5,$0 li $7,100 # 0x64 $L2: lw $2,0($6) addiu $5,$5,1 addu $2,$3,$2 move $3,$2 bne $5,$7,$L2 sw $2,0($4) j $31 nop gcc 4.2 on mips: (no good) foo: .set noreorder .set nomacro lw $3,0($4) lw $2,4($4) addiu $6,$4,4 addu $3,$3,$2 sw $3,0($4) li $5,1 # 0x1 $L2: lw $2,0($6) addiu $5,$5,1 addu $3,$3,$2 li $2,100 # 0x64 bne $5,$2,$L2 sw $3,0($4) j $31 nop gcc 4.2 on i386: foo: pushl %ebp movl $1, %edx movl %esp, %ebp pushl %ebx movl 8(%ebp), %ebx movl 4(%ebx), %eax leal 4(%ebx), %ecx addl (%ebx), %eax movl %eax, (%ebx) .p2align 4,,7 .L2: addl (%ecx), %eax addl $1, %edx cmpl $100, %edx movl %eax, (%ebx) jne .L2 popl %ebx popl %ebp ret gcc 3.4.6 on mips: (good) foo: .set noreorder .set nomacro lw $5,0($4) lw $6,4($4) li $2,99 # 0x63 $L5: addu $3,$5,$6 addiu $2,$2,-1 bgez $2,$L5 move $5,$3 j $31 sw $3,0($4) gcc 3.4.4 on i386: (good) foo: pushl %ebp movl %esp, %ebp pushl %esi pushl %ebx movl 8(%ebp), %esi movl (%esi), %ecx movl 4(%esi), %ebx movl $99, %eax .p2align 4,,15 .L5: leal (%ecx,%ebx), %edx decl %eax movl %edx, %ecx jns .L5 movl %edx, (%esi) popl %ebx popl %esi popl %ebp ret --- Atsushi Nemoto