http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46306
Summary: inefficient code generated for array accesses Product: gcc Version: 4.6.0 Status: UNCONFIRMED Keywords: missed-optimization Severity: normal Priority: P3 Component: tree-optimization AssignedTo: unassig...@gcc.gnu.org ReportedBy: davi...@gcc.gnu.org //Example: int foo (int i, int *p, int t) { int p2 = p[i]; int temp = 0; int temp2 = 1; int temp3 = 4; if (p[i+1] > t) { temp = p2; temp2 = p2 + 2; temp3 = p2 + 3; } return p[temp] + p [temp2] + p[temp3]; } Two problems seen the code generated by trunk gcc at -O2 1) all the shift operation are redundant and should be folded as the stride in the memory operand 2) unnecessary code duplication (may be handled by a pass that converts memory access with linear address into target memref in straight line code) foo: .LFB0: .cfi_startproc movslq %edi, %rdi movl (%rsi,%rdi,4), %eax cmpl %edx, 4(%rsi,%rdi,4) jle .L3 movslq %eax, %rdi leal 2(%rax), %ecx salq $2, %rdi leal 3(%rax), %edx movslq %ecx, %rcx movl (%rsi,%rdi), %eax salq $2, %rcx movslq %edx, %rdx addl (%rsi,%rcx), %eax salq $2, %rdx addl (%rsi,%rdx), %eax ret .p2align 4,,10 .p2align 3 .L3: movl $16, %edx movl $4, %ecx xorl %edi, %edi movl (%rsi,%rdi), %eax addl (%rsi,%rcx), %eax addl (%rsi,%rdx), %eax ret // The following code is generated by another compiler -- not ideal, but better: foo: .Leh_func_begin0: pushq %rbp .Ltmp0: movq %rsp, %rbp .Ltmp1: movslq %edi, %rax leal 1(%rax), %ecx movslq %ecx, %rcx cmpl %edx, (%rsi,%rcx,4) jg .LBB0_2 movl $1, %eax xorl %ecx, %ecx movl $4, %edx jmp .LBB0_3 .LBB0_2: movslq (%rsi,%rax,4), %rcx leal 3(%rcx), %eax movslq %eax, %rdx leal 2(%rcx), %eax movslq %eax, %rax .LBB0_3: movl (%rsi,%rax,4), %eax addl (%rsi,%rcx,4), %eax addl (%rsi,%rdx,4), %eax popq %rbp ret