[Bug tree-optimization/46306] New: inefficient code generated for array accesses

davidxl at gcc dot gnu.org Thu, 04 Nov 2010 12:15:24 -0700

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46306


           Summary: inefficient code generated for array accesses
           Product: gcc
           Version: 4.6.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: davi...@gcc.gnu.org


//Example:

int foo (int i, int *p, int t)
{
    int p2 = p[i];
    int temp = 0;
    int temp2 = 1;
    int temp3 = 4;
    if (p[i+1] > t)
     {
       temp = p2;
       temp2 = p2 + 2;
       temp3 = p2 + 3;
     }
    return p[temp] + p [temp2] + p[temp3];
}

Two problems seen the code generated by trunk gcc at -O2

1) all the shift operation are redundant and should be folded as the stride in
the memory operand
2) unnecessary code duplication 

(may be handled by a pass that converts memory access with linear address into
target memref in straight line code)

foo:
.LFB0:
    .cfi_startproc
    movslq    %edi, %rdi
    movl    (%rsi,%rdi,4), %eax
    cmpl    %edx, 4(%rsi,%rdi,4)
    jle    .L3
    movslq    %eax, %rdi
    leal    2(%rax), %ecx
    salq    $2, %rdi
    leal    3(%rax), %edx
    movslq    %ecx, %rcx
    movl    (%rsi,%rdi), %eax
    salq    $2, %rcx
    movslq    %edx, %rdx
    addl    (%rsi,%rcx), %eax
    salq    $2, %rdx
    addl    (%rsi,%rdx), %eax
    ret
    .p2align 4,,10
    .p2align 3
.L3:
    movl    $16, %edx
    movl    $4, %ecx
    xorl    %edi, %edi
    movl    (%rsi,%rdi), %eax
    addl    (%rsi,%rcx), %eax
    addl    (%rsi,%rdx), %eax
    ret


// The following code is generated by another compiler -- not ideal, but
better:
foo:
.Leh_func_begin0:
    pushq    %rbp
.Ltmp0:
    movq    %rsp, %rbp
.Ltmp1:
    movslq    %edi, %rax
    leal    1(%rax), %ecx
    movslq    %ecx, %rcx
    cmpl    %edx, (%rsi,%rcx,4)
    jg    .LBB0_2
    movl    $1, %eax
    xorl    %ecx, %ecx
    movl    $4, %edx
    jmp    .LBB0_3
.LBB0_2:
    movslq    (%rsi,%rax,4), %rcx
    leal    3(%rcx), %eax
    movslq    %eax, %rdx
    leal    2(%rcx), %eax
    movslq    %eax, %rax
.LBB0_3:
    movl    (%rsi,%rax,4), %eax
    addl    (%rsi,%rcx,4), %eax
    addl    (%rsi,%rdx,4), %eax
    popq    %rbp
    ret

[Bug tree-optimization/46306] New: inefficient code generated for array accesses

Reply via email to