How does the i386 backend optimise the stack slot assignment to minimize
the displacement offset?

What code should I look at?

Or is there some other optimisation at work here...?

I.e.:

; -O0 => large offset
        leal    8268(%esp), %eax
        incl    (%eax)

; -O3 => small offset
        incl    40(%esp)



The source for a test case + the output are attached

gcc-4.0 -S stackframe.c -fomit-frame-pointer -O0 -o stackframe-O0.s
gcc-4.0 -S stackframe.c -fomit-frame-pointer -O3 -o stackframe-O3.s


This thread has a stack slot assignment optimisation patch that has
never been committed to GCC CVS, but the above indicats that there is
some sort of mechanism in GCC already to mitigate this problem...

http://gcc.gnu.org/ml/gcc-patches/2003-01/msg00019.html




-- 
Øyvind Harboe
http://www.zylin.com
int bar(int a);
int test1(int *);

int foo(int a, int b, int c, int d)
{
  int abc[1024];
  int j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z;
  int def[1024];
  for (j=0; j<bar(j); j++)
    {
      test1(abc);
  for (k=0; k<bar(k); k++)
    {
  for (l=0; l<bar(l); l++)
    {
  for (m=0; m<bar(m); m++)
    {
      test1(def);
  for (m=0; n<bar(n); n++)
    {
  for (o=0; o<bar(o); o++)
    {
  for (p=0; p<bar(p); p++)
    {
  for (r=0; r<bar(r); r++)
    {
  for (s=0; s<bar(s); s++)
    {
  for (t=0; t<bar(t); t++)
    {
  for (u=0; u<bar(u); u++)
    {
  for (v=0; v<bar(v); v++)
    {
  for (w=0; w<bar(w); w++)
    {
    }
    }
    }
    }
    }
    }
    }
    }
    }
    }
    }
    }
    }
}
        .file   "stackframe.c"
        .text
.globl foo
        .type   foo, @function
foo:
        subl    $8284, %esp
        movl    $0, 8216(%esp)
        jmp     .L2
.L3:
        subl    $12, %esp
        leal    4132(%esp), %eax
        pushl   %eax
        call    test1
        addl    $16, %esp
        movl    $0, 8220(%esp)
        jmp     .L4
.L5:
        movl    $0, 8224(%esp)
        jmp     .L6
.L7:
        movl    $0, 8228(%esp)
        jmp     .L8
.L9:
        subl    $12, %esp
        leal    36(%esp), %eax
        pushl   %eax
        call    test1
        addl    $16, %esp
        movl    $0, 8228(%esp)
        jmp     .L10
.L11:
        movl    $0, 8236(%esp)
        jmp     .L12
.L13:
        movl    $0, 8240(%esp)
        jmp     .L14
.L15:
        movl    $0, 8248(%esp)
        jmp     .L16
.L17:
        movl    $0, 8252(%esp)
        jmp     .L18
.L19:
        movl    $0, 8256(%esp)
        jmp     .L20
.L21:
        movl    $0, 8260(%esp)
        jmp     .L22
.L23:
        movl    $0, 8264(%esp)
        jmp     .L24
.L25:
        movl    $0, 8268(%esp)
        jmp     .L26
.L27:
        leal    8268(%esp), %eax
        incl    (%eax)
.L26:
        subl    $12, %esp
        pushl   8280(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8268(%esp), %eax
        jg      .L27
        leal    8264(%esp), %eax
        incl    (%eax)
.L24:
        subl    $12, %esp
        pushl   8276(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8264(%esp), %eax
        jg      .L25
        leal    8260(%esp), %eax
        incl    (%eax)
.L22:
        subl    $12, %esp
        pushl   8272(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8260(%esp), %eax
        jg      .L23
        leal    8256(%esp), %eax
        incl    (%eax)
.L20:
        subl    $12, %esp
        pushl   8268(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8256(%esp), %eax
        jg      .L21
        leal    8252(%esp), %eax
        incl    (%eax)
.L18:
        subl    $12, %esp
        pushl   8264(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8252(%esp), %eax
        jg      .L19
        leal    8248(%esp), %eax
        incl    (%eax)
.L16:
        subl    $12, %esp
        pushl   8260(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8248(%esp), %eax
        jg      .L17
        leal    8240(%esp), %eax
        incl    (%eax)
.L14:
        subl    $12, %esp
        pushl   8252(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8240(%esp), %eax
        jg      .L15
        leal    8236(%esp), %eax
        incl    (%eax)
.L12:
        subl    $12, %esp
        pushl   8248(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8236(%esp), %eax
        jg      .L13
        leal    8232(%esp), %eax
        incl    (%eax)
.L10:
        subl    $12, %esp
        pushl   8244(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8232(%esp), %eax
        jg      .L11
        leal    8228(%esp), %eax
        incl    (%eax)
.L8:
        subl    $12, %esp
        pushl   8240(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8228(%esp), %eax
        jg      .L9
        leal    8224(%esp), %eax
        incl    (%eax)
.L6:
        subl    $12, %esp
        pushl   8236(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8224(%esp), %eax
        jg      .L7
        leal    8220(%esp), %eax
        incl    (%eax)
.L4:
        subl    $12, %esp
        pushl   8232(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8220(%esp), %eax
        jg      .L5
        leal    8216(%esp), %eax
        incl    (%eax)
.L2:
        subl    $12, %esp
        pushl   8228(%esp)
        call    bar
        addl    $16, %esp
        cmpl    8216(%esp), %eax
        jg      .L3
        addl    $8284, %esp
        ret
        .size   foo, .-foo
        .ident  "GCC: (GNU) 4.0.0 20050410 (prerelease) (Debian 4.0-0pre10)"
        .section        .note.GNU-stack,"",@progbits
        .file   "stackframe.c"
        .text
        .p2align 4,,15
.globl foo
        .type   foo, @function
foo:
        pushl   %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $8236, %esp
        movl    $0, 12(%esp)
.L2:
        subl    $12, %esp
        movl    24(%esp), %eax
        pushl   %eax
        call    bar
        addl    $16, %esp
        cmpl    %eax, 12(%esp)
        jge     .L43
        subl    $12, %esp
        leal    4152(%esp), %eax
        pushl   %eax
        call    test1
        movl    $0, 32(%esp)
        addl    $16, %esp
.L4:
        subl    $12, %esp
        movl    28(%esp), %edx
        pushl   %edx
        call    bar
        addl    $16, %esp
        cmpl    %eax, 16(%esp)
        jge     .L38
        movl    $0, 20(%esp)
.L36:
        subl    $12, %esp
        movl    32(%esp), %ecx
        pushl   %ecx
        call    bar
        addl    $16, %esp
        cmpl    %eax, 20(%esp)
        jge     .L44
        xorl    %ebx, %ebx
.L33:
        subl    $12, %esp
        pushl   %ebx
        call    bar
        addl    $16, %esp
        cmpl    %eax, %ebx
        jge     .L45
        subl    $12, %esp
        leal    56(%esp), %eax
        pushl   %eax
        call    test1
        addl    $16, %esp
.L6:
        subl    $12, %esp
        movl    36(%esp), %ebx
        pushl   %ebx
        call    bar
        addl    $16, %esp
        cmpl    24(%esp), %eax
        jle     .L46
        movl    $0, 28(%esp)
.L28:
        subl    $12, %esp
        movl    40(%esp), %esi
        pushl   %esi
        call    bar
        addl    $16, %esp
        cmpl    %eax, 28(%esp)
        jge     .L29
        movl    $0, 32(%esp)
.L25:
        subl    $12, %esp
        movl    44(%esp), %edi
        pushl   %edi
        call    bar
        addl    $16, %esp
        cmpl    %eax, 32(%esp)
        jge     .L26
        movl    $0, 36(%esp)
.L22:
        subl    $12, %esp
        movl    48(%esp), %ebp
        pushl   %ebp
        call    bar
        addl    $16, %esp
        cmpl    %eax, 36(%esp)
        jge     .L23
        movl    $0, 40(%esp)
.L19:
        subl    $12, %esp
        movl    52(%esp), %eax
        pushl   %eax
        call    bar
        addl    $16, %esp
        cmpl    %eax, 40(%esp)
        jge     .L20
        xorl    %ebp, %ebp
        subl    $12, %esp
        pushl   %ebp
        call    bar
        addl    $16, %esp
        cmpl    %eax, %ebp
        jge     .L17
.L49:
        xorl    %edi, %edi
        subl    $12, %esp
        pushl   %edi
        call    bar
        addl    $16, %esp
        cmpl    %eax, %edi
        jge     .L14
.L48:
        xorl    %esi, %esi
        subl    $12, %esp
        pushl   %esi
        call    bar
        addl    $16, %esp
        cmpl    %eax, %esi
        jge     .L11
.L47:
        xorl    %ebx, %ebx
        jmp     .L8
        .p2align 4,,15
.L7:
        incl    %ebx
.L8:
        subl    $12, %esp
        pushl   %ebx
        call    bar
        addl    $16, %esp
        cmpl    %eax, %ebx
        jl      .L7
        incl    %esi
        subl    $12, %esp
        pushl   %esi
        call    bar
        addl    $16, %esp
        cmpl    %eax, %esi
        jl      .L47
.L11:
        incl    %edi
        subl    $12, %esp
        pushl   %edi
        call    bar
        addl    $16, %esp
        cmpl    %eax, %edi
        jl      .L48
.L14:
        incl    %ebp
        subl    $12, %esp
        pushl   %ebp
        call    bar
        addl    $16, %esp
        cmpl    %eax, %ebp
        jl      .L49
.L17:
        incl    40(%esp)
        jmp     .L19
.L20:
        incl    36(%esp)
        jmp     .L22
.L23:
        incl    32(%esp)
        jmp     .L25
.L26:
        incl    28(%esp)
        jmp     .L28
.L29:
        incl    24(%esp)
        jmp     .L6
.L46:
        movl    $1, %ebx
        jmp     .L33
.L45:
        incl    20(%esp)
        jmp     .L36
.L44:
        incl    16(%esp)
        jmp     .L4
.L38:
        incl    12(%esp)
        jmp     .L2
.L43:
        addl    $8236, %esp
        popl    %ebx
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
        .size   foo, .-foo
        .ident  "GCC: (GNU) 4.0.0 20050410 (prerelease) (Debian 4.0-0pre10)"
        .section        .note.GNU-stack,"",@progbits

Reply via email to