On Monday 18 April 2005 20:53, Nicholas Nethercote wrote:
> Hi,
>
> I've been looking at GCC's use of sign-extensions when dealing with
> integers smaller than a machine word size.  It looks like there is room
> for improvement.
>
> Consider this C function:
>
>      short g(short x)
>      {
>         short i;
>         for (i = 0; i < 10; i++) {
>            x += i;
>         }
>         return x;
>      }
>
> On x86, using a GCC 4.0.0 20050130, with -O2 I get this code:
>
> g:
>          pushl   %ebp
>          xorl    %edx, %edx
>          movl    %esp, %ebp
>          movswl  8(%ebp),%ecx
>          .p2align 4,,15
> .L2:
>          leal    (%ecx,%edx), %eax
>          movswl  %ax,%ecx                # 1
>          leal    1(%edx), %eax
>          movzwl  %ax, %eax               # 2
>          cmpw    $10, %ax
>          movswl  %ax,%edx                # 3
>          jne     .L2
>
>          popl    %ebp
>          movl    %ecx, %eax
>          ret
>          .size   g, .-g
>          .p2align 4,,15
>
> The three extensions (#1, #2, #3) here are unnecessarily conservative.
> This would be better:
>
> g:
>          pushl   %ebp
>          xorl    %edx, %edx
>          movl    %esp, %ebp
>          movswl  8(%ebp),%ecx
>          .p2align 4,,15
> .L2:
>          leal    (%ecx,%edx), %ecx       # x += i
>          leal    1(%edx), %edx           # i++
>          cmpw    $10, %dx                # i < 10 ?
>          jne     .L2
>
>          popl    %ebp
>          movswl  %cx, %eax
>          ret

Here is what I get on amd64 with -m32:

        .file   "t.c"
        .text
        .p2align 4,,15
.globl g
        .type   g, @function
g:
        pushl   %ebp
        xorl    %edx, %edx
        movl    %esp, %ebp
        movswl  8(%ebp),%eax
        .p2align 4,,15
.L2:
        addl    %edx, %eax
        incl    %edx
        cmpl    $10, %edx
        cwtl
        jne     .L2
        popl    %ebp
        ret
        .size   g, .-g
        .ident  "GCC: (GNU) 4.1.0 20050412 (experimental)"
        .section        .note.GNU-stack,"",@progbits

Looks a bit more like your optimal code ;-)

Gr.
Steven

Reply via email to