On Monday 18 April 2005 20:53, Nicholas Nethercote wrote: > Hi, > > I've been looking at GCC's use of sign-extensions when dealing with > integers smaller than a machine word size. It looks like there is room > for improvement. > > Consider this C function: > > short g(short x) > { > short i; > for (i = 0; i < 10; i++) { > x += i; > } > return x; > } > > On x86, using a GCC 4.0.0 20050130, with -O2 I get this code: > > g: > pushl %ebp > xorl %edx, %edx > movl %esp, %ebp > movswl 8(%ebp),%ecx > .p2align 4,,15 > .L2: > leal (%ecx,%edx), %eax > movswl %ax,%ecx # 1 > leal 1(%edx), %eax > movzwl %ax, %eax # 2 > cmpw $10, %ax > movswl %ax,%edx # 3 > jne .L2 > > popl %ebp > movl %ecx, %eax > ret > .size g, .-g > .p2align 4,,15 > > The three extensions (#1, #2, #3) here are unnecessarily conservative. > This would be better: > > g: > pushl %ebp > xorl %edx, %edx > movl %esp, %ebp > movswl 8(%ebp),%ecx > .p2align 4,,15 > .L2: > leal (%ecx,%edx), %ecx # x += i > leal 1(%edx), %edx # i++ > cmpw $10, %dx # i < 10 ? > jne .L2 > > popl %ebp > movswl %cx, %eax > ret
Here is what I get on amd64 with -m32: .file "t.c" .text .p2align 4,,15 .globl g .type g, @function g: pushl %ebp xorl %edx, %edx movl %esp, %ebp movswl 8(%ebp),%eax .p2align 4,,15 .L2: addl %edx, %eax incl %edx cmpl $10, %edx cwtl jne .L2 popl %ebp ret .size g, .-g .ident "GCC: (GNU) 4.1.0 20050412 (experimental)" .section .note.GNU-stack,"",@progbits Looks a bit more like your optimal code ;-) Gr. Steven