Since x86 zero extends when passing argument, we get memory mismatch stall:
[...@gnu-6 899]$ cat call-1.c short __attribute__((noinline)) foo (short x, short y, short z) { return x + y + z; } short bar (short z, short x, short y) { return foo (x, y, z); } [...@gnu-6 899]$ gcc -O2 -m32 call-1.c -S [...@gnu-6 899]$ cat call-1.s .file "call-1.c" .text .p2align 4,,15 .globl foo .type foo, @function foo: pushl %ebp movl %esp, %ebp movzwl 12(%ebp), %eax << 2bytes addw 8(%ebp), %ax addw 16(%ebp), %ax popl %ebp ret .size foo, .-foo .p2align 4,,15 .globl bar .type bar, @function bar: pushl %ebp movl %esp, %ebp movswl 12(%ebp), %eax movswl 16(%ebp), %edx movswl 8(%ebp), %ecx movl %eax, 8(%ebp) << 4bytes movl %edx, 12(%ebp) movl %ecx, 16(%ebp) popl %ebp jmp foo -- Summary: X86 memory mismatch stall Product: gcc Version: 4.6.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: hjl dot tools at gmail dot com http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44490