#include <mmintrin.h> __v8qi foo(const __v8qi x, const __v8qi y){return x+y;} __v8qi bar(const __v8qi x, const __v8qi y){return __builtin_ia32_paddb(x,y);}
$ gcc tmp.c -S -O2 -mmmx 3.3.6 produces the same code for foo() and bar(): foo: movq 4(%esp), %mm0 paddb 12(%esp), %mm0 movq %mm0, (%eax) ret bar: movq 4(%esp), %mm0 paddb 12(%esp), %mm0 movq %mm0, (%eax) ret 4.2.0 produces horrible code for foo(): foo: pushl %edi pushl %esi pushl %ebx subl $24, %esp movq %mm0, (%esp) movl (%esp), %ebx movl 4(%esp), %esi movl %ebx, %edi movq %mm1, (%esp) movl (%esp), %eax movl 4(%esp), %edx movl %eax, %ecx movl %esi, %ebx movl %edi, %eax xorl %ecx, %eax andl $-2139062144, %eax andl $2139062143, %ecx andl $2139062143, %edi addl %edi, %ecx xorl %ecx, %eax pxor %mm0, %mm0 movq %mm0, 16(%esp) movl %eax, 16(%esp) movl %esi, %ecx xorl %edx, %ecx andl $-2139062144, %ecx andl $2139062143, %edx andl $2139062143, %ebx addl %ebx, %edx xorl %edx, %ecx movl 16(%esp), %eax movl %eax, 8(%esp) movl %ecx, 12(%esp) movq 8(%esp), %mm0 addl $24, %esp popl %ebx popl %esi popl %edi ret bar: paddb %mm1, %mm0 ret -- Summary: missed optimization for simple mmx code. Product: gcc Version: 4.2.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: other AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: pluto at agmk dot net GCC build triplet: i686-pld-linux GCC host triplet: i686-pld-linux GCC target triplet: i686-pld-linux http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25277