------- Additional Comments From pinskia at gcc dot gnu dot org 2004-12-01
14:27 -------
Actually the most optimial code would be:
_Z6foobarv:
.LFB2:
pushl %ebp
.LCFI0:
movl %esp, %ebp
.LCFI1:
subl $24, %esp
.LCFI2:
movaps a, %xmm0
mulps b, %xmm0
movaps %xmm0, -24(%ebp)
fldz
fadds -24(%ebp)
fadds -20(%ebp)
fadds -16(%ebp)
fadds -12(%ebp)
leave
retBut to do that we need the tree vectorizer to become better and also split the loop into two. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17619
