When the code below is compiled with -march=pentium4 -msse2 -O3
-fomit-frame-pointer, when N == 4 f() is correct:

f:
        movdqa  b, %xmm0
        paddd   a, %xmm0
        movdqa  %xmm0, c
        ret

However for N == 2 gcc 4.2.0 doesn't use %mm0 any more (gcc 4.0.0 worked OK). 
OK, for some reason gcc decides to synthesize the operation using scalar
instructions.  But why the code for f() is so scary, not even close to that of
g()?  My wild guess is that gcc copies vectors onto the stack because it thinks
the vectors may overlap.  But is it really possible?  If so, how can I say that
they never overlap?  Perhaps this is a bug too, because they are declared as
different objects, should not overlap.


#define N  2

typedef signed int data_type;


typedef data_type __attribute__((vector_size(sizeof(data_type) * N))) vector;

extern vector a, b, c;

void
f()
{
  c = a + b;
}


typedef data_type vector2[N];

extern vector2 a2, b2, c2;

void
g()
{
  int i;
  for (i = 0; i < N; ++i)
    c2[i] = a2[i] + b2[i];
}


For the code above gcc 4.2.0 produces:

f:
        pushl   %esi
        pushl   %ebx
        subl    $36, %esp
        movl    a, %ebx
        movl    a+4, %esi
        movl    b, %edx
        movl    b+4, %ecx
        movl    %edx, 24(%esp)
        movl    %ecx, 28(%esp)
        movl    24(%esp), %eax
        movl    %ebx, 16(%esp)
        movl    %esi, 20(%esp)
        addl    16(%esp), %eax
        movl    %eax, c
        movl    %edx, 8(%esp)
        movl    %ecx, 12(%esp)
        movl    12(%esp), %eax
        movl    %ebx, (%esp)
        movl    %esi, 4(%esp)
        addl    4(%esp), %eax
        movl    %eax, c+4
        addl    $36, %esp
        popl    %ebx
        popl    %esi
        ret

g:
        movl    a2, %eax
        addl    b2, %eax
        movl    %eax, c2
        movl    a2+4, %eax
        addl    b2+4, %eax
        movl    %eax, c2+4
        ret


-- 
           Summary: int __attribute__((vector_size(8))) doesn't use %mm0,
                    produces ugly code
           Product: gcc
           Version: 4.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: tomash dot brechko at gmail dot com
 GCC build triplet: i686-pc-linux-gnu
  GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=32301

Reply via email to