https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79151

--- Comment #2 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
Another test case.

It might even be profitable just to look for divisions, because these
are so expensive that packing/unpacking should always be
profitable.

double foo(double a, double b)
{
  return 1/a + 1/b;
}

double v_foo (double a, double b)
{
   typedef double v2do __attribute__((vector_size (16)));
   v2do x, y;

   x[0] = a;
   x[1] = b;
   y = 1/x;
   return y[0] + y[1];
}

Assembly:  foo is

        movsd   .LC0(%rip), %xmm2
        movapd  %xmm2, %xmm3
        divsd   %xmm1, %xmm2
        divsd   %xmm0, %xmm3
        movapd  %xmm3, %xmm0
        addsd   %xmm2, %xmm0
        ret

and v_foo is

        unpcklpd        %xmm1, %xmm0
        movapd  .LC1(%rip), %xmm1
        divpd   %xmm0, %xmm1
        movapd  %xmm1, %xmm2
        unpckhpd        %xmm1, %xmm1
        movapd  %xmm1, %xmm0
        addsd   %xmm2, %xmm0
        ret

Reply via email to