https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65832

            Bug ID: 65832
           Summary: Inefficient vector construction
           Product: gcc
           Version: 6.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rguenth at gcc dot gnu.org
            Target: x86_64-*-*, i?86-*-*

typedef int v4si __attribute__((vector_size(16)));

v4si foo (int i, int j, int k, int l)
{
  return (v4si) { i, j, k, l };
}

produces

        movl    %edx, -12(%rsp)
        movd    -12(%rsp), %xmm1
        movl    %ecx, -12(%rsp)
        movd    -12(%rsp), %xmm2
        movl    %edi, -12(%rsp)
        movd    -12(%rsp), %xmm0
        movl    %esi, -12(%rsp)
        movd    -12(%rsp), %xmm3
        punpckldq       %xmm2, %xmm1
        punpckldq       %xmm3, %xmm0
        punpcklqdq      %xmm1, %xmm0
        ret

as we spill everything to the stack we could as well use a vector load, thus
something like

        movl    %edx, -12(%rsp)
        movl    %ecx, -16(%rsp)
        movl    %edi, -20(%rsp)
        movl    %esi, -24(%rsp)
        movdqu  -12(%rsp), %xmm0
        ret

Reply via email to