https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66119

            Bug ID: 66119
           Summary: Regression in optimization of avx-code
           Product: gcc
           Version: 5.1.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: joachim.schoeberl at tuwien dot ac.at
  Target Milestone: ---

Created attachment 35525
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=35525&action=edit
testcode

gcc 5.1 produces a lot of scalar moves for the attached vector-class code. 
gcc 4.9 generates compact code (see below).

compiled using:
gcc -O3 -mavx -S -std=c++11 testgcc.cpp

compiler version:
gcc (GCC) 5.1.1 20150505



gcc 5.1 works fine in any of the cases:

- we use a manual copy constructor instead of '= default' (line 37):
MyTSIMD (const MyTSIMD & s2) : data(s2.data) { ; } 

- we use the concrete vector-class instead of the template (line 45):
using MyVec = MyAVX;

- we do not use  
  __attribute__ ((__always_inline__)) 
for ComputeSomething  (line 58)



Cheers, Joachim




code generated by gcc5.1:

        .globl  _Z12TestFunction4Vec2S_
        .type   _Z12TestFunction4Vec2S_, @function
_Z12TestFunction4Vec2S_:
.LFB4604:
        .cfi_startproc
        movq    72(%rsp), %rdx
        vmovapd 40(%rsp), %ymm0
        movq    %rdi, %rax
        vmovapd 8(%rsp), %ymm1
        movq    %rdx, -88(%rsp)
        movq    80(%rsp), %rdx
        movq    %rdx, -80(%rsp)
        movq    88(%rsp), %rdx
        movq    %rdx, -72(%rsp)
        movq    96(%rsp), %rdx
        movq    %rdx, -64(%rsp)
        movq    104(%rsp), %rdx
        vaddpd  -88(%rsp), %ymm1, %ymm1
        movq    %rdx, -56(%rsp)
        movq    112(%rsp), %rdx
        movq    %rdx, -48(%rsp)
        movq    120(%rsp), %rdx
        vmovapd %ymm1, (%rdi)
        movq    %rdx, -40(%rsp)
        movq    128(%rsp), %rdx
        movq    %rdx, -32(%rsp)
        vaddpd  -56(%rsp), %ymm0, %ymm0
        vmovapd %ymm0, 32(%rdi)
        vzeroupper
        ret
        .cfi_endproc




code generated by gcc 4.9.2:

        .type   _Z12TestFunction4Vec2S_, @function
_Z12TestFunction4Vec2S_:
.LFB2234:
        .cfi_startproc
        vmovapd 40(%rsp), %ymm0
        movq    %rdi, %rax
        vmovapd 8(%rsp), %ymm1
        vaddpd  104(%rsp), %ymm0, %ymm0
        vaddpd  72(%rsp), %ymm1, %ymm1
        vmovapd %ymm0, 32(%rdi)
        vmovapd %ymm1, (%rdi)
        vzeroupper
        ret
        .cfi_endproc

Reply via email to