http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57858
Bug ID: 57858 Summary: AVX2: ymm used for div, not for sqrt Product: gcc Version: 4.9.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: vincenzo.innocente at cern dot ch in the following example div uses ymm registries while sqr only xmm ones gcc version 4.9.0 20130630 (experimental) [trunk revision 200570] (GCC) cat avx2sqrt.cc #include<math.h> double div() { double s=0; for (int i=0; i!=1024; ++i) s+=1./(i+1); return s; } double sqr() { double s=0; for (int i=0; i!=1024; ++i) s+=sqrt(i+1); return s; } c++ -std=c++11 -Ofast -S avx2sqrt.cc -march=corei7-avx -mavx2 -ftree-vectorizer-verbose=1 -Wall ; cat avx2sqrt.s _Z3divv: .LFB3: .cfi_startproc vmovdqa .LC1(%rip), %ymm6 xorl %eax, %eax vxorpd %xmm1, %xmm1, %xmm1 vmovdqa .LC0(%rip), %ymm0 vmovdqa .LC2(%rip), %ymm5 vmovapd .LC3(%rip), %ymm2 jmp .L2 .p2align 4,,10 .p2align 3 .L3: vmovdqa %ymm4, %ymm0 .L2: vpaddd %ymm6, %ymm0, %ymm4 vpaddd %ymm5, %ymm0, %ymm0 addl $1, %eax vextracti128 $0x1, %ymm0, %xmm3 vcvtdq2pd %xmm0, %ymm0 vcvtdq2pd %xmm3, %ymm3 vdivpd %ymm0, %ymm2, %ymm0 vdivpd %ymm3, %ymm2, %ymm3 vaddpd %ymm0, %ymm3, %ymm0 cmpl $128, %eax vaddpd %ymm0, %ymm1, %ymm1 jne .L3 vhaddpd %ymm1, %ymm1, %ymm1 vperm2f128 $1, %ymm1, %ymm1, %ymm0 vaddpd %ymm0, %ymm1, %ymm0 vzeroupper ret .cfi_endproc .LFE3: .size _Z3divv, .-_Z3divv .p2align 4,,15 .globl _Z3sqrv .type _Z3sqrv, @function _Z3sqrv: .LFB4: .cfi_startproc movl $1, %eax vmovsd .LC4(%rip), %xmm1 vxorpd %xmm0, %xmm0, %xmm0 jmp .L6 .p2align 4,,10 .p2align 3 .L7: vcvtsi2sd %eax, %xmm1, %xmm1 vsqrtsd %xmm1, %xmm1, %xmm1 .L6: addl $1, %eax vaddsd %xmm1, %xmm0, %xmm0 cmpl $1025, %eax jne .L7 rep; ret .cfi_endproc .LFE4: .size _Z3sqrv, .-_Z3sqrv