https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65701
--- Comment #8 from Jan Hubicka <hubicka at ucw dot cz> --- With spaces removed to be readable > > 1.11 ???3682: mov 0x60(%rsp),%rdx > 9.32 ???3687:?????????vmovss (%rax,%r12,2),%xmm5 > 1.44 ??? ??? vmovss (%rax),%xmm6 > 4.46 ??? ??? inc %rdi > 0.01 ??? ??? add $0x10,%rcx > 1.17 ??? ??? vinser $0x10,(%rax,%r13,1),%xmm5,%xmm0 > 1.92 ??? ??? vinser $0x10,(%rax,%r12,1),%xmm6,%xmm1 > 0.28 ??? ??? add %r14,%rax > 0.07 ??? ??? vmovlh %xmm0,%xmm1,%xmm0 > 2.48 ??? ??? vfmadd %xmm3,-0x10(%rcx),%xmm0,%xmm3 > 5.15 ??? ??? cmp %rdi,%rdx > 0.01 ??? ?????????ja 3687 > 1.21 ??? vhaddp %xmm3,%xmm3,%xmm3 > 10.30 ??? mov 0x58(%rsp),%rax > 0.03 ??? mov %r13,0x10(%rsp) > 0.00 ??? add %rax,%rsi > 1.18 ??? vhaddp %xmm3,%xmm3,%xmm3 > 10.80 ??? vaddss %xmm3,%xmm4,%xmm4 > 4.47 ??? cmp 0x68(%rsp),%rax > > (the slower variant) to: > > 1.38 ??? xor %ecx,%ecx > 6.04 ???17c0:?????????vmovss (%rax,%r11,2),%xmm3 > 0.18 ??? ??? mov 0x90(%rsp),%rsi > 1.43 ??? ??? inc %rcx > 1.42 ??? ??? vmovss (%rax),%xmm5 > 0.36 ??? ??? vmovss (%rdx,%rbx,2),%xmm6 > 2.81 ??? ??? vmovss (%rdx),%xmm7 > 0.90 ??? ??? vinser $0x10,(%rax,%rsi,1),%xmm3,%xmm2 > 2.96 ??? ??? mov 0x88(%rsp),%rsi > 0.04 ??? ??? vinser $0x10,(%rax,%r11,1),%xmm5,%xmm4 > 2.76 ??? ??? add 0x70(%rsp),%rax > 0.07 ??? ??? vinser $0x10,(%rdx,%rbx,1),%xmm7,%xmm3 > 0.02 ??? ??? vmovlh %xmm2,%xmm4,%xmm4 > 2.69 ??? ??? vinser $0x10,(%rdx,%rsi,1),%xmm6,%xmm2 > 1.13 ??? ??? add 0x78(%rsp),%rdx > 0.04 ??? ??? vmovlh %xmm2,%xmm3,%xmm2 > 0.01 ??? ??? vfmadd %xmm0,%xmm2,%xmm4,%xmm0 > 2.74 ??? ??? cmp %rcx,0x80(%rsp) > 0.07 ??? ?????????ja 17c0 > 1.39 ??? vhaddp %xmm0,%xmm0,%xmm0 > 4.45 ??? mov 0x48(%rsp),%rsi > 1.42 ??? vhaddp %xmm0,%xmm0,%xmm0 > 7.96 ??? vaddss %xmm0,%xmm1,%xmm1 > 4.09 ??? cmp %r15,0x60(%rsp) > 0.01 ??? ??? je 18b1 > > (the faster variant, dunno why)