https://llvm.org/bugs/show_bug.cgi?id=15077
Simon Pilgrim <llvm-...@redking.me.uk> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #4 from Simon Pilgrim <llvm-...@redking.me.uk> --- Resolving this. With rL284939 we have accurate vector shift costs for SSE4.1+ which means that we can correctly vectorize the inner loop as 4 * <4 x i32>: LBB0_8: vpmovsxbd -12(%ebx), %xmm5 vmovdqu -48(%edi), %xmm3 vmovdqu -32(%edi), %xmm4 vmovdqu -16(%edi), %xmm2 vmovdqu (%edi), %xmm1 vpsrldq $12, %xmm5, %xmm6 # xmm6 = xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero vpsrlq $32, %xmm5, %xmm7 vpsrld %xmm6, %xmm3, %xmm6 vpsrld %xmm7, %xmm3, %xmm7 vpblendw $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7] vpunpckhdq %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3] vpmovzxdq %xmm5, %xmm5 # xmm5 = xmm5[0],zero,xmm5[1],zero vpsrld %xmm7, %xmm3, %xmm7 vpsrld %xmm5, %xmm3, %xmm3 vpmovsxbd -8(%ebx), %xmm5 vpblendw $240, %xmm7, %xmm3, %xmm3 # xmm3 = xmm3[0,1,2,3],xmm7[4,5,6,7] vpblendw $204, %xmm6, %xmm3, %xmm3 # xmm3 = xmm3[0,1],xmm6[2,3],xmm3[4,5],xmm6[6,7] vpsrldq $12, %xmm5, %xmm6 # xmm6 = xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero vpsrlq $32, %xmm5, %xmm7 vpsrld %xmm6, %xmm4, %xmm6 vpsrld %xmm7, %xmm4, %xmm7 vpblendw $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7] vpunpckhdq %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3] vpmovzxdq %xmm5, %xmm5 # xmm5 = xmm5[0],zero,xmm5[1],zero vpsrld %xmm7, %xmm4, %xmm7 vpsrld %xmm5, %xmm4, %xmm4 vpmovsxbd -4(%ebx), %xmm5 vpblendw $240, %xmm7, %xmm4, %xmm4 # xmm4 = xmm4[0,1,2,3],xmm7[4,5,6,7] vpblendw $204, %xmm6, %xmm4, %xmm4 # xmm4 = xmm4[0,1],xmm6[2,3],xmm4[4,5],xmm6[6,7] vpsrldq $12, %xmm5, %xmm6 # xmm6 = xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero vpsrlq $32, %xmm5, %xmm7 vpsrld %xmm6, %xmm2, %xmm6 vpsrld %xmm7, %xmm2, %xmm7 vpblendw $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7] vpunpckhdq %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3] vpmovzxdq %xmm5, %xmm5 # xmm5 = xmm5[0],zero,xmm5[1],zero vpsrld %xmm7, %xmm2, %xmm7 vpsrld %xmm5, %xmm2, %xmm2 vpmovsxbd (%ebx), %xmm5 vmovdqu %xmm3, -48(%edi) vmovdqu %xmm4, -32(%edi) addl $16, %ebx vpblendw $240, %xmm7, %xmm2, %xmm2 # xmm2 = xmm2[0,1,2,3],xmm7[4,5,6,7] vpblendw $204, %xmm6, %xmm2, %xmm2 # xmm2 = xmm2[0,1],xmm6[2,3],xmm2[4,5],xmm6[6,7] vmovdqu %xmm2, -16(%edi) vpsrldq $12, %xmm5, %xmm6 # xmm6 = xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero vpsrlq $32, %xmm5, %xmm7 vpsrld %xmm6, %xmm1, %xmm6 vpsrld %xmm7, %xmm1, %xmm7 vpblendw $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7] vpunpckhdq %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3] vpmovzxdq %xmm5, %xmm5 # xmm5 = xmm5[0],zero,xmm5[1],zero vpsrld %xmm7, %xmm1, %xmm7 vpsrld %xmm5, %xmm1, %xmm1 vpblendw $240, %xmm7, %xmm1, %xmm1 # xmm1 = xmm1[0,1,2,3],xmm7[4,5,6,7] vpblendw $204, %xmm6, %xmm1, %xmm1 # xmm1 = xmm1[0,1],xmm6[2,3],xmm1[4,5],xmm6[6,7] vmovdqu %xmm1, (%edi) addl $64, %edi addl $-16, %ebp jne LBB0_8 movl %esi, %edi cmpl %eax, %esi je LBB0_10 -- You are receiving this mail because: You are on the CC list for the bug.
_______________________________________________ llvm-bugs mailing list llvm-bugs@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs