https://llvm.org/bugs/show_bug.cgi?id=28136
Bug ID: 28136 Summary: Regression in x86 codegen using _mm256 intrinsic Product: clang Version: 3.7 Hardware: PC OS: All Status: NEW Severity: normal Priority: P Component: LLVM Codegen Assignee: unassignedclangb...@nondot.org Reporter: deadal...@gmail.com CC: llvm-bugs@lists.llvm.org Classification: Unclassified Sample code: #include <x86intrin.h> __m256i foo(__m256i a, __m256i b) { a = _mm256_unpacklo_epi8(a, b); return _mm256_permute4x64_epi64(a, _MM_SHUFFLE(3, 1, 2, 0)); } Assembly output on 3.6 : foo(long long __vector(4), long long __vector(4)): # @foo(long long __vector(4), long long __vector(4)) vpunpcklbw ymm0, ymm0, ymm1 # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] vpermq ymm0, ymm0, -40 # ymm0 = ymm0[0,2,1,3] ret Assembly output on 3.7 onward : .LCPI0_0: .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .byte 0 # 0x0 .zero 1 .byte 1 # 0x1 .zero 1 .byte 2 # 0x2 .zero 1 .byte 3 # 0x3 .zero 1 .byte 4 # 0x4 .zero 1 .byte 5 # 0x5 .zero 1 .byte 6 # 0x6 .zero 1 .byte 7 # 0x7 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .LCPI0_1: .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .byte 0 # 0x0 .zero 1 .byte 1 # 0x1 .zero 1 .byte 2 # 0x2 .zero 1 .byte 3 # 0x3 .zero 1 .byte 4 # 0x4 .zero 1 .byte 5 # 0x5 .zero 1 .byte 6 # 0x6 .zero 1 .byte 7 # 0x7 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .zero 1 .LCPI0_2: .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 .byte 255 # 0xff .byte 0 # 0x0 foo(long long __vector(4), long long __vector(4)): # @foo(long long __vector(4), long long __vector(4)) vpunpcklbw ymm2, ymm0, ymm0 # ymm2 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] vperm2i128 ymm0, ymm0, ymm0, 35 # ymm0 = ymm0[2,3,0,1] vpshufb ymm0, ymm0, ymmword ptr [rip + .LCPI0_0] # ymm0 = ymm0[u,u,u,u,u,u,u,u,0,u,1,u,2,u,3,u,20,u,21,u,22,u,23,u,u,u,u,u,u,u,u,u] vpblendd ymm0, ymm2, ymm0, 60 # ymm0 = ymm2[0,1],ymm0[2,3,4,5],ymm2[6,7] vpunpcklbw ymm2, ymm0, ymm1 # ymm2 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] vperm2i128 ymm1, ymm0, ymm1, 35 # ymm1 = ymm1[2,3,0,1] vpshufb ymm1, ymm1, ymmword ptr [rip + .LCPI0_1] # ymm1 = ymm1[u,u,u,u,u,u,u,u,u,0,u,1,u,2,u,3,u,20,u,21,u,22,u,23,u,u,u,u,u,u,u,u] vpblendd ymm1, ymm2, ymm1, 60 # ymm1 = ymm2[0,1],ymm1[2,3,4,5],ymm2[6,7] vmovdqa ymm2, ymmword ptr [rip + .LCPI0_2] # ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] vpblendvb ymm0, ymm1, ymm0, ymm2 ret -- You are receiving this mail because: You are on the CC list for the bug.
_______________________________________________ llvm-bugs mailing list llvm-bugs@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs