Author: Simon Pilgrim Date: 2020-12-04T11:29:23Z New Revision: d073805be644d86f1bf885ada3d8e7548226ca6e
URL: https://github.com/llvm/llvm-project/commit/d073805be644d86f1bf885ada3d8e7548226ca6e DIFF: https://github.com/llvm/llvm-project/commit/d073805be644d86f1bf885ada3d8e7548226ca6e.diff LOG: [X86] LowerRotate - VBMI2 targets can lower vXi16 rotates using funnel shifts. Ideally we'd do this inside DAGCombine but until we can make the FSHL/FSHR opcodes legal for VBMI2 it won't help us. Added: Modified: llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/vector-fshl-rot-128.ll llvm/test/CodeGen/X86/vector-fshl-rot-256.ll llvm/test/CodeGen/X86/vector-fshr-rot-128.ll llvm/test/CodeGen/X86/vector-fshr-rot-256.ll llvm/test/CodeGen/X86/vector-rotate-128.ll llvm/test/CodeGen/X86/vector-rotate-256.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9bfd7ca80701..04075e924eab 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28217,6 +28217,12 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return Op; } + // AVX512 VBMI2 vXi16 - lower to funnel shifts. + if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) { + unsigned FunnelOpc = (Opcode == ISD::ROTL ? ISD::FSHL : ISD::FSHR); + return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); + } + assert((Opcode == ISD::ROTL) && "Only ROTL supported"); // XOP has 128-bit vector variable + immediate rotates. diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index e48a9321a8fa..0aed42319252 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -439,24 +439,16 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; ; AVX512VBMI2-LABEL: var_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: var_funnnel_v8i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VLVBMI2-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLVBMI2-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: var_funnnel_v8i16: @@ -1033,17 +1025,68 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatvar_funnnel_v8i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatvar_funnnel_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatvar_funnnel_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatvar_funnnel_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i16: ; XOPAVX1: # %bb.0: @@ -1618,19 +1661,15 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x) nounwind { ; AVX512VBMI2-LABEL: constant_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,15,14,13,12,11,10,9] -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7] -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: constant_funnnel_v8i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm1 -; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: constant_funnnel_v8i16: @@ -1995,12 +2034,46 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x) nounwind { ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v8i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $9, %xmm0, %xmm1 -; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $9, %xmm0, %xmm1 +; AVX512F-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $9, %xmm0, %xmm1 +; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $9, %xmm0, %xmm1 +; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $9, %xmm0, %xmm1 +; AVX512VLBW-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldw $7, %xmm0, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v8i16: ; XOP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index eb07daa0801c..e40268a69b8a 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -344,23 +344,15 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; ; AVX512VBMI2-LABEL: var_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: var_funnnel_v16i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VLVBMI2-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLVBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512VLVBMI2-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: var_funnnel_v16i16: @@ -821,18 +813,71 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatvar_funnnel_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatvar_funnnel_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatvar_funnnel_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatvar_funnnel_v16i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -1280,18 +1325,14 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind { ; AVX512VBMI2-LABEL: constant_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1] -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: constant_funnnel_v16i16: @@ -1660,12 +1701,45 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind { ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $9, %ymm0, %ymm1 -; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $9, %ymm0, %ymm1 +; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $9, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v16i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $9, %ymm0, %ymm1 +; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $9, %ymm0, %ymm1 +; AVX512VLBW-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldw $7, %ymm0, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v16i16: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index 8da9d47f9b06..c2ea45d6013c 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -472,12 +472,8 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; @@ -485,12 +481,7 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 -; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VLVBMI2-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLVBMI2-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: var_funnnel_v8i16: @@ -1099,19 +1090,80 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatvar_funnnel_v8i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpsubw %xmm1, %xmm2, %xmm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatvar_funnnel_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatvar_funnnel_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatvar_funnnel_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v8i16: ; XOPAVX1: # %bb.0: @@ -1702,19 +1754,15 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x) nounwind { ; AVX512VBMI2-LABEL: constant_funnnel_v8i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [16,1,2,3,4,5,6,7] -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,15,14,13,12,11,10,9] -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,65535,65534,65533,65532,65531,65530,65529] +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: constant_funnnel_v8i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm1 -; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: constant_funnnel_v8i16: @@ -2079,12 +2127,46 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x) nounwind { ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v8i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $7, %xmm0, %xmm1 -; AVX512-NEXT: vpsllw $9, %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $7, %xmm0, %xmm1 +; AVX512F-NEXT: vpsllw $9, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $7, %xmm0, %xmm1 +; AVX512VL-NEXT: vpsllw $9, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $7, %xmm0, %xmm1 +; AVX512BW-NEXT: vpsllw $9, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $7, %xmm0, %xmm1 +; AVX512VLBW-NEXT: vpsllw $9, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $9, %zmm0, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldw $9, %xmm0, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_funnnel_v8i16: ; XOP: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index 856ac9468e14..ad6214413f66 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -374,24 +374,15 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpsubw %ymm1, %ymm2, %ymm1 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: var_funnnel_v16i16: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpsubw %ymm1, %ymm2, %ymm1 -; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VLVBMI2-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLVBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512VLVBMI2-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: var_funnnel_v16i16: @@ -887,20 +878,83 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatvar_funnnel_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpsubw %xmm1, %xmm2, %xmm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatvar_funnnel_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatvar_funnnel_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatvar_funnnel_v16i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatvar_funnnel_v16i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_funnnel_v16i16: ; XOPAVX1: # %bb.0: @@ -1362,18 +1416,14 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x) nounwind { ; AVX512VBMI2-LABEL: constant_funnnel_v16i16: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1] -; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,65535,65534,65533,65532,65531,65530,65529,65528,65527,65526,65525,65524,65523,65522,65521] +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: constant_funnnel_v16i16: @@ -1742,12 +1792,45 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind { ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatconstant_funnnel_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $7, %ymm0, %ymm1 -; AVX512-NEXT: vpsllw $9, %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_funnnel_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm1 +; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v16i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm1 +; AVX512BW-NEXT: vpsllw $9, %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v16i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $7, %ymm0, %ymm1 +; AVX512VLBW-NEXT: vpsllw $9, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $9, %zmm0, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldw $9, %ymm0, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v16i16: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index fced1af3b793..a00df716657b 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -425,24 +425,16 @@ define <8 x i16> @var_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; ; AVX512VBMI2-LABEL: var_rotate_v8i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: var_rotate_v8i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VLVBMI2-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VLVBMI2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLVBMI2-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: var_rotate_v8i16: @@ -1000,17 +992,68 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatvar_rotate_v8i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatvar_rotate_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatvar_rotate_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatvar_rotate_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatvar_rotate_v8i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatvar_rotate_v8i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatvar_rotate_v8i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VLVBMI2-NEXT: vpshldvw %xmm1, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_rotate_v8i16: ; XOPAVX1: # %bb.0: @@ -1570,18 +1613,14 @@ define <8 x i16> @constant_rotate_v8i16(<8 x i16> %a) nounwind { ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [16,15,14,13,12,11,10,9] -; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm0, %zmm2 -; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: constant_rotate_v8i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm1 -; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: constant_rotate_v8i16: @@ -1952,12 +1991,46 @@ define <8 x i16> @splatconstant_rotate_v8i16(<8 x i16> %a) nounwind { ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: splatconstant_rotate_v8i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $9, %xmm0, %xmm1 -; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_rotate_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $9, %xmm0, %xmm1 +; AVX512F-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_rotate_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $9, %xmm0, %xmm1 +; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_rotate_v8i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $9, %xmm0, %xmm1 +; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_rotate_v8i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $9, %xmm0, %xmm1 +; AVX512VLBW-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_rotate_v8i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_rotate_v8i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldw $7, %xmm0, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_rotate_v8i16: ; XOP: # %bb.0: @@ -2275,17 +2348,16 @@ define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind { ; ; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v8i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $11, %xmm0, %xmm1 -; AVX512VBMI2-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0 ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v8i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsllw $5, %xmm0, %xmm1 -; AVX512VLVBMI2-NEXT: vpsrlw $11, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $168, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VLVBMI2-NEXT: vpshldw $5, %xmm0, %xmm0, %xmm0 +; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOP-LABEL: splatconstant_rotate_mask_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index d5b747ec2100..cb358ccc1b04 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -337,23 +337,15 @@ define <16 x i16> @var_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { ; ; AVX512VBMI2-LABEL: var_rotate_v16i16: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512VBMI2-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: var_rotate_v16i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VLVBMI2-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLVBMI2-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512VLVBMI2-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: var_rotate_v16i16: @@ -802,18 +794,71 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatvar_rotate_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatvar_rotate_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatvar_rotate_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatvar_rotate_v16i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatvar_rotate_v16i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatvar_rotate_v16i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatvar_rotate_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX512VLVBMI2-NEXT: vpshldvw %ymm1, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatvar_rotate_v16i16: ; XOPAVX1: # %bb.0: @@ -1260,17 +1305,13 @@ define <16 x i16> @constant_rotate_v16i16(<16 x i16> %a) nounwind { ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1] -; AVX512VBMI2-NEXT: vpsrlvw %zmm2, %zmm0, %zmm2 -; AVX512VBMI2-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpshldvw %zmm1, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: constant_rotate_v16i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm1 -; AVX512VLVBMI2-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpshldvw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: constant_rotate_v16i16: @@ -1647,12 +1688,45 @@ define <16 x i16> @splatconstant_rotate_v16i16(<16 x i16> %a) nounwind { ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: splatconstant_rotate_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlw $9, %ymm0, %ymm1 -; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: splatconstant_rotate_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrlw $9, %ymm0, %ymm1 +; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_rotate_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlw $9, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_rotate_v16i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrlw $9, %ymm0, %ymm1 +; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_rotate_v16i16: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrlw $9, %ymm0, %ymm1 +; AVX512VLBW-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: retq +; +; AVX512VBMI2-LABEL: splatconstant_rotate_v16i16: +; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $7, %zmm0, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512VBMI2-NEXT: retq +; +; AVX512VLVBMI2-LABEL: splatconstant_rotate_v16i16: +; AVX512VLVBMI2: # %bb.0: +; AVX512VLVBMI2-NEXT: vpshldw $7, %ymm0, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_v16i16: ; XOPAVX1: # %bb.0: @@ -1995,17 +2069,15 @@ define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind { ; ; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v16i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $11, %ymm0, %ymm1 -; AVX512VBMI2-NEXT: vpsllw $5, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0 ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v16i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpsllw $5, %ymm0, %ymm1 -; AVX512VLVBMI2-NEXT: vpsrlw $11, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $168, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpshldw $5, %ymm0, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_mask_v16i16: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits