Author: Simon Pilgrim Date: 2021-01-15T16:25:25Z New Revision: be69e66b1cd826f499566e1c3dadbf04e872baa0
URL: https://github.com/llvm/llvm-project/commit/be69e66b1cd826f499566e1c3dadbf04e872baa0 DIFF: https://github.com/llvm/llvm-project/commit/be69e66b1cd826f499566e1c3dadbf04e872baa0.diff LOG: [X86][SSE] Attempt to fold shuffle(binop(),binop()) -> binop(shuffle(),shuffle()) If this will help us fold shuffles together, then push the shuffle through the merged binops. Ideally this would be performed in DAGCombiner::visitVECTOR_SHUFFLE but getting an efficient+legal merged shuffle can be tricky - on SSE we can be confident that for 32/64-bit elements vectors shuffles should easily fold. Added: Modified: llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/haddsub-shuf.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a84250782c19..d2cc2395576a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37939,6 +37939,33 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG)) return HAddSub; + + // Merge shuffles through binops if its likely we'll be able to merge it + // with other shuffles. + // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) + // TODO: We might be able to move this to DAGCombiner::visitVECTOR_SHUFFLE. + if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N)) { + unsigned SrcOpcode = N->getOperand(0).getOpcode(); + if (SrcOpcode == N->getOperand(1).getOpcode() && TLI.isBinOp(SrcOpcode) && + N->isOnlyUserOf(N->getOperand(0).getNode()) && + N->isOnlyUserOf(N->getOperand(1).getNode()) && + VT.getScalarSizeInBits() >= 32) { + SDValue Op00 = N->getOperand(0).getOperand(0); + SDValue Op10 = N->getOperand(1).getOperand(0); + SDValue Op01 = N->getOperand(0).getOperand(1); + SDValue Op11 = N->getOperand(1).getOperand(1); + if ((Op00.getOpcode() == ISD::VECTOR_SHUFFLE || + Op10.getOpcode() == ISD::VECTOR_SHUFFLE) && + (Op01.getOpcode() == ISD::VECTOR_SHUFFLE || + Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) { + SDLoc DL(N); + ArrayRef<int> Mask = SVN->getMask(); + SDValue LHS = DAG.getVectorShuffle(VT, DL, Op00, Op10, Mask); + SDValue RHS = DAG.getVectorShuffle(VT, DL, Op01, Op11, Mask); + return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS); + } + } + } } // Attempt to combine into a vector load/broadcast. diff --git a/llvm/test/CodeGen/X86/haddsub-shuf.ll b/llvm/test/CodeGen/X86/haddsub-shuf.ll index 9b2dfc1ce0cb..37eedcd54441 100644 --- a/llvm/test/CodeGen/X86/haddsub-shuf.ll +++ b/llvm/test/CodeGen/X86/haddsub-shuf.ll @@ -923,45 +923,15 @@ define <4 x float> @PR34724_1(<4 x float> %a, <4 x float> %b) { } define <4 x float> @PR34724_2(<4 x float> %a, <4 x float> %b) { -; SSSE3_SLOW-LABEL: PR34724_2: -; SSSE3_SLOW: # %bb.0: -; SSSE3_SLOW-NEXT: haddps %xmm1, %xmm0 -; SSSE3_SLOW-NEXT: movsldup {{.*#+}} xmm2 = xmm1[0,0,2,2] -; SSSE3_SLOW-NEXT: addps %xmm1, %xmm2 -; SSSE3_SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0] -; SSSE3_SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0] -; SSSE3_SLOW-NEXT: retq -; -; SSSE3_FAST-LABEL: PR34724_2: -; SSSE3_FAST: # %bb.0: -; SSSE3_FAST-NEXT: haddps %xmm1, %xmm0 -; SSSE3_FAST-NEXT: retq -; -; AVX1_SLOW-LABEL: PR34724_2: -; AVX1_SLOW: # %bb.0: -; AVX1_SLOW-NEXT: vhaddps %xmm1, %xmm0, %xmm0 -; AVX1_SLOW-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2] -; AVX1_SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1 -; AVX1_SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] -; AVX1_SLOW-NEXT: retq -; -; AVX1_FAST-LABEL: PR34724_2: -; AVX1_FAST: # %bb.0: -; AVX1_FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0 -; AVX1_FAST-NEXT: retq -; -; AVX2_SLOW-LABEL: PR34724_2: -; AVX2_SLOW: # %bb.0: -; AVX2_SLOW-NEXT: vhaddps %xmm1, %xmm0, %xmm0 -; AVX2_SLOW-NEXT: vmovsldup {{.*#+}} xmm2 = xmm1[0,0,2,2] -; AVX2_SLOW-NEXT: vaddps %xmm1, %xmm2, %xmm1 -; AVX2_SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] -; AVX2_SLOW-NEXT: retq +; SSSE3-LABEL: PR34724_2: +; SSSE3: # %bb.0: +; SSSE3-NEXT: haddps %xmm1, %xmm0 +; SSSE3-NEXT: retq ; -; AVX2_FAST-LABEL: PR34724_2: -; AVX2_FAST: # %bb.0: -; AVX2_FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0 -; AVX2_FAST-NEXT: retq +; AVX-LABEL: PR34724_2: +; AVX: # %bb.0: +; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 4, i32 undef, i32 undef> %t1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 5, i32 undef, i32 undef> %t2 = fadd <4 x float> %t0, %t1 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits