Author: evancheng Date: Fri Dec 14 21:00:47 2007 New Revision: 45058 URL: http://llvm.org/viewvc/llvm-project?rev=45058&view=rev Log: Make better use of instructions that clear high bits; fix various 2-wide shuffle bugs.
Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-14.ll llvm/trunk/test/CodeGen/X86/vec_shuffle-15.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp llvm/trunk/test/CodeGen/X86/vec_set-5.ll llvm/trunk/test/CodeGen/X86/vec_set-7.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=45058&r1=45057&r2=45058&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec 14 21:00:47 2007 @@ -3138,8 +3138,6 @@ return V; } -/// is4WideVector - Returns true if the specific v8i16 or v16i8 vector is -/// actually just a 4 wide vector. e.g. <a, a, y, y, d, d, x, x> SDOperand X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { // All zero's are handled with pxor, all one's are handled with pcmpeqd. @@ -3562,17 +3560,35 @@ } } -/// RewriteAs4WideShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide -/// ones if possible. This can be done when every pair / quad of shuffle mask -/// elements point to elements in the right sequence. e.g. +/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide +/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be +/// done when every pair / quad of shuffle mask elements point to elements in +/// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> static -SDOperand RewriteAs4WideShuffle(SDOperand V1, SDOperand V2, +SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2, + MVT::ValueType VT, SDOperand PermMask, SelectionDAG &DAG, TargetLowering &TLI) { unsigned NumElems = PermMask.getNumOperands(); - unsigned Scale = NumElems / 4; - SmallVector<SDOperand, 4> MaskVec; + unsigned NewWidth = (NumElems == 4) ? 2 : 4; + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth); + MVT::ValueType NewVT = MaskVT; + switch (VT) { + case MVT::v4f32: NewVT = MVT::v2f64; break; + case MVT::v4i32: NewVT = MVT::v2i64; break; + case MVT::v8i16: NewVT = MVT::v4i32; break; + case MVT::v16i8: NewVT = MVT::v4i32; break; + default: assert(false && "Unexpected!"); + } + + if (NewWidth == 2) + if (MVT::isInteger(VT)) + NewVT = MVT::v2i64; + else + NewVT = MVT::v2f64; + unsigned Scale = NumElems / NewWidth; + SmallVector<SDOperand, 8> MaskVec; for (unsigned i = 0; i < NumElems; i += Scale) { unsigned StartIdx = ~0U; for (unsigned j = 0; j < Scale; ++j) { @@ -3591,10 +3607,11 @@ MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32)); } - V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); - V2 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V2); - return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],4)); + V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1); + V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2); + return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, MaskVT, + &MaskVec[0], MaskVec.size())); } SDOperand @@ -3626,6 +3643,35 @@ return PromoteSplat(Op, DAG); } + // If the shuffle can be profitably rewritten as a narrower shuffle, then + // do it! + if (VT == MVT::v8i16 || VT == MVT::v16i8) { + SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); + if (NewOp.Val) + return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); + } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { + // FIXME: Figure out a cleaner way to do this. + // Try to make use of movq to zero out the top part. + if (ISD::isBuildVectorAllZeros(V2.Val)) { + SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); + if (NewOp.Val) { + SDOperand NewV1 = NewOp.getOperand(0); + SDOperand NewV2 = NewOp.getOperand(1); + SDOperand NewMask = NewOp.getOperand(2); + if (isCommutedMOVL(NewMask.Val, true, false)) { + NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG); + NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(), + NewV1, NewV2, getMOVLMask(2, DAG)); + return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); + } + } + } else if (ISD::isBuildVectorAllZeros(V1.Val)) { + SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this); + if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val)) + return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); + } + } + if (X86::isMOVLMask(PermMask.Val)) return (V1IsUndef) ? V2 : Op; @@ -3654,6 +3700,7 @@ Commuted = true; } + // FIXME: Figure out a cleaner way to do this. if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) { if (V2IsUndef) return V1; Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); @@ -3735,13 +3782,6 @@ } } - // If the shuffle can be rewritten as a 4 wide shuffle, then do it! - if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDOperand NewOp = RewriteAs4WideShuffle(V1, V2, PermMask, DAG, *this); - if (NewOp.Val) - return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); - } - // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=45058&r1=45057&r2=45058&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Dec 14 21:00:47 2007 @@ -2224,35 +2224,56 @@ (loadf64 addr:$src))), MOVL_shuffle_mask)))]>; -let AddedComplexity = 15 in // movd / movq to XMM register zero-extends +let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (vector_shuffle immAllZerosV, (v4i32 (scalar_to_vector GR32:$src)), MOVL_shuffle_mask)))]>; -let AddedComplexity = 20 in +// This is X86-64 only. +def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (vector_shuffle immAllZerosV_bc, + (v2i64 (scalar_to_vector GR64:$src)), + MOVL_shuffle_mask)))]>; +} + +let AddedComplexity = 20 in { def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (vector_shuffle immAllZerosV, (v4i32 (scalar_to_vector (loadi32 addr:$src))), MOVL_shuffle_mask)))]>; - -// Moving from XMM to XMM but still clear upper 64 bits. -let AddedComplexity = 15 in -def MOVZQI2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_movl_dq VR128:$src))]>, - XS, Requires<[HasSSE2]>; -let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_movl_dq - (bitconvert (memopv2i64 addr:$src))))]>, - XS, Requires<[HasSSE2]>; + [(set VR128:$dst, + (v2i64 (vector_shuffle immAllZerosV_bc, + (v2i64 (scalar_to_vector (loadi64 addr:$src))), + MOVL_shuffle_mask)))]>, XS, + Requires<[HasSSE2]>; +} +// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in +// IA32 document. movq xmm1, xmm2 does clear the high bits. +let AddedComplexity = 15 in +def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (vector_shuffle immAllZerosV_bc, + VR128:$src, + MOVL_shuffle_mask)))]>, + XS, Requires<[HasSSE2]>; + +let AddedComplexity = 20 in +def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (vector_shuffle immAllZerosV_bc, + (memopv2i64 addr:$src), + MOVL_shuffle_mask)))]>, + XS, Requires<[HasSSE2]>; //===----------------------------------------------------------------------===// // SSE3 Instructions @@ -2763,13 +2784,13 @@ // Special unary SHUFPSrri case. // FIXME: when we want non two-address code, then we should use PSHUFD? -def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef), - SHUFP_unary_shuffle_mask:$sm), +def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), + SHUFP_unary_shuffle_mask:$sm)), (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE1]>; // Special unary SHUFPDrri case. -def : Pat<(vector_shuffle (v2f64 VR128:$src1), (undef), - SHUFP_unary_shuffle_mask:$sm), +def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef), + SHUFP_unary_shuffle_mask:$sm)), (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; // Unary v4f32 shuffle with PSHUF* in order to fold a load. @@ -2778,14 +2799,24 @@ (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; // Special binary v4i32 shuffle cases with SHUFPS. -def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2), - PSHUFD_binary_shuffle_mask:$sm), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2), + PSHUFD_binary_shuffle_mask:$sm)), (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v4i32 VR128:$src1), - (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm), +def : Pat<(v4i32 (vector_shuffle VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)), (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>, Requires<[HasSSE2]>; +// Special binary v2i64 shuffle cases using SHUFPDrri. +def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$sm)), + (SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>, + Requires<[HasSSE2]>; +// Special unary SHUFPDrri case. +def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef), + SHUFP_unary_shuffle_mask:$sm)), + (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, + Requires<[HasSSE2]>; // vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> let AddedComplexity = 10 in { @@ -2888,11 +2919,11 @@ } // Set lowest element and zero upper elements. -let AddedComplexity = 20 in -def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV_bc, - (v2f64 (scalar_to_vector (loadf64 addr:$src))), - MOVL_shuffle_mask)), - (MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>; +let AddedComplexity = 15 in +def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src, + MOVL_shuffle_mask)), + (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; + // FIXME: Temporary workaround since 2-wide shuffle is broken. def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2), Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=45058&r1=45057&r2=45058&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Fri Dec 14 21:00:47 2007 @@ -409,6 +409,9 @@ { X86::MOVSX64rr8, X86::MOVSX64rm8 }, { X86::MOVUPDrr, X86::MOVUPDrm }, { X86::MOVUPSrr, X86::MOVUPSrm }, + { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm }, + { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm }, + { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, { X86::MOVZX16rr8, X86::MOVZX16rm8 }, { X86::MOVZX32rr16, X86::MOVZX32rm16 }, { X86::MOVZX32rr8, X86::MOVZX32rm8 }, Modified: llvm/trunk/test/CodeGen/X86/vec_set-5.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-5.ll?rev=45058&r1=45057&r2=45058&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_set-5.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_set-5.ll Fri Dec 14 21:00:47 2007 @@ -1,7 +1,8 @@ ; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 -o %t -f -; RUN: grep movlhps %t | count 2 +; RUN: grep movlhps %t | count 1 ; RUN: grep unpcklps %t | count 1 ; RUN: grep punpckldq %t | count 1 +; RUN: grep movq %t | count 1 <4 x float> %test1(float %a, float %b) { %tmp = insertelement <4 x float> zeroinitializer, float %a, uint 0 Modified: llvm/trunk/test/CodeGen/X86/vec_set-7.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_set-7.ll?rev=45058&r1=45057&r2=45058&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_set-7.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_set-7.ll Fri Dec 14 21:00:47 2007 @@ -1,4 +1,4 @@ -; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 | grep movq | count 1 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 | grep movsd | count 1 <2 x long> %test(<2 x long>* %p) { %tmp = cast <2 x long>* %p to double* Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-14.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-14.ll?rev=45058&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-14.ll (added) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-14.ll Fri Dec 14 21:00:47 2007 @@ -0,0 +1,42 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 1 +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd | count 2 +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movq | count 3 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xor + +define <4 x i32> @t1(i32 %a) nounwind { +entry: + %tmp = insertelement <4 x i32> undef, i32 %a, i32 0 + %tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp, <4 x i32> < i32 4, i32 1, i32 2, i32 3 > ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp6 +} + +define <2 x i64> @t2(i64 %a) nounwind { +entry: + %tmp = insertelement <2 x i64> undef, i64 %a, i32 0 + %tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %tmp, <2 x i32> < i32 2, i32 1 > ; <<4 x i32>> [#uses=1] + ret <2 x i64> %tmp6 +} + +define <2 x i64> @t3(<2 x i64>* %a) nounwind { +entry: + %tmp4 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1] + %tmp6 = bitcast <2 x i64> %tmp4 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp7 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp6, <4 x i32> < i32 4, i32 5, i32 2, i32 3 > ; <<4 x i32>> [#uses=1] + %tmp8 = bitcast <4 x i32> %tmp7 to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp8 +} + +define <2 x i64> @t4(<2 x i64> %a) nounwind { +entry: + %tmp5 = bitcast <2 x i64> %a to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp5, <4 x i32> < i32 4, i32 5, i32 2, i32 3 > ; <<4 x i32>> [#uses=1] + %tmp7 = bitcast <4 x i32> %tmp6 to <2 x i64> ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp7 +} + +define <2 x i64> @t5(<2 x i64> %a) nounwind { +entry: + %tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <2 x i32> < i32 2, i32 1 > ; <<4 x i32>> [#uses=1] + ret <2 x i64> %tmp6 +} Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-15.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-15.ll?rev=45058&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-15.ll (added) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-15.ll Fri Dec 14 21:00:47 2007 @@ -0,0 +1,81 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 + +define <2 x i64> @t00(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 0 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t01(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 1 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t02(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 2 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t03(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 3 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t10(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 0 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t11(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 1 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t12(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 2 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 3 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t20(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 0 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t21(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 1 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t22(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 2 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t23(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 3 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t30(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 0 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t31(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 1 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t32(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 2 > + ret <2 x i64> %tmp +} + +define <2 x i64> @t33(<2 x i64> %a, <2 x i64> %b) nounwind { + %tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 3 > + ret <2 x i64> %tmp +} _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits