Author: evancheng Date: Mon Dec 10 19:46:18 2007 New Revision: 44836 URL: http://llvm.org/viewvc/llvm-project?rev=44836&view=rev Log: - Improved v8i16 shuffle lowering. It now uses pshuflw and pshufhw as much as possible before resorting to pextrw and pinsrw. - Better codegen for v4i32 shuffles masquerading as v8i16 or v16i8 shuffles. - Improves (i16 extract_vector_element 0) codegen by recognizing (i32 extract_vector_element 0) does not require a pextrw.
Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=44836&r1=44835&r2=44836&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 10 19:46:18 2007 @@ -23,6 +23,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -35,6 +36,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ParameterAttributes.h" using namespace llvm; @@ -2714,7 +2716,7 @@ if (Arg.getOpcode() == ISD::UNDEF) continue; assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); unsigned Val = cast<ConstantSDNode>(Arg)->getValue(); - if (Val > 4) + if (Val >= 4) return false; } @@ -3130,6 +3132,8 @@ return V; } +/// is4WideVector - Returns true if the specific v8i16 or v16i8 vector is +/// actually just a 4 wide vector. e.g. <a, a, y, y, d, d, x, x> SDOperand X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { // All zero's are handled with pxor, all one's are handled with pcmpeqd. @@ -3154,7 +3158,7 @@ unsigned NumNonZero = 0; unsigned NonZeros = 0; unsigned NumNonZeroImms = 0; - std::set<SDOperand> Values; + SmallSet<SDOperand, 8> Values; for (unsigned i = 0; i < NumElems; ++i) { SDOperand Elt = Op.getOperand(i); if (Elt.getOpcode() != ISD::UNDEF) { @@ -3314,59 +3318,179 @@ SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2, SDOperand PermMask, SelectionDAG &DAG, TargetLowering &TLI) { + SDOperand NewV; MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8); MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); - if (isPSHUFHW_PSHUFLWMask(PermMask.Val)) { - // Handle v8i16 shuffle high / low shuffle node pair. + MVT::ValueType PtrVT = TLI.getPointerTy(); + SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(), + PermMask.Val->op_end()); + + // First record which half of which vector the low elements come from. + SmallVector<unsigned, 4> LowQuad(4); + for (unsigned i = 0; i < 4; ++i) { + SDOperand Elt = MaskElts[i]; + if (Elt.getOpcode() == ISD::UNDEF) + continue; + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + int QuadIdx = EltIdx / 4; + ++LowQuad[QuadIdx]; + } + int BestLowQuad = -1; + unsigned MaxQuad = 1; + for (unsigned i = 0; i < 4; ++i) { + if (LowQuad[i] > MaxQuad) { + BestLowQuad = i; + MaxQuad = LowQuad[i]; + } + } + + // Record which half of which vector the high elements come from. + SmallVector<unsigned, 4> HighQuad(4); + for (unsigned i = 4; i < 8; ++i) { + SDOperand Elt = MaskElts[i]; + if (Elt.getOpcode() == ISD::UNDEF) + continue; + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + int QuadIdx = EltIdx / 4; + ++HighQuad[QuadIdx]; + } + int BestHighQuad = -1; + MaxQuad = 1; + for (unsigned i = 0; i < 4; ++i) { + if (HighQuad[i] > MaxQuad) { + BestHighQuad = i; + MaxQuad = HighQuad[i]; + } + } + + // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it. + if (BestLowQuad != -1 || BestHighQuad != -1) { + // First sort the 4 chunks in order using shufpd. SmallVector<SDOperand, 8> MaskVec; - for (unsigned i = 0; i != 4; ++i) - MaskVec.push_back(PermMask.getOperand(i)); - for (unsigned i = 4; i != 8; ++i) - MaskVec.push_back(DAG.getConstant(i, MaskEVT)); - SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask); - MaskVec.clear(); - for (unsigned i = 0; i != 4; ++i) - MaskVec.push_back(DAG.getConstant(i, MaskEVT)); - for (unsigned i = 4; i != 8; ++i) - MaskVec.push_back(PermMask.getOperand(i)); - Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); - return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask); + if (BestLowQuad != -1) + MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32)); + else + MaskVec.push_back(DAG.getConstant(0, MVT::i32)); + if (BestHighQuad != -1) + MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32)); + else + MaskVec.push_back(DAG.getConstant(1, MVT::i32)); + SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2); + NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64, + DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1), + DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask); + NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV); + + // Now sort high and low parts separately. + BitVector InOrder(8); + if (BestLowQuad != -1) { + // Sort lower half in order using PSHUFLW. + MaskVec.clear(); + bool AnyOutOrder = false; + for (unsigned i = 0; i != 4; ++i) { + SDOperand Elt = MaskElts[i]; + if (Elt.getOpcode() == ISD::UNDEF) { + MaskVec.push_back(Elt); + InOrder.set(i); + } else { + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + if (EltIdx != i) + AnyOutOrder = true; + MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT)); + // If this element is in the right place after this shuffle, then + // remember it. + if ((int)(EltIdx / 4) == BestLowQuad) + InOrder.set(i); + } + } + if (AnyOutOrder) { + for (unsigned i = 4; i != 8; ++i) + MaskVec.push_back(DAG.getConstant(i, MaskEVT)); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); + NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); + } + } + + if (BestHighQuad != -1) { + // Sort high half in order using PSHUFHW if possible. + MaskVec.clear(); + for (unsigned i = 0; i != 4; ++i) + MaskVec.push_back(DAG.getConstant(i, MaskEVT)); + bool AnyOutOrder = false; + for (unsigned i = 4; i != 8; ++i) { + SDOperand Elt = MaskElts[i]; + if (Elt.getOpcode() == ISD::UNDEF) { + MaskVec.push_back(Elt); + InOrder.set(i); + } else { + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + if (EltIdx != i) + AnyOutOrder = true; + MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT)); + // If this element is in the right place after this shuffle, then + // remember it. + if ((int)(EltIdx / 4) == BestHighQuad) + InOrder.set(i); + } + } + if (AnyOutOrder) { + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); + NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask); + } + } + + // The other elements are put in the right place using pextrw and pinsrw. + for (unsigned i = 0; i != 8; ++i) { + if (InOrder[i]) + continue; + SDOperand Elt = MaskElts[i]; + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + if (EltIdx == i) + continue; + SDOperand ExtOp = (EltIdx < 8) + ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, + DAG.getConstant(EltIdx, PtrVT)) + : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, + DAG.getConstant(EltIdx - 8, PtrVT)); + NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, + DAG.getConstant(i, PtrVT)); + } + return NewV; } - // Lower than into extracts and inserts but try to do as few as possible. + // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use + ///as few as possible. // First, let's find out how many elements are already in the right order. unsigned V1InOrder = 0; unsigned V1FromV1 = 0; unsigned V2InOrder = 0; unsigned V2FromV2 = 0; - SmallVector<unsigned, 8> V1Elts; - SmallVector<unsigned, 8> V2Elts; + SmallVector<SDOperand, 8> V1Elts; + SmallVector<SDOperand, 8> V2Elts; for (unsigned i = 0; i < 8; ++i) { - SDOperand Elt = PermMask.getOperand(i); + SDOperand Elt = MaskElts[i]; if (Elt.getOpcode() == ISD::UNDEF) { - V1Elts.push_back(i); - V2Elts.push_back(i); + V1Elts.push_back(Elt); + V2Elts.push_back(Elt); ++V1InOrder; ++V2InOrder; + continue; + } + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + if (EltIdx == i) { + V1Elts.push_back(Elt); + V2Elts.push_back(DAG.getConstant(i+8, MaskEVT)); + ++V1InOrder; + } else if (EltIdx == i+8) { + V1Elts.push_back(Elt); + V2Elts.push_back(DAG.getConstant(i, MaskEVT)); + ++V2InOrder; + } else if (EltIdx < 8) { + V1Elts.push_back(Elt); + ++V1FromV1; } else { - unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); - if (EltIdx == i) { - V1Elts.push_back(i); - V2Elts.push_back(i+8); - ++V1InOrder; - } else if (EltIdx == i+8) { - V1Elts.push_back(i+8); - V2Elts.push_back(i); - ++V2InOrder; - } else { - V1Elts.push_back(EltIdx); - V2Elts.push_back(EltIdx); - if (EltIdx < 8) - ++V1FromV1; - else - ++V2FromV2; - } + V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT)); + ++V2FromV2; } } @@ -3377,33 +3501,92 @@ std::swap(V1FromV1, V2FromV2); } - MVT::ValueType PtrVT = TLI.getPointerTy(); - if (V1FromV1) { - // If there are elements that are from V1 but out of place, - // then first sort them in place - SmallVector<SDOperand, 8> MaskVec; + if ((V1FromV1 + V1InOrder) != 8) { + // Some elements are from V2. + if (V1FromV1) { + // If there are elements that are from V1 but out of place, + // then first sort them in place + SmallVector<SDOperand, 8> MaskVec; + for (unsigned i = 0; i < 8; ++i) { + SDOperand Elt = V1Elts[i]; + if (Elt.getOpcode() == ISD::UNDEF) { + MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); + continue; + } + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + if (EltIdx >= 8) + MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); + else + MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); + } + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); + } + + NewV = V1; for (unsigned i = 0; i < 8; ++i) { - unsigned EltIdx = V1Elts[i]; - if (EltIdx >= 8) - MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT)); - else - MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT)); + SDOperand Elt = V1Elts[i]; + if (Elt.getOpcode() == ISD::UNDEF) + continue; + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + if (EltIdx < 8) + continue; + SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, + DAG.getConstant(EltIdx - 8, PtrVT)); + NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, + DAG.getConstant(i, PtrVT)); } - SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8); - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask); + return NewV; + } else { + // All elements are from V1. + NewV = V1; + for (unsigned i = 0; i < 8; ++i) { + SDOperand Elt = V1Elts[i]; + if (Elt.getOpcode() == ISD::UNDEF) + continue; + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1, + DAG.getConstant(EltIdx, PtrVT)); + NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp, + DAG.getConstant(i, PtrVT)); + } + return NewV; } +} - // Now let's insert elements from the other vector. - for (unsigned i = 0; i < 8; ++i) { - unsigned EltIdx = V1Elts[i]; - if (EltIdx < 8) - continue; - SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2, - DAG.getConstant(EltIdx - 8, PtrVT)); - V1 = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V1, ExtOp, - DAG.getConstant(i, PtrVT)); +/// RewriteAs4WideShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide +/// ones if possible. This can be done when every pair / quad of shuffle mask +/// elements point to elements in the right sequence. e.g. +/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> +static +SDOperand RewriteAs4WideShuffle(SDOperand V1, SDOperand V2, + SDOperand PermMask, SelectionDAG &DAG, + TargetLowering &TLI) { + unsigned NumElems = PermMask.getNumOperands(); + unsigned Scale = NumElems / 4; + SmallVector<SDOperand, 4> MaskVec; + for (unsigned i = 0; i < NumElems; i += Scale) { + unsigned StartIdx = ~0U; + for (unsigned j = 0; j < Scale; ++j) { + SDOperand Elt = PermMask.getOperand(i+j); + if (Elt.getOpcode() == ISD::UNDEF) + continue; + unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue(); + if (StartIdx == ~0U) + StartIdx = EltIdx - (EltIdx % Scale); + if (EltIdx != StartIdx + j) + return SDOperand(); + } + if (StartIdx == ~0U) + MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); + else + MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32)); } - return V1; + + V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); + V2 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V2); + return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],4)); } SDOperand @@ -3544,18 +3727,31 @@ } } + // If the shuffle can be rewritten as a 4 wide shuffle, then do it! + if (VT == MVT::v8i16 || VT == MVT::v16i8) { + SDOperand NewOp = RewriteAs4WideShuffle(V1, V2, PermMask, DAG, *this); + if (NewOp.Val) + return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG)); + } + // Handle v8i16 specifically since SSE can do byte extraction and insertion. - if (VT == MVT::v8i16) - return LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); + if (VT == MVT::v8i16) { + SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this); + if (NewOp.Val) + return NewOp; + } - if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { + // Handle all 4 wide cases with a number of shuffles. + if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { // Don't do this for MMX. MVT::ValueType MaskVT = PermMask.getValueType(); MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); SmallVector<std::pair<int, int>, 8> Locs; Locs.reserve(NumElems); - SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); - SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); + SmallVector<SDOperand, 8> Mask1(NumElems, + DAG.getNode(ISD::UNDEF, MaskEVT)); + SmallVector<SDOperand, 8> Mask2(NumElems, + DAG.getNode(ISD::UNDEF, MaskEVT)); unsigned NumHi = 0; unsigned NumLo = 0; // If no more than two elements come from either vector. This can be @@ -3661,6 +3857,13 @@ MVT::ValueType VT = Op.getValueType(); // TODO: handle v16i8. if (MVT::getSizeInBits(VT) == 16) { + SDOperand Vec = Op.getOperand(0); + unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); + if (Idx == 0) + return DAG.getNode(ISD::TRUNCATE, MVT::i16, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, + DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec), + Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. MVT::ValueType EVT = (MVT::ValueType)(VT+1); SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, @@ -3669,7 +3872,6 @@ DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, VT, Assert); } else if (MVT::getSizeInBits(VT) == 32) { - SDOperand Vec = Op.getOperand(0); unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); if (Idx == 0) return Op; @@ -3686,12 +3888,12 @@ push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); + SDOperand Vec = Op.getOperand(0); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, DAG.getConstant(0, getPointerTy())); } else if (MVT::getSizeInBits(VT) == 64) { - SDOperand Vec = Op.getOperand(0); unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); if (Idx == 0) return Op; @@ -3706,6 +3908,7 @@ push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); + SDOperand Vec = Op.getOperand(0); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll?rev=44836&r1=44835&r2=44836&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll Mon Dec 10 19:46:18 2007 @@ -1,37 +1,28 @@ ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 7 -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 7 -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuf | count 2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 4 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 6 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 3 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 2 -define void @t1(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { +define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) { %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > - store <8 x i16> %tmp3, <8 x i16>* %res - ret void + ret <8 x i16> %tmp3 } -define void @t2(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { - %tmp1 = load <8 x i16>* %A - %tmp2 = load <8 x i16>* %B - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7 > - store <8 x i16> %tmp3, <8 x i16>* %res - ret void +define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) { + %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 > + ret <8 x i16> %tmp } -define void @t3(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { - %tmp1 = load <8 x i16>* %A - %tmp2 = load <8 x i16>* %B - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 > - store <8 x i16> %tmp3, <8 x i16>* %res - ret void +define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) { + %tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 > + ret <8 x i16> %tmp } -define void @t4(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) { - %tmp1 = load <8 x i16>* %A - %tmp2 = load <8 x i16>* %B - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > - store <8 x i16> %tmp3, <8 x i16>* %res - ret void +define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) { + %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 > + ret <8 x i16> %tmp } Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll?rev=44836&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll (added) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll Mon Dec 10 19:46:18 2007 @@ -0,0 +1,21 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movlhps | count 1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 1 + +define <8 x i16> @t1(<8 x i16> %A, <8 x i16> %B) { + %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 > + ret <8 x i16> %tmp +} + +define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) { + %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > + ret <8 x i16> %tmp +} + +define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) { + %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 > + ret <8 x i16> %tmp +} _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits