Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.131 -> 1.132 X86ISelLowering.h updated: 1.42 -> 1.43 X86InstrSSE.td updated: 1.30 -> 1.31 --- Log message: Build arbitrary vector with more than 2 distinct scalar elements with a series of unpack and interleave ops. --- Diffs of the changes: (+104 -5) X86ISelLowering.cpp | 29 +++++++++++++++++-- X86ISelLowering.h | 4 ++ X86InstrSSE.td | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 104 insertions(+), 5 deletions(-) Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.131 llvm/lib/Target/X86/X86ISelLowering.cpp:1.132 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.131 Fri Mar 24 19:33:37 2006 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Sat Mar 25 03:37:23 2006 @@ -2376,7 +2376,9 @@ abort(); } case ISD::BUILD_VECTOR: { + std::set<SDOperand> Values; SDOperand Elt0 = Op.getOperand(0); + Values.insert(Elt0); bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) && cast<ConstantSDNode>(Elt0)->getValue() == 0) || (isa<ConstantFPSDNode>(Elt0) && @@ -2384,15 +2386,16 @@ bool RestAreZero = true; unsigned NumElems = Op.getNumOperands(); for (unsigned i = 1; i < NumElems; ++i) { - SDOperand V = Op.getOperand(i); - if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(V)) { + SDOperand Elt = Op.getOperand(i); + if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) { if (!FPC->isExactlyValue(+0.0)) RestAreZero = false; - } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V)) { + } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { if (!C->isNullValue()) RestAreZero = false; } else RestAreZero = false; + Values.insert(Elt); } if (RestAreZero) { @@ -2402,6 +2405,25 @@ return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); } + if (Values.size() > 2) { + // Expand into a number of unpckl*. + // e.g. for v4f32 + // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> + // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> + // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> + MVT::ValueType VT = Op.getValueType(); + std::vector<SDOperand> V(NumElems); + for (unsigned i = 0; i < NumElems; ++i) + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); + NumElems >>= 1; + while (NumElems != 0) { + for (unsigned i = 0; i < NumElems; ++i) + V[i] = DAG.getNode(X86ISD::UNPCKL, VT, V[i], V[i + NumElems]); + NumElems >>= 1; + } + return V[0]; + } + return SDOperand(); } } @@ -2439,6 +2461,7 @@ case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::S2VEC: return "X86ISD::S2VEC"; case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; + case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; } } Index: llvm/lib/Target/X86/X86ISelLowering.h diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.42 llvm/lib/Target/X86/X86ISelLowering.h:1.43 --- llvm/lib/Target/X86/X86ISelLowering.h:1.42 Fri Mar 24 17:15:12 2006 +++ llvm/lib/Target/X86/X86ISelLowering.h Sat Mar 25 03:37:23 2006 @@ -153,6 +153,10 @@ /// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base /// does not have to match the operand type. ZEXT_S2VEC, + + /// UNPCKL - Unpack and interleave low. This corresponds to X86::UNPCKLPS, + /// X86::PUNPCKL*. + UNPCKL, }; // X86 specific condition code. These correspond to X86_*_COND in Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.30 llvm/lib/Target/X86/X86InstrSSE.td:1.31 --- llvm/lib/Target/X86/X86InstrSSE.td:1.30 Sat Mar 25 00:03:26 2006 +++ llvm/lib/Target/X86/X86InstrSSE.td Sat Mar 25 03:37:23 2006 @@ -28,6 +28,11 @@ def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC", SDTypeProfile<1, 1, []>, []>; +def SDTUnpckl : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def X86unpckl : SDNode<"X86ISD::UNPCKL", SDTUnpckl, + []>; + //===----------------------------------------------------------------------===// // SSE pattern fragments //===----------------------------------------------------------------------===// @@ -787,10 +792,14 @@ "unpckhpd {$src2, $dst|$dst, $src2}", []>; def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "unpcklps {$src2, $dst|$dst, $src2}", []>; + "unpcklps {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1, + VR128:$src2)))]>; def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "unpcklps {$src2, $dst|$dst, $src2}", []>; + "unpcklps {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1, + (load addr:$src2))))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpcklpd {$src2, $dst|$dst, $src2}", []>; @@ -885,6 +894,69 @@ "psubd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (sub VR128:$src1, (load addr:$src2))))]>; + +// Unpack and interleave +def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpcklbw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1, + VR128:$src2)))]>; +def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpcklbw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1, + (load addr:$src2))))]>; +def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpcklwd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1, + VR128:$src2)))]>; +def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpcklwd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1, + (load addr:$src2))))]>; +def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckldq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1, + VR128:$src2)))]>; +def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckldq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1, + (load addr:$src2))))]>; +def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpcklqdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpcklqdq {$src2, $dst|$dst, $src2}", []>; + +def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckhbw {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckhbw {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckhwd {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckhwd {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckhdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckhdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, + (ops VR128:$dst, VR128:$src1, VR128:$src2), + "punpckhdq {$src2, $dst|$dst, $src2}", []>; +def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, + (ops VR128:$dst, VR128:$src1, i128mem:$src2), + "punpckhqdq {$src2, $dst|$dst, $src2}", []>; } //===----------------------------------------------------------------------===// _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits