Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.151 -> 1.152 X86InstrInfo.cpp updated: 1.47 -> 1.48 X86InstrInfo.td updated: 1.259 -> 1.260 X86InstrSSE.td updated: 1.53 -> 1.54 --- Log message: - More efficient extract_vector_elt with shuffle and movss, movsd, movd, etc. - Some bug fixes and naming inconsistency fixes. --- Diffs of the changes: (+151 -70) X86ISelLowering.cpp | 62 +++++++++++++++++++-- X86InstrInfo.cpp | 6 +- X86InstrInfo.td | 1 X86InstrSSE.td | 152 ++++++++++++++++++++++++++++++---------------------- 4 files changed, 151 insertions(+), 70 deletions(-) Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.151 llvm/lib/Target/X86/X86ISelLowering.cpp:1.152 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.151 Fri Mar 31 15:55:24 2006 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Mon Apr 3 15:53:28 2006 @@ -280,6 +280,7 @@ setOperationAction(ISD::LOAD, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); } if (Subtarget->hasSSE2()) { @@ -316,7 +317,9 @@ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); } @@ -1484,11 +1487,20 @@ // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } // Expect bit 0 == 1, bit1 == 2 SDOperand Bit0 = N->getOperand(0); + if (Bit0.getOpcode() != ISD::UNDEF) { + assert(isa<ConstantSDNode>(Bit0) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast<ConstantSDNode>(Bit0)->getValue() != 1) + return false; + } + SDOperand Bit1 = N->getOperand(1); - assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) && - "Invalid VECTOR_SHUFFLE mask!"); - return (cast<ConstantSDNode>(Bit0)->getValue() == 1 && - cast<ConstantSDNode>(Bit1)->getValue() == 2); + if (Bit1.getOpcode() != ISD::UNDEF) { + assert(isa<ConstantSDNode>(Bit1) && "Invalid VECTOR_SHUFFLE mask!"); + if (cast<ConstantSDNode>(Bit1)->getValue() != 2) + return false; + } + + return true; } if (NumElems != 4) return false; @@ -2660,15 +2672,55 @@ return SDOperand(); } case ISD::EXTRACT_VECTOR_ELT: { - // Transform it so it match pextrw which produces a 32-bit result. + if (!isa<ConstantSDNode>(Op.getOperand(1))) + return SDOperand(); + MVT::ValueType VT = Op.getValueType(); if (MVT::getSizeInBits(VT) == 16) { + // Transform it so it match pextrw which produces a 32-bit result. MVT::ValueType EVT = (MVT::ValueType)(VT+1); SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT, Op.getOperand(0), Op.getOperand(1)); SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, VT, Assert); + } else if (MVT::getSizeInBits(VT) == 32) { + SDOperand Vec = Op.getOperand(0); + unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); + if (Idx == 0) + return Op; + + // TODO: if Idex == 2, we can use unpckhps + // SHUFPS the element to the lowest double word, then movss. + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); + SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4, + MVT::getVectorBaseType(MaskVT)); + std::vector<SDOperand> IdxVec; + IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); + Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), + Vec, Vec, Mask); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, + DAG.getConstant(0, MVT::i32)); + } else if (MVT::getSizeInBits(VT) == 64) { + SDOperand Vec = Op.getOperand(0); + unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); + if (Idx == 0) + return Op; + + // UNPCKHPD the element to the lowest double word, then movsd. + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); + std::vector<SDOperand> IdxVec; + IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec); + Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), + Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, + DAG.getConstant(0, MVT::i32)); } return SDOperand(); Index: llvm/lib/Target/X86/X86InstrInfo.cpp diff -u llvm/lib/Target/X86/X86InstrInfo.cpp:1.47 llvm/lib/Target/X86/X86InstrInfo.cpp:1.48 --- llvm/lib/Target/X86/X86InstrInfo.cpp:1.47 Fri Mar 24 17:15:12 2006 +++ llvm/lib/Target/X86/X86InstrInfo.cpp Mon Apr 3 15:53:28 2006 @@ -31,8 +31,10 @@ oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr || oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr || oc == X86::MOVAPSrr || oc == X86::MOVAPDrr || - oc == X86::MOVSS128rr || oc == X86::MOVSD128rr || - oc == X86::MOVD128rr || oc == X86::MOVQ128rr) { + oc == X86::MOVSS2PSrr || oc == X86::MOVSD2PDrr || + oc == X86::MOVPS2SSrr || oc == X86::MOVPD2SDrr || + oc == X86::MOVDI2PDIrr || oc == X86::MOVQI2PQIrr || + oc == X86::MOVPDI2DIrr) { assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() && MI.getOperand(1).isRegister() && Index: llvm/lib/Target/X86/X86InstrInfo.td diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.259 llvm/lib/Target/X86/X86InstrInfo.td:1.260 --- llvm/lib/Target/X86/X86InstrInfo.td:1.259 Sat Mar 25 03:45:48 2006 +++ llvm/lib/Target/X86/X86InstrInfo.td Mon Apr 3 15:53:28 2006 @@ -284,6 +284,7 @@ def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>; def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.53 llvm/lib/Target/X86/X86InstrSSE.td:1.54 --- llvm/lib/Target/X86/X86InstrSSE.td:1.53 Fri Mar 31 15:55:24 2006 +++ llvm/lib/Target/X86/X86InstrSSE.td Mon Apr 3 15:53:28 2006 @@ -226,24 +226,6 @@ "movsd {$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; -// FR32 / FR64 to 128-bit vector conversion. -def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src), - "movss {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector FR32:$src)))]>; -def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src), - "movss {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>; -def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src), - "movsd {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector FR64:$src)))]>; -def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), - "movsd {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; - // Arithmetic instructions let isTwoAddress = 1 in { let isCommutable = 1 in { @@ -1122,18 +1104,6 @@ //===----------------------------------------------------------------------===// // Move Instructions -def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), - "movd {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4i32 (scalar_to_vector R32:$src)))]>; -def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), - "movd {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; - -def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), - "movd {$src, $dst|$dst, $src}", []>; - def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src), "movdqa {$src, $dst|$dst, $src}", []>; def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src), @@ -1143,18 +1113,6 @@ "movdqa {$src, $dst|$dst, $src}", [(store (v4i32 VR128:$src), addr:$dst)]>; -// SSE2 instructions with XS prefix -def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src), - "movq {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector VR64:$src)))]>, XS, - Requires<[HasSSE2]>; -def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), - "movq {$src, $dst|$dst, $src}", []>, XS, - Requires<[HasSSE2]>; -def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), - "movq {$src, $dst|$dst, $src}", []>; - // 128-bit Integer Arithmetic let isTwoAddress = 1 in { let isCommutable = 1 in { @@ -1549,32 +1507,102 @@ "pcmpeqd $dst, $dst", [(set VR128:$dst, (v2f64 immAllOnesV))]>; -// Scalar to 128-bit vector with zero extension. +// FR32 / FR64 to 128-bit vector conversion. +def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src), + "movss {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector FR32:$src)))]>; +def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src), + "movss {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>; +def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src), + "movsd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (scalar_to_vector FR64:$src)))]>; +def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), + "movsd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; + +def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector R32:$src)))]>; +def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; +// SSE2 instructions with XS prefix +def MOVQI2PQIrr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src), + "movq {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector VR64:$src)))]>, XS, + Requires<[HasSSE2]>; +def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), + "movq {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + Requires<[HasSSE2]>; +// FIXME: may not be able to eliminate this movss with coalescing the src and +// dest register classes are different. We really want to write this pattern +// like this: +// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (i32 0))), +// (f32 FR32:$src)>; +def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, VR128:$src), + "movss {$src, $dst|$dst, $src}", + [(set FR32:$dst, (vector_extract (v4f32 VR128:$src), + (i32 0)))]>; +def MOVPS2SSmr : SSI<0x10, MRMDestMem, (ops f32mem:$dst, VR128:$src), + "movss {$src, $dst|$dst, $src}", + [(store (f32 (vector_extract (v4f32 VR128:$src), + (i32 0))), addr:$dst)]>; +def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, VR128:$src), + "movsd {$src, $dst|$dst, $src}", + [(set FR64:$dst, (vector_extract (v2f64 VR128:$src), + (i32 0)))]>; +def MOVPD2SDmr : SDI<0x10, MRMDestMem, (ops f64mem:$dst, VR128:$src), + "movsd {$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (i32 0))), addr:$dst)]>; +def MOVPDI2DIrr : PDI<0x7E, MRMSrcReg, (ops R32:$dst, VR128:$src), + "movd {$src, $dst|$dst, $src}", + [(set R32:$dst, (vector_extract (v4i32 VR128:$src), + (i32 0)))]>; +def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), + "movd {$src, $dst|$dst, $src}", + [(store (i32 (vector_extract (v4i32 VR128:$src), + (i32 0))), addr:$dst)]>; + +// Move to lower bits of a VR128, leaving upper bits alone. // Three operand (but two address) aliases. let isTwoAddress = 1 in { -def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2), +def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2), "movss {$src2, $dst|$dst, $src2}", []>; -def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2), +def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2), "movsd {$src2, $dst|$dst, $src2}", []>; -def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2), +def MOVLDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2), "movd {$src2, $dst|$dst, $src2}", []>; -def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2), - "movq {$src2, $dst|$dst, $src2}", []>; } +// Move to lower bits of a VR128 and zeroing upper bits. // Loading from memory automatically zeroing upper bits. -def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src), +def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src), "movss {$src, $dst|$dst, $src}", [(set VR128:$dst, (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>; -def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), +def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), "movsd {$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>; -def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), - "movd {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>; +def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>; +def MOVZQI2PQIrm : PDI<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (X86zexts2vec (loadi64 addr:$src))))]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -1621,9 +1649,9 @@ // Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or // 16-bits matter. -def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, +def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVDI2PDIrr R32:$src)>, Requires<[HasSSE2]>; -def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, +def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVDI2PDIrr R32:$src)>, Requires<[HasSSE2]>; // bit_convert @@ -1659,17 +1687,15 @@ // Zeroing a VR128 then do a MOVS* to the lower bits. def : Pat<(v2f64 (X86zexts2vec FR64:$src)), - (MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>; + (MOVLSD2PDrr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>; def : Pat<(v4f32 (X86zexts2vec FR32:$src)), - (MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (X86zexts2vec VR64:$src)), - (MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>; + (MOVLSS2PSrr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>; def : Pat<(v4i32 (X86zexts2vec R32:$src)), - (MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>; + (MOVLDI2PDIrr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>; def : Pat<(v8i16 (X86zexts2vec R16:$src)), - (MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>; + (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>; def : Pat<(v16i8 (X86zexts2vec R8:$src)), - (MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>; + (MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>; // Splat v2f64 / v2i64 def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_mask:$sm), _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits