Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.141 -> 1.142 X86InstrSSE.td updated: 1.40 -> 1.41 --- Log message: - Only use pshufd for v4i32 vector shuffles. - Other shuffle related fixes. --- Diffs of the changes: (+83 -61) X86ISelLowering.cpp | 51 +++++++++++++++++++++------- X86InstrSSE.td | 93 +++++++++++++++++++++++++--------------------------- 2 files changed, 83 insertions(+), 61 deletions(-) Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.141 llvm/lib/Target/X86/X86ISelLowering.cpp:1.142 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.141 Tue Mar 28 17:41:33 2006 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Tue Mar 28 19:30:51 2006 @@ -1583,15 +1583,21 @@ return Mask; } -/// CommuteVectorShuffleIfNeeded - Swap vector_shuffle operands (as well as -/// values in ther permute mask if needed. Return an empty SDOperand is it is -/// already well formed. -static SDOperand CommuteVectorShuffleIfNeeded(SDOperand V1, SDOperand V2, - SDOperand Mask, MVT::ValueType VT, - SelectionDAG &DAG) { +/// NormalizeVectorShuffle - Swap vector_shuffle operands (as well as +/// values in ther permute mask if needed. Use V1 as second vector if it is +/// undef. Return an empty SDOperand is it is already well formed. +static SDOperand NormalizeVectorShuffle(SDOperand V1, SDOperand V2, + SDOperand Mask, MVT::ValueType VT, + SelectionDAG &DAG) { unsigned NumElems = Mask.getNumOperands(); SDOperand Half1 = Mask.getOperand(0); SDOperand Half2 = Mask.getOperand(NumElems/2); + bool V2Undef = false; + if (V2.getOpcode() == ISD::UNDEF) { + V2Undef = true; + V2 = V1; + } + if (cast<ConstantSDNode>(Half1)->getValue() >= NumElems && cast<ConstantSDNode>(Half2)->getValue() < NumElems) { // Swap the operands and change mask. @@ -1604,6 +1610,10 @@ DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec); return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask); } + + if (V2Undef) + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); + return SDOperand(); } @@ -2387,8 +2397,26 @@ MVT::ValueType VT = Op.getValueType(); unsigned NumElems = PermMask.getNumOperands(); - if (NumElems == 2) - return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG); + if (X86::isUNPCKLMask(PermMask.Val) || + X86::isUNPCKHMask(PermMask.Val)) + // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. + return SDOperand(); + + // PSHUFD's 2nd vector must be undef. + if (MVT::isInteger(VT) && X86::isPSHUFDMask(PermMask.Val)) + if (V2.getOpcode() == ISD::UNDEF) + return SDOperand(); + else + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, + DAG.getNode(ISD::UNDEF, V1.getValueType()), + PermMask); + + if (NumElems == 2 || + X86::isSplatMask(PermMask.Val) || + X86::isSHUFPMask(PermMask.Val)) { + return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG); + } +#if 0 else if (X86::isSplatMask(PermMask.Val)) { // Handle splat cases. if (V2.getOpcode() == ISD::UNDEF) @@ -2400,10 +2428,6 @@ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); - } else if (X86::isUNPCKLMask(PermMask.Val) || - X86::isUNPCKHMask(PermMask.Val)) { - // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. - return SDOperand(); } else if (X86::isPSHUFDMask(PermMask.Val)) { if (V2.getOpcode() == ISD::UNDEF) // Leave the VECTOR_SHUFFLE alone. It matches PSHUFD. @@ -2414,7 +2438,8 @@ DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); } else if (X86::isSHUFPMask(PermMask.Val)) - return CommuteVectorShuffleIfNeeded(V1, V2, PermMask, VT, DAG); + return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG); +#endif assert(0 && "Unexpected VECTOR_SHUFFLE to lower"); abort(); Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.40 llvm/lib/Target/X86/X86InstrSSE.td:1.41 --- llvm/lib/Target/X86/X86InstrSSE.td:1.40 Tue Mar 28 17:51:43 2006 +++ llvm/lib/Target/X86/X86InstrSSE.td Tue Mar 28 19:30:51 2006 @@ -79,9 +79,8 @@ return X86::isUNPCKHMask(N); }]>; -// Only use PSHUF if it is not a splat. def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ - return !X86::isSplatMask(N) && X86::isPSHUFDMask(N); + return X86::isPSHUFDMask(N); }], SHUFFLE_get_shuf_imm>; def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ @@ -918,86 +917,92 @@ "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; def PSHUFDrr : PDIi8<0x70, MRMDestReg, (ops VR128:$dst, VR128:$src1, i8imm:$src2), - "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>; + "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v4i32 (vector_shuffle + VR128:$src1, (undef), + PSHUFD_shuffle_mask:$src2)))]>; def PSHUFDrm : PDIi8<0x70, MRMSrcMem, (ops VR128:$dst, i128mem:$src1, i8imm:$src2), - "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", []>; + "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v4i32 (vector_shuffle + (load addr:$src1), (undef), + PSHUFD_shuffle_mask:$src2)))]>; let isTwoAddress = 1 in { def SHUFPSrr : PSIi8<0xC6, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3), "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (vector_shuffle - (v4f32 VR128:$src1), (v4f32 VR128:$src2), - SHUFP_shuffle_mask:$src3))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$src3)))]>; def SHUFPSrm : PSIi8<0xC6, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3), "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (vector_shuffle - (v4f32 VR128:$src1), (load addr:$src2), - SHUFP_shuffle_mask:$src3))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, (load addr:$src2), + SHUFP_shuffle_mask:$src3)))]>; def SHUFPDrr : PDIi8<0xC6, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3), "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (vector_shuffle - (v2f64 VR128:$src1), (v2f64 VR128:$src2), - SHUFP_shuffle_mask:$src3))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + SHUFP_shuffle_mask:$src3)))]>; def SHUFPDrm : PDIi8<0xC6, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3), "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (vector_shuffle - (v2f64 VR128:$src1), (load addr:$src2), - SHUFP_shuffle_mask:$src3))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, (load addr:$src2), + SHUFP_shuffle_mask:$src3)))]>; def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpckhps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPSrm : PSI<0x15, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpckhps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; def UNPCKHPDrr : PDI<0x15, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpckhpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPDrm : PDI<0x15, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpckhpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), - UNPCKH_shuffle_mask)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpcklps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpcklps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), - UNPCKL_shuffle_mask)))]>; + [(set VR128:$dst, (v4f32 (vector_shuffle + VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpcklpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, - UNPCKL_shuffle_mask)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpcklpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), - UNPCKL_shuffle_mask)))]>; + [(set VR128:$dst, (v2f64 (vector_shuffle + VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; } //===----------------------------------------------------------------------===// @@ -1354,11 +1359,3 @@ (v2f64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>; def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), MOVLHPS_splat_mask:$sm), (v2i64 (MOVLHPSrr VR128:$src, VR128:$src))>, Requires<[HasSSE2]>; - -// Shuffle v4f32 / v4i32, undef. These should only match if splat cases do not. -def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm), - (v4f32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>, - Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm), - (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>, - Requires<[HasSSE2]>; _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits