Changes in directory llvm/lib/Target/X86:
README.txt updated: 1.85 -> 1.86 X86ISelLowering.cpp updated: 1.163 -> 1.164 X86InstrSSE.td updated: 1.79 -> 1.80 --- Log message: All "integer" logical ops (pand, por, pxor) are now promoted to v2i64. Clean up and fix various logical ops issues. --- Diffs of the changes: (+71 -146) README.txt | 4 + X86ISelLowering.cpp | 45 ++++++++----- X86InstrSSE.td | 168 ++++++++++++---------------------------------------- 3 files changed, 71 insertions(+), 146 deletions(-) Index: llvm/lib/Target/X86/README.txt diff -u llvm/lib/Target/X86/README.txt:1.85 llvm/lib/Target/X86/README.txt:1.86 --- llvm/lib/Target/X86/README.txt:1.85 Mon Apr 10 16:51:03 2006 +++ llvm/lib/Target/X86/README.txt Wed Apr 12 16:21:57 2006 @@ -794,3 +794,7 @@ X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible to choose between movaps, movapd, and movdqa based on types of source and destination? + +How about andps, andpd, and pand? Do we really care about the type of the packed +elements? If not, why not always use the "ps" variants which are likely to be +shorter. Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.163 llvm/lib/Target/X86/X86ISelLowering.cpp:1.164 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.163 Wed Apr 12 12:12:36 2006 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Wed Apr 12 16:21:57 2006 @@ -275,6 +275,9 @@ if (Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); + setOperationAction(ISD::AND, MVT::v4f32, Legal); + setOperationAction(ISD::OR, MVT::v4f32, Legal); + setOperationAction(ISD::XOR, MVT::v4f32, Legal); setOperationAction(ISD::ADD, MVT::v4f32, Legal); setOperationAction(ISD::SUB, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v4f32, Legal); @@ -301,36 +304,43 @@ setOperationAction(ISD::SUB, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v4i32, Legal); setOperationAction(ISD::MUL, MVT::v2f64, Legal); - setOperationAction(ISD::LOAD, MVT::v2f64, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + + // Custom lower build_vector, vector_shuffle, and extract_vector_elt. + for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { + setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); + } setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); - // Promote v16i8, v8i16, v4i32 selects to v2i64. Custom lower v2i64, v2f64, - // and v4f32 selects. - for (unsigned VT = (unsigned)MVT::v16i8; - VT != (unsigned)MVT::v2i64; VT++) { - setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); - AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); + // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. + for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { + setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64); + setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64); + setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64); setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote); AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64); + setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64); } + + // Custom lower v2i64 and v2f64 selects. + setOperationAction(ISD::LOAD, MVT::v2f64, Legal); setOperationAction(ISD::LOAD, MVT::v2i64, Legal); - setOperationAction(ISD::SELECT, MVT::v2i64, Custom); setOperationAction(ISD::SELECT, MVT::v2f64, Custom); + setOperationAction(ISD::SELECT, MVT::v2i64, Custom); } // We want to custom lower some of our intrinsics. @@ -2827,6 +2837,7 @@ return SDOperand(); MVT::ValueType VT = Op.getValueType(); + // TODO: handle v16i8. if (MVT::getSizeInBits(VT) == 16) { // Transform it so it match pextrw which produces a 32-bit result. MVT::ValueType EVT = (MVT::ValueType)(VT+1); Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.79 llvm/lib/Target/X86/X86InstrSSE.td:1.80 --- llvm/lib/Target/X86/X86InstrSSE.td:1.79 Wed Apr 12 12:12:36 2006 +++ llvm/lib/Target/X86/X86InstrSSE.td Wed Apr 12 16:21:57 2006 @@ -1019,9 +1019,7 @@ let isCommutable = 1 in { def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "andps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (bc_v4i32 (v4f32 VR128:$src1)), - (bc_v4i32 (v4f32 VR128:$src2))))]>; + [(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>; def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "andpd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1029,9 +1027,7 @@ (bc_v2i64 (v2f64 VR128:$src2))))]>; def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "orps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (or (bc_v4i32 (v4f32 VR128:$src1)), - (bc_v4i32 (v4f32 VR128:$src2))))]>; + [(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>; def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "orpd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1039,9 +1035,7 @@ (bc_v2i64 (v2f64 VR128:$src2))))]>; def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "xorps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (xor (bc_v4i32 (v4f32 VR128:$src1)), - (bc_v4i32 (v4f32 VR128:$src2))))]>; + [(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>; def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "xorpd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1050,9 +1044,8 @@ } def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "andps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (bc_v4i32 (v4f32 VR128:$src1)), - (bc_v4i32 (loadv4f32 addr:$src2))))]>; + [(set VR128:$dst, (and VR128:$src1, + (bc_v2i64 (loadv4f32 addr:$src2))))]>; def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "andpd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1060,9 +1053,8 @@ (bc_v2i64 (loadv2f64 addr:$src2))))]>; def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "orps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (or (bc_v4i32 (v4f32 VR128:$src1)), - (bc_v4i32 (loadv4f32 addr:$src2))))]>; + [(set VR128:$dst, (or VR128:$src1, + (bc_v2i64 (loadv4f32 addr:$src2))))]>; def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "orpd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1070,9 +1062,8 @@ (bc_v2i64 (loadv2f64 addr:$src2))))]>; def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "xorps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (xor (bc_v4i32 (v4f32 VR128:$src1)), - (bc_v4i32 (loadv4f32 addr:$src2))))]>; + [(set VR128:$dst, (xor VR128:$src1, + (bc_v2i64 (loadv4f32 addr:$src2))))]>; def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "xorpd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1080,14 +1071,14 @@ (bc_v2i64 (loadv2f64 addr:$src2))))]>; def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "andnps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (vnot (bc_v4i32 (v4f32 VR128:$src1))), - (bc_v4i32 (v4f32 VR128:$src2))))]>; + [(set VR128:$dst, (v2i64 (and (xor VR128:$src1, + (bc_v2i64 (v4i32 immAllOnesV))), + VR128:$src2)))]>; def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2), "andnps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (vnot (bc_v4i32 (v4f32 VR128:$src1))), - (bc_v4i32 (loadv4f32 addr:$src2))))]>; + [(set VR128:$dst, (v2i64 (and (xor VR128:$src1, + (bc_v2i64 (v4i32 immAllOnesV))), + (bc_v2i64 (loadv4f32 addr:$src2)))))]>; def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "andnpd {$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1922,110 +1913,29 @@ // 128-bit logical shifts def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), - (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; + (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>, + Requires<[HasSSE2]>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), - (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; + (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>, + Requires<[HasSSE2]>; -// Logical ops -def : Pat<(and (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)), - (ANDPSrm VR128:$src1, addr:$src2)>; -def : Pat<(and (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)), - (ANDPDrm VR128:$src1, addr:$src2)>; -def : Pat<(or (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)), - (ORPSrm VR128:$src1, addr:$src2)>; -def : Pat<(or (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)), - (ORPDrm VR128:$src1, addr:$src2)>; -def : Pat<(xor (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)), - (XORPSrm VR128:$src1, addr:$src2)>; -def : Pat<(xor (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)), - (XORPDrm VR128:$src1, addr:$src2)>; -def : Pat<(and (vnot (bc_v4i32 (v4f32 VR128:$src1))), (loadv4i32 addr:$src2)), - (ANDNPSrm VR128:$src1, addr:$src2)>; -def : Pat<(and (vnot (bc_v2i64 (v2f64 VR128:$src1))), (loadv2i64 addr:$src2)), - (ANDNPDrm VR128:$src1, addr:$src2)>; - -def : Pat<(bc_v4f32 (v4i32 (and VR128:$src1, VR128:$src2))), - (ANDPSrr VR128:$src1, VR128:$src2)>; -def : Pat<(bc_v4f32 (v4i32 (or VR128:$src1, VR128:$src2))), - (ORPSrr VR128:$src1, VR128:$src2)>; -def : Pat<(bc_v4f32 (v4i32 (xor VR128:$src1, VR128:$src2))), - (XORPSrr VR128:$src1, VR128:$src2)>; -def : Pat<(bc_v4f32 (v4i32 (and (vnot VR128:$src1), VR128:$src2))), - (ANDNPSrr VR128:$src1, VR128:$src2)>; - -def : Pat<(bc_v4f32 (v4i32 (and VR128:$src1, (load addr:$src2)))), - (ANDPSrm (v4i32 VR128:$src1), addr:$src2)>; -def : Pat<(bc_v4f32 (v4i32 (or VR128:$src1, (load addr:$src2)))), - (ORPSrm VR128:$src1, addr:$src2)>; -def : Pat<(bc_v4f32 (v4i32 (xor VR128:$src1, (load addr:$src2)))), - (XORPSrm VR128:$src1, addr:$src2)>; -def : Pat<(bc_v4f32 (v4i32 (and (vnot VR128:$src1), (load addr:$src2)))), - (ANDNPSrm VR128:$src1, addr:$src2)>; - -def : Pat<(bc_v2f64 (v2i64 (and VR128:$src1, VR128:$src2))), - (ANDPDrr VR128:$src1, VR128:$src2)>; -def : Pat<(bc_v2f64 (v2i64 (or VR128:$src1, VR128:$src2))), - (ORPDrr VR128:$src1, VR128:$src2)>; -def : Pat<(bc_v2f64 (v2i64 (xor VR128:$src1, VR128:$src2))), - (XORPDrr VR128:$src1, VR128:$src2)>; -def : Pat<(bc_v2f64 (v2i64 (and (vnot VR128:$src1), VR128:$src2))), - (ANDNPDrr VR128:$src1, VR128:$src2)>; - -def : Pat<(bc_v2f64 (v2i64 (and VR128:$src1, (load addr:$src2)))), - (ANDPSrm (v2i64 VR128:$src1), addr:$src2)>; -def : Pat<(bc_v2f64 (v2i64 (or VR128:$src1, (load addr:$src2)))), - (ORPSrm VR128:$src1, addr:$src2)>; -def : Pat<(bc_v2f64 (v2i64 (xor VR128:$src1, (load addr:$src2)))), - (XORPSrm VR128:$src1, addr:$src2)>; -def : Pat<(bc_v2f64 (v2i64 (and (vnot VR128:$src1), (load addr:$src2)))), - (ANDNPSrm VR128:$src1, addr:$src2)>; - -def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)), - (PANDrr VR128:$src1, VR128:$src2)>; -def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)), - (PANDrr VR128:$src1, VR128:$src2)>; -def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), - (PANDrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)), - (PORrr VR128:$src1, VR128:$src2)>; -def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)), - (PORrr VR128:$src1, VR128:$src2)>; -def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)), - (PORrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)), - (PXORrr VR128:$src1, VR128:$src2)>; -def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)), - (PXORrr VR128:$src1, VR128:$src2)>; -def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)), - (PXORrr VR128:$src1, VR128:$src2)>; -def : Pat<(v4i32 (and (vnot VR128:$src1), VR128:$src2)), - (PANDNrr VR128:$src1, VR128:$src2)>; -def : Pat<(v8i16 (and (vnot VR128:$src1), VR128:$src2)), - (PANDNrr VR128:$src1, VR128:$src2)>; -def : Pat<(v16i8 (and (vnot VR128:$src1), VR128:$src2)), - (PANDNrr VR128:$src1, VR128:$src2)>; - -def : Pat<(v4i32 (and VR128:$src1, (load addr:$src2))), - (PANDrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (and VR128:$src1, (load addr:$src2))), - (PANDrm VR128:$src1, addr:$src2)>; -def : Pat<(v16i8 (and VR128:$src1, (load addr:$src2))), - (PANDrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (or VR128:$src1, (load addr:$src2))), - (PORrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (or VR128:$src1, (load addr:$src2))), - (PORrm VR128:$src1, addr:$src2)>; -def : Pat<(v16i8 (or VR128:$src1, (load addr:$src2))), - (PORrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (xor VR128:$src1, (load addr:$src2))), - (PXORrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (xor VR128:$src1, (load addr:$src2))), - (PXORrm VR128:$src1, addr:$src2)>; -def : Pat<(v16i8 (xor VR128:$src1, (load addr:$src2))), - (PXORrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (and (vnot VR128:$src1), (load addr:$src2))), - (PANDNrm VR128:$src1, addr:$src2)>; -def : Pat<(v8i16 (and (vnot VR128:$src1), (load addr:$src2))), - (PANDNrm VR128:$src1, addr:$src2)>; -def : Pat<(v16i8 (and (vnot VR128:$src1), (load addr:$src2))), - (PANDNrm VR128:$src1, addr:$src2)>; +// Some special case pandn patterns. +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))), + VR128:$src2)), + (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))), + VR128:$src2)), + (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), + VR128:$src2)), + (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))), + (load addr:$src2))), + (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))), + (load addr:$src2))), + (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), + (load addr:$src2))), + (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits