Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.310 -> 1.311 X86ISelLowering.h updated: 1.81 -> 1.82 X86InstrSSE.td updated: 1.174 -> 1.175 --- Log message: With SSE2, expand FCOPYSIGN to a series of SSE bitwise operations. --- Diffs of the changes: (+93 -8) X86ISelLowering.cpp | 65 ++++++++++++++++++++++++++++++++++++++++++++++++---- X86ISelLowering.h | 11 ++++++++ X86InstrSSE.td | 25 ++++++++++++++++---- 3 files changed, 93 insertions(+), 8 deletions(-) Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.310 llvm/lib/Target/X86/X86ISelLowering.cpp:1.311 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.310 Wed Jan 3 11:24:59 2007 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Fri Jan 5 01:55:56 2007 @@ -250,9 +250,6 @@ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - if (X86ScalarSSE) { // Set up the FP register classes. addRegisterClass(MVT::f32, X86::FR32RegisterClass); @@ -266,6 +263,10 @@ setOperationAction(ISD::FNEG , MVT::f64, Custom); setOperationAction(ISD::FNEG , MVT::f32, Custom); + // Use ANDPD and ORPD to simulate FCOPYSIGN. + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + // We don't support sin/cos/fmod setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); @@ -283,7 +284,9 @@ // Set up the FP register classes. addRegisterClass(MVT::f64, X86::RFPRegisterClass); - setOperationAction(ISD::UNDEF, MVT::f64, Expand); + setOperationAction(ISD::UNDEF, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); if (!UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); @@ -4123,6 +4126,56 @@ return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); } +SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType SrcVT = Op.getOperand(1).getValueType(); + const Type *SrcTy = MVT::getTypeForValueType(SrcVT); + // First get the sign bit of second operand. + std::vector<Constant*> CV; + if (SrcVT == MVT::f64) { + CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63))); + CV.push_back(ConstantFP::get(SrcTy, 0.0)); + } else { + CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31))); + CV.push_back(ConstantFP::get(SrcTy, 0.0)); + CV.push_back(ConstantFP::get(SrcTy, 0.0)); + CV.push_back(ConstantFP::get(SrcTy, 0.0)); + } + Constant *CS = ConstantStruct::get(CV); + SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); + std::vector<MVT::ValueType> Tys; + Tys.push_back(VT); + Tys.push_back(MVT::Other); + SmallVector<SDOperand, 3> Ops; + Ops.push_back(DAG.getEntryNode()); + Ops.push_back(CPIdx); + Ops.push_back(DAG.getSrcValue(NULL)); + SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); + SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op.getOperand(1), Mask); + + // Shift sign bit right or left if the two operands have different types. + if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { + // Op0 is MVT::f32, Op1 is MVT::f64. + SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit); + SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit, + DAG.getConstant(32, MVT::i32)); + SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit); + SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit, + DAG.getConstant(0, getPointerTy())); + } else if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) { + // Op0 is MVT::f64, Op1 is MVT::f32. + SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, SignBit); + SignBit = DAG.getNode(X86ISD::FSHL, MVT::v4f32, SignBit, + DAG.getConstant(32, MVT::i32)); + SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, SignBit); + SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, SignBit, + DAG.getConstant(0, getPointerTy())); + } + + // Or the first operand with the sign bit. + return DAG.getNode(X86ISD::FOR, VT, Op.getOperand(0), SignBit); +} + SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, SDOperand Chain) { assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); @@ -4955,6 +5008,7 @@ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); + case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode()); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); @@ -4976,7 +5030,10 @@ case X86ISD::SHLD: return "X86ISD::SHLD"; case X86ISD::SHRD: return "X86ISD::SHRD"; case X86ISD::FAND: return "X86ISD::FAND"; + case X86ISD::FOR: return "X86ISD::FOR"; case X86ISD::FXOR: return "X86ISD::FXOR"; + case X86ISD::FSHL: return "X86ISD::FSHL"; + case X86ISD::FSRL: return "X86ISD::FSRL"; case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; Index: llvm/lib/Target/X86/X86ISelLowering.h diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.81 llvm/lib/Target/X86/X86ISelLowering.h:1.82 --- llvm/lib/Target/X86/X86ISelLowering.h:1.81 Thu Nov 30 15:55:46 2006 +++ llvm/lib/Target/X86/X86ISelLowering.h Fri Jan 5 01:55:56 2007 @@ -35,10 +35,20 @@ /// to X86::ANDPS or X86::ANDPD. FAND, + /// FOR - Bitwise logical OR of floating point values. This corresponds + /// to X86::ORPS or X86::ORPD. + FOR, + /// FXOR - Bitwise logical XOR of floating point values. This corresponds /// to X86::XORPS or X86::XORPD. FXOR, + /// FSHL, FSRL - Shift a floating point value (in SSE register) by n bits + /// while shifting in 0's. These corresponds to X86::PSLLDQ or + /// X86::PSRLDQ. + FSHL, + FSRL, + /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the /// integer source in memory and FP reg result. This corresponds to the /// X86::FILD*m instructions. It has three inputs (token chain, address, @@ -389,6 +399,7 @@ SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFABS(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFNEG(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG); SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG, SDOperand Chain); SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG); SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG); Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.174 llvm/lib/Target/X86/X86InstrSSE.td:1.175 --- llvm/lib/Target/X86/X86InstrSSE.td:1.174 Thu Dec 14 13:43:11 2006 +++ llvm/lib/Target/X86/X86InstrSSE.td Fri Jan 5 01:55:56 2007 @@ -18,14 +18,21 @@ // SSE specific DAG Nodes. //===----------------------------------------------------------------------===// +def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, + SDTCisFP<0>, SDTCisInt<2> ]>; + def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>; def X86loadu : SDNode<"X86ISD::LOAD_UA", SDTLoad, [SDNPHasChain]>; def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; +def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; +def X86fshl : SDNode<"X86ISD::FSHL", SDTX86FPShiftOp>; +def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest, [SDNPHasChain, SDNPOutFlag]>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest, @@ -607,9 +614,11 @@ "andpd {$src2, $dst|$dst, $src2}", [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>; def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), - "orps {$src2, $dst|$dst, $src2}", []>; + "orps {$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>; def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>; + "orpd {$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>; def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), "xorps {$src2, $dst|$dst, $src2}", [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>; @@ -626,9 +635,13 @@ [(set FR64:$dst, (X86fand FR64:$src1, (X86loadpf64 addr:$src2)))]>; def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), - "orps {$src2, $dst|$dst, $src2}", []>; + "orps {$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86for FR32:$src1, + (X86loadpf32 addr:$src2)))]>; def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>; + "orpd {$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86for FR64:$src1, + (X86loadpf64 addr:$src2)))]>; def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), "xorps {$src2, $dst|$dst, $src2}", [(set FR32:$dst, (X86fxor FR32:$src1, @@ -1364,6 +1377,10 @@ (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; + def : Pat<(v4f32 (X86fshl VR128:$src1, i32immSExt8:$src2)), + (v4f32 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; + def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), + (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>; } // Logical _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits