Changes in directory llvm/lib/Target/X86:
X86ATTAsmPrinter.h updated: 1.4 -> 1.5 X86ISelDAGToDAG.cpp updated: 1.37 -> 1.38 X86ISelLowering.cpp updated: 1.71 -> 1.72 X86ISelLowering.h updated: 1.23 -> 1.24 X86InstrInfo.td updated: 1.225 -> 1.226 X86IntelAsmPrinter.h updated: 1.5 -> 1.6 --- Log message: - Allow XMM load (for scalar use) to be folded into ANDP* and XORP*. - Use XORP* to implement fneg. --- Diffs of the changes: (+127 -26) X86ATTAsmPrinter.h | 2 - X86ISelDAGToDAG.cpp | 3 + X86ISelLowering.cpp | 54 +++++++++++++++++++++++++++++---- X86ISelLowering.h | 8 ++++ X86InstrInfo.td | 82 +++++++++++++++++++++++++++++++++++++++++---------- X86IntelAsmPrinter.h | 4 +- 6 files changed, 127 insertions(+), 26 deletions(-) Index: llvm/lib/Target/X86/X86ATTAsmPrinter.h diff -u llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.4 llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.5 --- llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.4 Fri Dec 16 19:03:57 2005 +++ llvm/lib/Target/X86/X86ATTAsmPrinter.h Tue Jan 31 16:28:30 2006 @@ -59,7 +59,7 @@ void printf64mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } - void printf80mem(const MachineInstr *MI, unsigned OpNo) { + void printf128mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.37 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.38 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.37 Fri Jan 27 02:10:46 2006 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Tue Jan 31 16:28:30 2006 @@ -253,7 +253,8 @@ if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) { if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N)) { AM.BaseType = X86ISelAddressMode::ConstantPoolBase; - AM.Base.Reg = CurDAG->getTargetConstantPool(CP->get(), MVT::i32); + AM.Base.Reg = CurDAG->getTargetConstantPool(CP->get(), MVT::i32, + CP->getAlignment()); return false; } } Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.71 llvm/lib/Target/X86/X86ISelLowering.cpp:1.72 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.71 Tue Jan 31 13:43:35 2006 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Tue Jan 31 16:28:30 2006 @@ -17,6 +17,7 @@ #include "X86ISelLowering.h" #include "X86TargetMachine.h" #include "llvm/CallingConv.h" +#include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -208,16 +209,20 @@ setOperationAction(ISD::EXTLOAD, MVT::f32, Expand); setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand); + // Use ANDPD to simulate FABS. + setOperationAction(ISD::FABS , MVT::f64, Custom); + setOperationAction(ISD::FABS , MVT::f32, Custom); + + // Use XORP to simulate FNEG. + setOperationAction(ISD::FNEG , MVT::f64, Custom); + setOperationAction(ISD::FNEG , MVT::f32, Custom); + // We don't support sin/cos/sqrt/fmod setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FABS , MVT::f64, Custom); - setOperationAction(ISD::FNEG , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); - setOperationAction(ISD::FABS , MVT::f32, Custom); - setOperationAction(ISD::FNEG , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); // Expand FP immediates into loads from the stack, except for the special @@ -1567,11 +1572,44 @@ } case ISD::FABS: { MVT::ValueType VT = Op.getValueType(); - SDOperand Mask = (VT == MVT::f64) - ? DAG.getConstantFP(BitsToDouble(~(1ULL << 63)), MVT::f64) - : DAG.getConstantFP(BitsToFloat (~(1U << 31)), MVT::f32); + const Type *OpNTy = MVT::getTypeForValueType(VT); + std::vector<Constant*> CV; + if (VT == MVT::f64) { + CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + } else { + CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + } + Constant *CS = ConstantStruct::get(CV); + SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); + SDOperand Mask + = DAG.getNode(X86ISD::LOAD_PACK, + VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask); } + case ISD::FNEG: { + MVT::ValueType VT = Op.getValueType(); + const Type *OpNTy = MVT::getTypeForValueType(VT); + std::vector<Constant*> CV; + if (VT == MVT::f64) { + CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + } else { + CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + CV.push_back(ConstantFP::get(OpNTy, 0.0)); + } + Constant *CS = ConstantStruct::get(CV); + SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); + SDOperand Mask + = DAG.getNode(X86ISD::LOAD_PACK, + VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL)); + return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); + } case ISD::SETCC: { assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDOperand Cond; @@ -1923,6 +1961,7 @@ case X86ISD::SHLD: return "X86ISD::SHLD"; case X86ISD::SHRD: return "X86ISD::SHRD"; case X86ISD::FAND: return "X86ISD::FAND"; + case X86ISD::FXOR: return "X86ISD::FXOR"; case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; @@ -1942,6 +1981,7 @@ case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; case X86ISD::REP_STOS: return "X86ISD::RET_STOS"; case X86ISD::REP_MOVS: return "X86ISD::RET_MOVS"; + case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; } } Index: llvm/lib/Target/X86/X86ISelLowering.h diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.23 llvm/lib/Target/X86/X86ISelLowering.h:1.24 --- llvm/lib/Target/X86/X86ISelLowering.h:1.23 Tue Jan 31 13:43:35 2006 +++ llvm/lib/Target/X86/X86ISelLowering.h Tue Jan 31 16:28:30 2006 @@ -45,6 +45,10 @@ /// to X86::ANDPS or X86::ANDPD. FAND, + /// FXOR - Bitwise logical XOR of floating point values. This corresponds + /// to X86::XORPS or X86::XORPD. + FXOR, + /// FILD - This instruction implements SINT_TO_FP with the integer source /// in memory and FP reg result. This corresponds to the X86::FILD*m /// instructions. It has three inputs (token chain, address, and source @@ -137,6 +141,10 @@ /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. REP_MOVS, + + /// LOAD_PACK Load a 128-bit packed float / double value. It has the same + /// operands as a normal load. + LOAD_PACK, }; // X86 specific condition code. These correspond to X86_*_COND in Index: llvm/lib/Target/X86/X86InstrInfo.td diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.225 llvm/lib/Target/X86/X86InstrInfo.td:1.226 --- llvm/lib/Target/X86/X86InstrInfo.td:1.225 Tue Jan 31 13:43:35 2006 +++ llvm/lib/Target/X86/X86InstrInfo.td Tue Jan 31 16:28:30 2006 @@ -70,6 +70,8 @@ def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; +def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest, [SDNPOutFlag]>; @@ -122,6 +124,9 @@ def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc, [SDNPHasChain, SDNPOutFlag]>; +def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, + [SDNPHasChain]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -140,7 +145,7 @@ def i64mem : X86MemOperand<"printi64mem">; def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; -def f80mem : X86MemOperand<"printf80mem">; +def f128mem : X86MemOperand<"printf128mem">; def SSECC : Operand<i8> { let PrintMethod = "printSSECC"; @@ -357,6 +362,9 @@ def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extload node:$ptr, i1))>; def extloadf64f32 : PatFrag<(ops node:$ptr), (f64 (extload node:$ptr, f32))>; +def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>; +def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>; + //===----------------------------------------------------------------------===// // Instruction templates... @@ -2566,43 +2574,51 @@ "orpd {$src2, $dst|$dst, $src2}", []>, Requires<[HasSSE2]>, TB, OpSize; def XORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), - "xorps {$src2, $dst|$dst, $src2}", []>, + "xorps {$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>, Requires<[HasSSE1]>, TB; def XORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), - "xorpd {$src2, $dst|$dst, $src2}", []>, + "xorpd {$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>, Requires<[HasSSE2]>, TB, OpSize; } -def ANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), +def ANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), "andps {$src2, $dst|$dst, $src2}", - []>, + [(set FR32:$dst, (X86fand FR32:$src1, + (X86loadpf32 addr:$src2)))]>, Requires<[HasSSE1]>, TB; -def ANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), +def ANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), "andpd {$src2, $dst|$dst, $src2}", - []>, + [(set FR64:$dst, (X86fand FR64:$src1, + (X86loadpf64 addr:$src2)))]>, Requires<[HasSSE2]>, TB, OpSize; -def ORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), +def ORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), "orps {$src2, $dst|$dst, $src2}", []>, Requires<[HasSSE1]>, TB; -def ORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), +def ORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), "orpd {$src2, $dst|$dst, $src2}", []>, Requires<[HasSSE2]>, TB, OpSize; -def XORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), - "xorps {$src2, $dst|$dst, $src2}", []>, +def XORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), + "xorps {$src2, $dst|$dst, $src2}", + [(set FR32:$dst, (X86fxor FR32:$src1, + (X86loadpf32 addr:$src2)))]>, Requires<[HasSSE1]>, TB; -def XORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), - "xorpd {$src2, $dst|$dst, $src2}", []>, +def XORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), + "xorpd {$src2, $dst|$dst, $src2}", + [(set FR64:$dst, (X86fxor FR64:$src1, + (X86loadpf64 addr:$src2)))]>, Requires<[HasSSE2]>, TB, OpSize; def ANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), "andnps {$src2, $dst|$dst, $src2}", []>, Requires<[HasSSE1]>, TB; -def ANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), +def ANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), "andnps {$src2, $dst|$dst, $src2}", []>, Requires<[HasSSE1]>, TB; def ANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), "andnpd {$src2, $dst|$dst, $src2}", []>, Requires<[HasSSE2]>, TB, OpSize; -def ANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), +def ANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), "andnpd {$src2, $dst|$dst, $src2}", []>, Requires<[HasSSE2]>, TB, OpSize; @@ -2982,6 +2998,42 @@ //===----------------------------------------------------------------------===// +// XMM Packed Floating point support (requires SSE / SSE2) +//===----------------------------------------------------------------------===// + +def MOVAPSrr : I<0x28, MRMSrcMem, (ops V4F4:$dst, V4F4:$src), + "movaps {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, XS; +def MOVAPDrr : I<0x28, MRMSrcMem, (ops V2F8:$dst, V2F8:$src), + "movapd {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE2]>, XD; + +def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F4:$dst, f128mem:$src), + "movaps {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, XS; +def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F4:$src), + "movaps {$src, $dst|$dst, $src}",[]>, + Requires<[HasSSE1]>, XD; +def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F8:$dst, f128mem:$src), + "movapd {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, XD; +def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F8:$src), + "movapd {$src, $dst|$dst, $src}",[]>, + Requires<[HasSSE2]>, XD; + +// Pseudo-instructions to load FR32 / FR64 from f128mem using movaps / movapd. +// Upper bits are disregarded. +def MOVSAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src), + "movaps {$src, $dst|$dst, $src}", + [(set FR32:$dst, (X86loadpf32 addr:$src))]>, + Requires<[HasSSE1]>, XS; +def MOVSAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src), + "movapd {$src, $dst|$dst, $src}", + [(set FR64:$dst, (X86loadpf64 addr:$src))]>, + Requires<[HasSSE1]>, XD; + + +//===----------------------------------------------------------------------===// // Miscellaneous Instructions //===----------------------------------------------------------------------===// Index: llvm/lib/Target/X86/X86IntelAsmPrinter.h diff -u llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.5 llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.6 --- llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.5 Fri Dec 16 19:03:57 2005 +++ llvm/lib/Target/X86/X86IntelAsmPrinter.h Tue Jan 31 16:28:30 2006 @@ -76,8 +76,8 @@ O << "QWORD PTR "; printMemReference(MI, OpNo); } - void printf80mem(const MachineInstr *MI, unsigned OpNo) { - O << "XWORD PTR "; + void printf128mem(const MachineInstr *MI, unsigned OpNo) { + O << "XMMWORD PTR "; printMemReference(MI, OpNo); } _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits