--- lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUTargetMachine.cpp | 1 + lib/Target/R600/AMDIL.h | 19 +++- lib/Target/R600/AMDILISelDAGToDAG.cpp | 104 +++++---------------- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 3 +- lib/Target/R600/R600ISelLowering.cpp | 103 ++++++++++++++++++++ lib/Target/R600/R600ISelLowering.h | 1 + lib/Target/R600/R600Instructions.td | 74 +++++++++++++++ lib/Target/R600/R600LowerConstCopy.cpp | 74 +++++++++++++++ 9 files changed, 296 insertions(+), 84 deletions(-) create mode 100644 lib/Target/R600/R600LowerConstCopy.cpp
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 0f5125d..22351bf 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,6 +23,7 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); +FunctionPass *createR600LowerConstCopy(TargetMachine &tm); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index d09dc2e..ad0d434 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -131,6 +131,7 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); + addPass(createR600LowerConstCopy(*TM)); addPass(&FinalizeMachineBundlesID); } else { addPass(createSILowerLiteralConstantsPass(*TM)); diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h index 4e577dc..dee51bc 100644 --- a/lib/Target/R600/AMDIL.h +++ b/lib/Target/R600/AMDIL.h @@ -90,14 +90,29 @@ namespace AMDGPUAS { enum AddressSpaces { PRIVATE_ADDRESS = 0, ///< Address space for private memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, ///< Address space for constant memory. + CONSTANT_ADDRESS = 2, ///< Address space for constant memory (aka CONSTANT_BUFFER_0) LOCAL_ADDRESS = 3, ///< Address space for local memory. REGION_ADDRESS = 4, ///< Address space for region memory. ADDRESS_NONE = 5, ///< Address space for unknown memory. PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI - LAST_ADDRESS = 9 + CONSTANT_BUFFER_1 = 9, + CONSTANT_BUFFER_2 = 10, + CONSTANT_BUFFER_3 = 11, + CONSTANT_BUFFER_4 = 12, + CONSTANT_BUFFER_5 = 13, + CONSTANT_BUFFER_6 = 14, + CONSTANT_BUFFER_7 = 15, + CONSTANT_BUFFER_8 = 16, + CONSTANT_BUFFER_9 = 17, + CONSTANT_BUFFER_10 = 18, + CONSTANT_BUFFER_11 = 19, + CONSTANT_BUFFER_12 = 20, + CONSTANT_BUFFER_13 = 21, + CONSTANT_BUFFER_14 = 22, + CONSTANT_BUFFER_15 = 23, + LAST_ADDRESS = 24 }; } // namespace AMDGPUAS diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 725a5e4..8fbf153 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" +#include "llvm/CodeGen/SelectionDAG.h" #include <list> #include <queue> @@ -67,6 +68,9 @@ private: static bool isLocalLoad(const LoadSDNode *N); static bool isRegionLoad(const LoadSDNode *N); + bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); + bool SelectGlobalValueVariableOffset(SDValue Addr, + SDValue &BaseReg, SDValue& Offset); bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -258,87 +262,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } break; } - - case ISD::INTRINSIC_WO_CHAIN: { - const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { - unsigned IntrinsicID = N->getConstantOperandVal(0); - switch (IntrinsicID) { - case AMDGPUIntrinsic::AMDGPU_load_const: { - const R600InstrInfo *TII = - static_cast<const R600InstrInfo*>(CurDAG->getTarget().getInstrInfo()); - int CSel = N->getConstantOperandVal(1); - std::vector<SDValue> Ops; - - // We'll try to fold the const in the alu insts with native operands - for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); - Use != SDNode::use_end(); Use = Next) { - Next = llvm::next(Use); - - if (!Use->isMachineOpcode()) - continue; - - unsigned Opcode = Use->getMachineOpcode(); - - const MCInstrDesc & MCDesc = TII->get(Opcode); - if (HAS_NATIVE_OPERANDS(MCDesc.TSFlags)) { - - unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 : - MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1; - - const unsigned SrcOps[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL} - }; - - Ops.clear(); - for (unsigned i = 0; i < Use->getNumOperands(); ++i) { - Ops.push_back(Use->getOperand(i)); - } - - int RegOpIdx = Use.getOperandNo(); - unsigned SrcIdx; - - for (SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) { - if (R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]] == - RegOpIdx + 1) - break; - } - - assert(SrcIdx < SrcNum && "ALU const folding: invalid operand"); - - int SelOpIdx = - R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]] - 1; - - Ops[RegOpIdx] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); - Ops[SelOpIdx] = CurDAG->getTargetConstant(CSel, MVT::i32); - - CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); - } - } - - Ops.clear(); - Ops.push_back(CurDAG->getTargetConstant(1, MVT::i32)); - Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32)); - Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32)); - Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32)); - Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32)); - Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32)); - Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32)); - Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32)); - Ops.push_back(CurDAG->getTargetConstant(CSel, MVT::i32)); - Ops.push_back(CurDAG->getTargetConstant(1, MVT::i32)); - Ops.push_back(CurDAG->getRegister(AMDGPU::PRED_SEL_OFF, MVT::f32)); - Ops.push_back(CurDAG->getTargetConstant(0, MVT::i32)); - - return CurDAG->SelectNodeTo(N, AMDGPU::MOV, MVT::f32, Ops.data(), - Ops.size()); - - } - } - } - } } return SelectCode(N); } @@ -487,6 +410,25 @@ const char *AMDGPUDAGToDAGISel::getPassName() const { ///==== AMDGPU Functions ====/// +bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, + SDValue& IntPtr) { + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { + IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true); + return true; + } + return false; +} + +bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, + SDValue& BaseReg, SDValue &Offset) { + if (!dyn_cast<ConstantSDNode>(Addr)) { + BaseReg = Addr; + Offset = CurDAG->getIntPtrConstant(0, true); + return true; + } + return false; +} + bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset) { if (Addr.getOpcode() == ISD::TargetExternalSymbol || diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index e357598..e061b18 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -164,7 +164,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, case AMDGPU::VTX_READ_PARAM_32_eg: case AMDGPU::VTX_READ_GLOBAL_8_eg: case AMDGPU::VTX_READ_GLOBAL_32_eg: - case AMDGPU::VTX_READ_GLOBAL_128_eg: { + case AMDGPU::VTX_READ_GLOBAL_128_eg: + case AMDGPU::TEX_VTX_CONSTBUF: { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index cd6170e..6a3c57d 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -74,6 +74,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::LOAD, MVT::f32, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4f32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setTargetDAGCombine(ISD::FP_ROUND); setSchedulingPreference(Sched::VLIW); @@ -355,6 +359,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); @@ -518,6 +523,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: return; case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + return; + case ISD::LOAD: { + SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); + Results.push_back(SDValue(Node, 0)); + Results.push_back(SDValue(Node, 1)); + // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode + // function + DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); + return; + } } } @@ -823,6 +838,94 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +// return (512 + (kc_bank << 12) +static int +ConstantAddressBlock(unsigned AddressSpace) { + switch (AddressSpace) { + case AMDGPUAS::CONSTANT_ADDRESS: + return 512; + case AMDGPUAS::CONSTANT_BUFFER_1: + return 512 + 4096; + case AMDGPUAS::CONSTANT_BUFFER_2: + return 512 + 4096 * 2; + case AMDGPUAS::CONSTANT_BUFFER_3: + return 512 + 4096 * 3; + case AMDGPUAS::CONSTANT_BUFFER_4: + return 512 + 4096 * 4; + case AMDGPUAS::CONSTANT_BUFFER_5: + return 512 + 4096 * 5; + case AMDGPUAS::CONSTANT_BUFFER_6: + return 512 + 4096 * 6; + case AMDGPUAS::CONSTANT_BUFFER_7: + return 512 + 4096 * 7; + case AMDGPUAS::CONSTANT_BUFFER_8: + return 512 + 4096 * 8; + case AMDGPUAS::CONSTANT_BUFFER_9: + return 512 + 4096 * 9; + case AMDGPUAS::CONSTANT_BUFFER_10: + return 512 + 4096 * 10; + case AMDGPUAS::CONSTANT_BUFFER_11: + return 512 + 4096 * 11; + case AMDGPUAS::CONSTANT_BUFFER_12: + return 512 + 4096 * 12; + case AMDGPUAS::CONSTANT_BUFFER_13: + return 512 + 4096 * 13; + case AMDGPUAS::CONSTANT_BUFFER_14: + return 512 + 4096 * 14; + case AMDGPUAS::CONSTANT_BUFFER_15: + return 512 + 4096 * 15; + default: + return -1; + } +} + +SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const +{ + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + + int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); + if (ConstantBlock > -1) { + SDValue Result; + if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) || + dyn_cast<Constant>(LoadNode->getSrcValue())) { + SDValue Slots[4]; + for (unsigned i = 0; i < 4; i++) { + // We want Const position encoded with the following formula : + // (((512 + (kc_bank << 12) + const_index) << 2) + chan) + // const_index is Ptr computed by llvm using an alignment of 16. + // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and + // then div by 4 at the ISel step + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); + Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::f32, NewPtr); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, Slots, 4); + } else { + // Ptr is GA + Reg : it cant be folded, keeps it as a v4f32 load + Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4f32, + DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)) + ); + } + + if (!VT.isVector()) { + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Result, + DAG.getConstant(0, MVT::i32)); + } + + SDValue MergedValues[2] = { + Result, + Chain + }; + return DAG.getMergeValues(MergedValues, 2, DL); + } + + return SDValue(); +} SDValue R600TargetLowering::LowerFPOW(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 2b954da..c141d50 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -63,6 +63,7 @@ private: SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; bool isZero(SDValue Op) const; }; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 41afb06..372ed6f 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -94,6 +94,8 @@ def LAST : InstFlag<"printLast", 1>; def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; +def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; +def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; class R600ALU_Word0 { field bits<32> Word0; @@ -1572,6 +1574,78 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { "RETURN", [(IL_retflag)]>; } + +//===----------------------------------------------------------------------===// +// Constant Buffer Addressing Support +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { +def CONST_COPY : Instruction { + let OutOperandList = (outs R600_Reg32:$dst); + let InOperandList = (ins i32imm:$src); + let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; + let AsmString = "CONST_COPY"; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; + let Itinerary = NullALU; +} +} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" + +def TEX_VTX_CONSTBUF : + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", + [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>, + VTX_WORD1_GPR, VTX_WORD0 { + + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = 0; + let SRC_REL = 0; + let SRC_SEL_X = 0; + let DST_REL = 0; + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 2; + let FORMAT_COMP_ALL = 1; + let SRF_MODE_ALL = 1; + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 35; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + +// LLVM can only encode 64-bit instructions, so these fields are manually +// encoded in R600CodeEmitter +// +// bits<16> OFFSET; +// bits<2> ENDIAN_SWAP = 0; +// bits<1> CONST_BUF_NO_STRIDE = 0; +// bits<1> MEGA_FETCH = 0; +// bits<1> ALT_CONST = 0; +// bits<2> BUFFER_INDEX_MODE = 0; + + + +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding +// is done in R600CodeEmitter +// +// Inst{79-64} = OFFSET; +// Inst{81-80} = ENDIAN_SWAP; +// Inst{82} = CONST_BUF_NO_STRIDE; +// Inst{83} = MEGA_FETCH; +// Inst{84} = ALT_CONST; +// Inst{86-85} = BUFFER_INDEX_MODE; +// Inst{95-86} = 0; Reserved + +// VTX_WORD3 (Padding) +// +// Inst{127-96} = 0; +} + + //===--------------------------------------------------------------------===// // Instructions support //===--------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp new file mode 100644 index 0000000..d14ae20 --- /dev/null +++ b/lib/Target/R600/R600LowerConstCopy.cpp @@ -0,0 +1,74 @@ +//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr. +/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot +/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits +/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try +/// to fold them if possible or replace them by MOV otherwise. +/// TODO : Implement the folding part, using Copy Propagation algorithm. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "R600InstrInfo.h" +#include "llvm/GlobalValue.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +class R600LowerConstCopy : public MachineFunctionPass { +private: + static char ID; + const R600InstrInfo *TII; +public: + R600LowerConstCopy(TargetMachine &tm); + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; } +}; + +char R600LowerConstCopy::ID = 0; + + +R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) : + MachineFunctionPass(ID), + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) +{ +} + +bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E;) { + MachineInstr &MI = *I; + I = llvm::next(I); + if (MI.getOpcode() != AMDGPU::CONST_COPY) + continue; + MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV, + MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); + NewMI->getOperand(9).setImm(MI.getOperand(1).getImm()); + MI.eraseFromParent(); + } + } + return false; +} + +FunctionPass *createR600LowerConstCopy(TargetMachine &tm) { + return new R600LowerConstCopy(tm); +} + +} + + -- 1.8.0.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev