From: Vadim Girlin <vadimgir...@gmail.com> Remove Cxxx registers, add new special register - "ALU_CONST" and new operand for each alu src - "sel". ALU_CONST is used to designate that the new operand contains the value to override src.sel, src.kc_bank, src.chan for constants in the driver.
v2[Vincent Lejeune]: -Parse load const address space pointer semantic --- lib/Target/AMDGPU/AMDGPU.h | 1 + lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 + lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 79 +++++++++++++ .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 81 +++++++------- lib/Target/AMDGPU/R600Defines.h | 15 +++ lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp | 122 +++++++++++++++++++++ lib/Target/AMDGPU/R600ISelLowering.cpp | 107 ++++++++++++++++-- lib/Target/AMDGPU/R600ISelLowering.h | 1 + lib/Target/AMDGPU/R600InstrInfo.cpp | 18 +-- lib/Target/AMDGPU/R600Instructions.td | 84 +++++++++++++- lib/Target/AMDGPU/R600RegisterInfo.cpp | 7 +- lib/Target/AMDGPU/R600RegisterInfo.td | 12 +- 12 files changed, 448 insertions(+), 80 deletions(-) create mode 100644 lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 40864b0..2263b15 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -23,6 +23,7 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); +FunctionPass *createR600EliminateSymbolicOperandPass(TargetMachine &tm); // SI Passes FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index e42fa8a..679a0fe 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -130,6 +130,7 @@ bool AMDGPUPassConfig::addPreEmitPass() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { addPass(createR600ExpandSpecialInstrsPass(*TM)); + addPass(createR600EliminateSymbolicOperandPass(*TM)); addPass(&FinalizeMachineBundlesID); } else { addPass(createSILowerLiteralConstantsPass(*TM)); diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp index a765438..6f156df 100644 --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" +#include "llvm/CodeGen/SelectionDAG.h" #include <list> #include <queue> @@ -66,6 +67,9 @@ private: static bool isLocalLoad(const LoadSDNode *N); static bool isRegionLoad(const LoadSDNode *N); + bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& TGA); + bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue& Offset, + SDValue &TGA); bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -400,6 +404,81 @@ const char *AMDGPUDAGToDAGISel::getPassName() const { ///==== AMDGPU Functions ====/// +class R600Address { +public: + const GlobalValue *Global; + unsigned Offset; + SDValue BaseReg; + + R600Address(const GlobalValue *GV, unsigned Off, const SDValue &BR) : + Global(GV), Offset(Off), BaseReg(BR) + { } + +}; + +static +const R600Address SelectAddr(SDValue Addr, SelectionDAG &DAG) { + switch (Addr.getOpcode()) { + case ISD::Constant: { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Addr); + return R600Address(0, Const->getZExtValue(), SDValue()); + } + case ISD::GlobalAddress: { + GlobalAddressSDNode * G = dyn_cast<GlobalAddressSDNode>(Addr); + return R600Address(G->getGlobal(), 0, SDValue()); + } + case ISD::OR: + if (!DAG.isBaseWithConstantOffset(Addr)) + break; + // Else OR and ADD code is the same + case ISD::ADD: { + const R600Address &LHSAddr = SelectAddr(Addr.getOperand(0), DAG); + const R600Address &RHSAddr = SelectAddr(Addr.getOperand(1), DAG); + + if (LHSAddr.BaseReg.getNode() && RHSAddr.BaseReg.getNode()) { + break; + } + + const GlobalValue *NewGV = LHSAddr.Global?LHSAddr.Global:RHSAddr.Global; + if (LHSAddr.BaseReg.getNode()) { + return R600Address(NewGV, LHSAddr.Offset + RHSAddr.Offset, + LHSAddr.BaseReg); + } else if (RHSAddr.BaseReg.getNode()) { + return R600Address(NewGV, LHSAddr.Offset + RHSAddr.Offset, + RHSAddr.BaseReg); + } else { + return R600Address(NewGV, LHSAddr.Offset + RHSAddr.Offset, SDValue()); + } + } + default: + break; + } + return R600Address(0, 0, Addr); +} + +bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, + SDValue& TGA) { + const R600Address R600Addr = SelectAddr(Addr, *CurDAG); + if (!R600Addr.BaseReg.getNode()) { + TGA = CurDAG->getTargetGlobalAddress(R600Addr.Global, Addr.getDebugLoc(), + Addr.getValueType(), R600Addr.Offset); + return true; + } + return false; +} + +bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, + SDValue& Offset, SDValue &TGA) { + const R600Address R600Addr = SelectAddr(Addr, *CurDAG); + if (R600Addr.BaseReg.getNode()) { + TGA = CurDAG->getTargetGlobalAddress(R600Addr.Global, Addr.getDebugLoc(), + Addr.getValueType(), R600Addr.Offset); + Offset = R600Addr.BaseReg; + return true; + } + return false; +} + bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset) { if (Addr.getOpcode() == ISD::TargetExternalSymbol || diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 018234a..4875dac 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -64,8 +64,8 @@ private: void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, raw_ostream &OS) const; void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; - void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value, - raw_ostream &OS) const; + void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, + raw_ostream &OS) const; void EmitDst(const MCInst &MI, raw_ostream &OS) const; void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, raw_ostream &OS) const; @@ -165,7 +165,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, case AMDGPU::VTX_READ_GLOBAL_i32_eg: case AMDGPU::VTX_READ_GLOBAL_f32_eg: case AMDGPU::VTX_READ_GLOBAL_v4i32_eg: - case AMDGPU::VTX_READ_GLOBAL_v4f32_eg: { + case AMDGPU::VTX_READ_GLOBAL_v4f32_eg: + case AMDGPU::TEX_VTX_CONSTBUF: { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset @@ -195,7 +196,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, raw_ostream &OS) const { const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); - unsigned NumOperands = MI.getNumOperands(); // Emit instruction type EmitByte(INSTR_ALU, OS); @@ -211,19 +211,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, InstWord01 |= ISAOpCode << 1; } - unsigned SrcIdx = 0; - for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) { - if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() || - OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) { - continue; - } - EmitSrcISA(MI, OpIdx, InstWord01, OS); - SrcIdx++; - } + unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 : + MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1; - // Emit zeros for unused sources - for ( ; SrcIdx < 3; SrcIdx++) { - EmitNullBytes(SRC_BYTE_COUNT - 6, OS); + EmitByte(SrcNum, OS); + + const unsigned SrcOps[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL} + }; + + for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) { + unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]]; + unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]]; + EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS); } Emit(InstWord01, OS); @@ -294,34 +296,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, } -void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, - uint64_t &Value, raw_ostream &OS) const { - const MCOperand &MO = MI.getOperand(OpIdx); +void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, + unsigned SelOpIdx, raw_ostream &OS) const { + const MCOperand &RegMO = MI.getOperand(RegOpIdx); + const MCOperand &SelMO = MI.getOperand(SelOpIdx); + union { float f; uint32_t i; } InlineConstant; InlineConstant.i = 0; - // Emit the source select (2 bytes). For GPRs, this is the register index. - // For other potential instruction operands, (e.g. constant registers) the - // value of the source select is defined in the r600isa docs. - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } + // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0 + // and select is 0 (GPR index is encoded in the instr encoding. For constants + // type is 1 and select is the original const select passed from the driver. + unsigned Reg = RegMO.getReg(); + if (Reg == AMDGPU::ALU_CONST) { + EmitByte(1, OS); + uint32_t Sel = SelMO.getImm(); + Emit(Sel, OS); + } else { + EmitByte(0, OS); + Emit((uint32_t)0, OS); + } - if (Reg == AMDGPU::ALU_LITERAL_X) { - unsigned ImmOpIndex = MI.getNumOperands() - 1; - MCOperand ImmOp = MI.getOperand(ImmOpIndex); - if (ImmOp.isFPImm()) { - InlineConstant.f = ImmOp.getFPImm(); - } else { - assert(ImmOp.isImm()); - InlineConstant.i = ImmOp.getImm(); - } + if (Reg == AMDGPU::ALU_LITERAL_X) { + unsigned ImmOpIndex = MI.getNumOperands() - 1; + MCOperand ImmOp = MI.getOperand(ImmOpIndex); + if (ImmOp.isFPImm()) { + InlineConstant.f = ImmOp.getFPImm(); + } else { + assert(ImmOp.isImm()); + InlineConstant.i = ImmOp.getImm(); } } diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h index 7dea8e4..e19eea3 100644 --- a/lib/Target/AMDGPU/R600Defines.h +++ b/lib/Target/AMDGPU/R600Defines.h @@ -62,18 +62,33 @@ namespace R600Operands { SRC0_NEG, SRC0_REL, SRC0_ABS, + SRC0_SEL, SRC1, SRC1_NEG, SRC1_REL, SRC1_ABS, + SRC1_SEL, SRC2, SRC2_NEG, SRC2_REL, + SRC2_SEL, LAST, PRED_SEL, IMM, COUNT }; + + const static int ALUOpTable[3][R600Operands::COUNT] = { +// W C S S S S S S S S S S S +// R O D L S R R R R S R R R R S R R R L P +// D U I M R A R C C C C R C C C C R C C C A R I +// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M +// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M + {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12}, + {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19}, + {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17} + }; + } #endif // R600DEFINES_H_ diff --git a/lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp b/lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp new file mode 100644 index 0000000..3d01582 --- /dev/null +++ b/lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp @@ -0,0 +1,122 @@ +//===-- R600EliminateSymbolicOperand.cpp - Eliminate Symbolic Operands-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Before this pass backend can manipulate symbolic operands like +/// GlobalAddress for data read from Const Buffers or FrameIndex for stack +/// allocated array. This pass is used to change these operands by a value +/// that can be passed to MCInstrEmitter. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "R600InstrInfo.h" +#include "llvm/GlobalValue.h" + +namespace llvm { + +class R600EliminateSymbolicOperand : public MachineFunctionPass { +private: + static char ID; + const R600InstrInfo *TII; +public: + R600EliminateSymbolicOperand(TargetMachine &tm); + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; } +}; + +char R600EliminateSymbolicOperand::ID = 0; + + +R600EliminateSymbolicOperand::R600EliminateSymbolicOperand(TargetMachine &tm) : + MachineFunctionPass(ID), + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) +{ +} + +static unsigned +getConstBufferIdxFromName(StringRef Name) { + if (Name == "const0") + return 0; + if (Name == "const1") + return 1; + if (Name == "const2") + return 2; + if (Name == "const3") + return 3; + if (Name == "const4") + return 4; + if (Name == "const5") + return 5; + if (Name == "const6") + return 6; + if (Name == "const7") + return 7; + if (Name == "const8") + return 8; + if (Name == "const9") + return 9; + if (Name == "const10") + return 10; + if (Name == "const11") + return 11; + if (Name == "const12") + return 12; + if (Name == "const13") + return 13; + if (Name == "const14") + return 14; + if (Name == "const15") + return 15; +} + +bool R600EliminateSymbolicOperand::runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + if (!TII->isALUInstr(MI.getOpcode())) + continue; + for (int i = 0; i < MI.getNumOperands(); i++) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isGlobal()) { + unsigned ConstSel = MO.getOffset() / 4 + + 2048 + + 16384 * getConstBufferIdxFromName(MO.getGlobal()->getName()); + int SelIdx; + if (i == TII->getOperandIdx(MI, R600Operands::SRC0)) { + SelIdx = TII->getOperandIdx(MI, R600Operands::SRC0_SEL); + } else if (i == TII->getOperandIdx(MI, R600Operands::SRC1)) { + SelIdx = TII->getOperandIdx(MI, R600Operands::SRC1_SEL); + } else if (i == TII->getOperandIdx(MI, R600Operands::SRC2)) { + SelIdx = TII->getOperandIdx(MI, R600Operands::SRC2_SEL); + } else { + assert(0 && "Wrong operand location for Global Address"); + } + MI.getOperand(SelIdx).setImm(ConstSel); + MO.ChangeToRegister(AMDGPU::ALU_CONST, false); + } + } + } + } + return false; +} + +FunctionPass *createR600EliminateSymbolicOperandPass(TargetMachine &tm) { + return new R600EliminateSymbolicOperand(tm); +} + +} + + diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index d09f8c0e..6ff99c3 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -16,6 +16,7 @@ #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" +#include "AMDGPURegisterInfo.h" #include "llvm/Argument.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -97,7 +98,12 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::v4i32, Expand); setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); setOperationAction(ISD::VSELECT, MVT::v4i32, Expand); + setOperationAction(ISD::LOAD, MVT::f32, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4f32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setSchedulingPreference(Sched::VLIW); } @@ -138,13 +144,9 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( break; } - case AMDGPU::R600_LOAD_CONST: { - int64_t RegIndex = MI->getOperand(1).getImm(); - unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) - .addOperand(MI->getOperand(0)) - .addReg(ConstantReg); - break; + case AMDGPU::TEX_VTX_CONSTBUF: { + MI->getOperand(2).ChangeToImmediate(MI->getOperand(2).getOffset() * 16); + return BB; } case AMDGPU::MASK_WRITE: { @@ -417,6 +419,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); @@ -580,6 +583,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: return; case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + return; + case ISD::LOAD: { + SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); + Results.push_back(SDValue(Node, 0)); + Results.push_back(SDValue(Node, 1)); + // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode + // function + DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); + return; + } } } @@ -861,6 +874,75 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return Cond; } +const SDValue PtrSRL(SDValue Addr, unsigned Amount, SelectionDAG &DAG) { + switch (Addr.getOpcode()) { + case ISD::GlobalAddress: + case ISD::BITCAST: + return Addr; + case ISD::ADD: + case ISD::OR: + return DAG.getNode(Addr.getOpcode(), Addr.getDebugLoc(), Addr.getValueType(), + PtrSRL(Addr.getOperand(0), Amount, DAG), + PtrSRL(Addr.getOperand(1), Amount, DAG)); + case ISD::SHL: { + ConstantSDNode *SHLPad = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + assert(SHLPad && "Not Constant SHLPad!"); + unsigned PadAmount = SHLPad->getZExtValue(); + //We suppose PadAmout > Amount + return DAG.getNode(ISD::SHL, Addr.getDebugLoc(), Addr.getValueType(), + Addr.getOperand(0), + DAG.getConstant(PadAmount - Amount, MVT::i32)); + } + case ISD::Constant: { + ConstantSDNode *SHLPad = dyn_cast<ConstantSDNode>(Addr); + return DAG.getConstant(SHLPad->getZExtValue() >> Amount, MVT::i32); + } + default: + assert(0 && "Invalid ptr format!"); + } +} + +SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const +{ + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Ptr = Op.getOperand(1); + SDValue LoweredLoad; + + if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { + SDValue Result; + if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue())) { + // Ptr is GA + Cst : it can be folded, turns it into 4x f32 + SDValue Slots[4]; + for (unsigned i = 0; i < 4; i++) { + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4 * i, MVT::i32)); + Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::f32, NewPtr); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, Slots, 4); + } else { + // Ptr is GA + Reg : it cant be folded, keeps it as a v4f32 load + Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4f32, + PtrSRL(Ptr, 4, DAG)); + } + + if (!VT.isVector()) { + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Result, + DAG.getConstant(0, MVT::i32)); + } + + SDValue MergedValues[2] = { + Result, + Chain + }; + return DAG.getMergeValues(MergedValues, 2, DL); + } + + return SDValue(); +} + SDValue R600TargetLowering::LowerFPOW(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); @@ -918,6 +1000,17 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } break; } + // Extract_vec (Build_vector) generated by custom lowering + // also needs to be customly combined + case ISD::EXTRACT_VECTOR_ELT: { + SDValue Arg = N->getOperand(0); + if (Arg.getOpcode() == ISD::BUILD_VECTOR) { + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + unsigned Element = Const->getZExtValue(); + return Arg->getOperand(Element); + } + } + } } return SDValue(); } diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index bdb0a55..70ece84 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -62,6 +62,7 @@ private: SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; bool isZero(SDValue Op) const; }; diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index a60a180..6c1c50a 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -484,13 +484,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB .addReg(Src0Reg) // $src0 .addImm(0) // $src0_neg .addImm(0) // $src0_rel - .addImm(0); // $src0_abs + .addImm(0) // $src0_abs + .addImm(0); // $src0_sel if (Src1Reg) { MIB.addReg(Src1Reg) // $src1 .addImm(0) // $src1_neg .addImm(0) // $src1_rel - .addImm(0); // $src1_abs + .addImm(0) // $src1_abs + .addImm(0); // $src1_sel } //XXX: The r600g finalizer expects this to be 1, once we've moved the @@ -519,16 +521,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, int R600InstrInfo::getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const { - const static int OpTable[3][R600Operands::COUNT] = { -// W C S S S S S S S S -// R O D L S R R R S R R R S R R L P -// D U I M R A R C C C C C C C R C C A R I -// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M -// T M P E D L P 0 N R A 1 N R A 2 N R T D M - {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11}, - {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17}, - {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14} - }; unsigned TargetFlags = get(Opcode).TSFlags; unsigned OpTableIdx; @@ -554,7 +546,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode, OpTableIdx = 2; } - return OpTable[OpTableIdx][Op]; + return R600Operands::ALUOpTable[OpTableIdx][Op]; } void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op, diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index dc32368..691b1c8 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -69,6 +69,8 @@ class InstFlag<string PM = "printOperand", int Default = 0> let PrintMethod = PM; } +def SEL : OperandWithDefaultOps <i32, (ops (i32 0))>; + def LITERAL : InstFlag<"printLiteral">; def WRITE : InstFlag <"printWrite", 1>; @@ -88,6 +90,8 @@ def LAST : InstFlag<"printLast", 1>; def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; +def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; +def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; class R600ALU_Word0 { field bits<32> Word0; @@ -262,7 +266,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern, InstR600 <0, (outs R600_Reg32:$dst), (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $dst$write$dst_rel$omod, " @@ -302,8 +306,8 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, (outs R600_Reg32:$dst), (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " @@ -339,9 +343,9 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern, InstR600 <0, (outs R600_Reg32:$dst), (ins REL:$dst_rel, CLAMP:$clamp, - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, - R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, + R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), !strconcat(opName, "$clamp $dst$dst_rel, " "$src0_neg$src0$src0_rel, " @@ -1596,6 +1600,74 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { "RETURN", [(IL_retflag)]>; } + +//===----------------------------------------------------------------------===// +// Constant Buffer Addressing Support +//===----------------------------------------------------------------------===// + +def CONST_COPY : R600_1OP <0x19, "CONST_COPY", + [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src0))]> +{ + let mayLoad = 1; +} + +def TEX_VTX_CONSTBUF : + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", + [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>, + VTX_WORD1_GPR, VTX_WORD0 { + + let usesCustomInserter = 1; + + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = 0; + let SRC_REL = 0; + let SRC_SEL_X = 0; + let DST_REL = 0; + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 2; + let FORMAT_COMP_ALL = 1; + let SRF_MODE_ALL = 1; + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 35; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + +// LLVM can only encode 64-bit instructions, so these fields are manually +// encoded in R600CodeEmitter +// +// bits<16> OFFSET; +// bits<2> ENDIAN_SWAP = 0; +// bits<1> CONST_BUF_NO_STRIDE = 0; +// bits<1> MEGA_FETCH = 0; +// bits<1> ALT_CONST = 0; +// bits<2> BUFFER_INDEX_MODE = 0; + + + +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding +// is done in R600CodeEmitter +// +// Inst{79-64} = OFFSET; +// Inst{81-80} = ENDIAN_SWAP; +// Inst{82} = CONST_BUF_NO_STRIDE; +// Inst{83} = MEGA_FETCH; +// Inst{84} = ALT_CONST; +// Inst{86-85} = BUFFER_INDEX_MODE; +// Inst{95-86} = 0; Reserved + +// VTX_WORD3 (Padding) +// +// Inst{127-96} = 0; +} + + //===----------------------------------------------------------------------===// // ISel Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp index a39f83d..397fbaf 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -15,6 +15,7 @@ #include "R600RegisterInfo.h" #include "AMDGPUTargetMachine.h" #include "R600Defines.h" +#include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" using namespace llvm; @@ -38,16 +39,12 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::NEG_ONE); Reserved.set(AMDGPU::PV_X); Reserved.set(AMDGPU::ALU_LITERAL_X); + Reserved.set(AMDGPU::ALU_CONST); Reserved.set(AMDGPU::PREDICATE_BIT); Reserved.set(AMDGPU::PRED_SEL_OFF); Reserved.set(AMDGPU::PRED_SEL_ZERO); Reserved.set(AMDGPU::PRED_SEL_ONE); - for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(), - E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) { - Reserved.set(*I); - } - for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(), E = MFI->ReservedRegs.end(); I != E; ++I) { Reserved.set(*I); diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td index d3d6d25..10fe858 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.td +++ b/lib/Target/AMDGPU/R600RegisterInfo.td @@ -28,9 +28,6 @@ foreach Index = 0-127 in { // 32-bit Temporary Registers def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>; - // 32-bit Constant Registers (There are more than 128, this the number - // that is currently supported. - def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>; } // 128-bit Temporary Registers def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW", @@ -46,7 +43,6 @@ foreach Index = 448-464 in { def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; } - // Special Registers def ZERO : R600Reg<"0.0", 248>; @@ -61,16 +57,11 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; +def ALU_CONST : R600Reg<"Const", 0>; def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "ArrayBase%u", 448, 464))>; -def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, - (add (interleave - (interleave (sequence "C%u_X", 0, 127), - (sequence "C%u_Z", 0, 127)), - (interleave (sequence "C%u_Y", 0, 127), - (sequence "C%u_W", 0, 127))))>; def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_X", 0, 127))>; @@ -91,7 +82,6 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_TReg32, - R600_CReg32, R600_ArrayBase, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; -- 1.8.0.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev