On Wed, Dec 26, 2012 at 05:37:20PM +0100, Vincent Lejeune wrote: > From: Vadim Girlin <vadimgir...@gmail.com> > > Remove Cxxx registers, add new special register - "ALU_CONST" and new > operand for each alu src - "sel". ALU_CONST is used to designate that the > new operand contains the value to override src.sel, src.kc_bank, src.chan > for constants in the driver. > > v2[Vincent Lejeune]: > -Parse load const address space pointer semantic
Just one coding style mistake I saw, otherwise: Reviewed-by: Tom Stellard <thomas.stell...@amd.com> > --- > lib/Target/AMDGPU/AMDGPU.h | 1 + > lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 + > lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 79 +++++++++++++ > .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 81 +++++++------- > lib/Target/AMDGPU/R600Defines.h | 15 +++ > lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp | 122 > +++++++++++++++++++++ > lib/Target/AMDGPU/R600ISelLowering.cpp | 107 ++++++++++++++++-- > lib/Target/AMDGPU/R600ISelLowering.h | 1 + > lib/Target/AMDGPU/R600InstrInfo.cpp | 18 +-- > lib/Target/AMDGPU/R600Instructions.td | 84 +++++++++++++- > lib/Target/AMDGPU/R600RegisterInfo.cpp | 7 +- > lib/Target/AMDGPU/R600RegisterInfo.td | 12 +- > 12 files changed, 448 insertions(+), 80 deletions(-) > create mode 100644 lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp > > diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h > index 40864b0..2263b15 100644 > --- a/lib/Target/AMDGPU/AMDGPU.h > +++ b/lib/Target/AMDGPU/AMDGPU.h > @@ -23,6 +23,7 @@ class AMDGPUTargetMachine; > // R600 Passes > FunctionPass* createR600KernelParametersPass(const DataLayout *TD); > FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); > +FunctionPass *createR600EliminateSymbolicOperandPass(TargetMachine &tm); > > // SI Passes > FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); > diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp > b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp > index e42fa8a..679a0fe 100644 > --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp > +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp > @@ -130,6 +130,7 @@ bool AMDGPUPassConfig::addPreEmitPass() { > const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); > if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { > addPass(createR600ExpandSpecialInstrsPass(*TM)); > + addPass(createR600EliminateSymbolicOperandPass(*TM)); > addPass(&FinalizeMachineBundlesID); > } else { > addPass(createSILowerLiteralConstantsPass(*TM)); > diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp > b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp > index a765438..6f156df 100644 > --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp > +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp > @@ -20,6 +20,7 @@ > #include "llvm/CodeGen/PseudoSourceValue.h" > #include "llvm/CodeGen/SelectionDAGISel.h" > #include "llvm/Support/Compiler.h" > +#include "llvm/CodeGen/SelectionDAG.h" > #include <list> > #include <queue> > > @@ -66,6 +67,9 @@ private: > static bool isLocalLoad(const LoadSDNode *N); > static bool isRegionLoad(const LoadSDNode *N); > > + bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& TGA); > + bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue& Offset, > + SDValue &TGA); > bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); > bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); > bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); > @@ -400,6 +404,81 @@ const char *AMDGPUDAGToDAGISel::getPassName() const { > > ///==== AMDGPU Functions ====/// > > +class R600Address { > +public: > + const GlobalValue *Global; > + unsigned Offset; > + SDValue BaseReg; > + > + R600Address(const GlobalValue *GV, unsigned Off, const SDValue &BR) : > + Global(GV), Offset(Off), BaseReg(BR) > + { } > + > +}; > + > +static > +const R600Address SelectAddr(SDValue Addr, SelectionDAG &DAG) { > + switch (Addr.getOpcode()) { > + case ISD::Constant: { > + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Addr); > + return R600Address(0, Const->getZExtValue(), SDValue()); > + } > + case ISD::GlobalAddress: { > + GlobalAddressSDNode * G = dyn_cast<GlobalAddressSDNode>(Addr); > + return R600Address(G->getGlobal(), 0, SDValue()); > + } > + case ISD::OR: > + if (!DAG.isBaseWithConstantOffset(Addr)) > + break; > + // Else OR and ADD code is the same > + case ISD::ADD: { > + const R600Address &LHSAddr = SelectAddr(Addr.getOperand(0), DAG); > + const R600Address &RHSAddr = SelectAddr(Addr.getOperand(1), DAG); > + > + if (LHSAddr.BaseReg.getNode() && RHSAddr.BaseReg.getNode()) { > + break; > + } > + > + const GlobalValue *NewGV = LHSAddr.Global?LHSAddr.Global:RHSAddr.Global; > + if (LHSAddr.BaseReg.getNode()) { > + return R600Address(NewGV, LHSAddr.Offset + RHSAddr.Offset, > + LHSAddr.BaseReg); > + } else if (RHSAddr.BaseReg.getNode()) { > + return R600Address(NewGV, LHSAddr.Offset + RHSAddr.Offset, > + RHSAddr.BaseReg); > + } else { > + return R600Address(NewGV, LHSAddr.Offset + RHSAddr.Offset, SDValue()); > + } > + } > + default: > + break; > + } > + return R600Address(0, 0, Addr); > +} > + > +bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, > + SDValue& TGA) { > + const R600Address R600Addr = SelectAddr(Addr, *CurDAG); > + if (!R600Addr.BaseReg.getNode()) { > + TGA = CurDAG->getTargetGlobalAddress(R600Addr.Global, Addr.getDebugLoc(), > + Addr.getValueType(), R600Addr.Offset); > + return true; > + } > + return false; > +} > + > +bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, > + SDValue& Offset, SDValue &TGA) { > + const R600Address R600Addr = SelectAddr(Addr, *CurDAG); > + if (R600Addr.BaseReg.getNode()) { > + TGA = CurDAG->getTargetGlobalAddress(R600Addr.Global, Addr.getDebugLoc(), > + Addr.getValueType(), R600Addr.Offset); > + Offset = R600Addr.BaseReg; > + return true; > + } > + return false; > +} > + > bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, > SDValue& Offset) { > if (Addr.getOpcode() == ISD::TargetExternalSymbol || > diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp > b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp > index 018234a..4875dac 100644 > --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp > +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp > @@ -64,8 +64,8 @@ private: > void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, > raw_ostream &OS) const; > void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; > - void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value, > - raw_ostream &OS) const; > + void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, > + raw_ostream &OS) const; > void EmitDst(const MCInst &MI, raw_ostream &OS) const; > void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, > raw_ostream &OS) const; > @@ -165,7 +165,8 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst > &MI, raw_ostream &OS, > case AMDGPU::VTX_READ_GLOBAL_i32_eg: > case AMDGPU::VTX_READ_GLOBAL_f32_eg: > case AMDGPU::VTX_READ_GLOBAL_v4i32_eg: > - case AMDGPU::VTX_READ_GLOBAL_v4f32_eg: { > + case AMDGPU::VTX_READ_GLOBAL_v4f32_eg: > + case AMDGPU::TEX_VTX_CONSTBUF: { > uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); > uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset > > @@ -195,7 +196,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, > SmallVectorImpl<MCFixup> &Fixups, > raw_ostream &OS) const { > const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); > - unsigned NumOperands = MI.getNumOperands(); > > // Emit instruction type > EmitByte(INSTR_ALU, OS); > @@ -211,19 +211,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, > InstWord01 |= ISAOpCode << 1; > } > > - unsigned SrcIdx = 0; > - for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) { > - if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() || > - OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) { > - continue; > - } > - EmitSrcISA(MI, OpIdx, InstWord01, OS); > - SrcIdx++; > - } > + unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 : > + MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1; > > - // Emit zeros for unused sources > - for ( ; SrcIdx < 3; SrcIdx++) { > - EmitNullBytes(SRC_BYTE_COUNT - 6, OS); > + EmitByte(SrcNum, OS); > + > + const unsigned SrcOps[3][2] = { > + {R600Operands::SRC0, R600Operands::SRC0_SEL}, > + {R600Operands::SRC1, R600Operands::SRC1_SEL}, > + {R600Operands::SRC2, R600Operands::SRC2_SEL} > + }; > + > + for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) { > + unsigned RegOpIdx = > R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]]; > + unsigned SelOpIdx = > R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]]; > + EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS); > } > > Emit(InstWord01, OS); > @@ -294,34 +296,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, > unsigned OpIdx, > > } > > -void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, > - uint64_t &Value, raw_ostream &OS) const { > - const MCOperand &MO = MI.getOperand(OpIdx); > +void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, > + unsigned SelOpIdx, raw_ostream &OS) const > { > + const MCOperand &RegMO = MI.getOperand(RegOpIdx); > + const MCOperand &SelMO = MI.getOperand(SelOpIdx); > + > union { > float f; > uint32_t i; > } InlineConstant; > InlineConstant.i = 0; > - // Emit the source select (2 bytes). For GPRs, this is the register index. > - // For other potential instruction operands, (e.g. constant registers) the > - // value of the source select is defined in the r600isa docs. > - if (MO.isReg()) { > - unsigned Reg = MO.getReg(); > - if > (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { > - EmitByte(1, OS); > - } else { > - EmitByte(0, OS); > - } > + // Emit source type (1 byte) and source select (4 bytes). For GPRs type is > 0 > + // and select is 0 (GPR index is encoded in the instr encoding. For > constants > + // type is 1 and select is the original const select passed from the > driver. > + unsigned Reg = RegMO.getReg(); > + if (Reg == AMDGPU::ALU_CONST) { > + EmitByte(1, OS); > + uint32_t Sel = SelMO.getImm(); > + Emit(Sel, OS); > + } else { > + EmitByte(0, OS); > + Emit((uint32_t)0, OS); > + } > > - if (Reg == AMDGPU::ALU_LITERAL_X) { > - unsigned ImmOpIndex = MI.getNumOperands() - 1; > - MCOperand ImmOp = MI.getOperand(ImmOpIndex); > - if (ImmOp.isFPImm()) { > - InlineConstant.f = ImmOp.getFPImm(); > - } else { > - assert(ImmOp.isImm()); > - InlineConstant.i = ImmOp.getImm(); > - } > + if (Reg == AMDGPU::ALU_LITERAL_X) { > + unsigned ImmOpIndex = MI.getNumOperands() - 1; > + MCOperand ImmOp = MI.getOperand(ImmOpIndex); > + if (ImmOp.isFPImm()) { > + InlineConstant.f = ImmOp.getFPImm(); > + } else { > + assert(ImmOp.isImm()); > + InlineConstant.i = ImmOp.getImm(); > } > } > > diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h > index 7dea8e4..e19eea3 100644 > --- a/lib/Target/AMDGPU/R600Defines.h > +++ b/lib/Target/AMDGPU/R600Defines.h > @@ -62,18 +62,33 @@ namespace R600Operands { > SRC0_NEG, > SRC0_REL, > SRC0_ABS, > + SRC0_SEL, > SRC1, > SRC1_NEG, > SRC1_REL, > SRC1_ABS, > + SRC1_SEL, > SRC2, > SRC2_NEG, > SRC2_REL, > + SRC2_SEL, > LAST, > PRED_SEL, > IMM, > COUNT > }; > + > + const static int ALUOpTable[3][R600Operands::COUNT] = { > +// W C S S S S S S S S S S S > +// R O D L S R R R R S R R R R S R R R L P > +// D U I M R A R C C C C R C C C C R C C C A R I > +// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M > +// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M > + {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12}, > + {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19}, > + {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17} > + }; > + > } > > #endif // R600DEFINES_H_ > diff --git a/lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp > b/lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp > new file mode 100644 > index 0000000..3d01582 > --- /dev/null > +++ b/lib/Target/AMDGPU/R600EliminateSymbolicOperand.cpp > @@ -0,0 +1,122 @@ > +//===-- R600EliminateSymbolicOperand.cpp - Eliminate Symbolic > Operands-----===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===----------------------------------------------------------------------===// > +// > +/// \file > +/// Before this pass backend can manipulate symbolic operands like > +/// GlobalAddress for data read from Const Buffers or FrameIndex for stack > +/// allocated array. This pass is used to change these operands by a value > +/// that can be passed to MCInstrEmitter. > +// > +//===----------------------------------------------------------------------===// > + > +#include "AMDGPU.h" > +#include "llvm/CodeGen/MachineFunction.h" > +#include "llvm/CodeGen/MachineFunctionPass.h" > +#include "R600InstrInfo.h" > +#include "llvm/GlobalValue.h" > + > +namespace llvm { > + > +class R600EliminateSymbolicOperand : public MachineFunctionPass { > +private: > + static char ID; > + const R600InstrInfo *TII; > +public: > + R600EliminateSymbolicOperand(TargetMachine &tm); > + virtual bool runOnMachineFunction(MachineFunction &MF); > + > + const char *getPassName() const { return "R600 Eliminate Symbolic > Operand"; } > +}; > + > +char R600EliminateSymbolicOperand::ID = 0; > + > + > +R600EliminateSymbolicOperand::R600EliminateSymbolicOperand(TargetMachine > &tm) : > + MachineFunctionPass(ID), > + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) > +{ > +} > + > +static unsigned > +getConstBufferIdxFromName(StringRef Name) { > + if (Name == "const0") > + return 0; > + if (Name == "const1") > + return 1; > + if (Name == "const2") > + return 2; > + if (Name == "const3") > + return 3; > + if (Name == "const4") > + return 4; > + if (Name == "const5") > + return 5; > + if (Name == "const6") > + return 6; > + if (Name == "const7") > + return 7; > + if (Name == "const8") > + return 8; > + if (Name == "const9") > + return 9; > + if (Name == "const10") > + return 10; > + if (Name == "const11") > + return 11; > + if (Name == "const12") > + return 12; > + if (Name == "const13") > + return 13; > + if (Name == "const14") > + return 14; > + if (Name == "const15") > + return 15; > +} > + > +bool R600EliminateSymbolicOperand::runOnMachineFunction(MachineFunction &MF) > { > + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); > + BB != BB_E; ++BB) { > + MachineBasicBlock &MBB = *BB; > + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); > + I != E; ++I) { > + MachineInstr &MI = *I; > + if (!TII->isALUInstr(MI.getOpcode())) > + continue; > + for (int i = 0; i < MI.getNumOperands(); i++) { > + MachineOperand &MO = MI.getOperand(i); > + if (MO.isGlobal()) { > + unsigned ConstSel = MO.getOffset() / 4 > + + 2048 + > + 16384 * getConstBufferIdxFromName(MO.getGlobal()->getName()); > + int SelIdx; > + if (i == TII->getOperandIdx(MI, R600Operands::SRC0)) { > + SelIdx = TII->getOperandIdx(MI, R600Operands::SRC0_SEL); > + } else if (i == TII->getOperandIdx(MI, R600Operands::SRC1)) { > + SelIdx = TII->getOperandIdx(MI, R600Operands::SRC1_SEL); > + } else if (i == TII->getOperandIdx(MI, R600Operands::SRC2)) { > + SelIdx = TII->getOperandIdx(MI, R600Operands::SRC2_SEL); > + } else { > + assert(0 && "Wrong operand location for Global Address"); > + } > + MI.getOperand(SelIdx).setImm(ConstSel); > + MO.ChangeToRegister(AMDGPU::ALU_CONST, false); > + } > + } > + } > + } > + return false; > +} > + > +FunctionPass *createR600EliminateSymbolicOperandPass(TargetMachine &tm) { > + return new R600EliminateSymbolicOperand(tm); > +} > + > +} > + > + > diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp > b/lib/Target/AMDGPU/R600ISelLowering.cpp > index d09f8c0e..6ff99c3 100644 > --- a/lib/Target/AMDGPU/R600ISelLowering.cpp > +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp > @@ -16,6 +16,7 @@ > #include "R600Defines.h" > #include "R600InstrInfo.h" > #include "R600MachineFunctionInfo.h" > +#include "AMDGPURegisterInfo.h" > #include "llvm/Argument.h" > #include "llvm/CodeGen/MachineInstrBuilder.h" > #include "llvm/CodeGen/MachineRegisterInfo.h" > @@ -97,7 +98,12 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : > setOperationAction(ISD::SELECT, MVT::v4i32, Expand); > setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); > setOperationAction(ISD::VSELECT, MVT::v4i32, Expand); > + setOperationAction(ISD::LOAD, MVT::f32, Custom); > + setOperationAction(ISD::LOAD, MVT::i32, Custom); > + setOperationAction(ISD::LOAD, MVT::v4f32, Custom); > + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); > setTargetDAGCombine(ISD::FP_ROUND); > + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); > > setSchedulingPreference(Sched::VLIW); > } > @@ -138,13 +144,9 @@ MachineBasicBlock * > R600TargetLowering::EmitInstrWithCustomInserter( > break; > } > > - case AMDGPU::R600_LOAD_CONST: { > - int64_t RegIndex = MI->getOperand(1).getImm(); > - unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); > - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) > - .addOperand(MI->getOperand(0)) > - .addReg(ConstantReg); > - break; > + case AMDGPU::TEX_VTX_CONSTBUF: { > + MI->getOperand(2).ChangeToImmediate(MI->getOperand(2).getOffset() * 16); > + return BB; > } > > case AMDGPU::MASK_WRITE: { > @@ -417,6 +419,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, > SelectionDAG &DAG) const > case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); > case ISD::SELECT: return LowerSELECT(Op, DAG); > case ISD::SETCC: return LowerSETCC(Op, DAG); > + case ISD::LOAD: return LowerLOAD(Op, DAG); > case ISD::FPOW: return LowerFPOW(Op, DAG); > case ISD::INTRINSIC_VOID: { > SDValue Chain = Op.getOperand(0); > @@ -580,6 +583,16 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, > switch (N->getOpcode()) { > default: return; > case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), > DAG)); > + return; > + case ISD::LOAD: { > + SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); > + Results.push_back(SDValue(Node, 0)); > + Results.push_back(SDValue(Node, 1)); > + // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode > + // function > + DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); > + return; > + } > } > } > > @@ -861,6 +874,75 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, > SelectionDAG &DAG) const { > return Cond; > } > > +const SDValue PtrSRL(SDValue Addr, unsigned Amount, SelectionDAG &DAG) { > + switch (Addr.getOpcode()) { > + case ISD::GlobalAddress: > + case ISD::BITCAST: > + return Addr; > + case ISD::ADD: > + case ISD::OR: > + return DAG.getNode(Addr.getOpcode(), Addr.getDebugLoc(), > Addr.getValueType(), > + PtrSRL(Addr.getOperand(0), Amount, DAG), > + PtrSRL(Addr.getOperand(1), Amount, DAG)); > + case ISD::SHL: { > + ConstantSDNode *SHLPad = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); > + assert(SHLPad && "Not Constant SHLPad!"); > + unsigned PadAmount = SHLPad->getZExtValue(); > + //We suppose PadAmout > Amount > + return DAG.getNode(ISD::SHL, Addr.getDebugLoc(), Addr.getValueType(), > + Addr.getOperand(0), > + DAG.getConstant(PadAmount - Amount, MVT::i32)); > + } > + case ISD::Constant: { > + ConstantSDNode *SHLPad = dyn_cast<ConstantSDNode>(Addr); > + return DAG.getConstant(SHLPad->getZExtValue() >> Amount, MVT::i32); > + } > + default: > + assert(0 && "Invalid ptr format!"); > + } > +} > + > +SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const > +{ Coding style: Brace must co on same line as function. > + EVT VT = Op.getValueType(); > + DebugLoc DL = Op.getDebugLoc(); > + LoadSDNode *LoadNode = cast<LoadSDNode>(Op); > + SDValue Chain = Op.getOperand(0); > + SDValue Ptr = Op.getOperand(1); > + SDValue LoweredLoad; > + > + if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { > + SDValue Result; > + if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue())) { > + // Ptr is GA + Cst : it can be folded, turns it into 4x f32 > + SDValue Slots[4]; > + for (unsigned i = 0; i < 4; i++) { > + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, > + DAG.getConstant(4 * i, MVT::i32)); > + Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::f32, > NewPtr); > + } > + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, Slots, 4); > + } else { > + // Ptr is GA + Reg : it cant be folded, keeps it as a v4f32 load > + Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4f32, > + PtrSRL(Ptr, 4, DAG)); > + } > + > + if (!VT.isVector()) { > + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Result, > + DAG.getConstant(0, MVT::i32)); > + } > + > + SDValue MergedValues[2] = { > + Result, > + Chain > + }; > + return DAG.getMergeValues(MergedValues, 2, DL); > + } > + > + return SDValue(); > +} > + > SDValue R600TargetLowering::LowerFPOW(SDValue Op, > SelectionDAG &DAG) const { > DebugLoc DL = Op.getDebugLoc(); > @@ -918,6 +1000,17 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, > } > break; > } > + // Extract_vec (Build_vector) generated by custom lowering > + // also needs to be customly combined > + case ISD::EXTRACT_VECTOR_ELT: { > + SDValue Arg = N->getOperand(0); > + if (Arg.getOpcode() == ISD::BUILD_VECTOR) { > + if (ConstantSDNode *Const = > dyn_cast<ConstantSDNode>(N->getOperand(1))) { > + unsigned Element = Const->getZExtValue(); > + return Arg->getOperand(Element); > + } > + } > + } > } > return SDValue(); > } > diff --git a/lib/Target/AMDGPU/R600ISelLowering.h > b/lib/Target/AMDGPU/R600ISelLowering.h > index bdb0a55..70ece84 100644 > --- a/lib/Target/AMDGPU/R600ISelLowering.h > +++ b/lib/Target/AMDGPU/R600ISelLowering.h > @@ -62,6 +62,7 @@ private: > SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; > SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; > SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; > + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; > > bool isZero(SDValue Op) const; > }; > diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp > b/lib/Target/AMDGPU/R600InstrInfo.cpp > index a60a180..6c1c50a 100644 > --- a/lib/Target/AMDGPU/R600InstrInfo.cpp > +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp > @@ -484,13 +484,15 @@ MachineInstrBuilder > R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB > .addReg(Src0Reg) // $src0 > .addImm(0) // $src0_neg > .addImm(0) // $src0_rel > - .addImm(0); // $src0_abs > + .addImm(0) // $src0_abs > + .addImm(0); // $src0_sel > > if (Src1Reg) { > MIB.addReg(Src1Reg) // $src1 > .addImm(0) // $src1_neg > .addImm(0) // $src1_rel > - .addImm(0); // $src1_abs > + .addImm(0) // $src1_abs > + .addImm(0); // $src1_sel > } > > //XXX: The r600g finalizer expects this to be 1, once we've moved the > @@ -519,16 +521,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, > > int R600InstrInfo::getOperandIdx(unsigned Opcode, > R600Operands::Ops Op) const { > - const static int OpTable[3][R600Operands::COUNT] = { > -// W C S S S S S S S S > -// R O D L S R R R S R R R S R R L P > -// D U I M R A R C C C C C C C R C C A R I > -// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M > -// T M P E D L P 0 N R A 1 N R A 2 N R T D M > - {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11}, > - {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17}, > - {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14} > - }; > unsigned TargetFlags = get(Opcode).TSFlags; > unsigned OpTableIdx; > > @@ -554,7 +546,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode, > OpTableIdx = 2; > } > > - return OpTable[OpTableIdx][Op]; > + return R600Operands::ALUOpTable[OpTableIdx][Op]; > } > > void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op, > diff --git a/lib/Target/AMDGPU/R600Instructions.td > b/lib/Target/AMDGPU/R600Instructions.td > index dc32368..691b1c8 100644 > --- a/lib/Target/AMDGPU/R600Instructions.td > +++ b/lib/Target/AMDGPU/R600Instructions.td > @@ -69,6 +69,8 @@ class InstFlag<string PM = "printOperand", int Default = 0> > let PrintMethod = PM; > } > > +def SEL : OperandWithDefaultOps <i32, (ops (i32 0))>; > + > def LITERAL : InstFlag<"printLiteral">; > > def WRITE : InstFlag <"printWrite", 1>; > @@ -88,6 +90,8 @@ def LAST : InstFlag<"printLast", 1>; > def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; > def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; > def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; > +def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, > "SelectGlobalValueConstantOffset", [], []>; > +def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, > "SelectGlobalValueVariableOffset", [], []>; > > class R600ALU_Word0 { > field bits<32> Word0; > @@ -262,7 +266,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> > pattern, > InstR600 <0, > (outs R600_Reg32:$dst), > (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, > - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, > ABS:$src0_abs, > + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, > ABS:$src0_abs, SEL:$src0_sel, > LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), > !strconcat(opName, > "$clamp $dst$write$dst_rel$omod, " > @@ -302,8 +306,8 @@ class R600_2OP <bits<11> inst, string opName, list<dag> > pattern, > (outs R600_Reg32:$dst), > (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, > OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, > - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, > - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, > + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, > ABS:$src0_abs, SEL:$src0_sel, > + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, > ABS:$src1_abs, SEL:$src1_sel, > LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), > !strconcat(opName, > "$clamp > $update_exec_mask$update_pred$dst$write$dst_rel$omod, " > @@ -339,9 +343,9 @@ class R600_3OP <bits<5> inst, string opName, list<dag> > pattern, > InstR600 <0, > (outs R600_Reg32:$dst), > (ins REL:$dst_rel, CLAMP:$clamp, > - R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, > - R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, > - R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, > + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, > + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, > + R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, > LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), > !strconcat(opName, "$clamp $dst$dst_rel, " > "$src0_neg$src0$src0_rel, " > @@ -1596,6 +1600,74 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, > hasCtrlDep = 1 in { > "RETURN", [(IL_retflag)]>; > } > > + > +//===----------------------------------------------------------------------===// > +// Constant Buffer Addressing Support > +//===----------------------------------------------------------------------===// > + > +def CONST_COPY : R600_1OP <0x19, "CONST_COPY", > + [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src0))]> > +{ > + let mayLoad = 1; > +} > + > +def TEX_VTX_CONSTBUF : > + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, > $ptr", > + [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>, > + VTX_WORD1_GPR, VTX_WORD0 { > + > + let usesCustomInserter = 1; > + > + let VC_INST = 0; > + let FETCH_TYPE = 2; > + let FETCH_WHOLE_QUAD = 0; > + let BUFFER_ID = 0; > + let SRC_REL = 0; > + let SRC_SEL_X = 0; > + let DST_REL = 0; > + let USE_CONST_FIELDS = 0; > + let NUM_FORMAT_ALL = 2; > + let FORMAT_COMP_ALL = 1; > + let SRF_MODE_ALL = 1; > + let MEGA_FETCH_COUNT = 16; > + let DST_SEL_X = 0; > + let DST_SEL_Y = 1; > + let DST_SEL_Z = 2; > + let DST_SEL_W = 3; > + let DATA_FORMAT = 35; > + > + let Inst{31-0} = Word0; > + let Inst{63-32} = Word1; > + > +// LLVM can only encode 64-bit instructions, so these fields are manually > +// encoded in R600CodeEmitter > +// > +// bits<16> OFFSET; > +// bits<2> ENDIAN_SWAP = 0; > +// bits<1> CONST_BUF_NO_STRIDE = 0; > +// bits<1> MEGA_FETCH = 0; > +// bits<1> ALT_CONST = 0; > +// bits<2> BUFFER_INDEX_MODE = 0; > + > + > + > +// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding > +// is done in R600CodeEmitter > +// > +// Inst{79-64} = OFFSET; > +// Inst{81-80} = ENDIAN_SWAP; > +// Inst{82} = CONST_BUF_NO_STRIDE; > +// Inst{83} = MEGA_FETCH; > +// Inst{84} = ALT_CONST; > +// Inst{86-85} = BUFFER_INDEX_MODE; > +// Inst{95-86} = 0; Reserved > + > +// VTX_WORD3 (Padding) > +// > +// Inst{127-96} = 0; > +} > + > + > > //===----------------------------------------------------------------------===// > // ISel Patterns > > //===----------------------------------------------------------------------===// > diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp > b/lib/Target/AMDGPU/R600RegisterInfo.cpp > index a39f83d..397fbaf 100644 > --- a/lib/Target/AMDGPU/R600RegisterInfo.cpp > +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp > @@ -15,6 +15,7 @@ > #include "R600RegisterInfo.h" > #include "AMDGPUTargetMachine.h" > #include "R600Defines.h" > +#include "R600InstrInfo.h" > #include "R600MachineFunctionInfo.h" > > using namespace llvm; > @@ -38,16 +39,12 @@ BitVector R600RegisterInfo::getReservedRegs(const > MachineFunction &MF) const { > Reserved.set(AMDGPU::NEG_ONE); > Reserved.set(AMDGPU::PV_X); > Reserved.set(AMDGPU::ALU_LITERAL_X); > + Reserved.set(AMDGPU::ALU_CONST); > Reserved.set(AMDGPU::PREDICATE_BIT); > Reserved.set(AMDGPU::PRED_SEL_OFF); > Reserved.set(AMDGPU::PRED_SEL_ZERO); > Reserved.set(AMDGPU::PRED_SEL_ONE); > > - for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(), > - E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) { > - Reserved.set(*I); > - } > - > for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(), > E = MFI->ReservedRegs.end(); I != E; > ++I) { > Reserved.set(*I); > diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td > b/lib/Target/AMDGPU/R600RegisterInfo.td > index d3d6d25..10fe858 100644 > --- a/lib/Target/AMDGPU/R600RegisterInfo.td > +++ b/lib/Target/AMDGPU/R600RegisterInfo.td > @@ -28,9 +28,6 @@ foreach Index = 0-127 in { > // 32-bit Temporary Registers > def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>; > > - // 32-bit Constant Registers (There are more than 128, this the number > - // that is currently supported. > - def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>; > } > // 128-bit Temporary Registers > def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW", > @@ -46,7 +43,6 @@ foreach Index = 448-464 in { > def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; > } > > - > // Special Registers > > def ZERO : R600Reg<"0.0", 248>; > @@ -61,16 +57,11 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; > def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; > def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; > def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; > +def ALU_CONST : R600Reg<"Const", 0>; > > def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, > (add (sequence "ArrayBase%u", 448, 464))>; > > -def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, > - (add (interleave > - (interleave (sequence "C%u_X", 0, 127), > - (sequence "C%u_Z", 0, 127)), > - (interleave (sequence "C%u_Y", 0, 127), > - (sequence "C%u_W", 0, 127))))>; > > def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, > (add (sequence "T%u_X", 0, 127))>; > @@ -91,7 +82,6 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, > > def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add > R600_TReg32, > - R600_CReg32, > R600_ArrayBase, > ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; > > -- > 1.8.0.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev