--- lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 11 ------ lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp | 23 ++++++------ lib/Target/AMDGPU/R600ISelLowering.cpp | 53 +++++++++++++++++++++++----- lib/Target/AMDGPU/R600ISelLowering.h | 2 ++ lib/Target/AMDGPU/R600Instructions.td | 21 ++++++----- 5 files changed, 71 insertions(+), 39 deletions(-)
diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp index 9ea1212..a09796a 100644 --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp @@ -161,17 +161,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } switch (Opc) { default: break; - case ISD::FrameIndex: - { - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) { - unsigned int FI = FIN->getIndex(); - EVT OpVT = N->getValueType(0); - unsigned int NewOpc = AMDGPU::COPY; - SDValue TFI = CurDAG->getRegister(AMDGPU::T0_X, MVT::i32); - return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); - } - } - break; } return SelectCode(N); } diff --git a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp index 18db422..a2cb43a 100644 --- a/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp +++ b/lib/Target/AMDGPU/R600AllocateMemoryRegs.cpp @@ -70,15 +70,16 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::RegisterStore_i32: case AMDGPU::RegisterStore_f32: { - int64_t Offset = (MI.getOperand(2).getImm() * 4) + - MI.getOperand(3).getImm() + - (IndirectRegOffset * 4); - unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset); + assert (MI.getOperand(1).isFI() && MI.getOperand(1).getIndex() == 0); R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); - + int64_t Offset = MI.getOperand(3).getImm() + + (IndirectRegOffset * 4); MFI->IndirectChannels.set(MI.getOperand(3).getImm()); - if (MI.getOperand(1).getReg() == AMDGPU::ZERO) { + if (MI.getOperand(2).isImm()) { + // Direct Addressing + Offset += MI.getOperand(2).getImm() * 4; + unsigned DstReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset); MFI->ReservedRegs.push_back(DstReg); TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, DstReg, MI.getOperand(0).getReg()); @@ -86,7 +87,7 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, - MI.getOperand(1).getReg()); + MI.getOperand(2).getReg()); TII->setImmOperand(MOVA, R600Operands::WRITE, 0); unsigned OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset); MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I, @@ -101,12 +102,14 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::RegisterLoad_i32: case AMDGPU::RegisterLoad_f32: { + assert (MI.getOperand(1).isFI() && MI.getOperand(1).getIndex() == 0); unsigned Channel = MI.getOperand(3).getImm(); - unsigned Offset = (MI.getOperand(2).getImm() * 4) + Channel + + unsigned Offset = Channel + (IndirectRegOffset * 4); unsigned OffsetReg; - if (MI.getOperand(1).getReg() == AMDGPU::ZERO) { + if (MI.getOperand(2).isImm()) { + Offset += MI.getOperand(2).getImm() * 4; OffsetReg = AMDGPU::R600_TReg32RegClass.getRegister(Offset); TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), @@ -116,7 +119,7 @@ bool R600AllocateMemoryRegsPass::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MOVA = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, - MI.getOperand(1).getReg()); + MI.getOperand(2).getReg()); TII->setImmOperand(MOVA, R600Operands::WRITE, 0); OffsetReg = AMDGPU::R600_AddrRegClass.getRegister(Offset); MachineInstrBuilder MIBuilder = TII->buildDefaultInstruction(*BB, I, diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 9bc2e03..02ab6d0 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -104,8 +104,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::v4f32, Custom); - setOperationAction(ISD::FrameIndex, MVT::i32, Custom); - setTargetDAGCombine(ISD::FP_ROUND); setSchedulingPreference(Sched::VLIW); @@ -360,7 +358,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); - case ISD::FrameIndex: return DAG.getConstant(0, MVT::i32); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); unsigned IntrinsicID = @@ -804,6 +801,31 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const return Cond; } +bool R600TargetLowering::ReorderPointerArithm(SDValue Ptr, SelectionDAG &DAG, + SDValue &CleanPtr, SDValue &FrameIndex) const +{ + switch (Ptr.getOpcode()) { + default: return false; + case ISD::FrameIndex: + FrameIndex = Ptr; + CleanPtr = DAG.getConstant(0, MVT::i32); + return true; + case ISD::ADD: + case ISD::OR: + if (ReorderPointerArithm(Ptr.getOperand(0), DAG, CleanPtr, FrameIndex)) { + CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32, + CleanPtr, Ptr.getOperand(1)); + return true; + } else if (ReorderPointerArithm(Ptr.getOperand(1), DAG, CleanPtr, FrameIndex)){ + CleanPtr = DAG.getNode(Ptr.getOpcode(), Ptr.getDebugLoc(), MVT::i32, + Ptr.getOperand(0), CleanPtr); + return true; + } else { + return false; + } + } +} + SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -817,22 +839,28 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const return SDValue(); } + SDValue FI; + SDValue PtrArithm; + ReorderPointerArithm(Ptr, DAG, PtrArithm, FI); + FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode()); + SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), MVT::i32); + if (VT.isVector()) { EVT ElemVT = VT.getVectorElementType(); SDValue Loads[4]; - Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm, DAG.getConstant(4, MVT::i32)); for (unsigned i = 0; i < 4; ++i) { Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, - Chain, Ptr, + Chain, TargetFI, Ptr, DAG.getTargetConstant(i, MVT::i32), // Channel Op.getOperand(2)); } LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Loads, 4); } else { LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, - Chain, Ptr, + Chain, TargetFI, PtrArithm, DAG.getTargetConstant(0, MVT::i32), // Channel Op.getOperand(2)); } @@ -859,12 +887,18 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const return SDValue(); } + SDValue FI; + SDValue PtrArithm; + ReorderPointerArithm(Ptr, DAG, PtrArithm, FI); + FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(FI.getNode()); + SDValue TargetFI = DAG.getTargetFrameIndex(FIN->getIndex(), MVT::i32); + if (VT.isVector()) { EVT ElemVT = VT.getVectorElementType(); SDValue Stores[4]; // XXX: I'm not sure how to explain this. - Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, + Ptr = DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), PtrArithm, DAG.getConstant(4, MVT::i32)); for (unsigned i = 0; i < 4; ++i) { @@ -872,7 +906,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const Value, DAG.getConstant(i, MVT::i32)); Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, - Chain, Elem, Ptr, + Chain, Elem, TargetFI, Ptr, DAG.getTargetConstant(i, MVT::i32)); // Channel MFI->IndirectChannels.set(i); } @@ -881,7 +915,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const if (VT == MVT::i8) { Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); } - Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, + Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, + Value, TargetFI, PtrArithm, DAG.getTargetConstant(0, MVT::i32)); // Channel MFI->IndirectChannels.set(0); } diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index a2d7934..ded30d2 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -66,6 +66,8 @@ private: SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; bool isZero(SDValue Op) const; + bool ReorderPointerArithm(SDValue Ptr, SelectionDAG &DAG, + SDValue &CleanPtr, SDValue &FrameIndex) const; }; } // End namespace llvm; diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index d081824..13fe297 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -426,11 +426,11 @@ def isR600toCayman : Predicate< //===----------------------------------------------------------------------===// def REGISTER_LOAD : SDNode<"AMDGPUISD::REGISTER_LOAD", - SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + SDTypeProfile<1, 3, [SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]>, [SDNPHasChain, SDNPMayLoad]>; def REGISTER_STORE : SDNode<"AMDGPUISD::REGISTER_STORE", - SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + SDTypeProfile<0, 4, [SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]>, [SDNPHasChain, SDNPMayStore]>; def INTERP: SDNode<"AMDGPUISD::INTERP", @@ -1386,17 +1386,20 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, let isPseudo = 1, isCodeGenOnly =1 in { class RegisterLoad <ValueType vt> : InstR600 <0x0, - (outs R600_Reg32:$dst), (ins FRAMEri:$addr, i32imm:$chan), - "RegisterLoad $dst, $addr", - [(set (vt R600_Reg32:$dst), (REGISTER_LOAD ADDRIndirect:$addr, - (i32 timm:$chan)))], + (outs R600_Reg32:$dst), + (ins i32imm:$frameidx, R600_Reg32:$offset, i32imm:$chan), + "RegisterLoad $dst, $offset", + [(set (vt R600_Reg32:$dst), (REGISTER_LOAD (i32 tframeindex:$frameidx), + (i32 R600_Reg32:$offset), (i32 timm:$chan)))], NullALU >; class RegisterStore <ValueType vt> : InstR600 <0x0, - (outs), (ins R600_Reg32:$val, FRAMEri:$addr, i32imm:$chan), - "RegisterStore_i32 $val, $addr", - [(REGISTER_STORE (vt R600_Reg32:$val), ADDRIndirect:$addr, (i32 timm:$chan))], + (outs), + (ins R600_Reg32:$val, i32imm:$frameidx, R600_Reg32:$offset, i32imm:$chan), + "RegisterStore_i32 $val, $offset", + [(REGISTER_STORE (vt R600_Reg32:$val), + (i32 tframeindex:$frameidx), (i32 R600_Reg32:$offset), (i32 timm:$chan))], NullALU >; -- 1.7.11.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev