On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote: > For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. > > The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take > a resource descriptor might be nicer. > > The maximum number of input SGPRs is bumped to 17. > > Signed-off-by: Marek Olšák <marek.ol...@amd.com> > --- > lib/Target/R600/AMDGPUCallingConv.td | 3 ++- > lib/Target/R600/AMDGPUISelLowering.cpp | 1 + > lib/Target/R600/AMDGPUISelLowering.h | 1 + > lib/Target/R600/SIISelLowering.cpp | 39 > ++++++++++++++++++++++++++++++++++ > lib/Target/R600/SIInstrInfo.td | 27 +++++++++++++++++++++++ > lib/Target/R600/SIInstructions.td | 29 +++++++++++++++++++++---- > lib/Target/R600/SIIntrinsics.td | 18 ++++++++++++++++ > 7 files changed, 113 insertions(+), 5 deletions(-) > > diff --git a/lib/Target/R600/AMDGPUCallingConv.td > b/lib/Target/R600/AMDGPUCallingConv.td > index 84d3118..d26be32 100644 > --- a/lib/Target/R600/AMDGPUCallingConv.td > +++ b/lib/Target/R600/AMDGPUCallingConv.td > @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[ > > CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[ > SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, > - SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15 > + SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, > + SGPR16
Why is this necessary? Are we using all 16 user sgprs now? > ]>>>, > > CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow< > diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp > b/lib/Target/R600/AMDGPUISelLowering.cpp > index 1237323..30d9503 100644 > --- a/lib/Target/R600/AMDGPUISelLowering.cpp > +++ b/lib/Target/R600/AMDGPUISelLowering.cpp > @@ -718,5 +718,6 @@ const char* > AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { > NODE_NAME_CASE(SAMPLED) > NODE_NAME_CASE(SAMPLEL) > NODE_NAME_CASE(STORE_MSKOR) > + NODE_NAME_CASE(TBUFFER_STORE_FORMAT) > } > } > diff --git a/lib/Target/R600/AMDGPUISelLowering.h > b/lib/Target/R600/AMDGPUISelLowering.h > index 75ac4c2..8a68356 100644 > --- a/lib/Target/R600/AMDGPUISelLowering.h > +++ b/lib/Target/R600/AMDGPUISelLowering.h > @@ -160,6 +160,7 @@ enum { > FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, > STORE_MSKOR, > LOAD_CONSTANT, > + TBUFFER_STORE_FORMAT, > LAST_AMDGPU_ISD_NUMBER > }; > > diff --git a/lib/Target/R600/SIISelLowering.cpp > b/lib/Target/R600/SIISelLowering.cpp > index f196059..6fa0c85 100644 > --- a/lib/Target/R600/SIISelLowering.cpp > +++ b/lib/Target/R600/SIISelLowering.cpp > @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : > setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); > setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); > > + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); > + > setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); > > setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); > @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, > SelectionDAG &DAG) const { > Op.getOperand(3)); > } > } > + > + case ISD::INTRINSIC_VOID: > + SDValue Chain = Op.getOperand(0); > + unsigned IntrinsicID = > cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); > + > + switch (IntrinsicID) { > + case AMDGPUIntrinsic::SI_tbuffer_store: { > + SDLoc DL(Op); > + SDValue Ops [] = { > + Chain, > + ResourceDescriptorToi128(Op.getOperand(2), DAG), > + Op.getOperand(3), > + Op.getOperand(4), > + Op.getOperand(5), > + Op.getOperand(6), > + Op.getOperand(7), > + Op.getOperand(8), > + Op.getOperand(9), > + Op.getOperand(10), > + Op.getOperand(11), > + Op.getOperand(12), > + Op.getOperand(13), > + Op.getOperand(14) > + }; > + EVT VT = Op.getOperand(3).getValueType(); > + > + MachineMemOperand *MMO = MF.getMachineMemOperand( > + MachinePointerInfo(), > + MachineMemOperand::MOStore, > + VT.getSizeInBits() / 8, 4); > + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, > + Op->getVTList(), Ops, > + sizeof(Ops)/sizeof(Ops[0]), VT, MMO); > + } > + default: > + break; > + } > } > return SDValue(); > } > diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td > index ecc4718..c902feb 100644 > --- a/lib/Target/R600/SIInstrInfo.td > +++ b/lib/Target/R600/SIInstrInfo.td > @@ -21,6 +21,25 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", > [SDNPMayLoad, SDNPMemOperand] > >; > > +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", > + SDTypeProfile<0, 13, > + [SDTCisVT<0, i128>, // rsrc(SGPR) > + SDTCisVT<1, iAny>, // vdata(VGPR) > + SDTCisVT<2, i32>, // num_channels(imm) > + SDTCisVT<3, i32>, // vaddr(VGPR) > + SDTCisVT<4, i32>, // soffset(SGPR) > + SDTCisVT<5, i32>, // inst_offset(imm) > + SDTCisVT<6, i32>, // dfmt(imm) > + SDTCisVT<7, i32>, // nfmt(imm) > + SDTCisVT<8, i32>, // offen(imm) > + SDTCisVT<9, i32>, // idxen(imm) > + SDTCisVT<10, i32>, // glc(imm) > + SDTCisVT<11, i32>, // slc(imm) > + SDTCisVT<12, i32> // tfe(imm) > + ]>, > + [SDNPMayStore, SDNPMemOperand, SDNPHasChain] > +>; > + > def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", > SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, > i16>, > SDTCisVT<3, i32>]> > @@ -65,6 +84,14 @@ def IMM8bitDWORD : ImmLeaf < > }]> > >; > > +def as_i1imm : SDNodeXForm<imm, [{ > + return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1); > +}]>; > + > +def as_i8imm : SDNodeXForm<imm, [{ > + return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8); > +}]>; > + > def as_i16imm : SDNodeXForm<imm, [{ > return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16); > }]>; > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index 136f69c..cd59c80 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -475,10 +475,10 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < > //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", > []>; > //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, > "TBUFFER_LOAD_FORMAT_XYZ", []>; > def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, > "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; > -//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", > []>; > -//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, > "TBUFFER_STORE_FORMAT_XY", []>; > -//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, > "TBUFFER_STORE_FORMAT_XYZ", []>; > -//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, > "TBUFFER_STORE_FORMAT_XYZW", []>; > +def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, > "TBUFFER_STORE_FORMAT_X", VReg_32>; > +def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, > "TBUFFER_STORE_FORMAT_XY", VReg_64>; > +def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, > "TBUFFER_STORE_FORMAT_XYZ", VReg_128>; > +def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, > "TBUFFER_STORE_FORMAT_XYZW", VReg_128>; > > let mayLoad = 1 in { > > @@ -1873,6 +1873,27 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, > global_store>; > defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>; > defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>; > > +//===----------------------------------------------------------------------===// > +// MTBUF Patterns > +//===----------------------------------------------------------------------===// > + > +// TBUFFER_STORE_FORMAT_*, addr64=0 > +class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : > Pat< > + (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr, > + i32:$soffset, imm:$inst_offset, imm:$dfmt, > + imm:$nfmt, imm:$offen, imm:$idxen, > + imm:$glc, imm:$slc, imm:$tfe), > + (opcode > + $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), > + (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, > + (as_i1imm $slc), (as_i1imm $tfe), $soffset) > +>; > + > +def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>; > +def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>; > +def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>; > +def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>; > + > /********** ====================== **********/ > /********** Indirect adressing **********/ > /********** ====================== **********/ > diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td > index d6e26ad..7fcc964 100644 > --- a/lib/Target/R600/SIIntrinsics.td > +++ b/lib/Target/R600/SIIntrinsics.td > @@ -20,6 +20,24 @@ let TargetPrefix = "SI", isTarget = 1 in { > def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, > llvm_i32_ty], [IntrNoMem]>; > def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, > llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; > > + // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which > is not exposed > + def int_SI_tbuffer_store : Intrinsic < > + [], > + [llvm_anyint_ty, // rsrc(SGPR) > + llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32 > + llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, > 3=XYZ, 4=XYZW > + llvm_i32_ty, // vaddr(VGPR) > + llvm_i32_ty, // soffset(SGPR) > + llvm_i32_ty, // inst_offset(imm) > + llvm_i32_ty, // dfmt(imm) > + llvm_i32_ty, // nfmt(imm) > + llvm_i32_ty, // offen(imm) > + llvm_i32_ty, // idxen(imm) > + llvm_i32_ty, // glc(imm) > + llvm_i32_ty, // slc(imm) > + llvm_i32_ty], // tfe(imm) > + []>; > + > class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, > llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; > > def int_SI_sample : Sample; > -- > 1.8.1.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev