No, we use 11 user data SGPRs for the vertex shader, but there are also 6 additional SGPRs loaded by the hw based on the VGT state (4 streamout offsets, streamout_enable, and streamout_write_index). The 6 SGPRs can be enabled by setting SPI_SHADER_PGM_RSRC2_VS.SO_* = 1.
Marek On Thu, Sep 5, 2013 at 5:44 PM, Tom Stellard <t...@stellard.net> wrote: > On Mon, Sep 02, 2013 at 09:07:18PM +0200, Marek Olšák wrote: >> For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. >> >> The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take >> a resource descriptor might be nicer. >> >> The maximum number of input SGPRs is bumped to 17. >> >> Signed-off-by: Marek Olšák <marek.ol...@amd.com> >> --- >> lib/Target/R600/AMDGPUCallingConv.td | 3 ++- >> lib/Target/R600/AMDGPUISelLowering.cpp | 1 + >> lib/Target/R600/AMDGPUISelLowering.h | 1 + >> lib/Target/R600/SIISelLowering.cpp | 39 >> ++++++++++++++++++++++++++++++++++ >> lib/Target/R600/SIInstrInfo.td | 27 +++++++++++++++++++++++ >> lib/Target/R600/SIInstructions.td | 29 +++++++++++++++++++++---- >> lib/Target/R600/SIIntrinsics.td | 18 ++++++++++++++++ >> 7 files changed, 113 insertions(+), 5 deletions(-) >> >> diff --git a/lib/Target/R600/AMDGPUCallingConv.td >> b/lib/Target/R600/AMDGPUCallingConv.td >> index 84d3118..d26be32 100644 >> --- a/lib/Target/R600/AMDGPUCallingConv.td >> +++ b/lib/Target/R600/AMDGPUCallingConv.td >> @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[ >> >> CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[ >> SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, >> - SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15 >> + SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, >> + SGPR16 > > Why is this necessary? Are we using all 16 user sgprs now? > >> ]>>>, >> >> CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow< >> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp >> b/lib/Target/R600/AMDGPUISelLowering.cpp >> index 1237323..30d9503 100644 >> --- a/lib/Target/R600/AMDGPUISelLowering.cpp >> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp >> @@ -718,5 +718,6 @@ const char* >> AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { >> NODE_NAME_CASE(SAMPLED) >> NODE_NAME_CASE(SAMPLEL) >> NODE_NAME_CASE(STORE_MSKOR) >> + NODE_NAME_CASE(TBUFFER_STORE_FORMAT) >> } >> } >> diff --git a/lib/Target/R600/AMDGPUISelLowering.h >> b/lib/Target/R600/AMDGPUISelLowering.h >> index 75ac4c2..8a68356 100644 >> --- a/lib/Target/R600/AMDGPUISelLowering.h >> +++ b/lib/Target/R600/AMDGPUISelLowering.h >> @@ -160,6 +160,7 @@ enum { >> FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, >> STORE_MSKOR, >> LOAD_CONSTANT, >> + TBUFFER_STORE_FORMAT, >> LAST_AMDGPU_ISD_NUMBER >> }; >> >> diff --git a/lib/Target/R600/SIISelLowering.cpp >> b/lib/Target/R600/SIISelLowering.cpp >> index f196059..6fa0c85 100644 >> --- a/lib/Target/R600/SIISelLowering.cpp >> +++ b/lib/Target/R600/SIISelLowering.cpp >> @@ -86,6 +86,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : >> setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); >> setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); >> >> + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); >> + >> setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); >> >> setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); >> @@ -462,6 +464,43 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, >> SelectionDAG &DAG) const { >> Op.getOperand(3)); >> } >> } >> + >> + case ISD::INTRINSIC_VOID: >> + SDValue Chain = Op.getOperand(0); >> + unsigned IntrinsicID = >> cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); >> + >> + switch (IntrinsicID) { >> + case AMDGPUIntrinsic::SI_tbuffer_store: { >> + SDLoc DL(Op); >> + SDValue Ops [] = { >> + Chain, >> + ResourceDescriptorToi128(Op.getOperand(2), DAG), >> + Op.getOperand(3), >> + Op.getOperand(4), >> + Op.getOperand(5), >> + Op.getOperand(6), >> + Op.getOperand(7), >> + Op.getOperand(8), >> + Op.getOperand(9), >> + Op.getOperand(10), >> + Op.getOperand(11), >> + Op.getOperand(12), >> + Op.getOperand(13), >> + Op.getOperand(14) >> + }; >> + EVT VT = Op.getOperand(3).getValueType(); >> + >> + MachineMemOperand *MMO = MF.getMachineMemOperand( >> + MachinePointerInfo(), >> + MachineMemOperand::MOStore, >> + VT.getSizeInBits() / 8, 4); >> + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, >> + Op->getVTList(), Ops, >> + sizeof(Ops)/sizeof(Ops[0]), VT, MMO); >> + } >> + default: >> + break; >> + } >> } >> return SDValue(); >> } >> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td >> index ecc4718..c902feb 100644 >> --- a/lib/Target/R600/SIInstrInfo.td >> +++ b/lib/Target/R600/SIInstrInfo.td >> @@ -21,6 +21,25 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", >> [SDNPMayLoad, SDNPMemOperand] >> >; >> >> +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", >> + SDTypeProfile<0, 13, >> + [SDTCisVT<0, i128>, // rsrc(SGPR) >> + SDTCisVT<1, iAny>, // vdata(VGPR) >> + SDTCisVT<2, i32>, // num_channels(imm) >> + SDTCisVT<3, i32>, // vaddr(VGPR) >> + SDTCisVT<4, i32>, // soffset(SGPR) >> + SDTCisVT<5, i32>, // inst_offset(imm) >> + SDTCisVT<6, i32>, // dfmt(imm) >> + SDTCisVT<7, i32>, // nfmt(imm) >> + SDTCisVT<8, i32>, // offen(imm) >> + SDTCisVT<9, i32>, // idxen(imm) >> + SDTCisVT<10, i32>, // glc(imm) >> + SDTCisVT<11, i32>, // slc(imm) >> + SDTCisVT<12, i32> // tfe(imm) >> + ]>, >> + [SDNPMayStore, SDNPMemOperand, SDNPHasChain] >> +>; >> + >> def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", >> SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, >> i16>, >> SDTCisVT<3, i32>]> >> @@ -65,6 +84,14 @@ def IMM8bitDWORD : ImmLeaf < >> }]> >> >; >> >> +def as_i1imm : SDNodeXForm<imm, [{ >> + return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1); >> +}]>; >> + >> +def as_i8imm : SDNodeXForm<imm, [{ >> + return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8); >> +}]>; >> + >> def as_i16imm : SDNodeXForm<imm, [{ >> return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16); >> }]>; >> diff --git a/lib/Target/R600/SIInstructions.td >> b/lib/Target/R600/SIInstructions.td >> index 136f69c..cd59c80 100644 >> --- a/lib/Target/R600/SIInstructions.td >> +++ b/lib/Target/R600/SIInstructions.td >> @@ -475,10 +475,10 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < >> //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, >> "TBUFFER_LOAD_FORMAT_XY", []>; >> //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, >> "TBUFFER_LOAD_FORMAT_XYZ", []>; >> def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, >> "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; >> -//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, >> "TBUFFER_STORE_FORMAT_X", []>; >> -//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, >> "TBUFFER_STORE_FORMAT_XY", []>; >> -//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, >> "TBUFFER_STORE_FORMAT_XYZ", []>; >> -//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, >> "TBUFFER_STORE_FORMAT_XYZW", []>; >> +def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, >> "TBUFFER_STORE_FORMAT_X", VReg_32>; >> +def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, >> "TBUFFER_STORE_FORMAT_XY", VReg_64>; >> +def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, >> "TBUFFER_STORE_FORMAT_XYZ", VReg_128>; >> +def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, >> "TBUFFER_STORE_FORMAT_XYZW", VReg_128>; >> >> let mayLoad = 1 in { >> >> @@ -1873,6 +1873,27 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, >> global_store>; >> defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>; >> defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>; >> >> +//===----------------------------------------------------------------------===// >> +// MTBUF Patterns >> +//===----------------------------------------------------------------------===// >> + >> +// TBUFFER_STORE_FORMAT_*, addr64=0 >> +class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : >> Pat< >> + (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr, >> + i32:$soffset, imm:$inst_offset, imm:$dfmt, >> + imm:$nfmt, imm:$offen, imm:$idxen, >> + imm:$glc, imm:$slc, imm:$tfe), >> + (opcode >> + $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), >> + (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, >> + (as_i1imm $slc), (as_i1imm $tfe), $soffset) >> +>; >> + >> +def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>; >> +def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>; >> +def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>; >> +def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>; >> + >> /********** ====================== **********/ >> /********** Indirect adressing **********/ >> /********** ====================== **********/ >> diff --git a/lib/Target/R600/SIIntrinsics.td >> b/lib/Target/R600/SIIntrinsics.td >> index d6e26ad..7fcc964 100644 >> --- a/lib/Target/R600/SIIntrinsics.td >> +++ b/lib/Target/R600/SIIntrinsics.td >> @@ -20,6 +20,24 @@ let TargetPrefix = "SI", isTarget = 1 in { >> def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, >> llvm_i32_ty], [IntrNoMem]>; >> def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, >> llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; >> >> + // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which >> is not exposed >> + def int_SI_tbuffer_store : Intrinsic < >> + [], >> + [llvm_anyint_ty, // rsrc(SGPR) >> + llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32 >> + llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, >> 2=XY, 3=XYZ, 4=XYZW >> + llvm_i32_ty, // vaddr(VGPR) >> + llvm_i32_ty, // soffset(SGPR) >> + llvm_i32_ty, // inst_offset(imm) >> + llvm_i32_ty, // dfmt(imm) >> + llvm_i32_ty, // nfmt(imm) >> + llvm_i32_ty, // offen(imm) >> + llvm_i32_ty, // idxen(imm) >> + llvm_i32_ty, // glc(imm) >> + llvm_i32_ty, // slc(imm) >> + llvm_i32_ty], // tfe(imm) >> + []>; >> + >> class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, >> llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; >> >> def int_SI_sample : Sample; >> -- >> 1.8.1.2 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev