On Fri, Jan 24, 2014 at 03:17:04PM +0900, Michel Dänzer wrote: > > The attached patches add two intrinsics to the R600 backend which are > necessary for geometry shader support in the radeonsi driver. >
Patch 1 and v2 of Patch 2 are: Reviewed-by: Tom Stellard <thomas.stell...@amd.com> -Tom > > -- > Earthling Michel Dänzer | http://www.amd.com > Libre software enthusiast | Mesa and X developer > From 8feb7201ac894e5a6731a157020ac807936f584d Mon Sep 17 00:00:00 2001 > From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daen...@amd.com> > Date: Fri, 29 Nov 2013 18:21:41 +0900 > Subject: [PATCH 1/2] R600/SI: Add intrinsic for S_SENDMSG instruction > MIME-Version: 1.0 > Content-Type: text/plain; charset=UTF-8 > Content-Transfer-Encoding: 8bit > > Signed-off-by: Michel Dänzer <michel.daen...@amd.com> > --- > lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 31 > +++++++++++++++++++++++ > lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 1 + > lib/Target/R600/SIInsertWaits.cpp | 6 +++++ > lib/Target/R600/SIInstructions.td | 16 ++++++++++-- > lib/Target/R600/SIIntrinsics.td | 2 ++ > test/CodeGen/R600/llvm.SI.sendmsg.ll | 21 +++++++++++++++ > 6 files changed, 75 insertions(+), 2 deletions(-) > create mode 100644 test/CodeGen/R600/llvm.SI.sendmsg.ll > > diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp > b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp > index 99e1377..7105879 100644 > --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp > +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp > @@ -316,6 +316,37 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, > unsigned OpNo, > } > } > > +void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo, > + raw_ostream &O) { > + unsigned SImm16 = MI->getOperand(OpNo).getImm(); > + unsigned Msg = SImm16 & 0xF; > + if (Msg == 2 || Msg == 3) { > + unsigned Op = (SImm16 >> 4) & 0xF; > + if (Msg == 3) > + O << "Gs_done("; > + else > + O << "Gs("; > + if (Op == 0) { > + O << "nop"; > + } else { > + unsigned Stream = (SImm16 >> 8) & 0x3; > + if (Op == 1) > + O << "cut"; > + else if (Op == 2) > + O << "emit"; > + else if (Op == 3) > + O << "emit-cut"; > + O << " stream " << Stream; > + } > + O << "), [m0] "; > + } else if (Msg == 1) > + O << "interrupt "; > + else if (Msg == 15) > + O << "system "; > + else > + O << "unknown(" << Msg << ") "; > +} > + > void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, > raw_ostream &O) { > // Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs > diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h > b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h > index 77af942..2876dd2 100644 > --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h > +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h > @@ -53,6 +53,7 @@ private: > void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); > void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O); > void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O); > + void printSendMsg(const MCInst *MI, unsigned OpNo, raw_ostream &O); > void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O); > }; > > diff --git a/lib/Target/R600/SIInsertWaits.cpp > b/lib/Target/R600/SIInsertWaits.cpp > index 7ef662e..695ec40 100644 > --- a/lib/Target/R600/SIInsertWaits.cpp > +++ b/lib/Target/R600/SIInsertWaits.cpp > @@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) > { > > Counters Result = ZeroCounts; > > + // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish, > + // but we also want to wait for any other outstanding transfers before > + // signalling other hardware blocks > + if (MI.getOpcode() == AMDGPU::S_SENDMSG) > + return LastIssued; > + > // For each register affected by this > // instruction increase the result sequence > for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index 3baa4cd..c0ad398 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -22,6 +22,10 @@ def InterpSlot : Operand<i32> { > let PrintMethod = "printInterpSlot"; > } > > +def SendMsgImm : Operand<i32> { > + let PrintMethod = "printSendMsg"; > +} > + > def isSI : Predicate<"Subtarget.getGeneration() " > ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; > > @@ -826,17 +830,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER", > def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT > $simm16", > [] > >; > -} // End hasSideEffects > //def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; > //def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; > //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; > -//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>; > + > +let Uses = [EXEC] in { > + def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), > "S_SENDMSG $simm16", > + [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)] > + > { > + let DisableEncoding = "$m0"; > + } > +} // End Uses = [EXEC] > + > //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; > //def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; > //def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; > //def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; > //def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; > //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; > +} // End hasSideEffects > > def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), > (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), > diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td > index 7fcc964..efcdc84 100644 > --- a/lib/Target/R600/SIIntrinsics.td > +++ b/lib/Target/R600/SIIntrinsics.td > @@ -38,6 +38,8 @@ let TargetPrefix = "SI", isTarget = 1 in { > llvm_i32_ty], // tfe(imm) > []>; > > + def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], > [IntrNoMem]>; > + > class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, > llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; > > def int_SI_sample : Sample; > diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll > b/test/CodeGen/R600/llvm.SI.sendmsg.ll > new file mode 100644 > index 0000000..581d422 > --- /dev/null > +++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll > @@ -0,0 +1,21 @@ > +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s > + > +; CHECK-LABEL: @main > +; CHECK: S_SENDMSG Gs(emit stream 0) > +; CHECK: S_SENDMSG Gs(cut stream 1) > +; CHECK: S_SENDMSG Gs(emit-cut stream 2) > +; CHECK: S_SENDMSG Gs_done(nop) > + > +define void @main() { > +main_body: > + call void @llvm.SI.sendmsg(i32 34, i32 0); > + call void @llvm.SI.sendmsg(i32 274, i32 0); > + call void @llvm.SI.sendmsg(i32 562, i32 0); > + call void @llvm.SI.sendmsg(i32 3, i32 0); > + ret void > +} > + > +; Function Attrs: nounwind > +declare void @llvm.SI.sendmsg(i32, i32) #0 > + > +attributes #0 = { nounwind } > -- > 1.8.5.3 > > From 6cf50f8fad5b81a3b109275704b050f805ed4ba1 Mon Sep 17 00:00:00 2001 > From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daen...@amd.com> > Date: Thu, 28 Nov 2013 13:33:00 +0900 > Subject: [PATCH 2/2] R600/SI: Add intrinsic for BUFFER_LOAD_DWORD* > instructions > MIME-Version: 1.0 > Content-Type: text/plain; charset=UTF-8 > Content-Transfer-Encoding: 8bit > > Signed-off-by: Michel Dänzer <michel.daen...@amd.com> > --- > lib/Target/R600/SIInstrInfo.td | 60 > ++++++++++++++++++++++----------- > lib/Target/R600/SIInstructions.td | 48 ++++++++++++++++++++++++-- > lib/Target/R600/SIIntrinsics.td | 14 ++++++++ > test/CodeGen/R600/llvm.SI.load.dword.ll | 40 ++++++++++++++++++++++ > 4 files changed, 141 insertions(+), 21 deletions(-) > create mode 100644 test/CodeGen/R600/llvm.SI.load.dword.ll > > diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td > index d0cc7ce..23b8b65 100644 > --- a/lib/Target/R600/SIInstrInfo.td > +++ b/lib/Target/R600/SIInstrInfo.td > @@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, > RegisterClass regClass> : MTBU > > multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass > regClass> { > > - let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */, > - mayLoad = 1 in { > - > - let offen = 1, idxen = 0, addr64 = 0, offset = 0 in { > - def _OFFEN : MUBUF <op, (outs regClass:$vdata), > - (ins SReg_128:$srsrc, VReg_32:$vaddr), > - asm#" $vdata, $srsrc + $vaddr", []>; > - } > - > - let offen = 0, idxen = 1, addr64 = 0 in { > - def _IDXEN : MUBUF <op, (outs regClass:$vdata), > - (ins SReg_128:$srsrc, VReg_32:$vaddr, > i16imm:$offset), > - asm#" $vdata, $srsrc[$vaddr] + $offset", []>; > - } > + let lds = 0, mayLoad = 1 in { > + > + let addr64 = 0 in { > + > + let offen = 0, idxen = 0 in { > + def _OFFSET : MUBUF <op, (outs regClass:$vdata), > + (ins SReg_128:$srsrc, VReg_32:$vaddr, > + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, > + i1imm:$slc, i1imm:$tfe), > + asm#" $vdata, $srsrc + $offset + $soffset, > glc=$glc, slc=$slc, tfe=$tfe", []>; > + } > + > + let offen = 1, idxen = 0, offset = 0 in { > + def _OFFEN : MUBUF <op, (outs regClass:$vdata), > + (ins SReg_128:$srsrc, VReg_32:$vaddr, > + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, > i1imm:$slc, > + i1imm:$tfe), > + asm#" $vdata, $srsrc + $vaddr + $offset + > $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; > + } > + > + let offen = 0, idxen = 1 in { > + def _IDXEN : MUBUF <op, (outs regClass:$vdata), > + (ins SReg_128:$srsrc, VReg_32:$vaddr, > + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, > + i1imm:$slc, i1imm:$tfe), > + asm#" $vdata, $srsrc[$vaddr] + $offset + > $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; > + } > + > + let offen = 1, idxen = 1 in { > + def _BOTHEN : MUBUF <op, (outs regClass:$vdata), > + (ins SReg_128:$srsrc, VReg_64:$vaddr, > + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, > + i1imm:$slc, i1imm:$tfe), > + asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + > $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; > + } > + } > > - let offen = 0, idxen = 0, addr64 = 1 in { > - def _ADDR64 : MUBUF <op, (outs regClass:$vdata), > - (ins SReg_128:$srsrc, VReg_64:$vaddr, > i16imm:$offset), > - asm#" $vdata, $srsrc + $vaddr + $offset", []>; > - } > + let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset > = 128 /* ZERO */ in { > + def _ADDR64 : MUBUF <op, (outs regClass:$vdata), > + (ins SReg_128:$srsrc, VReg_64:$vaddr, > i16imm:$offset), > + asm#" $vdata, $srsrc + $vaddr + $offset", []>; > + } > } > } > > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index c0ad398..03e7e32 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -1428,7 +1428,7 @@ def : Pat < > /* int_SI_vs_load_input */ > def : Pat< > (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), > - (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset) > + (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, > 0, 0, 0) > >; > > /* int_SI_export */ > @@ -1834,7 +1834,7 @@ def : Pat < > // 3. Offset in an 32Bit VGPR > def : Pat < > (SIload_constant i128:$sbase, i32:$voff), > - (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff) > + (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0) > >; > > // The multiplication scales from [0,1] to the unsigned integer range > @@ -1995,6 +1995,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, > global_store>; > defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>; > defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>; > > +// BUFFER_LOAD_DWORD*, addr64=0 > +multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF > idxen, > + MUBUF bothen> { > + > + def : Pat < > + (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, > + imm:$offset, 0, 0, imm:$glc, imm:$slc, > + imm:$tfe)), > + (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), > + (as_i1imm $slc), (as_i1imm $tfe)) > + >; > + > + def : Pat < > + (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, > + imm:$offset, 1, 0, imm:$glc, imm:$slc, > + imm:$tfe)), > + (offen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), > + (as_i1imm $slc), (as_i1imm $tfe)) > + >; > + > + def : Pat < > + (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, > + imm:$offset, 0, 1, imm:$glc, imm:$slc, > + imm:$tfe)), > + (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), > + (as_i1imm $slc), (as_i1imm $tfe)) > + >; > + > + def : Pat < > + (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset, > + imm:$offset, 1, 1, imm:$glc, imm:$slc, > + imm:$tfe)), > + (bothen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), > + (as_i1imm $slc), (as_i1imm $tfe)) > + >; > +} > + > +defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, > BUFFER_LOAD_DWORD_OFFEN, > + BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>; > +defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, > BUFFER_LOAD_DWORDX2_OFFEN, > + BUFFER_LOAD_DWORDX2_IDXEN, > BUFFER_LOAD_DWORDX2_BOTHEN>; > +defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, > BUFFER_LOAD_DWORDX4_OFFEN, > + BUFFER_LOAD_DWORDX4_IDXEN, > BUFFER_LOAD_DWORDX4_BOTHEN>; > + > > //===----------------------------------------------------------------------===// > // MTBUF Patterns > > //===----------------------------------------------------------------------===// > diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td > index efcdc84..00e32c0 100644 > --- a/lib/Target/R600/SIIntrinsics.td > +++ b/lib/Target/R600/SIIntrinsics.td > @@ -38,6 +38,20 @@ let TargetPrefix = "SI", isTarget = 1 in { > llvm_i32_ty], // tfe(imm) > []>; > > + // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is > not exposed > + def int_SI_buffer_load_dword : Intrinsic < > + [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32 > + [llvm_anyint_ty, // rsrc(SGPR) > + llvm_anyint_ty, // vaddr(VGPR) > + llvm_i32_ty, // soffset(SGPR) > + llvm_i32_ty, // inst_offset(imm) > + llvm_i32_ty, // offen(imm) > + llvm_i32_ty, // idxen(imm) > + llvm_i32_ty, // glc(imm) > + llvm_i32_ty, // slc(imm) > + llvm_i32_ty], // tfe(imm) > + [IntrReadArgMem]>; > + > def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], > [IntrNoMem]>; > > class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, > llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; > diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll > b/test/CodeGen/R600/llvm.SI.load.dword.ll > new file mode 100644 > index 0000000..a622775 > --- /dev/null > +++ b/test/CodeGen/R600/llvm.SI.load.dword.ll > @@ -0,0 +1,40 @@ > +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s > + > +; Example of a simple geometry shader loading vertex attributes from the > +; ESGS ring buffer > + > +; CHECK-LABEL: @main > +; CHECK: BUFFER_LOAD_DWORD > +; CHECK: BUFFER_LOAD_DWORD > +; CHECK: BUFFER_LOAD_DWORD > +; CHECK: BUFFER_LOAD_DWORD > + > +define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] > addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] > addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] > addrspace(2)* inreg, i32, i32, i32, i32) #0 { > +main_body: > + %10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1 > + %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 > + %12 = shl i32 %6, 2 > + %13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, > i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0) > + %14 = bitcast i32 %13 to float > + %15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, > i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) > + %16 = bitcast i32 %15 to float > + %17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, > i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0) > + %18 = bitcast i32 %17 to float > + %19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x > i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0) > + %20 = bitcast i32 %19 to float > + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, > float %16, float %18, float %20) > + ret void > +} > + > +; Function Attrs: nounwind readonly > +declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, > i32, i32, i32, i32, i32) #1 > + > +; Function Attrs: nounwind readonly > +declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, > i32, i32, i32, i32, i32, i32) #1 > + > +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, > float) > + > +attributes #0 = { "ShaderType"="1" } > +attributes #1 = { nounwind readonly } > + > +!0 = metadata !{metadata !"const", null, i32 1} > -- > 1.8.5.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev