The attached patches add two intrinsics to the R600 backend which are necessary for geometry shader support in the radeonsi driver.
-- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer
>From 8feb7201ac894e5a6731a157020ac807936f584d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daen...@amd.com> Date: Fri, 29 Nov 2013 18:21:41 +0900 Subject: [PATCH 1/2] R600/SI: Add intrinsic for S_SENDMSG instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michel Dänzer <michel.daen...@amd.com> --- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 31 +++++++++++++++++++++++ lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 1 + lib/Target/R600/SIInsertWaits.cpp | 6 +++++ lib/Target/R600/SIInstructions.td | 16 ++++++++++-- lib/Target/R600/SIIntrinsics.td | 2 ++ test/CodeGen/R600/llvm.SI.sendmsg.ll | 21 +++++++++++++++ 6 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/R600/llvm.SI.sendmsg.ll diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 99e1377..7105879 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -316,6 +316,37 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned SImm16 = MI->getOperand(OpNo).getImm(); + unsigned Msg = SImm16 & 0xF; + if (Msg == 2 || Msg == 3) { + unsigned Op = (SImm16 >> 4) & 0xF; + if (Msg == 3) + O << "Gs_done("; + else + O << "Gs("; + if (Op == 0) { + O << "nop"; + } else { + unsigned Stream = (SImm16 >> 8) & 0x3; + if (Op == 1) + O << "cut"; + else if (Op == 2) + O << "emit"; + else if (Op == 3) + O << "emit-cut"; + O << " stream " << Stream; + } + O << "), [m0] "; + } else if (Msg == 1) + O << "interrupt "; + else if (Msg == 15) + O << "system "; + else + O << "unknown(" << Msg << ") "; +} + void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O) { // Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 77af942..2876dd2 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -53,6 +53,7 @@ private: void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSendMsg(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O); }; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 7ef662e..695ec40 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { Counters Result = ZeroCounts; + // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish, + // but we also want to wait for any other outstanding transfers before + // signalling other hardware blocks + if (MI.getOpcode() == AMDGPU::S_SENDMSG) + return LastIssued; + // For each register affected by this // instruction increase the result sequence for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 3baa4cd..c0ad398 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -22,6 +22,10 @@ def InterpSlot : Operand<i32> { let PrintMethod = "printInterpSlot"; } +def SendMsgImm : Operand<i32> { + let PrintMethod = "printSendMsg"; +} + def isSI : Predicate<"Subtarget.getGeneration() " ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">; @@ -826,17 +830,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER", def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16", [] >; -} // End hasSideEffects //def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; //def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; -//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>; + +let Uses = [EXEC] in { + def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16", + [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)] + > { + let DisableEncoding = "$m0"; + } +} // End Uses = [EXEC] + //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; //def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; //def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; //def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; //def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; //def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; +} // End hasSideEffects def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 7fcc964..efcdc84 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -38,6 +38,8 @@ let TargetPrefix = "SI", isTarget = 1 in { llvm_i32_ty], // tfe(imm) []>; + def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_sample : Sample; diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll new file mode 100644 index 0000000..581d422 --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll @@ -0,0 +1,21 @@ +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: @main +; CHECK: S_SENDMSG Gs(emit stream 0) +; CHECK: S_SENDMSG Gs(cut stream 1) +; CHECK: S_SENDMSG Gs(emit-cut stream 2) +; CHECK: S_SENDMSG Gs_done(nop) + +define void @main() { +main_body: + call void @llvm.SI.sendmsg(i32 34, i32 0); + call void @llvm.SI.sendmsg(i32 274, i32 0); + call void @llvm.SI.sendmsg(i32 562, i32 0); + call void @llvm.SI.sendmsg(i32 3, i32 0); + ret void +} + +; Function Attrs: nounwind +declare void @llvm.SI.sendmsg(i32, i32) #0 + +attributes #0 = { nounwind } -- 1.8.5.3
>From 6cf50f8fad5b81a3b109275704b050f805ed4ba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daen...@amd.com> Date: Thu, 28 Nov 2013 13:33:00 +0900 Subject: [PATCH 2/2] R600/SI: Add intrinsic for BUFFER_LOAD_DWORD* instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michel Dänzer <michel.daen...@amd.com> --- lib/Target/R600/SIInstrInfo.td | 60 ++++++++++++++++++++++----------- lib/Target/R600/SIInstructions.td | 48 ++++++++++++++++++++++++-- lib/Target/R600/SIIntrinsics.td | 14 ++++++++ test/CodeGen/R600/llvm.SI.load.dword.ll | 40 ++++++++++++++++++++++ 4 files changed, 141 insertions(+), 21 deletions(-) create mode 100644 test/CodeGen/R600/llvm.SI.load.dword.ll diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index d0cc7ce..23b8b65 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> { - let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */, - mayLoad = 1 in { - - let offen = 1, idxen = 0, addr64 = 0, offset = 0 in { - def _OFFEN : MUBUF <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_32:$vaddr), - asm#" $vdata, $srsrc + $vaddr", []>; - } - - let offen = 0, idxen = 1, addr64 = 0 in { - def _IDXEN : MUBUF <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset), - asm#" $vdata, $srsrc[$vaddr] + $offset", []>; - } + let lds = 0, mayLoad = 1 in { + + let addr64 = 0 in { + + let offen = 0, idxen = 0 in { + def _OFFSET : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_32:$vaddr, + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, + i1imm:$slc, i1imm:$tfe), + asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; + } + + let offen = 1, idxen = 0, offset = 0 in { + def _OFFEN : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_32:$vaddr, + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, i1imm:$slc, + i1imm:$tfe), + asm#" $vdata, $srsrc + $vaddr + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; + } + + let offen = 0, idxen = 1 in { + def _IDXEN : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_32:$vaddr, + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, + i1imm:$slc, i1imm:$tfe), + asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; + } + + let offen = 1, idxen = 1 in { + def _BOTHEN : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_64:$vaddr, + i16imm:$offset, SSrc_32:$soffset, i1imm:$glc, + i1imm:$slc, i1imm:$tfe), + asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>; + } + } - let offen = 0, idxen = 0, addr64 = 1 in { - def _ADDR64 : MUBUF <op, (outs regClass:$vdata), - (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset), - asm#" $vdata, $srsrc + $vaddr + $offset", []>; - } + let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in { + def _ADDR64 : MUBUF <op, (outs regClass:$vdata), + (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset), + asm#" $vdata, $srsrc + $vaddr + $offset", []>; + } } } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index c0ad398..03e7e32 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1428,7 +1428,7 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), - (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset) + (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0) >; /* int_SI_export */ @@ -1834,7 +1834,7 @@ def : Pat < // 3. Offset in an 32Bit VGPR def : Pat < (SIload_constant i128:$sbase, i32:$voff), - (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff) + (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0) >; // The multiplication scales from [0,1] to the unsigned integer range @@ -1995,6 +1995,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>; defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>; defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>; +// BUFFER_LOAD_DWORD*, addr64=0 +multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen, + MUBUF bothen> { + + def : Pat < + (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + imm:$offset, 0, 0, imm:$glc, imm:$slc, + imm:$tfe)), + (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), + (as_i1imm $slc), (as_i1imm $tfe)) + >; + + def : Pat < + (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + imm:$offset, 1, 0, imm:$glc, imm:$slc, + imm:$tfe)), + (offen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), + (as_i1imm $slc), (as_i1imm $tfe)) + >; + + def : Pat < + (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + imm:$offset, 0, 1, imm:$glc, imm:$slc, + imm:$tfe)), + (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), + (as_i1imm $slc), (as_i1imm $tfe)) + >; + + def : Pat < + (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset, + imm:$offset, 1, 1, imm:$glc, imm:$slc, + imm:$tfe)), + (bothen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), + (as_i1imm $slc), (as_i1imm $tfe)) + >; +} + +defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN, + BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>; +defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN, + BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>; +defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN, + BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>; + //===----------------------------------------------------------------------===// // MTBUF Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index efcdc84..00e32c0 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -38,6 +38,20 @@ let TargetPrefix = "SI", isTarget = 1 in { llvm_i32_ty], // tfe(imm) []>; + // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed + def int_SI_buffer_load_dword : Intrinsic < + [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32 + [llvm_anyint_ty, // rsrc(SGPR) + llvm_anyint_ty, // vaddr(VGPR) + llvm_i32_ty, // soffset(SGPR) + llvm_i32_ty, // inst_offset(imm) + llvm_i32_ty, // offen(imm) + llvm_i32_ty, // idxen(imm) + llvm_i32_ty, // glc(imm) + llvm_i32_ty, // slc(imm) + llvm_i32_ty], // tfe(imm) + [IntrReadArgMem]>; + def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll new file mode 100644 index 0000000..a622775 --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.load.dword.ll @@ -0,0 +1,40 @@ +;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s + +; Example of a simple geometry shader loading vertex attributes from the +; ESGS ring buffer + +; CHECK-LABEL: @main +; CHECK: BUFFER_LOAD_DWORD +; CHECK: BUFFER_LOAD_DWORD +; CHECK: BUFFER_LOAD_DWORD +; CHECK: BUFFER_LOAD_DWORD + +define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 { +main_body: + %10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1 + %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 + %12 = shl i32 %6, 2 + %13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0) + %14 = bitcast i32 %13 to float + %15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0) + %16 = bitcast i32 %15 to float + %17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0) + %18 = bitcast i32 %17 to float + %19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0) + %20 = bitcast i32 %19 to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20) + ret void +} + +; Function Attrs: nounwind readonly +declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +; Function Attrs: nounwind readonly +declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="1" } +attributes #1 = { nounwind readonly } + +!0 = metadata !{metadata !"const", null, i32 1} -- 1.8.5.3
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev