Off hand I don't know any use case exept constant buffers where we use S_BUFFER_LOAD, but anybody who uses it should be aware how to use it.

What are the symptoms of issuing a S_BUFFER_LOAD with NumRecords=0? Hangs or just undefined behaviour?

Christian.

Am 30.10.2013 17:00, schrieb Marek Olšák:
Yeah, it's unusual.

What if S_BUFFER_LOAD is also used by something else, like texture
buffers, or OpenCL? Will we have to fix that as well?

Marek

On Wed, Oct 30, 2013 at 3:32 PM, Christian König
<deathsim...@vodafone.de> wrote:
Mhm, I'm assumed that having NumRecord zero is actually something quite
unusual. E.g. a shader that accesses a not defined constant buffer or
something like that. So I would rather optimize for the common use case.

Anyway branch instructions are quite expensive, you can issue something
between 12 and 16 arithmetic instructions before they make sense to use
instead of a conditional move. Not sure how the relation is with memory
moves but I guess that it would be better to avoid them.

Christian.

Am 30.10.2013 14:59, schrieb Marek Olšák:

I thought that doing S_CMPK followed by S_CBRANCH has less overhead
than doing a memory read. If we used one of
S_BUFFER_LOAD_DWORDX2,4,8,16, it wouldn't be so bad. I don't know.

Marek

On Wed, Oct 30, 2013 at 2:48 PM, Christian König
<deathsim...@vodafone.de> wrote:
Am 30.10.2013 14:23, schrieb Marek Olšák:

From: Marek Olšák <marek.ol...@amd.com>

This also fixes scalar compare instructions which were always
eliminated,
because they didn't have a destination of SCC.

Uff, that looks like quite a bit of overhead, isn't there a simpler
approach? Like setting the the NumRecord to one and letting unused
constants
pointing to a dummy buffer or soemthing like this?

Christian.


Signed-off-by: Marek Olšák <marek.ol...@amd.com>
---
    lib/Target/R600/SIISelLowering.cpp | 30
++++++++++++++++++++++++++----
    lib/Target/R600/SIInsertWaits.cpp  |  6 ++++++
    lib/Target/R600/SIInstrInfo.td     |  5 +++++
    lib/Target/R600/SIInstructions.td  | 26 +++++++++++++++-----------
    4 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/lib/Target/R600/SIISelLowering.cpp
b/lib/Target/R600/SIISelLowering.cpp
index 371572e..e9f4035 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -14,6 +14,7 @@
      #include "SIISelLowering.h"
    #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
    #include "AMDILIntrinsicInfo.h"
    #include "SIInstrInfo.h"
    #include "SIMachineFunctionInfo.h"
@@ -302,14 +303,37 @@ MachineBasicBlock *
SITargetLowering::EmitInstrWithCustomInserter(
        MachineInstr * MI, MachineBasicBlock * BB) const {
        MachineBasicBlock::iterator I = *MI;
+  const SIInstrInfo *TII =
+    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+
+  // Sea Islands must conditionally execute SMRD instructions depending
+  // on the value of SQ_BUF_RSRC_WORD2.NUM_RECORDS, because the
hardware
+  // doesn't skip the instructions if NUM_RECORDS is 0.
+  if (TII->isSMRD(MI->getOpcode())) {
+    if
(getTargetMachine().getSubtarget<AMDGPUSubtarget>().getGeneration() !=
+        AMDGPUSubtarget::SEA_ISLANDS)
+      return BB;
+
+    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+    unsigned NumRecords =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+    // XXX should we save and restore the SCC register?
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::COPY),
NumRecords)
+        .addReg(MI->getOperand(1).getReg(), 0, AMDGPU::sub2);
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_CMPK_EQ_U32),
AMDGPU::SCC)
+        .addReg(NumRecords)
+        .addImm(0);
+    BuildMI(*BB, I, MI->getDebugLoc(),
TII->get(AMDGPU::S_CBRANCH_SCC1))
+        .addImm(1)
+        .addReg(AMDGPU::SCC);
+    return BB;
+  }
        switch (MI->getOpcode()) {
      default:
        return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
      case AMDGPU::BRANCH: return BB;
      case AMDGPU::SI_ADDR64_RSRC: {
-    const SIInstrInfo *TII =
-      static_cast<const
SIInstrInfo*>(getTargetMachine().getInstrInfo());
        MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
        unsigned SuperReg = MI->getOperand(0).getReg();
        unsigned SubRegLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -336,8 +360,6 @@ MachineBasicBlock *
SITargetLowering::EmitInstrWithCustomInserter(
        break;
      }
      case AMDGPU::V_SUB_F64: {
-    const SIInstrInfo *TII =
-      static_cast<const
SIInstrInfo*>(getTargetMachine().getInstrInfo());
        BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
                MI->getOperand(0).getReg())
                .addReg(MI->getOperand(1).getReg())
diff --git a/lib/Target/R600/SIInsertWaits.cpp
b/lib/Target/R600/SIInsertWaits.cpp
index 7e42fb7..2e47346 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -294,6 +294,12 @@ bool SIInsertWaits::insertWait(MachineBasicBlock
&MBB,
      if (Counts.Named.EXP == 0)
        ExpInstrTypesSeen = 0;
    +  // Ensure S_WAITCNT is inserted before S_CBRANCH.
+  MachineBasicBlock::iterator beforeI = I;
+  --beforeI;
+  if (beforeI->getOpcode() == AMDGPU::S_CBRANCH_SCC1)
+    I = beforeI;
+
      // Build the wait instruction
      BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
              .addImm((Counts.Named.VM & 0xF) |
diff --git a/lib/Target/R600/SIInstrInfo.td
b/lib/Target/R600/SIInstrInfo.td
index ed42a2a..9567879 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -177,6 +177,11 @@ class SOPC_32 <bits<7> op, string opName, list<dag>
pattern> : SOPC <
      opName#" $dst, $src0, $src1", pattern
    >;
    +class SOPCK_32 <bits<7> op, string opName, list<dag> pattern> : SOPC
<
+  op, (outs SCCReg:$dst), (ins SReg_32:$src0, i16imm:$src1),
+  opName#" $dst, $src0, $src1", pattern
+>;
+
    class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC <
      op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
      opName#" $dst, $src0, $src1", pattern
diff --git a/lib/Target/R600/SIInstructions.td
b/lib/Target/R600/SIInstructions.td
index 048c157..1b275a7 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -115,17 +115,17 @@ def S_CMPK_EQ_I32 : SOPK <
    */
      let isCompare = 1 in {
-def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
-def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
-def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
-def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
-def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
-def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
-def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
-def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
-def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
-def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
-def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+def S_CMPK_LG_I32 : SOPCK_32 <0x00000004, "S_CMPK_LG_I32", []>;
+def S_CMPK_GT_I32 : SOPCK_32 <0x00000005, "S_CMPK_GT_I32", []>;
+def S_CMPK_GE_I32 : SOPCK_32 <0x00000006, "S_CMPK_GE_I32", []>;
+def S_CMPK_LT_I32 : SOPCK_32 <0x00000007, "S_CMPK_LT_I32", []>;
+def S_CMPK_LE_I32 : SOPCK_32 <0x00000008, "S_CMPK_LE_I32", []>;
+def S_CMPK_EQ_U32 : SOPCK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
+def S_CMPK_LG_U32 : SOPCK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
+def S_CMPK_GT_U32 : SOPCK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
+def S_CMPK_GE_U32 : SOPCK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
+def S_CMPK_LT_U32 : SOPCK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
+def S_CMPK_LE_U32 : SOPCK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
    } // End isCompare = 1
      def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
@@ -492,6 +492,8 @@ defm S_LOAD_DWORDX4 : SMRD_Helper <0x02,
"S_LOAD_DWORDX4", SReg_64, SReg_128>;
    defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64,
SReg_256>;
    defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64,
SReg_512>;
    +let usesCustomInserter = 1 in {
+
    defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
      0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
    >;
@@ -512,6 +514,8 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
      0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
    >;
    +} // usesCustomInserter = 1
+
    } // mayLoad = 1
      //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to