On 05/26/2016 05:51 PM, Samuel Pitoiset wrote:


On 05/26/2016 05:49 PM, Samuel Pitoiset wrote:
What about the Maxwell logic? LDS expects a GPR at def(0) and in case
the dst reg doesn't exist, you move the predicate to def(0).

Are you sure you don't need to check if dst(0) is not a predicate in
emitLDS()?

Oh, actually not because we don't lower on GM107, and not predicate are
needed, so you are right.

[...] and we don't need to add a predicate [...]


Just wondering how did you find the issue. :=

Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>

On 05/26/2016 04:44 AM, Ilia Mirkin wrote:
For a load locked, we might not use the first result but the second
result is the predicate result of the locking. In that case the load
splitting logic doesn't apply (which is designed for splitting 128-bit
loads). Instead we take the predicate and move it into the first
position (as having a dead result in first def's position upsets all
sorts of things including RA). Update the emitters to deal with this as
well.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 20
++++++++++++++---
 .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 26
+++++++++++++++++-----
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 10 +++++++--
 3 files changed, 45 insertions(+), 11 deletions(-)

diff --git
a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 6a5981d..27d9b8e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -2080,15 +2080,29 @@ CodeEmitterGK110::emitLOAD(const Instruction *i)
    code[1] |= offset >> 9;

    // Locked store on shared memory can fail.
+   int r = 0, p = -1;
    if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
        i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
-      assert(i->defExists(1));
-      defId(i->def(1), 32 + 16);
+      if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
+         r = -1;
+         p = 0;
+      } else if (i->defExists(1)) { // r, p
+         p = 1;
+      } else {
+         assert(!"Expected predicate dest for load locked");
+      }
    }

    emitPredicate(i);

-   defId(i->def(0), 2);
+   if (r >= 0)
+      defId(i->def(r), 2);
+   else
+      code[0] |= 255 << 2;
+
+   if (p >= 0)
+      defId(i->def(p), 32 + 16);
+
    if (i->getIndirect(0, 0)) {
       srcId(i->src(0).getIndirect(0), 10);
       if (i->getIndirect(0, 0)->reg.size == 8)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 596293e..1bb962f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1874,17 +1874,31 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
    }
    code[1] = opc;

+   int r = 0, p = -1;
    if (i->src(0).getFile() == FILE_MEMORY_SHARED) {
       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
-         assert(i->defExists(1));
-         if (targ->getChipset() >= NVISA_GK104_CHIPSET)
-            defId(i->def(1), 8);
-         else
-            defId(i->def(1), 32 + 18);
+         if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
+            r = -1;
+            p = 0;
+         } else if (i->defExists(1)) { // r, p
+            p = 1;
+         } else {
+            assert(!"Expected predicate dest for load locked");
+         }
       }
    }

-   defId(i->def(0), 14);
+   if (r >= 0)
+      defId(i->def(r), 14);
+   else
+      code[0] |= 63 << 14;
+
+   if (p >= 0) {
+      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
+         defId(i->def(p), 8);
+      else
+         defId(i->def(p), 32 + 18);
+   }

    setAddressByFile(i->src(0));
    srcId(i->src(0).getIndirect(0), 20);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index cd801f3..3213188 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -3265,14 +3265,20 @@ DeadCodeElim::visit(BasicBlock *bb)
          ++deadCount;
          delete_Instruction(prog, i);
       } else
-      if (i->defExists(1) && (i->op == OP_VFETCH || i->op ==
OP_LOAD)) {
+      if (i->defExists(1) &&
+          i->subOp == 0 &&
+          (i->op == OP_VFETCH || i->op == OP_LOAD)) {
          checkSplitLoad(i);
       } else
       if (i->defExists(0) && !i->getDef(0)->refCount()) {
          if (i->op == OP_ATOM ||
              i->op == OP_SUREDP ||
-             i->op == OP_SUREDB)
+             i->op == OP_SUREDB) {
             i->setDef(0, NULL);
+         } else if (i->op == OP_LOAD && i->subOp ==
NV50_IR_SUBOP_LOAD_LOCKED) {
+            i->setDef(0, i->getDef(1));
+            i->setDef(1, NULL);
+         }
       }
    }
    return true;




--
-Samuel
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to