We used to pre-set a bunch of extra arguments to a texture instruction
in order to force the RA to allocate a register at the boundary of 4.
However with the levelZero optimization, which removes a LOD argument
when it's uniformly equal to zero, we undid that logic by removing an
extra argument. As a result, we could end up with insufficient alignment
on the second wide texture argument.

Instead we switch to a different method of achieving the same result.
The logic runs during the constraint analysis of the RA, and adds unset
sources as necessary right before being merged into a wide argument.
An additional benefit of this approach is that we don't have needless
stores of 0 into registers.

Fixes MISALIGNED_REG errors in Hitman when run with bindless textures
enabled on a GK208.

Fixes: 9145873b152 ("nvc0/ir: use levelZero flag when the lod is set to 0")
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---

This should get some testing on Maxwell+. And the logic I added for
GM107+ should also be checked to see if it's needed at all, or could
perhaps be reduced in scope (perhaps just needs to be as wide as first
arg, etc).

 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 16 ---------
 .../drivers/nouveau/codegen/nv50_ir_ra.cpp    | 36 ++++++++++++++-----
 2 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 44c62820342..9e87c97b0f4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1063,22 +1063,6 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
       }
    }
 
-   if (chipset >= NVISA_GK104_CHIPSET) {
-      //
-      // If TEX requires more than 4 sources, the 2nd register tuple must be
-      // aligned to 4, even if it consists of just a single 4-byte register.
-      //
-      // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case.
-      //
-      int s = i->srcCount(0xff, true);
-      if (s > 4 && s < 7) {
-         if (i->srcExists(s)) // move potential predicate out of the way
-            i->moveSources(s, 7 - s);
-         while (s < 7)
-            i->setSrc(s++, bld.loadImm(NULL, 0));
-      }
-   }
-
    return true;
 }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index cef1f6ffbe0..6b30e52efbe 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -2315,9 +2315,19 @@ 
RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
             if (!tex->tex.target.isArray() && tex->tex.useOffsets)
                s++;
          }
-         n = tex->srcCount(0xff) - s;
+         n = tex->srcCount(0xff, true) - s;
+         // TODO: Is this necessary? Perhaps just has to be aligned to the
+         // level that the first arg is, not necessarily to 4. This
+         // requirement has not been rigorously verified, as it has been on
+         // Kepler.
+         if (n > 0 && n < 3) {
+            if (tex->srcExists(n + s)) // move potential predicate out of the 
way
+               tex->moveSources(n + s, 3 - n);
+            while (n < 3)
+               tex->setSrc(s + n++, new_LValue(func, FILE_GPR));
+         }
       } else {
-         s = tex->srcCount(0xff);
+         s = tex->srcCount(0xff, true);
          n = 0;
       }
 
@@ -2340,14 +2350,18 @@ 
RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
    } else
    if (isTextureOp(tex->op)) {
       int n = tex->srcCount(0xff, true);
-      if (n > 4) {
-         condenseSrcs(tex, 0, 3);
-         if (n > 5) // NOTE: first call modified positions already
-            condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
-      } else
-      if (n > 1) {
-         condenseSrcs(tex, 0, n - 1);
+      int s = n > 4 ? 4 : n;
+      if (n > 4 && n < 7) {
+         if (tex->srcExists(n)) // move potential predicate out of the way
+            tex->moveSources(n, 7 - n);
+
+         while (n < 7)
+            tex->setSrc(n++, new_LValue(func, FILE_GPR));
       }
+      if (s > 1)
+         condenseSrcs(tex, 0, s - 1);
+      if (n > 4)
+         condenseSrcs(tex, 1, n - s);
    }
 }
 
@@ -2484,6 +2498,10 @@ 
RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s)
    assert(cst->getSrc(s)->defs.size() == 1); // still SSA
 
    Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
+
+   if (defi->op == OP_NOP)
+      return;
+
    bool imm = defi->op == OP_MOV &&
       defi->src(0).getFile() == FILE_IMMEDIATE;
    bool load = defi->op == OP_LOAD &&
-- 
2.19.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to