We used to pre-set a bunch of extra arguments to a texture instruction in order to force the RA to allocate a register at the boundary of 4. However with the levelZero optimization, which removes a LOD argument when it's uniformly equal to zero, we undid that logic by removing an extra argument. As a result, we could end up with insufficient alignment on the second wide texture argument.
Instead we switch to a different method of achieving the same result. The logic runs during the constraint analysis of the RA, and adds unset sources as necessary right before being merged into a wide argument. An additional benefit of this approach is that we don't have needless stores of 0 into registers. Fixes MISALIGNED_REG errors in Hitman when run with bindless textures enabled on a GK208. Fixes: 9145873b152 ("nvc0/ir: use levelZero flag when the lod is set to 0") Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- This should get some testing on Maxwell+. And the logic I added for GM107+ should also be checked to see if it's needed at all, or could perhaps be reduced in scope (perhaps just needs to be as wide as first arg, etc). .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 16 --------- .../drivers/nouveau/codegen/nv50_ir_ra.cpp | 36 ++++++++++++++----- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 44c62820342..9e87c97b0f4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1063,22 +1063,6 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) } } - if (chipset >= NVISA_GK104_CHIPSET) { - // - // If TEX requires more than 4 sources, the 2nd register tuple must be - // aligned to 4, even if it consists of just a single 4-byte register. - // - // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. - // - int s = i->srcCount(0xff, true); - if (s > 4 && s < 7) { - if (i->srcExists(s)) // move potential predicate out of the way - i->moveSources(s, 7 - s); - while (s < 7) - i->setSrc(s++, bld.loadImm(NULL, 0)); - } - } - return true; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index cef1f6ffbe0..6b30e52efbe 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -2315,9 +2315,19 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) if (!tex->tex.target.isArray() && tex->tex.useOffsets) s++; } - n = tex->srcCount(0xff) - s; + n = tex->srcCount(0xff, true) - s; + // TODO: Is this necessary? Perhaps just has to be aligned to the + // level that the first arg is, not necessarily to 4. This + // requirement has not been rigorously verified, as it has been on + // Kepler. + if (n > 0 && n < 3) { + if (tex->srcExists(n + s)) // move potential predicate out of the way + tex->moveSources(n + s, 3 - n); + while (n < 3) + tex->setSrc(s + n++, new_LValue(func, FILE_GPR)); + } } else { - s = tex->srcCount(0xff); + s = tex->srcCount(0xff, true); n = 0; } @@ -2340,14 +2350,18 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) } else if (isTextureOp(tex->op)) { int n = tex->srcCount(0xff, true); - if (n > 4) { - condenseSrcs(tex, 0, 3); - if (n > 5) // NOTE: first call modified positions already - condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); - } else - if (n > 1) { - condenseSrcs(tex, 0, n - 1); + int s = n > 4 ? 4 : n; + if (n > 4 && n < 7) { + if (tex->srcExists(n)) // move potential predicate out of the way + tex->moveSources(n, 7 - n); + + while (n < 7) + tex->setSrc(n++, new_LValue(func, FILE_GPR)); } + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 4) + condenseSrcs(tex, 1, n - s); } } @@ -2484,6 +2498,10 @@ RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s) assert(cst->getSrc(s)->defs.size() == 1); // still SSA Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); + + if (defi->op == OP_NOP) + return; + bool imm = defi->op == OP_MOV && defi->src(0).getFile() == FILE_IMMEDIATE; bool load = defi->op == OP_LOAD && -- 2.19.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev