In some cases, we need to emit ALU instructions with a certain stride
due to a HW limitation. When splitting that instruction, we need to
respect the original stride when creating the temporaries we load from
and store into. Otherwise, we'll reintroduce the problem we were trying
to work around.

Signed-off-by: Connor Abbott <connor.w.abb...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 812648f..386e9a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2370,12 +2370,14 @@ fs_visitor::opt_register_renaming()
 
       if (depth == 0 &&
           inst->dst.file == GRF &&
-          alloc.sizes[inst->dst.reg] == inst->exec_size / 8 &&
+          alloc.sizes[inst->dst.reg] ==
+            inst->dst.component_size(inst->exec_size) &&
           !inst->is_partial_write()) {
          if (remap[dst] == -1) {
             remap[dst] = dst;
          } else {
-            remap[dst] = alloc.allocate(inst->exec_size / 8);
+            remap[dst] =
+               alloc.allocate(inst->dst.component_size(inst->exec_size));
             inst->dst.reg = remap[dst];
             progress = true;
          }
@@ -4334,6 +4336,8 @@ fs_visitor::lower_simd_width()
                    * temporary passed as source to the lowered instruction.
                    */
                   split_inst.src[j] = lbld.vgrf(inst->src[j].type, src_size);
+                  split_inst.src[j].subreg_offset = inst->src[j].subreg_offset;
+                  split_inst.src[j].stride = inst->src[j].stride;
                   emit_transpose(lbld.group(copy_width, 0),
                                  split_inst.src[j], &src, 1, src_size, n);
                }
@@ -4343,8 +4347,10 @@ fs_visitor::lower_simd_width()
                /* Allocate enough space to hold the result of the lowered
                 * instruction and fix up the number of registers written.
                 */
-               split_inst.dst = dsts[i] =
-                  lbld.vgrf(inst->dst.type, dst_size);
+               fs_reg dst = lbld.vgrf(inst->dst.type, dst_size);
+               dst.stride = inst->dst.stride;
+               dst.subreg_offset = inst->dst.subreg_offset;
+               split_inst.dst = dsts[i] = dst;
                split_inst.regs_written =
                   DIV_ROUND_UP(inst->regs_written * lower_width,
                                inst->exec_size);
-- 
2.4.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to