Due to 32-bit alignment, we are fully using those registers even if the type is 16-bit.
This change allows copy propagation to work on those registers without making the optimization pass too complicated. --- src/intel/compiler/brw_fs_nir.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index c469baf42a1..eadd49ff13a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -635,6 +635,20 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) } } + /* For 16-bit data, we are using the full 32-bit register, due alignment + * requirements. So it is safe to retype a 16-bit to 16-bit raw mov to + * UD, and that would make these movs easier to be optimized during copy + * propagation later. + */ + bool retype_to_ud = (nir_dest_bit_size(instr->dest.dest) == 16 && + nir_src_bit_size(instr->src[0].src) == 16 && + temp.stride == 2 && + op[0].stride == 2); + if (retype_to_ud) { + temp.type = BRW_REGISTER_TYPE_UD; + temp.stride = 1; + } + for (unsigned i = 0; i < 4; i++) { if (!(instr->dest.write_mask & (1 << i))) continue; @@ -643,6 +657,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst = bld.MOV(offset(temp, bld, i), offset(op[0], bld, instr->src[0].swizzle[i])); } else { + if (retype_to_ud) { + op[i].type = BRW_REGISTER_TYPE_UD; + op[i].stride = 1; + } + inst = bld.MOV(offset(temp, bld, i), offset(op[i], bld, instr->src[i].swizzle[0])); } -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev