This avoids emitting a few extra instructions required to take the dispatch mask into account when it's known to be tightly packed. --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 +++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index c510f42..bdeda3b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2045,7 +2045,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_FIND_LIVE_CHANNEL: { const struct brw_reg mask = - stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : brw_dmask_reg(); + brw_stage_has_packed_dispatch(stage, prog_data) ? brw_imm_ud(~0u) : + stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : + brw_dmask_reg(); brw_find_live_channel(p, dst, mask); break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index f9e6d1c..2bef549 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1862,9 +1862,13 @@ generate_code(struct brw_codegen *p, brw_memory_fence(p, dst); break; - case SHADER_OPCODE_FIND_LIVE_CHANNEL: - brw_find_live_channel(p, dst, brw_dmask_reg()); + case SHADER_OPCODE_FIND_LIVE_CHANNEL: { + const struct brw_reg mask = + brw_stage_has_packed_dispatch(nir->stage, &prog_data->base) ? + brw_imm_ud(~0u) : brw_dmask_reg(); + brw_find_live_channel(p, dst, mask); break; + } case SHADER_OPCODE_BROADCAST: assert(inst->force_writemask_all); -- 2.9.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev