Instead of letting nir lower nir_intrinsic_load_subgroup_all_mask this
is now generated directly. This is more efficient because it can be
calculated in the compiler based on the dispatch width.

Sadly it’s still not totally ideal because the constant doesn’t seem
to get propagated and there is still a redundant MOV.
---
 src/intel/compiler/brw_compiler.c | 2 +-
 src/intel/compiler/brw_fs_nir.cpp | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.c 
b/src/intel/compiler/brw_compiler.c
index 8df0d2e..f02fceb 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -57,7 +57,7 @@ static const struct nir_shader_compiler_options 
scalar_nir_options = {
    .lower_unpack_snorm_4x8 = true,
    .lower_unpack_unorm_2x16 = true,
    .lower_unpack_unorm_4x8 = true,
-   .lower_subgroup_all_mask = true,
+   .lower_subgroup_all_mask = false,
    .lower_subgroup_masks = true,
    .max_subgroup_size = 32,
    .max_unroll_iterations = 32,
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 9202b0f..b73edc9 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4185,7 +4185,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_load_subgroup_all_mask:
+   case nir_intrinsic_load_subgroup_all_mask: {
+      uint32_t mask = ~UINT32_C(0) >> (32 - dispatch_width);
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_Q), brw_imm_d(mask));
+      break;
+   }
+
    case nir_intrinsic_load_subgroup_eq_mask:
    case nir_intrinsic_load_subgroup_ge_mask:
    case nir_intrinsic_load_subgroup_gt_mask:
-- 
2.9.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to