Similar to nir_intrinsic_load_subgroup_eq_mask and friends, this adds an intrinsic which contains a bit for every member of the group. This doesn’t have a corresponding GLSL builtin but it will be used to calculate nir_intrinsic_load_subgroup_g{t,e}_mask. It has its own nir option on whether to lower it. The idea is that this should be much easier to generate than the other masks because it will likely be a compile-time constant and if so it will generate more efficient code for the other masks. --- src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_intrinsics.h | 1 + src/compiler/nir/nir_opt_intrinsics.c | 41 +++++++++++++++++++++++++++-------- src/intel/compiler/brw_compiler.c | 1 + src/intel/compiler/brw_fs_nir.cpp | 1 + 5 files changed, 36 insertions(+), 9 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index dd833cf..edb02b9 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1836,6 +1836,7 @@ typedef struct nir_shader_compiler_options { bool lower_extract_word; bool lower_vote_trivial; + bool lower_subgroup_all_mask; bool lower_subgroup_masks; /** diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index cefd18b..de362a8 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -350,6 +350,7 @@ SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx) SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx) +SYSTEM_VALUE(subgroup_all_mask, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx) SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx) diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index d5fdc51..71d79d7 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -29,23 +29,39 @@ */ static nir_ssa_def * -high_subgroup_mask(nir_builder *b, - nir_ssa_def *count, - uint64_t base_mask) +subgroup_all_mask(nir_builder *b, + nir_ssa_def *count) { /* group_mask could probably be calculated more efficiently but we want to * be sure not to shift by 64 if the subgroup size is 64 because the GLSL - * shift operator is undefined in that case. In any case if we were worried - * about efficency this should probably be done further down because the - * subgroup size is likely to be known at compile time. + * shift operator is undefined in that case. In any case if the driver is + * worried about efficency this should probably be done further down + * because the subgroup size is likely to be known at compile time. */ nir_ssa_def *subgroup_size = nir_load_subgroup_size(b); nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull); nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size); - nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift); - nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count); + return nir_ushr(b, all_bits, shift); +} - return nir_iand(b, higher_bits, group_mask); +static nir_ssa_def * +high_subgroup_mask(nir_builder *b, + nir_ssa_def *count, + uint64_t base_mask) +{ + nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count); + nir_intrinsic_instr *load_group_mask = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_load_subgroup_all_mask); + load_group_mask->num_components = 1; + nir_ssa_dest_init(&load_group_mask->instr, + &load_group_mask->dest, + 1 /* num_components */, + 64 /* bit_size */, + NULL /* name */); + nir_builder_instr_insert(b, &load_group_mask->instr); + + return nir_iand(b, higher_bits, &load_group_mask->dest.ssa); } static bool @@ -100,6 +116,10 @@ opt_intrinsics_impl(nir_function_impl *impl) nir_imm_int(&b, 0)); break; } + case nir_intrinsic_load_subgroup_all_mask: + if (!b.shader->options->lower_subgroup_all_mask) + break; + /* flow through */ case nir_intrinsic_load_subgroup_eq_mask: case nir_intrinsic_load_subgroup_ge_mask: case nir_intrinsic_load_subgroup_gt_mask: @@ -111,6 +131,9 @@ opt_intrinsics_impl(nir_function_impl *impl) nir_ssa_def *count = nir_load_subgroup_invocation(&b); switch (intrin->intrinsic) { + case nir_intrinsic_load_subgroup_all_mask: + replacement = subgroup_all_mask(&b, count); + break; case nir_intrinsic_load_subgroup_eq_mask: replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count); break; diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 2f6af7d..8df0d2e 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -57,6 +57,7 @@ static const struct nir_shader_compiler_options scalar_nir_options = { .lower_unpack_snorm_4x8 = true, .lower_unpack_unorm_2x16 = true, .lower_unpack_unorm_4x8 = true, + .lower_subgroup_all_mask = true, .lower_subgroup_masks = true, .max_subgroup_size = 32, .max_unroll_iterations = 32, diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index bb153ca..9202b0f 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4185,6 +4185,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_load_subgroup_all_mask: case nir_intrinsic_load_subgroup_eq_mask: case nir_intrinsic_load_subgroup_ge_mask: case nir_intrinsic_load_subgroup_gt_mask: -- 2.9.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev