The GL_ARB_shader_ballot spec says that gl_SubGroupSizeARB is declared as a uniform. This means that it cannot change across an invocation such as a draw call or a compute dispatch. For compute shaders, we're ok because we only ever use one dispatch size. For fragment, however, the hardware dynamically chooses between SIMD8 and SIMD16 which violates the spec. Instead, let's just pick a subgroup size based on the shader stage. The fixed size we choose for compute shaders is a bit higher than strictly needed but there's no real harm in that. The advantage is that, if they do anything interesting with the value, NIR will see it as an immediate and can optimize better. --- src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_lower_subgroups.c | 5 +++++ src/intel/compiler/brw_fs_nir.cpp | 4 ---- src/intel/compiler/brw_nir.c | 2 ++ 4 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f5b46c7..b3d993a 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2450,6 +2450,7 @@ bool nir_lower_samplers_as_deref(nir_shader *shader, const struct gl_shader_program *shader_program); typedef struct nir_lower_subgroups_options { + uint8_t subgroup_size; uint8_t ballot_bit_size; bool lower_to_scalar:1; bool lower_vote_trivial:1; diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c index 1cc6717..ed5fa4b 100644 --- a/src/compiler/nir/nir_lower_subgroups.c +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -108,6 +108,11 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin, return nir_imm_int(b, NIR_TRUE); break; + case nir_intrinsic_load_subgroup_size: + if (options->subgroup_size) + return nir_imm_int(b, options->subgroup_size); + break; + case nir_intrinsic_read_invocation: case nir_intrinsic_read_first_invocation: if (options->lower_to_scalar) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 01b3547..4648f29 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4162,10 +4162,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_subgroup_size: - bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(dispatch_width)); - break; - case nir_intrinsic_load_subgroup_invocation: bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index bc80df3..14bde06 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -624,6 +624,8 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) OPT(nir_normalize_cubemap_coords); const nir_lower_subgroups_options subgroups_options = { + .subgroup_size = nir->stage == MESA_SHADER_COMPUTE ? 32 : + nir->stage == MESA_SHADER_FRAGMENT ? 16 : 8, .ballot_bit_size = 32, .lower_to_scalar = true, .lower_subgroup_masks = true, -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev