On Fri, Oct 27, 2017 at 2:11 AM, Iago Toral <ito...@igalia.com> wrote:
> On Wed, 2017-10-25 at 16:26 -0700, Jason Ekstrand wrote: > > Previously, brw_nir_lower_intrinsics added the param and then emitted > > a > > load_uniform intrinsic to load it directly. This commit switches > > things > > over to use a specific NIR intrinsic for the thread id. The one > > thing I > > don't like about this approach is that we have to copy > > thread_local_id > > over to the new visitor in import_uniforms. > > It is not clear to me why you are doing this... why do you like this > better? > For compute shaders, the SPIR-V subgroups stuff has a gl_subgroupId system value which subgroup in the dispatch you are. That information is basically the same as the thread_local_id only off by a factor of the SIMD size. It's fairly arbitrary, but I figured we might as well switch over to pushing the value that's defined in SPIR-V. > > --- > > src/compiler/nir/nir_intrinsics.h | 3 ++ > > src/intel/compiler/brw_fs.cpp | 4 +- > > src/intel/compiler/brw_fs.h | 1 + > > src/intel/compiler/brw_fs_nir.cpp | 14 +++++++ > > src/intel/compiler/brw_nir.h | 3 +- > > src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 53 +++++--------- > > ---------- > > 6 files changed, 32 insertions(+), 46 deletions(-) > > > > diff --git a/src/compiler/nir/nir_intrinsics.h > > b/src/compiler/nir/nir_intrinsics.h > > index cefd18b..47022dd 100644 > > --- a/src/compiler/nir/nir_intrinsics.h > > +++ b/src/compiler/nir/nir_intrinsics.h > > @@ -364,6 +364,9 @@ SYSTEM_VALUE(blend_const_color_a_float, 1, 0, xx, > > xx, xx) > > SYSTEM_VALUE(blend_const_color_rgba8888_unorm, 1, 0, xx, xx, xx) > > SYSTEM_VALUE(blend_const_color_aaaa8888_unorm, 1, 0, xx, xx, xx) > > > > +/* Intel specific system values */ > > +SYSTEM_VALUE(intel_thread_local_id, 1, 0, xx, xx, xx) > > + > > /** > > * Barycentric coordinate intrinsics. > > * > > diff --git a/src/intel/compiler/brw_fs.cpp > > b/src/intel/compiler/brw_fs.cpp > > index 2acd838..c0d4c05 100644 > > --- a/src/intel/compiler/brw_fs.cpp > > +++ b/src/intel/compiler/brw_fs.cpp > > @@ -996,6 +996,7 @@ fs_visitor::import_uniforms(fs_visitor *v) > > this->push_constant_loc = v->push_constant_loc; > > this->pull_constant_loc = v->pull_constant_loc; > > this->uniforms = v->uniforms; > > + this->thread_local_id = v->thread_local_id; > > } > > > > void > > @@ -6781,8 +6782,7 @@ brw_compile_cs(const struct brw_compiler > > *compiler, void *log_data, > > { > > nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); > > shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, > > true); > > - > > - brw_nir_lower_cs_intrinsics(shader, prog_data); > > + brw_nir_lower_cs_intrinsics(shader); > > shader = brw_postprocess_nir(shader, compiler, true); > > > > prog_data->local_size[0] = shader->info.cs.local_size[0]; > > diff --git a/src/intel/compiler/brw_fs.h > > b/src/intel/compiler/brw_fs.h > > index da32593..f51a4d8 100644 > > --- a/src/intel/compiler/brw_fs.h > > +++ b/src/intel/compiler/brw_fs.h > > @@ -315,6 +315,7 @@ public: > > */ > > int *push_constant_loc; > > > > + fs_reg thread_local_id; > > fs_reg frag_depth; > > fs_reg frag_stencil; > > fs_reg sample_mask; > > diff --git a/src/intel/compiler/brw_fs_nir.cpp > > b/src/intel/compiler/brw_fs_nir.cpp > > index 05efee3..fdc6fc6 100644 > > --- a/src/intel/compiler/brw_fs_nir.cpp > > +++ b/src/intel/compiler/brw_fs_nir.cpp > > @@ -88,6 +88,16 @@ fs_visitor::nir_setup_uniforms() > > } > > > > uniforms = nir->num_uniforms / 4; > > + > > + if (stage == MESA_SHADER_COMPUTE) { > > + /* Add a uniform for the thread local id. It must be the last > > uniform > > + * on the list. > > + */ > > + assert(uniforms == prog_data->nr_params); > > + uint32_t *param = brw_stage_prog_data_add_params(prog_data, > > 1); > > + *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID; > > + thread_local_id = fs_reg(UNIFORM, uniforms++, > > BRW_REGISTER_TYPE_UD); > > + } > > } > > > > static bool > > @@ -3409,6 +3419,10 @@ fs_visitor::nir_emit_cs_intrinsic(const > > fs_builder &bld, > > cs_prog_data->uses_barrier = true; > > break; > > > > + case nir_intrinsic_load_intel_thread_local_id: > > + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), thread_local_id); > > + break; > > + > > case nir_intrinsic_load_local_invocation_id: > > case nir_intrinsic_load_work_group_id: { > > gl_system_value sv = nir_system_value_from_intrinsic(instr- > > >intrinsic); > > diff --git a/src/intel/compiler/brw_nir.h > > b/src/intel/compiler/brw_nir.h > > index 1493b74..3e40712 100644 > > --- a/src/intel/compiler/brw_nir.h > > +++ b/src/intel/compiler/brw_nir.h > > @@ -95,8 +95,7 @@ void brw_nir_analyze_boolean_resolves(nir_shader > > *nir); > > nir_shader *brw_preprocess_nir(const struct brw_compiler *compiler, > > nir_shader *nir); > > > > -bool brw_nir_lower_cs_intrinsics(nir_shader *nir, > > - struct brw_cs_prog_data > > *prog_data); > > +bool brw_nir_lower_cs_intrinsics(nir_shader *nir); > > void brw_nir_lower_vs_inputs(nir_shader *nir, > > bool use_legacy_snorm_formula, > > const uint8_t *vs_attrib_wa_flags); > > diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > > b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > > index d277276..07d2dcc 100644 > > --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > > +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > > @@ -26,47 +26,12 @@ > > > > struct lower_intrinsics_state { > > nir_shader *nir; > > - struct brw_cs_prog_data *prog_data; > > nir_function_impl *impl; > > bool progress; > > nir_builder builder; > > - int thread_local_id_index; > > + unsigned local_workgroup_size; > > }; > > > > -static nir_ssa_def * > > -read_thread_local_id(struct lower_intrinsics_state *state) > > -{ > > - struct brw_cs_prog_data *prog_data = state->prog_data; > > - nir_builder *b = &state->builder; > > - nir_shader *nir = state->nir; > > - const unsigned *sizes = nir->info.cs.local_size; > > - const unsigned group_size = sizes[0] * sizes[1] * sizes[2]; > > - > > - /* Some programs have local_size dimensions so small that the > > thread local > > - * ID will always be 0. > > - */ > > - if (group_size <= 8) > > - return nir_imm_int(b, 0); > > - > > - if (state->thread_local_id_index == -1) { > > - state->thread_local_id_index = prog_data->base.nr_params; > > - uint32_t *param = brw_stage_prog_data_add_params(&prog_data- > > >base, 1); > > - *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID; > > - nir->num_uniforms += 4; > > - } > > - unsigned id_index = state->thread_local_id_index; > > - > > - nir_intrinsic_instr *load = > > - nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); > > - load->num_components = 1; > > - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); > > - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); > > - nir_intrinsic_set_base(load, id_index * sizeof(uint32_t)); > > - nir_intrinsic_set_range(load, sizeof(uint32_t)); > > - nir_builder_instr_insert(b, &load->instr); > > - return &load->dest.ssa; > > -} > > - > > static bool > > lower_cs_intrinsics_convert_block(struct lower_intrinsics_state > > *state, > > nir_block *block) > > @@ -91,7 +56,12 @@ lower_cs_intrinsics_convert_block(struct > > lower_intrinsics_state *state, > > * gl_LocalInvocationIndex = > > * cs_thread_local_id + subgroup_invocation; > > */ > > - nir_ssa_def *thread_local_id = read_thread_local_id(state); > > + nir_ssa_def *thread_local_id; > > + if (state->local_workgroup_size <= 8) > > + thread_local_id = nir_imm_int(b, 0); > > + else > > + thread_local_id = nir_load_intel_thread_local_id(b); > > + > > nir_ssa_def *channel = nir_load_subgroup_invocation(b); > > sysval = nir_iadd(b, channel, thread_local_id); > > break; > > @@ -157,8 +127,7 @@ lower_cs_intrinsics_convert_impl(struct > > lower_intrinsics_state *state) > > } > > > > bool > > -brw_nir_lower_cs_intrinsics(nir_shader *nir, > > - struct brw_cs_prog_data *prog_data) > > +brw_nir_lower_cs_intrinsics(nir_shader *nir) > > { > > assert(nir->info.stage == MESA_SHADER_COMPUTE); > > > > @@ -166,9 +135,9 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir, > > struct lower_intrinsics_state state; > > memset(&state, 0, sizeof(state)); > > state.nir = nir; > > - state.prog_data = prog_data; > > - > > - state.thread_local_id_index = -1; > > + state.local_workgroup_size = nir->info.cs.local_size[0] * > > + nir->info.cs.local_size[1] * > > + nir->info.cs.local_size[2]; > > > > do { > > state.progress = false; >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev