On Wed, 2017-10-25 at 16:26 -0700, Jason Ekstrand wrote: > Previously, brw_nir_lower_intrinsics added the param and then emitted > a > load_uniform intrinsic to load it directly. This commit switches > things > over to use a specific NIR intrinsic for the thread id. The one > thing I > don't like about this approach is that we have to copy > thread_local_id > over to the new visitor in import_uniforms.
It is not clear to me why you are doing this... why do you like this better? > --- > src/compiler/nir/nir_intrinsics.h | 3 ++ > src/intel/compiler/brw_fs.cpp | 4 +- > src/intel/compiler/brw_fs.h | 1 + > src/intel/compiler/brw_fs_nir.cpp | 14 +++++++ > src/intel/compiler/brw_nir.h | 3 +- > src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 53 +++++--------- > ---------- > 6 files changed, 32 insertions(+), 46 deletions(-) > > diff --git a/src/compiler/nir/nir_intrinsics.h > b/src/compiler/nir/nir_intrinsics.h > index cefd18b..47022dd 100644 > --- a/src/compiler/nir/nir_intrinsics.h > +++ b/src/compiler/nir/nir_intrinsics.h > @@ -364,6 +364,9 @@ SYSTEM_VALUE(blend_const_color_a_float, 1, 0, xx, > xx, xx) > SYSTEM_VALUE(blend_const_color_rgba8888_unorm, 1, 0, xx, xx, xx) > SYSTEM_VALUE(blend_const_color_aaaa8888_unorm, 1, 0, xx, xx, xx) > > +/* Intel specific system values */ > +SYSTEM_VALUE(intel_thread_local_id, 1, 0, xx, xx, xx) > + > /** > * Barycentric coordinate intrinsics. > * > diff --git a/src/intel/compiler/brw_fs.cpp > b/src/intel/compiler/brw_fs.cpp > index 2acd838..c0d4c05 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -996,6 +996,7 @@ fs_visitor::import_uniforms(fs_visitor *v) > this->push_constant_loc = v->push_constant_loc; > this->pull_constant_loc = v->pull_constant_loc; > this->uniforms = v->uniforms; > + this->thread_local_id = v->thread_local_id; > } > > void > @@ -6781,8 +6782,7 @@ brw_compile_cs(const struct brw_compiler > *compiler, void *log_data, > { > nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); > shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, > true); > - > - brw_nir_lower_cs_intrinsics(shader, prog_data); > + brw_nir_lower_cs_intrinsics(shader); > shader = brw_postprocess_nir(shader, compiler, true); > > prog_data->local_size[0] = shader->info.cs.local_size[0]; > diff --git a/src/intel/compiler/brw_fs.h > b/src/intel/compiler/brw_fs.h > index da32593..f51a4d8 100644 > --- a/src/intel/compiler/brw_fs.h > +++ b/src/intel/compiler/brw_fs.h > @@ -315,6 +315,7 @@ public: > */ > int *push_constant_loc; > > + fs_reg thread_local_id; > fs_reg frag_depth; > fs_reg frag_stencil; > fs_reg sample_mask; > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index 05efee3..fdc6fc6 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -88,6 +88,16 @@ fs_visitor::nir_setup_uniforms() > } > > uniforms = nir->num_uniforms / 4; > + > + if (stage == MESA_SHADER_COMPUTE) { > + /* Add a uniform for the thread local id. It must be the last > uniform > + * on the list. > + */ > + assert(uniforms == prog_data->nr_params); > + uint32_t *param = brw_stage_prog_data_add_params(prog_data, > 1); > + *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID; > + thread_local_id = fs_reg(UNIFORM, uniforms++, > BRW_REGISTER_TYPE_UD); > + } > } > > static bool > @@ -3409,6 +3419,10 @@ fs_visitor::nir_emit_cs_intrinsic(const > fs_builder &bld, > cs_prog_data->uses_barrier = true; > break; > > + case nir_intrinsic_load_intel_thread_local_id: > + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), thread_local_id); > + break; > + > case nir_intrinsic_load_local_invocation_id: > case nir_intrinsic_load_work_group_id: { > gl_system_value sv = nir_system_value_from_intrinsic(instr- > >intrinsic); > diff --git a/src/intel/compiler/brw_nir.h > b/src/intel/compiler/brw_nir.h > index 1493b74..3e40712 100644 > --- a/src/intel/compiler/brw_nir.h > +++ b/src/intel/compiler/brw_nir.h > @@ -95,8 +95,7 @@ void brw_nir_analyze_boolean_resolves(nir_shader > *nir); > nir_shader *brw_preprocess_nir(const struct brw_compiler *compiler, > nir_shader *nir); > > -bool brw_nir_lower_cs_intrinsics(nir_shader *nir, > - struct brw_cs_prog_data > *prog_data); > +bool brw_nir_lower_cs_intrinsics(nir_shader *nir); > void brw_nir_lower_vs_inputs(nir_shader *nir, > bool use_legacy_snorm_formula, > const uint8_t *vs_attrib_wa_flags); > diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > index d277276..07d2dcc 100644 > --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > @@ -26,47 +26,12 @@ > > struct lower_intrinsics_state { > nir_shader *nir; > - struct brw_cs_prog_data *prog_data; > nir_function_impl *impl; > bool progress; > nir_builder builder; > - int thread_local_id_index; > + unsigned local_workgroup_size; > }; > > -static nir_ssa_def * > -read_thread_local_id(struct lower_intrinsics_state *state) > -{ > - struct brw_cs_prog_data *prog_data = state->prog_data; > - nir_builder *b = &state->builder; > - nir_shader *nir = state->nir; > - const unsigned *sizes = nir->info.cs.local_size; > - const unsigned group_size = sizes[0] * sizes[1] * sizes[2]; > - > - /* Some programs have local_size dimensions so small that the > thread local > - * ID will always be 0. > - */ > - if (group_size <= 8) > - return nir_imm_int(b, 0); > - > - if (state->thread_local_id_index == -1) { > - state->thread_local_id_index = prog_data->base.nr_params; > - uint32_t *param = brw_stage_prog_data_add_params(&prog_data- > >base, 1); > - *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID; > - nir->num_uniforms += 4; > - } > - unsigned id_index = state->thread_local_id_index; > - > - nir_intrinsic_instr *load = > - nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); > - load->num_components = 1; > - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); > - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); > - nir_intrinsic_set_base(load, id_index * sizeof(uint32_t)); > - nir_intrinsic_set_range(load, sizeof(uint32_t)); > - nir_builder_instr_insert(b, &load->instr); > - return &load->dest.ssa; > -} > - > static bool > lower_cs_intrinsics_convert_block(struct lower_intrinsics_state > *state, > nir_block *block) > @@ -91,7 +56,12 @@ lower_cs_intrinsics_convert_block(struct > lower_intrinsics_state *state, > * gl_LocalInvocationIndex = > * cs_thread_local_id + subgroup_invocation; > */ > - nir_ssa_def *thread_local_id = read_thread_local_id(state); > + nir_ssa_def *thread_local_id; > + if (state->local_workgroup_size <= 8) > + thread_local_id = nir_imm_int(b, 0); > + else > + thread_local_id = nir_load_intel_thread_local_id(b); > + > nir_ssa_def *channel = nir_load_subgroup_invocation(b); > sysval = nir_iadd(b, channel, thread_local_id); > break; > @@ -157,8 +127,7 @@ lower_cs_intrinsics_convert_impl(struct > lower_intrinsics_state *state) > } > > bool > -brw_nir_lower_cs_intrinsics(nir_shader *nir, > - struct brw_cs_prog_data *prog_data) > +brw_nir_lower_cs_intrinsics(nir_shader *nir) > { > assert(nir->info.stage == MESA_SHADER_COMPUTE); > > @@ -166,9 +135,9 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir, > struct lower_intrinsics_state state; > memset(&state, 0, sizeof(state)); > state.nir = nir; > - state.prog_data = prog_data; > - > - state.thread_local_id_index = -1; > + state.local_workgroup_size = nir->info.cs.local_size[0] * > + nir->info.cs.local_size[1] * > + nir->info.cs.local_size[2]; > > do { > state.progress = false; _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev