On Fri, Jul 3, 2015 at 12:58 AM, Iago Toral <ito...@igalia.com> wrote: > On Thu, 2015-07-02 at 09:31 +0200, Iago Toral wrote: >> On Tue, 2015-06-30 at 11:32 -0700, Jason Ekstrand wrote: >> > I'm not sure what I think about adding an is_scalar flag vs. having >> > _scalar and _vec4 versions of each function. My feeling is that once >> > we tweak assign_var_locations as I mentioned for vec4 outputs, we'll >> > find that we want to have them separate. The big thing here is that >> > I'd rather have _vec4 and _scalar versions internally call the same >> > function than have a general-looking function that switches inside on >> > is_scalar and does two completely different things. So if we can >> > share code like this for all of them, is_scalar is totally fine. If >> > we start having divergence, different scalar/vec4 functions is >> > probably better. >> > --Jason >> >> Ok, let's see how the changes to vec4 outputs affects this. If we end up >> needing more changes than just the kind of type_size() function we need >> to call then I'll look into having separate paths for scalar and vec4. > > Notice that this patch already changes how assign_var_locations works > (that calls type_size which is the one thing affected by the is_scalar > flag). The mappings you saw in the implementations of the input and > output intrinsics stopped being necessary with this patch and we will > just remove them, it just happened that I fixed it for uniforms and did > not realize that the same situation affected inputs and outputs, but we > just confirmed that with this patch the indirections are not needed > there either. > > So, as things stand now, the changes in this patch are the only changes > we need to nir_lower_io for vec4 shaders, at least for now. Knowing > that, are you okay with this implementation or would you rather change > it to have separate functions for vec4 and scalar?
That's fine. --Jason > Iago > >> > On Fri, Jun 26, 2015 at 1:06 AM, Eduardo Lima Mitev <el...@igalia.com> >> > wrote: >> > > From: Iago Toral Quiroga <ito...@igalia.com> >> > > >> > > The current implementation operates in scalar mode only, so add a vec4 >> > > mode where types are padded to vec4 sizes. >> > > >> > > This will be useful in the i965 driver for its vec4 nir backend >> > > (and possbly other drivers that have vec4-based shaders). >> > > >> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89580 >> > > --- >> > > src/glsl/nir/nir.h | 18 ++++---- >> > > src/glsl/nir/nir_lower_io.c | 87 >> > > +++++++++++++++++++++++++++++-------- >> > > src/mesa/drivers/dri/i965/brw_nir.c | 18 +++++--- >> > > 3 files changed, 90 insertions(+), 33 deletions(-) >> > > >> > > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h >> > > index 697d37e..334abbc 100644 >> > > --- a/src/glsl/nir/nir.h >> > > +++ b/src/glsl/nir/nir.h >> > > @@ -1640,14 +1640,16 @@ void nir_lower_global_vars_to_local(nir_shader >> > > *shader); >> > > >> > > void nir_lower_locals_to_regs(nir_shader *shader); >> > > >> > > -void nir_assign_var_locations_scalar(struct exec_list *var_list, >> > > - unsigned *size); >> > > -void nir_assign_var_locations_scalar_direct_first(nir_shader *shader, >> > > - struct exec_list >> > > *var_list, >> > > - unsigned *direct_size, >> > > - unsigned *size); >> > > - >> > > -void nir_lower_io(nir_shader *shader); >> > > +void nir_assign_var_locations(struct exec_list *var_list, >> > > + unsigned *size, >> > > + bool is_scalar); >> > > +void nir_assign_var_locations_direct_first(nir_shader *shader, >> > > + struct exec_list *var_list, >> > > + unsigned *direct_size, >> > > + unsigned *size, >> > > + bool is_scalar); >> > > + >> > > +void nir_lower_io(nir_shader *shader, bool is_scalar); >> > > >> > > void nir_lower_vars_to_ssa(nir_shader *shader); >> > > >> > > diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c >> > > index 6761d5b..3ecd386 100644 >> > > --- a/src/glsl/nir/nir_lower_io.c >> > > +++ b/src/glsl/nir/nir_lower_io.c >> > > @@ -29,19 +29,56 @@ >> > > /* >> > > * This lowering pass converts references to input/output variables with >> > > * loads/stores to actual input/output intrinsics. >> > > - * >> > > - * NOTE: This pass really only works for scalar backends at the moment >> > > due >> > > - * to the way it packes the input/output data. >> > > */ >> > > >> > > #include "nir.h" >> > > >> > > struct lower_io_state { >> > > void *mem_ctx; >> > > + bool is_scalar; >> > > }; >> > > >> > > +static int >> > > +type_size_vec4(const struct glsl_type *type) >> > > +{ >> > > + unsigned int i; >> > > + int size; >> > > + >> > > + switch (glsl_get_base_type(type)) { >> > > + case GLSL_TYPE_UINT: >> > > + case GLSL_TYPE_INT: >> > > + case GLSL_TYPE_FLOAT: >> > > + case GLSL_TYPE_BOOL: >> > > + if (glsl_type_is_matrix(type)) { >> > > + return glsl_get_matrix_columns(type); >> > > + } else { >> > > + return 1; >> > > + } >> > > + case GLSL_TYPE_ARRAY: >> > > + return type_size_vec4(glsl_get_array_element(type)) * >> > > glsl_get_length(type); >> > > + case GLSL_TYPE_STRUCT: >> > > + size = 0; >> > > + for (i = 0; i < glsl_get_length(type); i++) { >> > > + size += type_size_vec4(glsl_get_struct_field(type, i)); >> > > + } >> > > + return size; >> > > + case GLSL_TYPE_SAMPLER: >> > > + return 0; >> > > + case GLSL_TYPE_ATOMIC_UINT: >> > > + return 0; >> > > + case GLSL_TYPE_IMAGE: >> > > + case GLSL_TYPE_VOID: >> > > + case GLSL_TYPE_DOUBLE: >> > > + case GLSL_TYPE_ERROR: >> > > + case GLSL_TYPE_INTERFACE: >> > > + unreachable("not reached"); >> > > + } >> > > + >> > > + return 0; >> > > +} >> > > + >> > > static unsigned >> > > -type_size(const struct glsl_type *type) >> > > +type_size_scalar(const struct glsl_type *type) >> > > { >> > > unsigned int size, i; >> > > >> > > @@ -52,11 +89,11 @@ type_size(const struct glsl_type *type) >> > > case GLSL_TYPE_BOOL: >> > > return glsl_get_components(type); >> > > case GLSL_TYPE_ARRAY: >> > > - return type_size(glsl_get_array_element(type)) * >> > > glsl_get_length(type); >> > > + return type_size_scalar(glsl_get_array_element(type)) * >> > > glsl_get_length(type); >> > > case GLSL_TYPE_STRUCT: >> > > size = 0; >> > > for (i = 0; i < glsl_get_length(type); i++) { >> > > - size += type_size(glsl_get_struct_field(type, i)); >> > > + size += type_size_scalar(glsl_get_struct_field(type, i)); >> > > } >> > > return size; >> > > case GLSL_TYPE_SAMPLER: >> > > @@ -76,8 +113,17 @@ type_size(const struct glsl_type *type) >> > > return 0; >> > > } >> > > >> > > +static unsigned >> > > +type_size(const struct glsl_type *type, bool is_scalar) >> > > +{ >> > > + if (is_scalar) >> > > + return type_size_scalar(type); >> > > + else >> > > + return type_size_vec4(type); >> > > +} >> > > + >> > > void >> > > -nir_assign_var_locations_scalar(struct exec_list *var_list, unsigned >> > > *size) >> > > +nir_assign_var_locations(struct exec_list *var_list, unsigned *size, >> > > bool is_scalar) >> > > { >> > > unsigned location = 0; >> > > >> > > @@ -90,7 +136,7 @@ nir_assign_var_locations_scalar(struct exec_list >> > > *var_list, unsigned *size) >> > > continue; >> > > >> > > var->data.driver_location = location; >> > > - location += type_size(var->type); >> > > + location += type_size(var->type, is_scalar); >> > > } >> > > >> > > *size = location; >> > > @@ -136,10 +182,11 @@ mark_indirect_uses_block(nir_block *block, void >> > > *void_state) >> > > * assigns locations to variables that are used indirectly. >> > > */ >> > > void >> > > -nir_assign_var_locations_scalar_direct_first(nir_shader *shader, >> > > - struct exec_list *var_list, >> > > - unsigned *direct_size, >> > > - unsigned *size) >> > > +nir_assign_var_locations_direct_first(nir_shader *shader, >> > > + struct exec_list *var_list, >> > > + unsigned *direct_size, >> > > + unsigned *size, >> > > + bool is_scalar) >> > > { >> > > struct set *indirect_set = _mesa_set_create(NULL, _mesa_hash_pointer, >> > > _mesa_key_pointer_equal); >> > > @@ -160,7 +207,7 @@ >> > > nir_assign_var_locations_scalar_direct_first(nir_shader *shader, >> > > continue; >> > > >> > > var->data.driver_location = location; >> > > - location += type_size(var->type); >> > > + location += type_size(var->type, is_scalar); >> > > } >> > > >> > > *direct_size = location; >> > > @@ -173,7 +220,7 @@ >> > > nir_assign_var_locations_scalar_direct_first(nir_shader *shader, >> > > continue; >> > > >> > > var->data.driver_location = location; >> > > - location += type_size(var->type); >> > > + location += type_size(var->type, is_scalar); >> > > } >> > > >> > > *size = location; >> > > @@ -195,7 +242,7 @@ get_io_offset(nir_deref_var *deref, nir_instr >> > > *instr, nir_src *indirect, >> > > >> > > if (tail->deref_type == nir_deref_type_array) { >> > > nir_deref_array *deref_array = nir_deref_as_array(tail); >> > > - unsigned size = type_size(tail->type); >> > > + unsigned size = type_size(tail->type, state->is_scalar); >> > > >> > > base_offset += size * deref_array->base_offset; >> > > >> > > @@ -237,7 +284,8 @@ get_io_offset(nir_deref_var *deref, nir_instr >> > > *instr, nir_src *indirect, >> > > nir_deref_struct *deref_struct = nir_deref_as_struct(tail); >> > > >> > > for (unsigned i = 0; i < deref_struct->index; i++) >> > > - base_offset += type_size(glsl_get_struct_field(parent_type, >> > > i)); >> > > + base_offset += type_size(glsl_get_struct_field(parent_type, >> > > i), >> > > + state->is_scalar); >> > > } >> > > } >> > > >> > > @@ -350,11 +398,12 @@ nir_lower_io_block(nir_block *block, void >> > > *void_state) >> > > } >> > > >> > > static void >> > > -nir_lower_io_impl(nir_function_impl *impl) >> > > +nir_lower_io_impl(nir_function_impl *impl, bool is_scalar) >> > > { >> > > struct lower_io_state state; >> > > >> > > state.mem_ctx = ralloc_parent(impl); >> > > + state.is_scalar = is_scalar; >> > > >> > > nir_foreach_block(impl, nir_lower_io_block, &state); >> > > >> > > @@ -363,10 +412,10 @@ nir_lower_io_impl(nir_function_impl *impl) >> > > } >> > > >> > > void >> > > -nir_lower_io(nir_shader *shader) >> > > +nir_lower_io(nir_shader *shader, bool is_scalar) >> > > { >> > > nir_foreach_overload(shader, overload) { >> > > if (overload->impl) >> > > - nir_lower_io_impl(overload->impl); >> > > + nir_lower_io_impl(overload->impl, is_scalar); >> > > } >> > > } >> > > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c >> > > b/src/mesa/drivers/dri/i965/brw_nir.c >> > > index 885c49d..002ae3e 100644 >> > > --- a/src/mesa/drivers/dri/i965/brw_nir.c >> > > +++ b/src/mesa/drivers/dri/i965/brw_nir.c >> > > @@ -22,6 +22,7 @@ >> > > */ >> > > >> > > #include "brw_nir.h" >> > > +#include "brw_shader.h" >> > > #include "glsl/glsl_parser_extras.h" >> > > #include "glsl/nir/glsl_to_nir.h" >> > > #include "program/prog_to_nir.h" >> > > @@ -70,6 +71,9 @@ brw_create_nir(struct brw_context *brw, >> > > bool debug_enabled = INTEL_DEBUG & >> > > intel_debug_flag_for_shader_stage(stage); >> > > nir_shader *nir; >> > > >> > > + bool is_scalar = >> > > + brw_compiler_is_scalar_shader_stage(brw->intelScreen->compiler, >> > > stage); >> > > + >> > > /* First, lower the GLSL IR or Mesa IR to NIR */ >> > > if (shader_prog) { >> > > nir = glsl_to_nir(shader, options); >> > > @@ -100,13 +104,15 @@ brw_create_nir(struct brw_context *brw, >> > > /* Get rid of split copies */ >> > > nir_optimize(nir); >> > > >> > > - nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, >> > > - >> > > &nir->num_direct_uniforms, >> > > - &nir->num_uniforms); >> > > - nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); >> > > - nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); >> > > + nir_assign_var_locations_direct_first(nir, &nir->uniforms, >> > > + &nir->num_direct_uniforms, >> > > + &nir->num_uniforms, >> > > + is_scalar); >> > > + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, is_scalar); >> > > + nir_assign_var_locations(&nir->outputs, &nir->num_outputs, >> > > is_scalar); >> > > + >> > > + nir_lower_io(nir, is_scalar); >> > > >> > > - nir_lower_io(nir); >> > > nir_validate_shader(nir); >> > > >> > > nir_remove_dead_variables(nir); >> > > -- >> > > 2.1.4 >> > > >> > > _______________________________________________ >> > > mesa-dev mailing list >> > > mesa-dev@lists.freedesktop.org >> > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > _______________________________________________ >> > mesa-dev mailing list >> > mesa-dev@lists.freedesktop.org >> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >> >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev