On Mon, Oct 23, 2017 at 2:10 AM, Timothy Arceri <tarc...@itsqueeze.com> wrote: > --- > src/compiler/nir/nir.h | 2 + > src/compiler/nir/nir_linking_helpers.c | 235 > +++++++++++++++++++++++++++++++++ > 2 files changed, 237 insertions(+) > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index dd833cf183..6a761ab655 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -2413,20 +2413,22 @@ void nir_lower_io_to_temporaries(nir_shader *shader, > nir_function_impl *entrypoint, > bool outputs, bool inputs); > > void nir_shader_gather_info(nir_shader *shader, nir_function_impl > *entrypoint); > > void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, > int (*type_size)(const struct glsl_type *)); > > /* Some helpers to do very simple linking */ > bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); > +void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, > + bool default_to_smooth_interp); > > typedef enum { > /* If set, this forces all non-flat fragment shader inputs to be > * interpolated as if with the "sample" qualifier. This requires > * nir_shader_compiler_options::use_interpolated_input_intrinsics. > */ > nir_lower_io_force_sample_interpolation = (1 << 1), > } nir_lower_io_options; > bool nir_lower_io(nir_shader *shader, > nir_variable_mode modes, > diff --git a/src/compiler/nir/nir_linking_helpers.c > b/src/compiler/nir/nir_linking_helpers.c > index 54ba1c85e5..85da84fbd9 100644 > --- a/src/compiler/nir/nir_linking_helpers.c > +++ b/src/compiler/nir/nir_linking_helpers.c > @@ -143,10 +143,245 @@ nir_remove_unused_varyings(nir_shader *producer, > nir_shader *consumer) > tcs_add_output_reads(producer, read); > > bool progress = false; > progress = remove_unused_io_vars(producer, &producer->outputs, read); > > progress = > remove_unused_io_vars(consumer, &consumer->inputs, written) || > progress; > > return progress; > } > + > +static uint8_t > +get_interp_type(nir_variable *var, bool default_to_smooth_interp) > +{ > + return var->data.interpolation == INTERP_MODE_NONE && > + default_to_smooth_interp ? > + INTERP_MODE_SMOOTH : var->data.interpolation; > +} > + > +static void > +get_slot_component_masks_and_interp_types(struct exec_list *var_list, > + uint8_t *comps, uint8_t > *interp_type, > + gl_shader_stage stage, > + bool default_to_smooth_interp) > +{ > + nir_foreach_variable_safe(var, var_list) { > + assert(var->data.location >= 0); > + > + /* Only remap things that aren't built-ins. > + * TODO: add TES patch support. > + */ > + if (var->data.location >= VARYING_SLOT_VAR0 && > + var->data.location - VARYING_SLOT_VAR0 < 32) { > + > + const struct glsl_type *type = var->type; > + if (nir_is_per_vertex_io(var, stage)) { > + assert(glsl_type_is_array(type)); > + type = glsl_get_array_element(type); > + } > + > + unsigned location = var->data.location - VARYING_SLOT_VAR0; > + unsigned elements = > + glsl_get_vector_elements(glsl_without_array(type)); > + > + bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type)); > + unsigned slots = glsl_count_attribute_slots(type, false); > + for (unsigned i = 0; i < slots; i++) { > + interp_type[location + i] = > + get_interp_type(var, default_to_smooth_interp); > + > + if (dual_slot) { > + unsigned comps_slot2 = 0;
I'd expect this to be declared outside the loop, otherwise it is always going to be 0 in the (i & 1) case below. With that fixed this patch is Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> > + if (i & 1) { > + comps[location + i] |= ((1 << comps_slot2) - 1); > + } else { > + unsigned num_comps = 4 - var->data.location_frac; > + comps_slot2 = (elements * 2) - num_comps; > + > + /* Assume ARB_enhanced_layouts packing rules for doubles */ > + assert(var->data.location_frac == 0 || > + var->data.location_frac == 2); > + assert(comps_slot2 <= 4); > + > + comps[location + i] |= > + ((1 << num_comps) - 1) << var->data.location_frac; > + } > + } else { > + comps[location + i] |= > + ((1 << elements) - 1) << var->data.location_frac; > + } > + } > + } > + } > +} > + > +struct varying_loc > +{ > + uint8_t component; > + uint32_t location; > +}; > + > +static void > +remap_slots_and_components(struct exec_list *var_list, > + struct varying_loc (*remap)[4]) > +{ > + nir_foreach_variable(var, var_list) { > + assert(var->data.location >= 0); > + > + /* Only remap things that aren't built-ins */ > + if (var->data.location >= VARYING_SLOT_VAR0 && > + var->data.location - VARYING_SLOT_VAR0 < 32) { > + assert(var->data.location - VARYING_SLOT_VAR0 < 32); > + assert(remap[var->data.location - VARYING_SLOT_VAR0] >= 0); > + > + unsigned location = var->data.location - VARYING_SLOT_VAR0; > + struct varying_loc *new_loc = > &remap[location][var->data.location_frac]; > + if (new_loc->location) { > + var->data.location = new_loc->location; > + var->data.location_frac = new_loc->component; > + } > + } > + } > +} > + > +/* If there are empty components in the slot compact the remaining components > + * as close to component 0 as possible. This will make it easier to fill the > + * empty components with components from a different slot in a following > pass. > + */ > +static void > +compact_components(nir_shader *producer, nir_shader *consumer, uint8_t > *comps, > + uint8_t *interp_type, bool default_to_smooth_interp) > +{ > + struct exec_list *input_list = &consumer->inputs; > + struct exec_list *output_list = &producer->outputs; > + struct varying_loc remap[32][4] = {{{0}, {0}}}; > + > + /* Create a cursor for each interpolation type */ > + unsigned cursor[4] = {0}; > + > + /* We only need to pass over one stage and we choose the consumer as it > seems > + * to cause a larger reduction in instruction counts (tested on i965). > + */ > + nir_foreach_variable(var, input_list) { > + > + /* Only remap things that aren't builtins. > + * TODO: add TES patch support. > + */ > + if (var->data.location >= VARYING_SLOT_VAR0 && > + var->data.location - VARYING_SLOT_VAR0 < 32) { > + > + const struct glsl_type *type = var->type; > + if (nir_is_per_vertex_io(var, consumer->info.stage)) { > + assert(glsl_type_is_array(type)); > + type = glsl_get_array_element(type); > + } > + > + /* Skip types that require more complex packing handling. > + * TODO: add support for these types. > + */ > + if (glsl_type_is_array(type) || > + glsl_type_is_dual_slot(type) || > + glsl_type_is_matrix(type) || > + glsl_type_is_struct(type) || > + glsl_type_is_64bit(type)) > + continue; > + > + /* We ignore complex types above and all other vector types should > + * have been split into scalar variables by the lower_io_to_scalar > + * pass. The only exeption should by OpenGL xfb varyings. > + */ > + if (glsl_get_vector_elements(type) != 1) > + continue; > + > + unsigned location = var->data.location - VARYING_SLOT_VAR0; > + uint8_t used_comps = comps[location]; > + > + /* If there are no empty components there is nothing more for us to > do. > + */ > + if (used_comps == 0xf) > + continue; > + > + bool found_new_offset = false; > + uint8_t interp = get_interp_type(var, default_to_smooth_interp); > + for (; cursor[interp] < 32; cursor[interp]++) { > + > + /* We couldn't find anywhere to pack the varying continue on. */ > + if (cursor[interp] == location) > + break; > + > + /* We can only pack varyings with matching interpolation types */ > + if (interp_type[cursor[interp]] != interp) > + continue; > + > + uint8_t cursor_used_comps = comps[cursor[interp]]; > + > + /* If the slot is empty just skip it for now, compact_var_list() > + * can be called after this function to remove empty slots for > us. > + * TODO: finish implementing compact_var_list() requires array > and > + * matrix splitting. > + */ > + if (!cursor_used_comps) > + continue; > + > + uint8_t unused_comps = ~cursor_used_comps; > + > + for (unsigned i = 0; i < 4; i++) { > + uint8_t new_var_comps = 1 << i; > + if (unused_comps & new_var_comps) { > + remap[location][var->data.location_frac].component = i; > + remap[location][var->data.location_frac].location = > + cursor[interp] + VARYING_SLOT_VAR0; > + > + found_new_offset = true; > + > + /* Turn off the mask for the component we are remapping */ > + if (comps[location] & 1 << var->data.location_frac) { > + comps[location] ^= 1 << var->data.location_frac; > + comps[cursor[interp]] |= new_var_comps; > + } > + break; > + } > + } > + > + if (found_new_offset) > + break; > + } > + } > + } > + > + remap_slots_and_components(input_list, remap); > + remap_slots_and_components(output_list, remap); > +} > + > +/* We assume that this has been called more-or-less directly after > + * remove_unused_varyings. At this point, all of the varyings that we > + * aren't going to be using have been completely removed and the > + * inputs_read and outputs_written fields in nir_shader_info reflect > + * this. Therefore, the total set of valid slots is the OR of the two > + * sets of varyings; this accounts for varyings which one side may need > + * to read/write even if the other doesn't. This can happen if, for > + * instance, an array is used indirectly from one side causing it to be > + * unsplittable but directly from the other. > + */ > +void > +nir_compact_varyings(nir_shader *producer, nir_shader *consumer, > + bool default_to_smooth_interp) > +{ > + assert(producer->info.stage != MESA_SHADER_FRAGMENT); > + assert(consumer->info.stage != MESA_SHADER_VERTEX); > + > + uint8_t comps[32] = {0}; > + uint8_t interp_type[32] = {0}; > + > + get_slot_component_masks_and_interp_types(&producer->outputs, comps, > + interp_type, > + producer->info.stage, > + default_to_smooth_interp); > + get_slot_component_masks_and_interp_types(&consumer->inputs, comps, > + interp_type, > + consumer->info.stage, > + default_to_smooth_interp); > + > + compact_components(producer, consumer, comps, interp_type, > + default_to_smooth_interp); > +} > -- > 2.13.6 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev