--- src/compiler/nir/nir.h | 2 + src/compiler/nir/nir_linking_helpers.c | 235 +++++++++++++++++++++++++++++++++ 2 files changed, 237 insertions(+)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index dd833cf183..6a761ab655 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2413,20 +2413,22 @@ void nir_lower_io_to_temporaries(nir_shader *shader, nir_function_impl *entrypoint, bool outputs, bool inputs); void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, int (*type_size)(const struct glsl_type *)); /* Some helpers to do very simple linking */ bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); +void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, + bool default_to_smooth_interp); typedef enum { /* If set, this forces all non-flat fragment shader inputs to be * interpolated as if with the "sample" qualifier. This requires * nir_shader_compiler_options::use_interpolated_input_intrinsics. */ nir_lower_io_force_sample_interpolation = (1 << 1), } nir_lower_io_options; bool nir_lower_io(nir_shader *shader, nir_variable_mode modes, diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c index 54ba1c85e5..85da84fbd9 100644 --- a/src/compiler/nir/nir_linking_helpers.c +++ b/src/compiler/nir/nir_linking_helpers.c @@ -143,10 +143,245 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) tcs_add_output_reads(producer, read); bool progress = false; progress = remove_unused_io_vars(producer, &producer->outputs, read); progress = remove_unused_io_vars(consumer, &consumer->inputs, written) || progress; return progress; } + +static uint8_t +get_interp_type(nir_variable *var, bool default_to_smooth_interp) +{ + return var->data.interpolation == INTERP_MODE_NONE && + default_to_smooth_interp ? + INTERP_MODE_SMOOTH : var->data.interpolation; +} + +static void +get_slot_component_masks_and_interp_types(struct exec_list *var_list, + uint8_t *comps, uint8_t *interp_type, + gl_shader_stage stage, + bool default_to_smooth_interp) +{ + nir_foreach_variable_safe(var, var_list) { + assert(var->data.location >= 0); + + /* Only remap things that aren't built-ins. + * TODO: add TES patch support. + */ + if (var->data.location >= VARYING_SLOT_VAR0 && + var->data.location - VARYING_SLOT_VAR0 < 32) { + + const struct glsl_type *type = var->type; + if (nir_is_per_vertex_io(var, stage)) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); + } + + unsigned location = var->data.location - VARYING_SLOT_VAR0; + unsigned elements = + glsl_get_vector_elements(glsl_without_array(type)); + + bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type)); + unsigned slots = glsl_count_attribute_slots(type, false); + for (unsigned i = 0; i < slots; i++) { + interp_type[location + i] = + get_interp_type(var, default_to_smooth_interp); + + if (dual_slot) { + unsigned comps_slot2 = 0; + if (i & 1) { + comps[location + i] |= ((1 << comps_slot2) - 1); + } else { + unsigned num_comps = 4 - var->data.location_frac; + comps_slot2 = (elements * 2) - num_comps; + + /* Assume ARB_enhanced_layouts packing rules for doubles */ + assert(var->data.location_frac == 0 || + var->data.location_frac == 2); + assert(comps_slot2 <= 4); + + comps[location + i] |= + ((1 << num_comps) - 1) << var->data.location_frac; + } + } else { + comps[location + i] |= + ((1 << elements) - 1) << var->data.location_frac; + } + } + } + } +} + +struct varying_loc +{ + uint8_t component; + uint32_t location; +}; + +static void +remap_slots_and_components(struct exec_list *var_list, + struct varying_loc (*remap)[4]) +{ + nir_foreach_variable(var, var_list) { + assert(var->data.location >= 0); + + /* Only remap things that aren't built-ins */ + if (var->data.location >= VARYING_SLOT_VAR0 && + var->data.location - VARYING_SLOT_VAR0 < 32) { + assert(var->data.location - VARYING_SLOT_VAR0 < 32); + assert(remap[var->data.location - VARYING_SLOT_VAR0] >= 0); + + unsigned location = var->data.location - VARYING_SLOT_VAR0; + struct varying_loc *new_loc = &remap[location][var->data.location_frac]; + if (new_loc->location) { + var->data.location = new_loc->location; + var->data.location_frac = new_loc->component; + } + } + } +} + +/* If there are empty components in the slot compact the remaining components + * as close to component 0 as possible. This will make it easier to fill the + * empty components with components from a different slot in a following pass. + */ +static void +compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps, + uint8_t *interp_type, bool default_to_smooth_interp) +{ + struct exec_list *input_list = &consumer->inputs; + struct exec_list *output_list = &producer->outputs; + struct varying_loc remap[32][4] = {{{0}, {0}}}; + + /* Create a cursor for each interpolation type */ + unsigned cursor[4] = {0}; + + /* We only need to pass over one stage and we choose the consumer as it seems + * to cause a larger reduction in instruction counts (tested on i965). + */ + nir_foreach_variable(var, input_list) { + + /* Only remap things that aren't builtins. + * TODO: add TES patch support. + */ + if (var->data.location >= VARYING_SLOT_VAR0 && + var->data.location - VARYING_SLOT_VAR0 < 32) { + + const struct glsl_type *type = var->type; + if (nir_is_per_vertex_io(var, consumer->info.stage)) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); + } + + /* Skip types that require more complex packing handling. + * TODO: add support for these types. + */ + if (glsl_type_is_array(type) || + glsl_type_is_dual_slot(type) || + glsl_type_is_matrix(type) || + glsl_type_is_struct(type) || + glsl_type_is_64bit(type)) + continue; + + /* We ignore complex types above and all other vector types should + * have been split into scalar variables by the lower_io_to_scalar + * pass. The only exeption should by OpenGL xfb varyings. + */ + if (glsl_get_vector_elements(type) != 1) + continue; + + unsigned location = var->data.location - VARYING_SLOT_VAR0; + uint8_t used_comps = comps[location]; + + /* If there are no empty components there is nothing more for us to do. + */ + if (used_comps == 0xf) + continue; + + bool found_new_offset = false; + uint8_t interp = get_interp_type(var, default_to_smooth_interp); + for (; cursor[interp] < 32; cursor[interp]++) { + + /* We couldn't find anywhere to pack the varying continue on. */ + if (cursor[interp] == location) + break; + + /* We can only pack varyings with matching interpolation types */ + if (interp_type[cursor[interp]] != interp) + continue; + + uint8_t cursor_used_comps = comps[cursor[interp]]; + + /* If the slot is empty just skip it for now, compact_var_list() + * can be called after this function to remove empty slots for us. + * TODO: finish implementing compact_var_list() requires array and + * matrix splitting. + */ + if (!cursor_used_comps) + continue; + + uint8_t unused_comps = ~cursor_used_comps; + + for (unsigned i = 0; i < 4; i++) { + uint8_t new_var_comps = 1 << i; + if (unused_comps & new_var_comps) { + remap[location][var->data.location_frac].component = i; + remap[location][var->data.location_frac].location = + cursor[interp] + VARYING_SLOT_VAR0; + + found_new_offset = true; + + /* Turn off the mask for the component we are remapping */ + if (comps[location] & 1 << var->data.location_frac) { + comps[location] ^= 1 << var->data.location_frac; + comps[cursor[interp]] |= new_var_comps; + } + break; + } + } + + if (found_new_offset) + break; + } + } + } + + remap_slots_and_components(input_list, remap); + remap_slots_and_components(output_list, remap); +} + +/* We assume that this has been called more-or-less directly after + * remove_unused_varyings. At this point, all of the varyings that we + * aren't going to be using have been completely removed and the + * inputs_read and outputs_written fields in nir_shader_info reflect + * this. Therefore, the total set of valid slots is the OR of the two + * sets of varyings; this accounts for varyings which one side may need + * to read/write even if the other doesn't. This can happen if, for + * instance, an array is used indirectly from one side causing it to be + * unsplittable but directly from the other. + */ +void +nir_compact_varyings(nir_shader *producer, nir_shader *consumer, + bool default_to_smooth_interp) +{ + assert(producer->info.stage != MESA_SHADER_FRAGMENT); + assert(consumer->info.stage != MESA_SHADER_VERTEX); + + uint8_t comps[32] = {0}; + uint8_t interp_type[32] = {0}; + + get_slot_component_masks_and_interp_types(&producer->outputs, comps, + interp_type, + producer->info.stage, + default_to_smooth_interp); + get_slot_component_masks_and_interp_types(&consumer->inputs, comps, + interp_type, + consumer->info.stage, + default_to_smooth_interp); + + compact_components(producer, consumer, comps, interp_type, + default_to_smooth_interp); +} -- 2.13.6 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev