This pass looks for variables with vector or array-of-vector types and narrows the type to only the components used. --- src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_split_vars.c | 530 +++++++++++++++++++++++++++++- 2 files changed, 523 insertions(+), 8 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c6ed5bb5358..ca437743ff8 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2611,6 +2611,7 @@ bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); bool nir_split_var_copies(nir_shader *shader); bool nir_split_per_member_structs(nir_shader *shader); bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); +bool nir_narrow_vec_vars(nir_shader *shader, nir_variable_mode modes); bool nir_lower_returns_impl(nir_function_impl *impl); bool nir_lower_returns(nir_shader *shader); diff --git a/src/compiler/nir/nir_split_vars.c b/src/compiler/nir/nir_split_vars.c index 394ed2be622..8a981dd341d 100644 --- a/src/compiler/nir/nir_split_vars.c +++ b/src/compiler/nir/nir_split_vars.c @@ -25,6 +25,9 @@ #include "nir_builder.h" #include "nir_deref.h" +/* Needed for _mesa_bitcount() */ +#include "main/macros.h" + struct split_var_state { void *mem_ctx; @@ -46,15 +49,28 @@ struct field { }; static const struct glsl_type * -wrap_type_in_array(const struct glsl_type *type, - const struct glsl_type *array_type) +wrap_type_in_matrix_or_array(const struct glsl_type *type, + const struct glsl_type *array_type) { - if (!glsl_type_is_array(array_type)) + if (glsl_type_is_array(array_type)) { + const struct glsl_type *elem_type = + wrap_type_in_matrix_or_array(type, glsl_get_array_element(array_type)); + return glsl_array_type(elem_type, glsl_get_length(array_type)); + } else if (glsl_type_is_matrix(array_type)) { + if (glsl_type_is_scalar(type)) { + /* This can happen if we reduce the number of rows in a matrix to 1. + * Just use an array type in this case. + */ + return glsl_array_type(type, glsl_get_matrix_columns(array_type)); + } else { + assert(glsl_type_is_vector(type)); + return glsl_matrix_type(glsl_get_base_type(type), + glsl_get_components(type), + glsl_get_matrix_columns(array_type)); + } + } else { return type; - - const struct glsl_type *elem_type = - wrap_type_in_array(type, glsl_get_array_element(array_type)); - return glsl_array_type(elem_type, glsl_get_length(array_type)); + } } static void @@ -86,7 +102,7 @@ init_field_for_type(struct field *field, struct field *parent, } else { const struct glsl_type *var_type = type; for (struct field *f = field->parent; f; f = f->parent) - var_type = wrap_type_in_array(var_type, f->type); + var_type = wrap_type_in_matrix_or_array(var_type, f->type); nir_variable_mode mode = state->base_var->data.mode; if (mode == nir_var_local) { @@ -689,6 +705,8 @@ split_array_derefs_impl(nir_function_impl *impl, /* This level is split, just advance to the next element */ assert(p->deref_type == nir_deref_type_array); unsigned idx = nir_src_as_const_value(p->arr.index)->u32[0]; + if (idx >= glsl_get_length(tail_elem->type)) + idx = 0; /* Overflow; we can give them anything */ tail_elem = &tail_elem->children[idx]; } else { /* This level isn't split, build a deref */ @@ -794,3 +812,499 @@ nir_split_array_vars(nir_shader *shader, nir_variable_mode modes) return progress; } + +struct vec_var_usage { + nir_component_mask_t comps_read; + nir_component_mask_t comps_written; + + nir_component_mask_t comps_kept; + + struct set *vars_copied; +}; + +static struct vec_var_usage * +get_vec_var_usage(nir_variable *var, + struct hash_table *var_usage_map, + bool add_usage_entry, void *mem_ctx) +{ + struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var); + if (entry) + return entry->data; + + if (!add_usage_entry) + return NULL; + + struct vec_var_usage *usage = rzalloc(mem_ctx, struct vec_var_usage); + _mesa_hash_table_insert(var_usage_map, var, usage); + + return usage; +} + +static void +mark_deref_used(nir_deref_instr *deref, + nir_component_mask_t comps_read, + nir_component_mask_t comps_written, + nir_deref_instr *copy_deref, + struct hash_table *var_usage_map, + nir_variable_mode modes, + void *mem_ctx) +{ + if (!(deref->mode & modes)) + return; + + nir_variable *var = nir_deref_instr_get_variable(deref); + + const struct glsl_type *vec_type = glsl_without_array_or_matrix(var->type); + if (!glsl_type_is_vector_or_scalar(vec_type)) + return; + + unsigned num_components = glsl_get_components(vec_type); + + struct vec_var_usage *usage = + get_vec_var_usage(var, var_usage_map, true, mem_ctx); + + usage->comps_read |= comps_read & ((1u << num_components) - 1); + usage->comps_written |= comps_written & ((1u << num_components) - 1); + + if (copy_deref) { + if (usage->vars_copied == NULL) { + usage->vars_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + } + _mesa_set_add(usage->vars_copied, + nir_deref_instr_get_variable(copy_deref)); + } +} + +static bool +src_is_load_deref(nir_src src, nir_src deref_src) +{ + assert(src.is_ssa); + assert(deref_src.is_ssa); + + if (src.ssa->parent_instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *load = nir_instr_as_intrinsic(src.ssa->parent_instr); + if (load->intrinsic != nir_intrinsic_load_deref) + return false; + + assert(load->src[0].is_ssa); + + return load->src[0].ssa == deref_src.ssa; +} + +/* Returns all non-self-referential components of a store instruction. A + * component is self-referential if it comes from the same component of a load + * instruction on the same deref. If the only data in a particular component + * of a variable came directly from that component then it's undefined. The + * only way to get defined data into a component of a variable is for it to + * get written there by something outside or from a different component. + * + * This is a fairly common pattern in shaders that come from either GLSL IR or + * GLSLang because both glsl_to_nir and GLSLang implement write-masking with + * load-vec-store. + */ +static nir_component_mask_t +get_non_self_referential_store_comps(nir_intrinsic_instr *store) +{ + nir_component_mask_t comps = nir_intrinsic_write_mask(store); + + assert(store->src[1].is_ssa); + nir_instr *src_instr = store->src[1].ssa->parent_instr; + if (src_instr->type != nir_instr_type_alu) + return comps; + + nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); + + if (src_alu->op == nir_op_imov || + src_alu->op == nir_op_fmov) { + /* If it's just a swizzle of a load from the same deref, discount any + * channels that don't move in the swizzle. + */ + if (src_is_load_deref(src_alu->src[0].src, store->src[0])) { + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { + if (src_alu->src[0].swizzle[i] == i) + comps &= ~(1u << i); + } + } + } else if (src_alu->op == nir_op_vec2 || + src_alu->op == nir_op_vec3 || + src_alu->op == nir_op_vec4) { + /* If it's a vec, discount any channels that are just loads from the + * same deref put in the same spot. + */ + for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) { + if (src_is_load_deref(src_alu->src[i].src, store->src[0]) && + src_alu->src[i].swizzle[0] == i) + comps &= ~(1u << i); + } + } + + return comps; +} + +static void +find_used_components_impl(nir_function_impl *impl, + struct hash_table *var_usage_map, + nir_variable_mode modes, + void *mem_ctx) +{ + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_load_deref: + mark_deref_used(nir_src_as_deref(intrin->src[0]), + nir_ssa_def_components_read(&intrin->dest.ssa), 0, + NULL, var_usage_map, modes, mem_ctx); + break; + + case nir_intrinsic_store_deref: + mark_deref_used(nir_src_as_deref(intrin->src[0]), + 0, get_non_self_referential_store_comps(intrin), + NULL, var_usage_map, modes, mem_ctx); + break; + + case nir_intrinsic_copy_deref: { + /* Just mark everything used for copies. */ + nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); + nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); + mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx); + mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx); + break; + } + + default: + break; + } + } + } +} + +static bool +narrow_vec_var_list(struct exec_list *vars, + struct hash_table *var_usage_map) +{ + /* Initialize the components kept field of each variable. This is the + * AND of the components written and components read. If a component is + * written but never read, it's dead. If it is read but never written, + * then all values read are undefined garbage and we may as well not read + * them. + */ + nir_foreach_variable(var, vars) { + struct vec_var_usage *usage = + get_vec_var_usage(var, var_usage_map, false, NULL); + if (usage) { + assert(usage->comps_kept == 0); + usage->comps_kept = usage->comps_read & usage->comps_written; + } + } + + /* In order for variable copies to work, we have to have the same data type + * on the source and the destination. In order to satisfy this, we run a + * little fixed-point algorithm to transitively ensure that we get enough + * components for this to hold for all copies. + */ + bool fp_progress; + do { + fp_progress = false; + nir_foreach_variable(var, vars) { + struct vec_var_usage *var_usage = + get_vec_var_usage(var, var_usage_map, false, NULL); + if (!var_usage || !var_usage->vars_copied) + continue; + + const unsigned var_num_components = + glsl_get_components(glsl_without_array_or_matrix(var->type)); + + struct set_entry *copy_entry; + set_foreach(var_usage->vars_copied, copy_entry) { + struct vec_var_usage *copy_usage = + get_vec_var_usage((void *)copy_entry->key, + var_usage_map, false, NULL); + + nir_component_mask_t copy_comps; + if (!copy_usage) { + /* If the copy doesn't have usage information, assume the worst + * case that it uses everything. + */ + copy_comps = (1 << var_num_components) - 1; + } else { + copy_comps = copy_usage->comps_kept; + } + + if ((var_usage->comps_kept | copy_comps) != var_usage->comps_kept) { + var_usage->comps_kept |= copy_comps; + fp_progress = true; + } + } + } + } while (fp_progress); + + bool vars_narrowed = false; + nir_foreach_variable_safe(var, vars) { + struct vec_var_usage *usage = + get_vec_var_usage(var, var_usage_map, false, NULL); + if (!usage) + continue; + + const struct glsl_type *vec_type = + glsl_without_array_or_matrix(var->type); + assert(usage->comps_kept < (1u << glsl_get_components(vec_type))); + + if (usage->comps_kept == (1u << glsl_get_components(vec_type)) - 1) { + /* This variable doesn't need to be narrowed. Remove it from the + * hash table so later passes will ignore it. + */ + _mesa_hash_table_remove_key(var_usage_map, var); + continue; + } + + if (usage->comps_kept == 0) { + /* This variable is dead, remove it */ + exec_node_remove(&var->node); + } else { + const struct glsl_type *new_vec_type = + glsl_vector_type(glsl_get_base_type(vec_type), + _mesa_bitcount(usage->comps_kept)); + var->type = wrap_type_in_matrix_or_array(new_vec_type, var->type); + } + vars_narrowed = true; + } + + return vars_narrowed; +} + +static bool +deref_is_dead(nir_deref_instr *deref, + struct hash_table *var_usage_map, + nir_variable_mode modes) +{ + if (!(deref->mode & modes)) + return false; + + struct vec_var_usage *usage = + get_vec_var_usage(nir_deref_instr_get_variable(deref), + var_usage_map, false, NULL); + if (!usage) + return false; + + return usage->comps_kept == 0; +} + +static void +narrow_vec_var_access_impl(nir_function_impl *impl, + struct hash_table *var_usage_map, + nir_variable_mode modes) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + switch (instr->type) { + case nir_instr_type_deref: { + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (!(deref->mode & modes)) + break; + + /* Clean up any dead derefs we find lying around. They may refer + * to variables we're planning to split. + */ + if (nir_deref_instr_remove_if_unused(deref)) + break; + + /* We don't need to check if this is one of the derefs we're + * narrowing because this is a no-op if it isn't. The worst that + * could happen is that we accidentally fix an invalid deref. + */ + if (deref->deref_type == nir_deref_type_var) { + deref->type = deref->var->type; + } else if (deref->deref_type == nir_deref_type_array || + deref->deref_type == nir_deref_type_array_wildcard) { + nir_deref_instr *parent = nir_deref_instr_parent(deref); + assert(glsl_type_is_array(parent->type) || + glsl_type_is_matrix(parent->type)); + deref->type = glsl_get_array_element(parent->type); + } + break; + } + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* If we have a copy whose source or destination has been deleted + * because we determined the variable was dead, then we just + * delete the copy instruction. If the source variable was dead + * then it was writing undefined garbage anyway and if it's the + * destination variable that's dead then the write isn't needed. + */ + if (intrin->intrinsic == nir_intrinsic_copy_deref) { + nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); + nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); + if (deref_is_dead(dst, var_usage_map, modes) || + deref_is_dead(src, var_usage_map, modes)) { + nir_instr_remove(&intrin->instr); + nir_deref_instr_remove_if_unused(dst); + nir_deref_instr_remove_if_unused(src); + } + continue; + } + + if (intrin->intrinsic != nir_intrinsic_load_deref && + intrin->intrinsic != nir_intrinsic_store_deref) + continue; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (!(deref->mode & modes)) + continue; + + struct vec_var_usage *usage = + get_vec_var_usage(nir_deref_instr_get_variable(deref), + var_usage_map, false, NULL); + if (!usage) + continue; + + if (usage->comps_kept == 0) { + if (intrin->intrinsic == nir_intrinsic_load_deref) { + nir_ssa_def *u = + nir_ssa_undef(&b, intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(u)); + } + nir_instr_remove(&intrin->instr); + nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[0])); + continue; + } + + if (intrin->intrinsic == nir_intrinsic_load_deref) { + unsigned swizzle[NIR_MAX_VEC_COMPONENTS] = { 0, }; + unsigned c = 0; + for (unsigned i = 0; i < intrin->num_components; i++) { + if (usage->comps_kept & (1u << i)) + swizzle[i] = c++; + } + + b.cursor = nir_after_instr(&intrin->instr); + + nir_ssa_def *swizzled = + nir_swizzle(&b, &intrin->dest.ssa, swizzle, + intrin->num_components, false); + + nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, + nir_src_for_ssa(swizzled), + swizzled->parent_instr); + + /* The SSA def is now only used by the swizzle. It's safe to + * shrink the number of components. + */ + assert(list_is_singular(&intrin->dest.ssa.uses)); + intrin->num_components = c; + intrin->dest.ssa.num_components = c; + } else { + nir_component_mask_t write_mask = + nir_intrinsic_write_mask(intrin); + + unsigned swizzle[NIR_MAX_VEC_COMPONENTS]; + nir_component_mask_t new_write_mask = 0; + unsigned c = 0; + for (unsigned i = 0; i < intrin->num_components; i++) { + if (usage->comps_kept & (1u << i)) { + swizzle[c] = i; + if (write_mask & (1u << i)) + new_write_mask |= 1u << c; + c++; + } + } + + b.cursor = nir_before_instr(&intrin->instr); + + nir_ssa_def *swizzled = + nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false); + + /* Rewrite to use the compacted source */ + nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], + nir_src_for_ssa(swizzled)); + nir_intrinsic_set_write_mask(intrin, new_write_mask); + intrin->num_components = c; + } + break; + } + + default: + break; + } + } + } +} + +/** Attempt to narrow (remove components) from (arrays of) vectors + * + * This pass looks at variables which contain a vector or an array (possibly + * multiple dimensions) of vectors and attempts to lower to a smaller vector. + */ +bool +nir_narrow_vec_vars(nir_shader *shader, nir_variable_mode modes) +{ + assert((modes & (nir_var_global | nir_var_local)) == modes); + + void *mem_ctx = ralloc_context(NULL); + + struct hash_table *var_usage_map = + _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + bool has_vars_to_narrow = false; + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + /* Don't even bother crawling the IR if we don't have any variables. + * Given that this pass deletes any unused variables, it's likely that + * we will be in this scenario eventually. + */ + if (!exec_list_is_empty(&shader->globals) || + !exec_list_is_empty(&function->impl->locals)) { + has_vars_to_narrow = true; + find_used_components_impl(function->impl, var_usage_map, + modes, mem_ctx); + } + } + if (!has_vars_to_narrow) { + ralloc_free(mem_ctx); + return false; + } + + bool globals_narrowed = false; + if (modes & nir_var_global) + globals_narrowed = narrow_vec_var_list(&shader->globals, var_usage_map); + + bool progress = false; + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + bool locals_narrowed = false; + if (modes & nir_var_local) { + locals_narrowed = narrow_vec_var_list(&function->impl->locals, + var_usage_map); + } + + if (globals_narrowed || locals_narrowed) { + narrow_vec_var_access_impl(function->impl, var_usage_map, modes); + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + progress = true; + } + } + + ralloc_free(mem_ctx); + + return progress; +} -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev