Hi, A few comments and questions. I still haven't finished reading emit_split_copies(), though.
On Thu, Jul 26, 2018 at 09:00:03AM -0700, Jason Ekstrand wrote: > This pass looks for array variables where at least one level of the > array is never indirected and splits it into multiple smaller variables. > --- > src/compiler/nir/nir.h | 1 + > src/compiler/nir/nir_split_vars.c | 525 ++++++++++++++++++++++++++++++ > 2 files changed, 526 insertions(+) > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index 4af7166f25b..c6ed5bb5358 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -2607,6 +2607,7 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp); > > int nir_gs_count_vertices(const nir_shader *shader); > > +bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes); > bool nir_split_var_copies(nir_shader *shader); > bool nir_split_per_member_structs(nir_shader *shader); > bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes); > diff --git a/src/compiler/nir/nir_split_vars.c > b/src/compiler/nir/nir_split_vars.c > index 1f59ac2f5e7..394ed2be622 100644 > --- a/src/compiler/nir/nir_split_vars.c > +++ b/src/compiler/nir/nir_split_vars.c > @@ -269,3 +269,528 @@ nir_split_struct_vars(nir_shader *shader, > nir_variable_mode modes) > > return progress; > } > + > +static int > +num_arrays_in_type(const struct glsl_type *type) > +{ > + int num_arrays = 0; > + while (true) { > + if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) { > + num_arrays++; > + type = glsl_get_array_element(type); > + } else if (glsl_type_is_vector_or_scalar(type)) { > + return num_arrays; > + } else { > + /* Unknown type */ > + return -1; "Unknown" seems misleading, maybe "Unsupported"? The pass uses this to rule out variables that have structs in the hiearchy. > + } > + } > +} > + > +static bool > +init_var_list_array_indirects(struct exec_list *vars, > + struct hash_table *var_indirect_map, > + void *mem_ctx) > +{ > + bool has_aoa = false; > + > + nir_foreach_variable(var, vars) { > + int num_arrays = num_arrays_in_type(var->type); > + if (num_arrays > 0) { > + BITSET_WORD *indirects = rzalloc_array(mem_ctx, BITSET_WORD, > + BITSET_WORDS(num_arrays)); > + _mesa_hash_table_insert(var_indirect_map, var, indirects); > + has_aoa = true; > + } > + } > + > + return has_aoa; > +} > + > +static void > +mark_indirects_impl(nir_function_impl *impl, > + struct hash_table *var_indirect_map, > + nir_variable_mode modes, > + void *mem_ctx) > +{ > + nir_foreach_block(block, impl) { > + nir_foreach_instr(instr, block) { > + if (instr->type != nir_instr_type_deref) > + continue; > + > + nir_deref_instr *deref = nir_instr_as_deref(instr); > + if (!(deref->mode & modes)) > + continue; > + > + if (!glsl_type_is_vector_or_scalar(deref->type)) > + continue; > + > + nir_variable *base_var = nir_deref_instr_get_variable(deref); > + struct hash_entry *entry = > + _mesa_hash_table_search(var_indirect_map, base_var); > + if (!entry) > + continue; > + > + BITSET_WORD *indirects = entry->data; > + > + nir_deref_path path; > + nir_deref_path_init(&path, deref, mem_ctx); > + > + for (unsigned i = 1; path.path[i]; i++) { Optional: add a comment that you can skip the first because it is always a type variable. > + /* We already know this is an AoA variable */ > + assert(path.path[i]->deref_type == nir_deref_type_array_wildcard > || > + path.path[i]->deref_type == nir_deref_type_array); > + > + if (path.path[i]->deref_type == nir_deref_type_array && > + nir_src_as_const_value(path.path[i]->arr.index) == NULL) Idea: add a "nir_deref_array_is_indirect" (or something like) helper function to make this less noisy. > + BITSET_SET(indirects, i - 1); > + } > + } > + } > +} > + > +struct elem { > + struct elem *parent; > + > + const struct glsl_type *type; > + > + nir_variable *var; > + struct elem *ind_child; > + struct elem *children; A small note about what ind_child and children store would be helpful. > +}; > + > +static void > +create_split_array_vars(struct elem *elem, struct elem *parent, > + BITSET_WORD *indirects, unsigned depth, > + const struct glsl_type *type, > + const char *name, > + struct split_var_state *state) > +{ > + *elem = (struct elem) { > + .parent = parent, > + .type = type, > + }; > + > + if (glsl_type_is_vector_or_scalar(type)) { > + const struct glsl_type *var_type = type; > + for (struct elem *e = elem->parent; e; e = e->parent) { > + if (e->ind_child) > + var_type = glsl_array_type(var_type, glsl_get_length(e->type)); > + } > + > + /* We add parens to the variable name so it looks like "(foo[2][*])" so > + * that further derefs will look like "(foo[2][*])[ssa_6]" > + */ > + name = ralloc_asprintf(state->mem_ctx, "(%s)", name); > + > + nir_variable_mode mode = state->base_var->data.mode; > + if (mode == nir_var_local) { > + elem->var = nir_local_variable_create(state->impl, var_type, name); > + } else { > + elem->var = nir_variable_create(state->shader, mode, var_type, > name); > + } > + } else if (BITSET_TEST(indirects, depth)) { > + elem->ind_child = ralloc(state->mem_ctx, struct elem); > + create_split_array_vars(elem->ind_child, elem, > + indirects, depth + 1, > + glsl_get_array_element(type), > + ralloc_asprintf(state->mem_ctx, "%s[*]", name), > + state); > + } else { > + unsigned array_len = glsl_get_length(type); > + elem->children = ralloc_array(state->mem_ctx, struct elem, array_len); > + for (unsigned i = 0; i < array_len; i++) { > + create_split_array_vars(&elem->children[i], elem, > + indirects, depth + 1, > + glsl_get_array_element(type), > + ralloc_asprintf(state->mem_ctx, > + "%s[%d]", name, i), > + state); > + } > + } > +} > + > +static bool > +split_var_list_arrays(nir_shader *shader, > + nir_function_impl *impl, > + struct exec_list *vars, > + struct hash_table *var_indirect_map, > + struct hash_table *var_elem_map, > + void *mem_ctx) > +{ > + struct split_var_state state = { > + .mem_ctx = mem_ctx, > + .shader = shader, > + .impl = impl, > + }; > + > + struct exec_list split_vars; > + exec_list_make_empty(&split_vars); > + > + /* To avoid list confusion (we'll be adding things as we split variables), > + * pull all of the variables we plan to split off of the list > + */ > + nir_foreach_variable_safe(var, vars) { > + struct hash_entry *entry = > + _mesa_hash_table_search(var_indirect_map, var); > + if (!entry) > + continue; > + > + BITSET_WORD *indirects = entry->data; > + > + int num_arrays = num_arrays_in_type(var->type); > + bool has_non_indirect = false; > + for (int i = 0; i < num_arrays; i++) { > + if (!BITSET_TEST(indirects, i)) { > + has_non_indirect = true; > + break; > + } > + } > + > + if (has_non_indirect) { > + /* We have something to split */ > + exec_node_remove(&var->node); > + exec_list_push_tail(&split_vars, &var->node); > + } else { > + /* It's easier in split_array_copies_impl if var_indirect_map > + * contains only the things we plan to split. > + */ > + _mesa_hash_table_remove(var_indirect_map, entry); I thought of moving the non_indirect check and the hash removal to mark_indirects_impl. Avoid doing path walking during the instruction traversal for variables you know are not fit. Not sure if its worth. > + } > + } > + > + nir_foreach_variable(var, &split_vars) { > + struct hash_entry *entry = > + _mesa_hash_table_search(var_indirect_map, var); > + assert(entry); > + > + BITSET_WORD *indirects = entry->data; > + > + state.base_var = var; > + > + struct elem *root_elem = ralloc(mem_ctx, struct elem); > + create_split_array_vars(root_elem, NULL, indirects, 0, var->type, > + ralloc_asprintf(mem_ctx, "(%s)", var->name), > + &state); Is the parenthesis in "(%s)" for the toplevel variable name correct? > + _mesa_hash_table_insert(var_elem_map, var, root_elem); > + } > + > + return !exec_list_is_empty(&split_vars); > +} > + > +static bool > +deref_has_split_wildcard(nir_deref_path *path, BITSET_WORD *indirects) > +{ > + if (indirects == NULL) > + return false; > + > + for (unsigned i = 0; path->path[i]; i++) { > + if (path->path[i]->deref_type != nir_deref_type_array_wildcard) > + continue; > + > + assert(i > 0); > + if (!BITSET_TEST(indirects, i - 1)) > + return true; > + } > + > + return false; > +} > + > +static void > +emit_split_copies(nir_builder *b, > + nir_deref_instr *dst, nir_deref_path *dst_path, > + unsigned dst_depth, BITSET_WORD *dst_indirects, > + nir_deref_instr *src, nir_deref_path *src_path, > + unsigned src_depth, BITSET_WORD *src_indirects) > +{ > + nir_deref_instr *dst_p, *src_p; > + > + while ((dst_p = dst_path->path[dst_depth])) { > + if (dst_p->deref_type == nir_deref_type_array_wildcard) > + break; > + > + dst = nir_build_deref_follower(b, dst, dst_p); > + dst_depth++; > + } > + > + while ((src_p = src_path->path[src_depth])) { > + if (src_p->deref_type == nir_deref_type_array_wildcard) > + break; > + > + src = nir_build_deref_follower(b, src, src_p); > + src_depth++; > + } > + > + if (src_p == NULL || dst_p == NULL) { > + assert(src_p == NULL && dst_p == NULL); > + nir_copy_deref(b, dst, src); > + } else { > + assert(dst_p->deref_type == nir_deref_type_array_wildcard && > + src_p->deref_type == nir_deref_type_array_wildcard); > + > + if ((dst_indirects && !BITSET_TEST(dst_indirects, dst_depth - 1)) || > + (src_indirects && !BITSET_TEST(src_indirects, src_depth - 1))) { > + /* There are no indirects at this level on one of the source or the > + * destination so we are lowering it. > + */ > + assert(glsl_get_length(dst_path->path[dst_depth - 1]->type) == > + glsl_get_length(src_path->path[src_depth - 1]->type)); > + unsigned len = glsl_get_length(dst_path->path[dst_depth - 1]->type); Could "dst_path->path[dst_depth - 1]" be replaced with dst or am I missing something? (Same for src.) > + for (unsigned i = 0; i < len; i++) { > + nir_ssa_def *idx = nir_imm_int(b, i); > + emit_split_copies(b, nir_build_deref_array(b, dst, idx), > + dst_path, dst_depth + 1, dst_indirects, > + nir_build_deref_array(b, src, idx), > + src_path, src_depth + 1, src_indirects); > + } > + } else { > + /* Both sides either aren't being lowered at all or have indirects > on > + * this level so we just keep going > + */ > + emit_split_copies(b, nir_build_deref_follower(b, dst, dst_p), > + dst_path, dst_depth + 1, dst_indirects, > + nir_build_deref_follower(b, src, src_p), > + src_path, src_depth + 1, src_indirects); > + } > + } > +} > + > +static void > +split_array_copies_impl(nir_function_impl *impl, > + struct hash_table *var_indirect_map, > + nir_variable_mode modes, > + void *mem_ctx) > +{ > + nir_builder b; > + nir_builder_init(&b, impl); > + > + nir_foreach_block(block, impl) { > + nir_foreach_instr_safe(instr, block) { > + if (instr->type != nir_instr_type_intrinsic) > + continue; > + > + nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); > + if (copy->intrinsic != nir_intrinsic_copy_deref) > + continue; > + > + nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]); > + nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]); > + > + BITSET_WORD *dst_indirects = NULL; > + if (dst_deref->mode & modes) { > + nir_variable *dst_var = nir_deref_instr_get_variable(dst_deref); > + struct hash_entry *entry = > + _mesa_hash_table_search(var_indirect_map, dst_var); > + if (entry) > + dst_indirects = entry->data; > + } > + > + BITSET_WORD *src_indirects = NULL; > + if (src_deref->mode & modes) { > + nir_variable *src_var = nir_deref_instr_get_variable(src_deref); > + struct hash_entry *entry = > + _mesa_hash_table_search(var_indirect_map, src_var); > + if (entry) > + src_indirects = entry->data; > + } > + > + if (!src_indirects && !dst_indirects) > + continue; > + > + nir_deref_path dst_path, src_path; > + nir_deref_path_init(&dst_path, dst_deref, mem_ctx); > + nir_deref_path_init(&src_path, src_deref, mem_ctx); > + > + if (!deref_has_split_wildcard(&dst_path, dst_indirects) && > + !deref_has_split_wildcard(&src_path, src_indirects)) > + continue; > + > + b.cursor = nir_before_instr(©->instr); > + > + emit_split_copies(&b, dst_path.path[0], &dst_path, 1, dst_indirects, > + src_path.path[0], &src_path, 1, src_indirects); > + > + nir_instr_remove(©->instr); > + nir_deref_instr_remove_if_unused(dst_deref); > + nir_deref_instr_remove_if_unused(src_deref); > + } > + } > +} > + > +static void > +split_array_derefs_impl(nir_function_impl *impl, > + struct hash_table *var_elem_map, > + nir_variable_mode modes, > + void *mem_ctx) > +{ > + nir_builder b; > + nir_builder_init(&b, impl); > + > + nir_foreach_block(block, impl) { > + nir_foreach_instr_safe(instr, block) { > + if (instr->type != nir_instr_type_deref) > + continue; > + > + nir_deref_instr *deref = nir_instr_as_deref(instr); > + if (!(deref->mode & modes)) > + continue; > + > + /* Clean up any dead derefs we find lying around. They may refer to > + * variables we're planning to split. > + */ > + if (nir_deref_instr_remove_if_unused(deref)) > + continue; > + > + if (!glsl_type_is_vector_or_scalar(deref->type)) > + continue; > + > + nir_variable *base_var = nir_deref_instr_get_variable(deref); > + struct hash_entry *entry = > + _mesa_hash_table_search(var_elem_map, base_var); > + if (!entry) > + continue; > + > + struct elem *root_elem = entry->data; > + > + nir_deref_path path; > + nir_deref_path_init(&path, deref, mem_ctx); > + > + struct elem *tail_elem = root_elem; > + for (unsigned i = 1; path.path[i]; i++) { > + nir_deref_instr *p = path.path[i]; > + assert(path.path[i - 1]->type == tail_elem->type); > + assert(p->deref_type == nir_deref_type_array || > + p->deref_type == nir_deref_type_array_wildcard); > + > + if (tail_elem->children) { > + assert(p->deref_type == nir_deref_type_array); > + unsigned idx = nir_src_as_const_value(p->arr.index)->u32[0]; > + if (idx >= glsl_get_length(tail_elem->type)) > + idx = 0; /* Overflow; we can give them anything */ > + tail_elem = &tail_elem->children[idx]; > + } else { > + tail_elem = tail_elem->ind_child; > + } > + } > + nir_variable *split_var = tail_elem->var; > + > + b.cursor = nir_before_instr(&deref->instr); > + > + nir_deref_instr *new_deref = nir_build_deref_var(&b, split_var); > + > + tail_elem = root_elem; > + for (unsigned i = 1; path.path[i]; i++) { > + nir_deref_instr *p = path.path[i]; > + assert(path.path[i - 1]->type == tail_elem->type); > + assert(p->deref_type == nir_deref_type_array || > + p->deref_type == nir_deref_type_array_wildcard); > + > + if (tail_elem->children) { > + /* This level is split, just advance to the next element */ > + assert(p->deref_type == nir_deref_type_array); > + unsigned idx = nir_src_as_const_value(p->arr.index)->u32[0]; > + tail_elem = &tail_elem->children[idx]; > + } else { > + /* This level isn't split, build a deref */ > + new_deref = nir_build_deref_follower(&b, new_deref, p); > + tail_elem = tail_elem->ind_child; > + } > + } > + > + assert(new_deref->type == deref->type); > + nir_ssa_def_rewrite_uses(&deref->dest.ssa, > + nir_src_for_ssa(&new_deref->dest.ssa)); > + nir_deref_instr_remove_if_unused(deref); > + } > + } > +} > + > +/** A pass for splitting arrays into multiple variables > + * > + * This pass looks at arrays (possibly multiple levels) and tries to split Optional: would be nice to mention this deals with arrays of vectors / scalars -- in particular, no structs involved, as they are handled by the other split. > + * them into piles of variables, one for each array element. The heuristic > + * used is simple: If a given array level is never used with an indirect, > that > + * array level will get split. > + */ > +bool > +nir_split_array_vars(nir_shader *shader, nir_variable_mode modes) > +{ > + void *mem_ctx = ralloc_context(NULL); > + struct hash_table *var_indirect_map = > + _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, > + _mesa_key_pointer_equal); > + > + assert((modes & (nir_var_global | nir_var_local)) == modes); > + > + bool has_global_aoa = false; > + if (modes & nir_var_global) { > + has_global_aoa = init_var_list_array_indirects(&shader->globals, > + var_indirect_map, > mem_ctx); > + } > + > + bool has_any_aoa = false; > + nir_foreach_function(function, shader) { > + if (!function->impl) > + continue; > + > + bool has_local_aoa; > + if (modes & nir_var_local) { > + has_local_aoa = > init_var_list_array_indirects(&function->impl->locals, > + var_indirect_map, > + mem_ctx); > + } > + > + if (has_global_aoa || has_local_aoa) { > + has_any_aoa = true; > + mark_indirects_impl(function->impl, var_indirect_map, modes, > mem_ctx); > + } > + } > + > + /* If we failed to find any arrays of arrays, bail early. */ > + if (!has_any_aoa) { > + ralloc_free(mem_ctx); > + return false; > + } > + > + struct hash_table *var_elem_map = > + _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, > + _mesa_key_pointer_equal); > + > + bool has_global_splits = false; > + if (modes & nir_var_global) { > + has_global_splits = split_var_list_arrays(shader, NULL, > + &shader->globals, > + var_indirect_map, > + var_elem_map, mem_ctx); Optional: remove the extra space in some of the lines above. > + } > + > + bool progress = false; > + nir_foreach_function(function, shader) { > + if (!function->impl) > + continue; > + > + bool has_local_splits = false; > + if (modes & nir_var_local) { > + has_local_splits = split_var_list_arrays(shader, function->impl, > + &function->impl->locals, > + var_indirect_map, > + var_elem_map, mem_ctx); > + } > + > + if (has_global_splits || has_local_splits) { > + split_array_copies_impl(function->impl, var_indirect_map, > + modes, mem_ctx); > + > + split_array_derefs_impl(function->impl, var_elem_map, > + modes, mem_ctx); > + > + nir_metadata_preserve(function->impl, nir_metadata_block_index | > + nir_metadata_dominance); > + progress = true; > + } > + } > + > + ralloc_free(mem_ctx); > + > + return progress; > +} > -- > 2.17.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev