On Wed, Jan 4, 2017 at 9:31 PM, Timothy Arceri <timothy.arc...@collabora.com > wrote:
> There was a bit to take in here but it seems ok to me. I've made a > bunch of trivial suggestions/comments below otherwise: > > Reviewed-by: Timothy Arceri <timothy.arc...@collabora.com> > > On Mon, 2016-12-12 at 19:39 -0800, Jason Ekstrand wrote: > > --- > > src/compiler/Makefile.sources | 1 + > > src/compiler/nir/nir.h | 2 + > > src/compiler/nir/nir_opt_copy_prop_vars.c | 799 > > ++++++++++++++++++++++++++++++ > > 3 files changed, 802 insertions(+) > > create mode 100644 src/compiler/nir/nir_opt_copy_prop_vars.c > > > > diff --git a/src/compiler/Makefile.sources > > b/src/compiler/Makefile.sources > > index 17b15de..09b4105 100644 > > --- a/src/compiler/Makefile.sources > > +++ b/src/compiler/Makefile.sources > > @@ -231,6 +231,7 @@ NIR_FILES = \ > > nir/nir_normalize_cubemap_coords.c \ > > nir/nir_opt_conditional_discard.c \ > > nir/nir_opt_constant_folding.c \ > > + nir/nir_opt_copy_prop_vars.c \ > > nir/nir_opt_copy_propagate.c \ > > nir/nir_opt_cse.c \ > > nir/nir_opt_dce.c \ > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > > index 544d4ba..27ef633 100644 > > --- a/src/compiler/nir/nir.h > > +++ b/src/compiler/nir/nir.h > > @@ -2489,6 +2489,8 @@ bool nir_opt_global_to_local(nir_shader > > *shader); > > > > bool nir_copy_prop(nir_shader *shader); > > > > +bool nir_opt_copy_prop_vars(nir_shader *shader); > > + > > bool nir_opt_cse(nir_shader *shader); > > > > bool nir_opt_dce(nir_shader *shader); > > diff --git a/src/compiler/nir/nir_opt_copy_prop_vars.c > > b/src/compiler/nir/nir_opt_copy_prop_vars.c > > new file mode 100644 > > index 0000000..728e476 > > --- /dev/null > > +++ b/src/compiler/nir/nir_opt_copy_prop_vars.c > > @@ -0,0 +1,799 @@ > > +/* > > + * Copyright © 2016 Intel Corporation > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > + * copy of this software and associated documentation files (the > > "Software"), > > + * to deal in the Software without restriction, including without > > limitation > > + * the rights to use, copy, modify, merge, publish, distribute, > > sublicense, > > + * and/or sell copies of the Software, and to permit persons to whom > > the > > + * Software is furnished to do so, subject to the following > > conditions: > > + * > > + * The above copyright notice and this permission notice (including > > the next > > + * paragraph) shall be included in all copies or substantial > > portions of the > > + * Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES > > OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > > OTHER DEALINGS > > + * IN THE SOFTWARE. > > + */ > > + > > +#include "nir.h" > > +#include "nir_builder.h" > > + > > +/** > > + * Variable-based copy propagation > > + * > > + * Normally, NIR trusts in SSA form for most of its copy-propagation > > needs. > > + * However, there are cases, especially when dealing with indirects, > > where SSA > > + * won't help you. This pass is for those times. Specifically, it > > handles > > + * the following things that the rest of NIR can't: > > + * > > + * 1) Copy-propagation on variables that have indirect > > access. This includes > > + * propagating from indirect stores into indirect loads. > > + * > > + * 2) Dead code elimination of store_var and copy_var intrinsics > > based on > > + * killed destination values. > > + * > > + * 3) Removal of redundant load_var intrinsics. We can't trust > > regular CSE > > + * to do this because it isn't aware of variable writes that may > > alias the > > + * value and make the former load invalid. > > + * > > + * Unfortunately, properly handling all of those cases makes this > > path rather > > + * complex. In order to avoid additional complexity, this pass is > > entirely > > + * block-local. If we tried to make it local the data-flow analysis > > would > > + * rapidly get out of hand. Fortunately, for anything that is only > > ever > > + * accessed directly, we get SSA based copy-propagation which is > > extremely > > + * powerful so this isn't that great a loss. > > + */ > > + > > +struct value { > > + bool is_ssa; > > + union { > > + nir_ssa_def *ssa[4]; > > + nir_deref_var *deref; > > + }; > > +}; > > + > > +struct copy_entry { > > + struct list_head link; > > + > > + nir_instr *store_instr[4]; > > + > > + unsigned comps_may_be_read; > > + struct value src; > > + > > + nir_deref_var *dst; > > +}; > > + > > +struct copy_prop_var_state { > > + nir_shader *shader; > > + > > + void *mem_ctx; > > + > > + struct list_head copies; > > + > > + /* We're going to be allocating and deleting a lot of copy > > entries so we'll > > + * keep a free list to avoid thrashing malloc too badly. > > + */ > > + struct list_head copy_free_list; > > + > > + bool progress; > > +}; > > + > > +static struct copy_entry * > > +copy_entry_create(struct copy_prop_var_state *state, > > + nir_deref_var *dst_deref) > > +{ > > + struct copy_entry *entry; > > + if (!list_empty(&state->copy_free_list)) { > > + struct list_head *item = state->copy_free_list.next; > > + list_del(item); > > + entry = LIST_ENTRY(struct copy_entry, item, link); > > + memset(entry, 0, sizeof(*entry)); > > + } else { > > + entry = rzalloc(state->mem_ctx, struct copy_entry); > > + } > > + > > + entry->dst = dst_deref; > > + list_add(&entry->link, &state->copies); > > + > > + return entry; > > +} > > + > > +static void > > +copy_entry_destroy(struct copy_prop_var_state *state, struct > > copy_entry *entry) > > No a huge deal but this doesn't really do what the name says. Maybe > rename it? > > copy_entry_remove() ? > Changed locally. I couldn't come up with anything better. > > > +{ > > + list_del(&entry->link); > > + list_add(&entry->link, &state->copy_free_list); > > +} > > + > > +enum deref_compare_result { > > + derefs_equal_bit = (1 << 0), > > + derefs_may_alias_bit = (1 << 1), > > + derefs_a_contains_b_bit = (1 << 2), > > + derefs_b_contains_a_bit = (1 << 3), > > +}; > > + > > +/** Returns true if the storage referrenced to by deref completely > > contains > > + * the storage referenced by sub. > > + * > > + * TODO: Should this go in core NIR? > > I'm not sure if it should. But we should decide one way or the other > not sure its very helpful to leave this TODO in place. > Yeah... I stuck a TODO in there because I really liked the way it turned out and it was nice and general so it *could* be moved to into core NIR and used by other things. Then again, no one else uses it right now. I could go eather way. Thoughts? > > + */ > > +static enum deref_compare_result > > +compare_derefs(nir_deref_var *a, nir_deref_var *b) > > +{ > > + if (a->var != b->var) > > + return 0; > > + > > + /* Start off assuming they fully compare. We ignore equality for > > now. In > > + * the end, we'll determine that by containment. > > + */ > > + enum deref_compare_result result = derefs_may_alias_bit | > > + derefs_a_contains_b_bit | > > + derefs_b_contains_a_bit; > > + > > + nir_deref *a_tail = &a->deref; > > + nir_deref *b_tail = &b->deref; > > + while (a_tail->child && b_tail->child) { > > + a_tail = a_tail->child; > > + b_tail = b_tail->child; > > + > > + assert(a_tail->deref_type == b_tail->deref_type); > > + switch (a_tail->deref_type) { > > + case nir_deref_type_array: { > > + nir_deref_array *a_arr = nir_deref_as_array(a_tail); > > + nir_deref_array *b_arr = nir_deref_as_array(b_tail); > > + > > + if (a_arr->deref_array_type == nir_deref_array_type_direct > > && > > + b_arr->deref_array_type == nir_deref_array_type_direct) > > { > > + /* If they're both direct and have different offsets, > > they > > + * don't even alias much less anything else. > > + */ > > + if (a_arr->base_offset != b_arr->base_offset) > > + return 0; > > + } else if (a_arr->deref_array_type == > > nir_deref_array_type_wildcard) { > > + if (b_arr->deref_array_type != > > nir_deref_array_type_wildcard) > > + result &= ~derefs_b_contains_a_bit; > > + } else if (b_arr->deref_array_type == > > nir_deref_array_type_wildcard) { > > + if (a_arr->deref_array_type != > > nir_deref_array_type_wildcard) > > + result &= ~derefs_a_contains_b_bit; > > + } else if (a_arr->deref_array_type == > > nir_deref_array_type_indirect && > > + b_arr->deref_array_type == > > nir_deref_array_type_indirect) { > > + assert(a_arr->indirect.is_ssa && b_arr- > > >indirect.is_ssa); > > + if (a_arr->indirect.ssa == b_arr->indirect.ssa) { > > + /* If they're different constant offsets from the > > same indirect > > + * then they don't alias at all. > > + */ > > + if (a_arr->base_offset != b_arr->base_offset) > > + return 0; > > + /* Otherwise the indirect and base both match */ > > + } else { > > + /* If they're have different indirect offsets then we > > can't > > + * prove anything about containment. > > + */ > > + result &= ~(derefs_a_contains_b_bit | > > derefs_b_contains_a_bit); > > + } > > + } else { > > + /* In this case, one is indirect and the other direct so > > we can't > > + * prove anything about containment. > > + */ > > + result &= ~(derefs_a_contains_b_bit | > > derefs_b_contains_a_bit); > > + } > > + break; > > + } > > + > > + case nir_deref_type_struct: { > > + nir_deref_struct *a_struct = nir_deref_as_struct(a_tail); > > + nir_deref_struct *b_struct = nir_deref_as_struct(b_tail); > > + > > + /* If they're different struct members, they don't even > > alias */ > > + if (a_struct->index != b_struct->index) > > + return 0; > > + break; > > + } > > + > > + default: > > + unreachable("Invalid deref type"); > > + } > > + } > > + > > + /* If a is longer than b, then it can't contain b */ > > + if (a_tail->child) > > + result &= ~derefs_a_contains_b_bit; > > + if (b_tail->child) > > + result &= ~derefs_b_contains_a_bit; > > + > > + /* If a contains b and b contains a they must be equal. */ > > + if ((result & derefs_a_contains_b_bit) && (result & > > derefs_b_contains_a_bit)) > > + result |= derefs_equal_bit; > > + > > + return result; > > +} > > + > > +static void > > +remove_dead_writes(struct copy_prop_var_state *state, > > + struct copy_entry *entry, unsigned write_mask) > > +{ > > + /* We're overwriting another entry. Some of it's components may > > not > > + * have been read yet and, if that's the case, we may be able to > > delete > > + * some instructions but we have to be careful. > > + */ > > + unsigned dead_comps = write_mask & ~entry->comps_may_be_read; > > + if (!dead_comps) > > + return; > > + > > + for (unsigned i = 0; i < 4; i++) { > > + if (!(dead_comps & (1 << i))) > > + continue; > > Up to you but we could replace this and the return above with: > > unsigned mask = dead_comps; > while (mask) { > const int i = u_bit_scan(&mask); > > It's becoming an increasingly used pattern in Mesa and I find it > cleaner but you may disagree. > > > + > > + nir_instr *instr = entry->store_instr[i]; > > + > > + /* We may have already deleted it on a previous iteration */ > > + if (!instr) > > + continue; > > + > > + /* See if this instr is used anywhere that it's not dead */ > > + bool keep = false; > > + for (unsigned j = 0; j < 4; j++) { > > + if (entry->store_instr[j] == instr) { > > + if (dead_comps & (1 << j)) { > > + entry->store_instr[j] = NULL; > > + } else { > > + keep = true; > > + } > > + } > > + } > > + > > + if (!keep) { > > + nir_instr_remove(instr); > > + state->progress = true; > > + } > > + } > > +} > > + > > +static struct copy_entry * > > +lookup_entry_for_deref(struct copy_prop_var_state *state, > > + nir_deref_var *deref, > > + enum deref_compare_result > > allowed_comparisons) > > +{ > > + list_for_each_entry(struct copy_entry, iter, &state->copies, > > link) { > > + if (compare_derefs(iter->dst, deref) & allowed_comparisons) > > + return iter; > > + } > > + > > + return NULL; > > +} > > + > > +static void > > +mark_aliased_entries_as_read(struct copy_prop_var_state *state, > > + nir_deref_var *deref, unsigned > > components) > > +{ > > + list_for_each_entry(struct copy_entry, iter, &state->copies, > > link) { > > + if (compare_derefs(iter->dst, deref) & derefs_may_alias_bit) > > + iter->comps_may_be_read |= components; > > + } > > +} > > + > > +static struct copy_entry * > > +get_entry_and_kill_aliases(struct copy_prop_var_state *state, > > + nir_deref_var *deref, > > + unsigned write_mask) > > +{ > > + struct copy_entry *entry = NULL; > > + list_for_each_entry_safe(struct copy_entry, iter, &state->copies, > > link) { > > + if (!iter->src.is_ssa) { > > + /* If this write aliases the source of some entry, get rid > > of it */ > > + if (compare_derefs(iter->src.deref, deref) & > > derefs_may_alias_bit) { > > + copy_entry_destroy(state, iter); > > + continue; > > + } > > + } > > + > > + enum deref_compare_result comp = compare_derefs(iter->dst, > > deref); > > + /* This is a store operation. If we completely overwrite some > > value, we > > + * want to delete any dead writes that may be present. > > + */ > > + if (comp & derefs_b_contains_a_bit) > > + remove_dead_writes(state, iter, write_mask); > > + > > + if (comp & derefs_equal_bit) { > > + assert(entry == NULL); > > + entry = iter; > > + } else if (comp & derefs_may_alias_bit) { > > + copy_entry_destroy(state, iter); > > + } > > + } > > + > > + if (entry == NULL) > > + entry = copy_entry_create(state, deref); > > + > > + return entry; > > +} > > + > > +static void > > +apply_barrier_for_modes(struct copy_prop_var_state *state, > > + nir_variable_mode modes) > > +{ > > + list_for_each_entry_safe(struct copy_entry, iter, &state->copies, > > link) { > > + if ((iter->dst->var->data.mode & modes) || > > + (!(iter->src.is_ssa) && (iter->src.deref->var->data.mode & > > modes))) > > !(iter->src.is_ssa) -> !iter->src.is_ssa ?? > Sure. Changed locally. > > + copy_entry_destroy(state, iter); > > + } > > +} > > + > > +static void > > +store_to_entry(struct copy_prop_var_state *state, struct copy_entry > > *entry, > > + const struct value *value, unsigned write_mask, > > + nir_instr *store_instr) > > +{ > > + entry->comps_may_be_read &= ~write_mask; > > + if (value->is_ssa) { > > + entry->src.is_ssa = true; > > + /* Only overwrite the written components */ > > + for (unsigned i = 0; i < 4; i++) { > > + if (write_mask & (1 << i)) { > > + entry->store_instr[i] = store_instr; > > + entry->src.ssa[i] = value->ssa[i]; > > + } > > + } > > + } else { > > + /* Non-ssa stores always write everything */ > > + entry->src.is_ssa = false; > > + entry->src.deref = value->deref; > > + for (unsigned i = 0; i < 4; i++) > > + entry->store_instr[i] = store_instr; > > + } > > +} > > + > > +/* Remove an instruction and return a cursor pointing to where it > > was */ > > +static nir_cursor > > +instr_remove_cursor(nir_instr *instr) > > +{ > > + nir_cursor cursor; > > + nir_instr *prev = nir_instr_prev(instr); > > + if (prev) { > > + cursor = nir_after_instr(prev); > > + } else { > > + cursor = nir_before_block(instr->block); > > + } > > + nir_instr_remove(instr); > > + return cursor; > > +} > > I wonder if this is useful elsewhere? > I wondered that too. I'm happy to move it to nir.c if you'd like. > > + > > +static bool > > +load_from_ssa_entry_value(struct copy_prop_var_state *state, > > + struct copy_entry *entry, > > + nir_builder *b, nir_intrinsic_instr > > *intrin, > > + struct value *value) > > +{ > > + *value = entry->src; > > + assert(value->is_ssa); > > + > > + const struct glsl_type *type = nir_deref_tail(&entry->dst- > > >deref)->type; > > + unsigned num_components = glsl_get_vector_elements(type); > > + > > + uint8_t available = 0; > > + bool all_same = true; > > + for (unsigned i = 0; i < num_components; i++) { > > + if (value->ssa[i]) > > + available |= (1 << i); > > + > > + if (value->ssa[i] != value->ssa[0]) > > + all_same = false; > > + } > > + > > + if (all_same) { > > + /* Our work here is done */ > > Mind expanding on this? :P Without looking at the caller I have no idea > why the instruction is being removed. Going on the function name I > suspect its because we can use the ssa entry value instead since it > contains all the values we are looking for but it would be nice to be > clear. > > The other alternative is to add a general description to the function > itself. > Right... I can add a nice detailed comment above the function. > > + b->cursor = instr_remove_cursor(&intrin->instr); > > + intrin->instr.block = NULL; > > + return true; > > + } > > + > > + if (available != (1 << num_components) - 1 && > > + intrin->intrinsic == nir_intrinsic_load_var && > > + (available & nir_ssa_def_components_read(&intrin->dest.ssa)) > > == 0) { > > + /* If none of the components read are available as SSA values, > > then we > > + * should just bail. Otherwise, we would end up replacing the > > uses of > > + * the load_var a vecN() that just gathers up its components. > > + */ > > + return false; > > + } > > + > > + b->cursor = nir_after_instr(&intrin->instr); > > + > > + nir_ssa_def *load_def = > > + intrin->intrinsic == nir_intrinsic_load_var ? &intrin- > > >dest.ssa : NULL; > > + > > + bool keep_intrin = false; > > + nir_ssa_def *comps[4]; > > + for (unsigned i = 0; i < num_components; i++) { > > + if (value->ssa[i]) { > > + comps[i] = nir_channel(b, value->ssa[i], i); > > + } else { > > + /* We don't have anything for this component in our > > + * list. Just re-use a channel from the load. > > + */ > > + if (load_def == NULL) > > + load_def = nir_load_deref_var(b, entry->dst); > > + > > + if (load_def->parent_instr == &intrin->instr) > > + keep_intrin = true; > > + > > + comps[i] = nir_channel(b, load_def, i); > > + } > > + } > > + > > + nir_ssa_def *vec = nir_vec(b, comps, num_components); > > + for (unsigned i = 0; i < num_components; i++) > > + value->ssa[i] = vec; > > + > > + if (!keep_intrin) { > > + /* Removing this instruction should not touch the cursor > > because we > > + * created the cursor after the intrinsic and have added at > > least one > > + * instruction (the vec) since then. > > + */ > > I might be blind but where was that added? Do we need an assert here? > right above the for above this if. > > assert(cursor != intrin->instr) > Sure > > > + nir_instr_remove(&intrin->instr); > > + intrin->instr.block = NULL; > > + } > > + > > + return true; > > +} > > + > > +/** > > + * Specialize the wildcards in a deref chain > > + * > > + * This function returns a deref chain identical to \param deref > > except that > > + * some of its wildcards are replaced with indices from \param > > specific. The > > + * process is guided by \param guide which references the same type > > as \param > > + * specific but has the same wildcard array lengths as \param deref. > > + */ > > +static nir_deref_var * > > +specialize_wildcards(nir_deref_var *deref, > > + nir_deref_var *guide, > > + nir_deref_var *specific, > > + void *mem_ctx) > > +{ > > + nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); > > + > > + nir_deref *deref_tail = deref->deref.child; > > + nir_deref *guide_tail = guide->deref.child; > > + nir_deref *spec_tail = specific->deref.child; > > + nir_deref *ret_tail = &ret->deref; > > + while (deref_tail) { > > + switch (deref_tail->deref_type) { > > + case nir_deref_type_array: { > > + nir_deref_array *deref_arr = > > nir_deref_as_array(deref_tail); > > + > > + nir_deref_array *ret_arr = > > nir_deref_array_create(ret_tail); > > + ret_arr->deref.type = deref_arr->deref.type; > > + ret_arr->deref_array_type = deref_arr->deref_array_type; > > + > > + switch (deref_arr->deref_array_type) { > > + case nir_deref_array_type_direct: > > + ret_arr->base_offset = deref_arr->base_offset; > > + break; > > + case nir_deref_array_type_indirect: > > + ret_arr->base_offset = deref_arr->base_offset; > > + assert(deref_arr->indirect.is_ssa); > > + ret_arr->indirect = deref_arr->indirect; > > + break; > > + case nir_deref_array_type_wildcard: > > + /* This is where things get tricky. We have to search > > through > > + * the entry deref to find its corresponding wildcard > > and fill > > + * this slot in with the value from the src. > > + */ > > + while (guide_tail) { > > + if (guide_tail->deref_type == nir_deref_type_array && > > + nir_deref_as_array(guide_tail)->deref_array_type > > == > > + nir_deref_array_type_wildcard) > > + break; > > + > > + guide_tail = guide_tail->child; > > + spec_tail = spec_tail->child; > > + } > > + > > + nir_deref_array *spec_arr = > > nir_deref_as_array(spec_tail); > > + ret_arr->deref_array_type = spec_arr->deref_array_type; > > + ret_arr->base_offset = spec_arr->base_offset; > > + ret_arr->indirect = spec_arr->indirect; > > + } > > + > > + ret_tail->child = &ret_arr->deref; > > + break; > > + } > > + case nir_deref_type_struct: { > > + nir_deref_struct *deref_struct = > > nir_deref_as_struct(deref_tail); > > + > > + nir_deref_struct *ret_struct = > > + nir_deref_struct_create(ret_tail, deref_struct->index); > > + ret_struct->deref.type = deref_struct->deref.type; > > + > > + ret_tail->child = &ret_struct->deref; > > + break; > > + } > > + case nir_deref_type_var: > > + unreachable("Invalid deref type"); > > + } > > + > > + deref_tail = deref_tail->child; > > + ret_tail = ret_tail->child; > > + } > > + > > + return ret; > > +} > > + > > +static bool > > +load_from_deref_entry_value(struct copy_prop_var_state *state, > > + struct copy_entry *entry, > > + nir_builder *b, nir_intrinsic_instr > > *intrin, > > + nir_deref_var *src, struct value *value) > > +{ > > + *value = entry->src; > > + > > + /* Walk the deref to get the two tails and also figure out if we > > need to > > + * specialize any wildcards. > > + */ > > + bool need_to_specialize_wildcards = false; > > + nir_deref *entry_tail = &entry->dst->deref; > > + nir_deref *src_tail = &src->deref; > > + while (entry_tail->child && src_tail->child) { > > + assert(src_tail->child->deref_type == entry_tail->child- > > >deref_type); > > + if (src_tail->child->deref_type == nir_deref_type_array) { > > + nir_deref_array *entry_arr = nir_deref_as_array(entry_tail- > > >child); > > + nir_deref_array *src_arr = nir_deref_as_array(src_tail- > > >child); > > + > > + if (src_arr->deref_array_type != > > nir_deref_array_type_wildcard && > > + entry_arr->deref_array_type == > > nir_deref_array_type_wildcard) > > + need_to_specialize_wildcards = true; > > + } > > + > > + entry_tail = entry_tail->child; > > + src_tail = src_tail->child; > > + } > > + > > + /* If the entry deref is longer than the source deref then it > > refers to a > > + * smaller type and we can't source from it. > > + */ > > + assert(entry_tail->child == NULL); > > + > > + if (need_to_specialize_wildcards) { > > + /* The entry has some wildcards that are not in src. This > > means we need > > + * to construct a new deref based on the entry but using the > > wildcards > > + * from the source and guided by the entry dst. Oof. > > + */ > > + value->deref = specialize_wildcards(entry->src.deref, entry- > > >dst, src, > > + state->mem_ctx); > > + } else { > > + /* We're going to need to make a copy anyway... */ > > because? If its worth commenting on you might as well finish the > comment. Otherwise if you think its obvious can you just remove it. To > be honest I'm not sure what is going on here exactly so it would be > good if you could expand the comment to make it totally obvious. > I added the following comment: /* Do a "load" from an deref-based entry return it in "value" as a value. The * deref returned in "value" will always be a fresh copy so the caller can * steal it and assign it to the instruction directly without copying it * again. */ > > + value->deref = > > + nir_deref_as_var(nir_copy_deref(state->mem_ctx, &value- > > >deref->deref)); > > + } > > + > > + if (src_tail->child) { > > + /* If our source deref is longer than the entry deref, that's > > ok because > > + * it just means the entry deref needs to be extended a bit. > > + */ > > + nir_deref *value_tail = nir_deref_tail(&value->deref->deref); > > + value_tail->child = nir_copy_deref(value_tail, src_tail- > > >child); > > So we need to copy above because we might modify things here? > Also, yes, we need a copy so that we can modify it. > Ok returning here after reviewing below it seems we steal the deref so > I guess that's why we want a copy. > > > + } > > + > > + b->cursor = instr_remove_cursor(&intrin->instr); > > + > > + return true; > > +} > > + > > +static bool > > +try_load_from_entry(struct copy_prop_var_state *state, struct > > copy_entry *entry, > > + nir_builder *b, nir_intrinsic_instr *intrin, > > + nir_deref_var *src, struct value *value) > > +{ > > + if (entry == NULL) > > + return false; > > + > > + if (entry->src.is_ssa) { > > + return load_from_ssa_entry_value(state, entry, b, intrin, > > value); > > + } else { > > + return load_from_deref_entry_value(state, entry, b, intrin, > > src, value); > > + } > > +} > > + > > +static void > > +copy_prop_vars_block(struct copy_prop_var_state *state, > > + nir_builder *b, nir_block *block) > > +{ > > + /* Start each block with a blank slate */ > > + list_for_each_entry_safe(struct copy_entry, iter, &state->copies, > > link) > > + copy_entry_destroy(state, iter); > > + > > + nir_foreach_instr_safe(instr, block) { > > + if (instr->type != nir_instr_type_intrinsic) > > + continue; > > + > > + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); > > + switch (intrin->intrinsic) { > > + case nir_intrinsic_barrier: > > + case nir_intrinsic_memory_barrier: > > + /* If we hit a barrier, we need to trash everything that > > may possible > > may possibly > yup > > + * be accessible to another thread. Locals, globals, and > > things of > > + * the like are safe, however. > > + */ > > + apply_barrier_for_modes(state, ~(nir_var_local | > > nir_var_global | > > + nir_var_shader_in | > > nir_var_uniform)); > > + break; > > + > > + case nir_intrinsic_emit_vertex: > > + case nir_intrinsic_emit_vertex_with_counter: > > + apply_barrier_for_modes(state, nir_var_shader_out); > > + break; > > + > > + case nir_intrinsic_load_var: { > > Is it worth short circuiting this? > > if (list_empty(&state->copies)) > continue; > > Looking closely below I'm not sure it is. > > Never mind I see we need to create the new copy entry below regardless. > > > + nir_deref_var *src = intrin->variables[0]; > > + > > + uint8_t comps_read = nir_ssa_def_components_read(&intrin- > > >dest.ssa); > > + mark_aliased_entries_as_read(state, src, comps_read); > > + > > + struct copy_entry *src_entry = > > + lookup_entry_for_deref(state, src, > > derefs_a_contains_b_bit); > > + struct value value; > > + if (try_load_from_entry(state, src_entry, b, intrin, src, > > &value)) { > > + if (value.is_ssa) { > > + /* lookup_load has already ensured that we get a > > single SSA > > + * value that has all of the channels. We just have > > to do the > > + * rewrite operation. > > + */ > > + if (intrin->instr.block) { > > + /* The lookup left our instruction in-place. This > > means it > > + * must have used it to vec up a bunch of > > different sources. > > + * We need to be careful > > Is this the end of the sentence or is there meant to be more here? > I finished the sentence. > > + */ > > + nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, > > + nir_src_for_ssa(val > > ue.ssa[0]), > > + value.ssa[0]- > > >parent_instr); > > + } else { > > + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, > > + nir_src_for_ssa(value.ssa > > [0])); > > + } > > + } else { > > + /* We're turning it into a load of a different > > variable */ > > + ralloc_steal(intrin, value.deref); > > + intrin->variables[0] = value.deref; > > + > > + /* Put it back in again. */ > > + nir_builder_instr_insert(b, instr); > > + > > + value.is_ssa = true; > > + for (unsigned i = 0; i < intrin->num_components; i++) > > + value.ssa[i] = &intrin->dest.ssa; > > + } > > + state->progress = true; > > + } else { > > + value.is_ssa = true; > > + for (unsigned i = 0; i < intrin->num_components; i++) > > + value.ssa[i] = &intrin->dest.ssa; > > + } > > + > > + /* Now that we have a value, we're going to store it back > > so that we > > + * have the right value next time we come looking for > > it. In order > > + * to do this, we need an exact match, not just something > > that > > + * contains what we're looking for. > > + */ > > + struct copy_entry *store_entry = > > + lookup_entry_for_deref(state, src, derefs_equal_bit); > > + if (!store_entry) > > + store_entry = copy_entry_create(state, src); > > +store_to_entry > > + /* Set up a store to this entry with the value of the > > load. This way > > + * we can potentially remove subsequent loads. However, we > > use a > > + * NULL instruction so we don't try and delete the load on > > a > > + * subsequent store. > > + */ > > + store_to_entry(state, store_entry, &value, > > + ((1 << intrin->num_components) - 1), NULL); > > + break; > > + } > > + > > + case nir_intrinsic_store_var: { > > + struct value value = { > > + .is_ssa = true > > + }; > > + > > + for (unsigned i = 0; i < intrin->num_components; i++) > > + value.ssa[i] = intrin->src[0].ssa; > > + > > + nir_deref_var *dst = intrin->variables[0]; > > + unsigned wrmask = nir_intrinsic_write_mask(intrin); > > + struct copy_entry *entry = > > + get_entry_and_kill_aliases(state, dst, wrmask); > > + store_to_entry(state, entry, &value, wrmask, &intrin- > > >instr); > > + break; > > + } > > + > > + case nir_intrinsic_copy_var: { > > + nir_deref_var *dst = intrin->variables[0]; > > + nir_deref_var *src = intrin->variables[1]; > > + > > + if (compare_derefs(src, dst) & derefs_equal_bit) { > > + /* This is a no-op self-copy. Get rid of it */ > > + nir_instr_remove(instr); > > + continue; > > + } > > + > > + mark_aliased_entries_as_read(state, src, 0xf); > > + > > + struct copy_entry *src_entry = > > + lookup_entry_for_deref(state, src, > > derefs_a_contains_b_bit); > > + struct value value; > > + if (try_load_from_entry(state, src_entry, b, intrin, src, > > &value)) { > > + if (value.is_ssa) { > > + nir_store_deref_var(b, dst, value.ssa[0], 0xf); > > + intrin = > > nir_instr_as_intrinsic(nir_builder_last_instr(b)); > > + } else { > > + /* If this would be a no-op self-copy, don't bother. > > */ > > + if (compare_derefs(value.deref, dst) & > > derefs_equal_bit) > > + continue; > > + > > + /* Just turn it into a copy of a different deref */ > > + ralloc_steal(intrin, value.deref); > > + intrin->variables[1] = value.deref; > > + > > + /* Put it back in again. */ > > + nir_builder_instr_insert(b, instr); > > + } > > + > > + state->progress = true; > > + } else { > > + value = (struct value) { > > + .is_ssa = false, > > + .deref = src, > > + }; > > + } > > + > > + struct copy_entry *dst_entry = > > + get_entry_and_kill_aliases(state, dst, 0xf); > > + store_to_entry(state, dst_entry, &value, 0xf, &intrin- > > >instr); > > + break; > > + } > > + > > + default: > > + break; > > + } > > + } > > +} > > + > > +bool > > +nir_opt_copy_prop_vars(nir_shader *shader) > > +{ > > + struct copy_prop_var_state state; > > + > > + state.shader = shader; > > + state.mem_ctx = ralloc_context(NULL); > > + list_inithead(&state.copies); > > + list_inithead(&state.copy_free_list); > > + > > + bool global_progress = false; > > + nir_foreach_function(function, shader) { > > + if (!function->impl) > > + continue; > > + > > + nir_builder b; > > + nir_builder_init(&b, function->impl); > > + > > + state.progress = false; > > + nir_foreach_block(block, function->impl) > > + copy_prop_vars_block(&state, &b, block); > > + > > + if (state.progress) { > > + nir_metadata_preserve(function->impl, > > nir_metadata_block_index | > > + nir_metadata_dominanc > > e); > > + global_progress = true; > > + } > > + } > > + > > + ralloc_free(state.mem_ctx); > > + > > + return global_progress; > > +} >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev