Caio Marcelo de Oliveira Filho <caio.olive...@intel.com> writes: > Instead of keeping multiple acp_entries in lists, have a single > acp_entry per variable. With this, the implementation of clone is more > convenient and now fully implemented. In the previous code, clone was > only partial. > > Before this patch, each acp_entry struct represented a write to a > variable including LHS, RHS and a mask of what channels were written > to. There were two main hash tables, the first (lhs_ht) stored a list > of acp_entries per LHS variable, with the values available to copy for > that variable; the second (rhs_ht) was a "reverse index" for the first > hash table, so stored acp_entries per RHS variable. > > After the patch, there's a single acp_entry struct per LHS variable, > it contains an array with references to the RHS variables per > channel. There now is a single hash table, from LHS variable to the > corresponding entry. The "reverse index" is stored in the ACP entry, > in the form of a set of variables that copy from the LHS. To make the > clone operation cheaper, the ACP entries are created on demand. > > This should not change the result of copy propagation, a later patch > will take advantage of the clone operation. > > v2: Add note clarifying how the hashtable is destroyed. > --- > .../glsl/opt_copy_propagation_elements.cpp | 244 +++++++++--------- > 1 file changed, 128 insertions(+), 116 deletions(-) > > diff --git a/src/compiler/glsl/opt_copy_propagation_elements.cpp > b/src/compiler/glsl/opt_copy_propagation_elements.cpp > index 05965128fd9..989bd81afae 100644 > --- a/src/compiler/glsl/opt_copy_propagation_elements.cpp > +++ b/src/compiler/glsl/opt_copy_propagation_elements.cpp > @@ -47,63 +47,30 @@ > #include "ir_optimization.h" > #include "compiler/glsl_types.h" > #include "util/hash_table.h" > +#include "util/set.h" > > static bool debug = false; > > namespace { > > -class acp_entry; > - > -/* Class that refers to acp_entry in another exec_list. Used > - * when making removals based on rhs. > - */ > -class acp_ref : public exec_node > -{ > -public: > - acp_ref(acp_entry *e) > - { > - entry = e; > - } > - acp_entry *entry; > -}; > - > -class acp_entry : public exec_node > +class acp_entry > { > public: > - /* override operator new from exec_node */ > DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry) > > - acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int > swizzle[4]) > - : rhs_node(this) > - { > - this->lhs = lhs; > - this->rhs = rhs; > - this->write_mask = write_mask; > - memcpy(this->swizzle, swizzle, sizeof(this->swizzle)); > - } > + ir_variable *rhs_element[4]; > + unsigned rhs_channel[4]; > > - ir_variable *lhs; > - ir_variable *rhs; > - unsigned int write_mask; > - int swizzle[4]; > - acp_ref rhs_node; > + set *dsts; > }; > > class copy_propagation_state { > public: > DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state); > > - copy_propagation_state(void *mem_ctx, void *lin_ctx) > - { > - /* Use 'this' as context for the tables, no explicit destruction > - * needed later. */ > - lhs_ht = _mesa_hash_table_create(this, _mesa_hash_pointer, > - _mesa_key_pointer_equal); > - rhs_ht = _mesa_hash_table_create(this, _mesa_hash_pointer, > - _mesa_key_pointer_equal); > - this->mem_ctx = mem_ctx; > - this->lin_ctx = lin_ctx; > - } > + copy_propagation_state() > + : copy_propagation_state(NULL) > + {} > > copy_propagation_state* clone() > { > @@ -112,94 +79,139 @@ public: > > void erase_all() > { > - _mesa_hash_table_clear(lhs_ht, NULL); > - _mesa_hash_table_clear(rhs_ht, NULL); > + /* Individual elements were allocated from a linear allocator, so will > + * be destroyed when the state is destroyed. */ > + _mesa_hash_table_clear(acp, NULL); > + fallback = NULL; > } > > void erase(ir_variable *var, unsigned write_mask) > { > - /* removal of lhs entries */ > - hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, var); > - if (ht_entry) { > - exec_list *lhs_list = (exec_list *) ht_entry->data; > - foreach_in_list_safe(acp_entry, entry, lhs_list) { > - entry->write_mask = entry->write_mask & ~write_mask; > - if (entry->write_mask == 0) { > - entry->remove(); > - continue; > + acp_entry *entry = pull_acp(var); > + > + for (int i = 0; i < 4; i++) { > + if (!entry->rhs_element[i]) > + continue; > + if ((write_mask & (1 << i)) == 0) > + continue; > + > + ir_variable *to_remove = entry->rhs_element[i]; > + entry->rhs_element[i] = NULL; > +
This loop was confusing to me. Maybe: /* Don't remove from dsts until we're cleared the last use. * Some uses may be left over if the write_mask wasn't full. */ > + for (int j = 0; j < 4; j++) { > + if (entry->rhs_element[j] == to_remove) { > + to_remove = NULL; > + break; > } Add a comment here, /* For any usage of our variable on the RHS, clear it out. */ > + /* TODO: Check write mask, and possibly not clear everything. */ > + struct set_entry *set_entry; > + set_foreach(entry->dsts, set_entry) { > + ir_variable *dst_var = (ir_variable *)set_entry->key; > + acp_entry *dst_entry = pull_acp(dst_var); > + for (int i = 0; i < 4; i++) { > + if (dst_entry->rhs_element[i] == var) > + dst_entry->rhs_element[i] = NULL; > } > } > + _mesa_set_clear(entry->dsts, NULL); Optional: put this up above as _mesa_set_remove(entry->dsts, dst_entry), so you only walk the table once. > } > void write(ir_variable *lhs, ir_variable *rhs, unsigned write_mask, int > swizzle[4]) > { > - acp_entry *entry = new(this->lin_ctx) acp_entry(lhs, rhs, write_mask, > swizzle); > - > - /* lhs hash, hash of lhs -> acp_entry lists */ > - hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, lhs); > - if (ht_entry) { > - exec_list *lhs_list = (exec_list *) ht_entry->data; > - lhs_list->push_tail(entry); > - } else { > - exec_list *lhs_list = new(mem_ctx) exec_list; > - lhs_list->push_tail(entry); > - _mesa_hash_table_insert(lhs_ht, lhs, lhs_list); > - } > + acp_entry *lhs_entry = pull_acp(lhs); > + > + for (int i = 0; i < 4; i++) { > + if ((write_mask & (1 << i)) == 0) > + continue; > + ir_variable *to_remove = lhs_entry->rhs_element[i]; > + lhs_entry->rhs_element[i] = rhs; > + lhs_entry->rhs_channel[i] = swizzle[i]; > + > + for (int j = 0; j < 4; j++) { > + if (lhs_entry->rhs_element[j] == to_remove) { > + to_remove = NULL; > + break; > + } > + } > > - /* rhs hash, hash of rhs -> acp_entry pointers to lhs lists */ > - ht_entry = _mesa_hash_table_search(rhs_ht, rhs); > - if (ht_entry) { > - exec_list *rhs_list = (exec_list *) ht_entry->data; > - rhs_list->push_tail(&entry->rhs_node); > - } else { > - exec_list *rhs_list = new(mem_ctx) exec_list; > - rhs_list->push_tail(&entry->rhs_node); > - _mesa_hash_table_insert(rhs_ht, rhs, rhs_list); > + if (to_remove) { > + acp_entry *element = pull_acp(to_remove); > + assert(element); > + _mesa_set_remove_key(element->dsts, lhs); > + } Maybe move the j loop and to_remove check to a helper function? remove_unused_var_from_dsts(acp_entry *entry, ir_variable *var)? I'd like to see something like the comments added, but the two little refactors are optional. Either way: Reviewed-by: Eric Anholt <e...@anholt.net>
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev