On Wed, Dec 17, 2014 at 1:52 PM, Connor Abbott <cwabbo...@gmail.com> wrote: > > I'm sure you're already aware, but there are two things we could do to > speed this up: > > 1. Pre-compute def/kill sets for each block similar to what i965 does. > 2. Use a worklist + an array of flags for "this block is in the > worklist" rather than walking all the basic blocks in reverse to find > the few we need to update. > > Wrt #2, we already use a worklist in the DCE pass, but it's kinda lame > because it's using a linked list when we could just allocate an array > of pointers up-front based on the maximum size (the number of blocks > in this case, the number of SSA definitions in that case) and use it > as a ringbuffer. It would be a nice cleanup to implement such a > bounded worklist and share it between these two passes, since we'll > probably want to use it for lots of other passes too. > > I don't either thing should block merging this, though. > > A few other comments below. > > On Tue, Dec 16, 2014 at 1:05 AM, Jason Ekstrand <ja...@jlekstrand.net> > wrote: > > --- > > src/glsl/Makefile.sources | 1 + > > src/glsl/nir/nir.h | 13 ++ > > src/glsl/nir/nir_live_variables.c | 282 > ++++++++++++++++++++++++++++++++++++++ > > src/glsl/nir/nir_metadata.c | 2 + > > src/mesa/main/bitset.h | 1 + > > 5 files changed, 299 insertions(+) > > create mode 100644 src/glsl/nir/nir_live_variables.c > > > > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources > > index 4eb6320..433224e 100644 > > --- a/src/glsl/Makefile.sources > > +++ b/src/glsl/Makefile.sources > > @@ -20,6 +20,7 @@ NIR_FILES = \ > > $(GLSL_SRCDIR)/nir/nir_from_ssa.c \ > > $(GLSL_SRCDIR)/nir/nir_intrinsics.c \ > > $(GLSL_SRCDIR)/nir/nir_intrinsics.h \ > > + $(GLSL_SRCDIR)/nir/nir_live_variables.c \ > > $(GLSL_SRCDIR)/nir/nir_lower_atomics.c \ > > $(GLSL_SRCDIR)/nir/nir_lower_samplers.cpp \ > > $(GLSL_SRCDIR)/nir/nir_lower_system_values.c \ > > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > > index f744736..2f2edb6 100644 > > --- a/src/glsl/nir/nir.h > > +++ b/src/glsl/nir/nir.h > > @@ -420,6 +420,9 @@ typedef struct { > > /** generic SSA definition index. */ > > unsigned index; > > > > + /** Index into the live_in and live_out bitfields */ > > + unsigned live_index; > > + > > nir_instr *parent_instr; > > > > struct set *uses; > > @@ -999,6 +1002,10 @@ typedef struct nir_block { > > struct nir_block **dom_children; > > > > struct set *dom_frontier; > > + > > + /* live in and out for this block; used for liveness analysis */ > > + BITSET_WORD *live_in; > > + BITSET_WORD *live_out; > > } nir_block; > > > > #define nir_block_first_instr(block) \ > > @@ -1047,6 +1054,7 @@ typedef enum { > > nir_metadata_none = 0x0, > > nir_metadata_block_index = 0x1, > > nir_metadata_dominance = 0x2, > > + nir_metadata_live_variables = 0x4, > > } nir_metadata; > > > > typedef struct { > > @@ -1274,6 +1282,8 @@ bool nir_foreach_src(nir_instr *instr, > nir_foreach_src_cb cb, void *state); > > typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state); > > bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, > > void *state); > > +bool nir_foreach_block_reverse(nir_function_impl *impl, > nir_foreach_block_cb cb, > > + void *state); > > This hunk should go in the previous patch that defined this function. > > > > > /* If the following CF node is an if, this function returns that if. > > * Otherwise, it returns NULL. > > @@ -1318,6 +1328,9 @@ void nir_lower_system_values(nir_shader *shader); > > > > void nir_lower_atomics(nir_shader *shader); > > > > +void nir_live_variables_impl(nir_function_impl *impl); > > +bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); > > + > > void nir_convert_to_ssa_impl(nir_function_impl *impl); > > void nir_convert_to_ssa(nir_shader *shader); > > void nir_convert_from_ssa(nir_shader *shader); > > diff --git a/src/glsl/nir/nir_live_variables.c > b/src/glsl/nir/nir_live_variables.c > > new file mode 100644 > > index 0000000..7d99a06 > > --- /dev/null > > +++ b/src/glsl/nir/nir_live_variables.c > > @@ -0,0 +1,282 @@ > > +/* > > + * Copyright © 2014 Intel Corporation > > + * > > + * Permission is hereby granted, free of charge, to any person > obtaining a > > + * copy of this software and associated documentation files (the > "Software"), > > + * to deal in the Software without restriction, including without > limitation > > + * the rights to use, copy, modify, merge, publish, distribute, > sublicense, > > + * and/or sell copies of the Software, and to permit persons to whom the > > + * Software is furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice (including the > next > > + * paragraph) shall be included in all copies or substantial portions > of the > > + * Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT > SHALL > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > > + * IN THE SOFTWARE. > > + * > > + * Authors: > > + * Jason Ekstrand (ja...@jlekstrand.net) > > + */ > > + > > +#include "nir.h" > > + > > +/* > > + * Basic liveness analysis. This works only in SSA form. > > + * > > + * This liveness pass treats phi nodes as being melded to the space > between > > + * blocks so that the destinations of a phi are in the livein of the > block > > + * in which it resides and the sources are in the liveout of the > > + * corresponding block. By formulating the liveness information in this > > + * way, we ensure that the definition of any variable dominates its > entire > > + * live range. This is true because the only way that the definition > of an > > + * SSA value may not dominate a use is if the use is in a phi node and > the > > + * uses in phi no are in the live-out of the corresponding predecessor > > + * block but not in the live-in of the block containing the phi node. > > + */ > > + > > +struct live_variables_state { > > + unsigned num_ssa_defs; > > + unsigned bitset_words; > > + BITSET_WORD progress; > > I'd rather have this be > > bool progress; > > since it's effectively being used as a boolean, no need to confuse > people with this and it's just unnecessary micro-optimization. > > > +}; > > + > > +static bool > > +index_dest(nir_dest *dest, void *void_state) > > +{ > > + struct live_variables_state *state = void_state; > > + > > + if (dest->is_ssa) > > + dest->ssa.live_index = state->num_ssa_defs++; > > + > > + return true; > > +} > > + > > +static bool > > +index_ssa_definitions_block(nir_block *block, void *void_state) > > +{ > > + struct live_variables_state *state = void_state; > > + > > + nir_foreach_instr(block, instr) { > > + if (instr->type == nir_instr_type_ssa_undef) { > > + nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(instr); > > + undef->def.live_index = 0; > > + } else { > > + nir_foreach_dest(instr, index_dest, state); > > + } > > + } > > + > > + return true; > > +} > > + > > +static bool > > +init_liveness_block(nir_block *block, void *void_state) > > +{ > > + struct live_variables_state *state = void_state; > > + > > + block->live_in = reralloc(block, block->live_in, BITSET_WORD, > > + state->bitset_words); > > + memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); > > + > > + block->live_out = reralloc(block, block->live_out, BITSET_WORD, > > + state->bitset_words); > > + memset(block->live_out, 0, state->bitset_words * > sizeof(BITSET_WORD)); > > + > > + return true; > > +} > > + > > +static bool > > +set_src_live(nir_src *src, void *void_live) > > +{ > > + BITSET_WORD *live = void_live; > > + > > + if (!src->is_ssa) > > + return true; > > + > > + if (src->ssa->live_index == 0) > > + return true; /* undefined variables are never live */ > > + > > + BITSET_SET(live, src->ssa->live_index); > > + > > + return true; > > +} > > + > > +static bool > > +set_dest_dead(nir_dest *dest, void *void_live) > > +{ > > + BITSET_WORD *live = void_live; > > + > > + if (dest->is_ssa) > > + BITSET_CLEAR(live, dest->ssa.live_index); > > + > > + return true; > > +} > > + > > +/* Phi nodes exist "between" blocks and all the phi nodes at the start > of a > > + * block act "in parallel". When we propagate from the live_in of one > > + * block to the live out of the other, we have to kill any writes from > phis > > + * and make live any sources. > > + */ > > +static void > > +propagate_across_edge(nir_block *pred, nir_block *succ, > > + struct live_variables_state *state) > > +{ > > + BITSET_WORD live[state->bitset_words]; > > + memcpy(live, succ->live_in, sizeof live); > > + > > + nir_foreach_instr(succ, instr) { > > + if (instr->type != nir_instr_type_phi) > > + break; > > + nir_phi_instr *phi = nir_instr_as_phi(instr); > > + > > + set_dest_dead(&phi->dest, live); > > + } > > + > > + nir_foreach_instr(succ, instr) { > > + if (instr->type != nir_instr_type_phi) > > + break; > > + nir_phi_instr *phi = nir_instr_as_phi(instr); > > + > > + foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { > > + if (src->pred == pred) { > > + set_src_live(&src->src, live); > > + break; > > + } > > + } > > + } > > + > > + for (unsigned i = 0; i < state->bitset_words; ++i) { > > + state->progress |= live[i] & ~pred->live_out[i]; > > state->progress = state->progress || (live[i] & ~pred->live_out[i]) != 0; > > > + pred->live_out[i] |= live[i]; > > + } > > +} > > + > > +static bool > > +walk_instructions_block(nir_block *block, void *void_state) > > +{ > > + struct live_variables_state *state = void_state; > > + > > + /* The live out is the union (modulo phi nodes) of the live ins of > its > > + * successors */ > > + if (block->successors[0]) > > + propagate_across_edge(block, block->successors[0], state); > > + if (block->successors[1]) > > + propagate_across_edge(block, block->successors[1], state); > > + > > + memcpy(block->live_in, block->live_out, > > + state->bitset_words * sizeof(BITSET_WORD)); > > + > > + nir_if *following_if = nir_block_following_if(block); > > + if (following_if) > > + set_src_live(&following_if->condition, block->live_in); > > + > > + nir_foreach_instr_reverse(block, instr) { > > + /* Phi nodes are handled seperately so we want to skip them. > Since > > + * we are going backwards and they are at the beginning, we can > just > > + * break as soon as we see one. > > + */ > > + if (instr->type == nir_instr_type_phi) > > + break; > > + > > + nir_foreach_dest(instr, set_dest_dead, block->live_in); > > + nir_foreach_src(instr, set_src_live, block->live_in); > > + } > > + > > + return true; > > +} > > + > > +static bool > > +src_does_not_use_def(nir_src *src, void *def) > > +{ > > + return !src->is_ssa || src->ssa != (nir_ssa_def *)def; > > +} > > + > > +static bool > > +search_for_use_after_instr(nir_instr *start, nir_ssa_def *def) > > +{ > > + /* Only look for a use strictly after the given instruction */ > > + struct exec_node *node = start->node.next; > > + while (!exec_node_is_tail_sentinel(node)) { > > + nir_instr *instr = exec_node_data(nir_instr, node, node); > > + if (!nir_foreach_src(instr, src_does_not_use_def, def)) > > + return true; > > + node = node->next; > > + } > > + return false; > > +} > > + > > +/* Returns true if def is live at instr assuming that def comes before > > + * instr in a pre DFS search of the dominance tree. > > + */ > > +static bool > > +nir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr) > > +{ > > + if (BITSET_TEST(instr->block->live_out, def->live_index)) { > > + /* Since def dominates instr, if def is in the liveout of the > block, > > + * it's live at instr > > + */ > > + return true; > > + } else { > > + if (BITSET_TEST(instr->block->live_in, def->live_index) || > > + def->parent_instr->block == instr->block) { > > + /* In this case it is either live coming into instr's block or > it > > + * is defined in the same block. In this case, we simply need > to > > + * see if it is used after instr. > > + */ > > + return search_for_use_after_instr(instr, def); > > + } else { > > + return false; > > + } > > + } > > +} > > + > > +bool > > +nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b) > > +{ > > + if (a->parent_instr == b->parent_instr) { > > + /* Two variables defined at the same time interfere assuming at > > + * least one isn't dead. > > + */ > > + return true; > > + } else if (a->live_index == 0 || b->live_index == 0) { > > + /* If either variable is an ssa_undef, then there's no > interference */ > > + return false; > > + } else if (a->live_index < b->live_index) { > > + return nir_ssa_def_is_live_at(a, b->parent_instr); > > + } else { > > + return nir_ssa_def_is_live_at(b, a->parent_instr); > > + } > > +} > > + > > +void > > +nir_live_variables_impl(nir_function_impl *impl) > > +{ > > + struct live_variables_state state; > > + > > + /* We start at 1 because we reserve the index value of 0 for > ssa_undef > > + * instructions. Those are never live, so their liveness information > > + * can be compacted into a single bit. > > + */ > > + state.num_ssa_defs = 1; > > + nir_foreach_block(impl, index_ssa_definitions_block, &state); > > + > > + /* We now know how many unique ssa definitions we have and we can go > > + * ahead and allocate live_in and live_out sets > > + */ > > + state.bitset_words = BITSET_WORDS(state.num_ssa_defs); > > + nir_foreach_block(impl, init_liveness_block, &state); > > + > > + /* We need to propagate the liveness back through the CFG. Thanks to > > + * the wonders of SSA, this will run no more times than the depth of > the > > + * deepest loop + 1. > > + */ > > + do { > > + state.progress = 0; > > state.progress = false; > > > + nir_foreach_block_reverse(impl, walk_instructions_block, &state); > > + } while (state.progress); > > +} > > diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c > > index 070cb74..a4d618c 100644 > > --- a/src/glsl/nir/nir_metadata.c > > +++ b/src/glsl/nir/nir_metadata.c > > @@ -39,6 +39,8 @@ nir_metadata_require(nir_function_impl *impl, > nir_metadata required) > > nir_index_blocks(impl); > > if (NEEDS_UPDATE(nir_metadata_dominance)) > > nir_calc_dominance_impl(impl); > > + if (NEEDS_UPDATE(nir_metadata_live_variables)) > > + nir_live_variables_impl(impl); > > > > #undef NEEDS_UPDATE > > > > diff --git a/src/mesa/main/bitset.h b/src/mesa/main/bitset.h > > index 601fd0e..2558da4 100644 > > --- a/src/mesa/main/bitset.h > > +++ b/src/mesa/main/bitset.h > > @@ -32,6 +32,7 @@ > > #define BITSET_H > > > > #include "imports.h" > > +#include "macros.h" > > > > > /**************************************************************************** > > * generic bitset implementation > > -- > > 2.2.0 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev