On Wed, Dec 17, 2014 at 8:04 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > Previously, we were doing a lazy creation of the parallel copy > instructions. This is confusing, hard to get right, and involves some > extra state tracking of the copies. This commit adds an extra walk over > the basic blocks to add the block-end parallel copies up front. This > should be much less confusing and, consequently, easier to get right. This > commit also adds more comments about parallel copies to help explain what > all is going on. > > As a consequence of these changes, we can now remove the at_end parameter > from nir_parallel_copy_instr. > --- > src/glsl/nir/nir.c | 1 - > src/glsl/nir/nir.h | 7 --- > src/glsl/nir/nir_from_ssa.c | 150 > +++++++++++++++++++++++++++----------------- > 3 files changed, 92 insertions(+), 66 deletions(-) > > diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c > index d0cab5a..17c1efe 100644 > --- a/src/glsl/nir/nir.c > +++ b/src/glsl/nir/nir.c > @@ -471,7 +471,6 @@ nir_parallel_copy_instr_create(void *mem_ctx) > nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr); > instr_init(&instr->instr, nir_instr_type_parallel_copy); > > - instr->at_end = false; > exec_list_make_empty(&instr->copies); > > return instr; > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index dbfa461..a23bc5f 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -967,13 +967,6 @@ typedef struct { > > typedef struct { > nir_instr instr; > - > - /* Indicates that this is the parallel copy at the end of the block. > - * When isolating phi nodes, we create 2 parallel copies in most blocks; > - * this flag helps tell them apart. > - */ > - bool at_end; > - > struct exec_list copies; > } nir_parallel_copy_instr; > > diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c > index 1d17ac4..29fcc64 100644 > --- a/src/glsl/nir/nir_from_ssa.c > +++ b/src/glsl/nir/nir_from_ssa.c > @@ -229,48 +229,90 @@ merge_sets_interfere(merge_set *a, merge_set *b) > return false; > } > > -static nir_parallel_copy_instr * > -block_get_parallel_copy_at_end(nir_block *block, void *mem_ctx) > +static bool > +add_parallel_copy_to_end_of_block(nir_block *block, void *void_state) > { > - nir_instr *last_instr = nir_block_last_instr(block); > + struct from_ssa_state *state = void_state; > > - /* First we try and find a parallel copy if it already exists. If the > - * last instruction is a jump, it will be right before the jump; > - * otherwise, it will be the last instruction. > - */ > - nir_instr *pcopy_instr; > - if (last_instr != NULL && last_instr->type == nir_instr_type_jump) > - pcopy_instr = nir_instr_prev(last_instr); > - else > - pcopy_instr = last_instr; > + bool need_end_copy = false; > + if (block->successors[0]) { > + nir_instr *instr = nir_block_first_instr(block->successors[0]); > + if (instr && instr->type == nir_instr_type_phi) > + need_end_copy = true; > + } > > - if (pcopy_instr != NULL && > - pcopy_instr->type == nir_instr_type_parallel_copy) { > - /* A parallel copy already exists. */ > - nir_parallel_copy_instr *pcopy = > nir_instr_as_parallel_copy(pcopy_instr); > + if (block->successors[1]) { > + nir_instr *instr = nir_block_first_instr(block->successors[1]); > + if (instr && instr->type == nir_instr_type_phi) > + need_end_copy = true; > + } > > - /* This parallel copy may be the copy for the beginning of some > - * block, so we need to check for that before we return it. > + if (need_end_copy) { > + /* If one of our successors has at least one phi node, we need to > + * create a parallel copy at the end of the block but before the jump > + * (if there is one). > */ > - if (pcopy->at_end) > - return pcopy; > + nir_parallel_copy_instr *pcopy = > + nir_parallel_copy_instr_create(state->dead_ctx); > + > + nir_instr *last_instr = nir_block_last_instr(block); > + if (last_instr && last_instr->type == nir_instr_type_jump) { > + nir_instr_insert_before(last_instr, &pcopy->instr); > + } else { > + nir_instr_insert_after_block(block, &pcopy->instr); > + } > } > > - /* At this point, we haven't found a suitable parallel copy, so we > - * have to create one. > - */ > - nir_parallel_copy_instr *pcopy = nir_parallel_copy_instr_create(mem_ctx); > - pcopy->at_end = true; > + return true; > +} > > - if (last_instr && last_instr->type == nir_instr_type_jump) { > - nir_instr_insert_before(last_instr, &pcopy->instr); > - } else { > - nir_instr_insert_after_block(block, &pcopy->instr); > - } > +static nir_parallel_copy_instr * > +get_parallel_copy_at_end_of_block(nir_block *block) > +{ > + nir_instr *last_instr = nir_block_last_instr(block); > + if (last_instr == NULL) > + return NULL; > > - return pcopy; > + /* The last instruction may be a jump in which case the parallel copy is > + * right before it. > + */ > + if (last_instr->type == nir_instr_type_jump) > + last_instr = nir_instr_prev(last_instr); > + > + if (last_instr->type == nir_instr_type_parallel_copy) > + return nir_instr_as_parallel_copy(last_instr); > + else > + return NULL; > } > > +/** Isolate phi nodes with parallel copies > + * > + * In order to solve the dependency problems with the sources and > + * destinations of phi nodes, we first isolate them by adding parallel > + * copies to the beginnings and ends of basic blocks. For every block with > + * phi nodes, we add a parallel copy immediately following the last phi > + * node that copies the destinations of all of the phi nodes to new SSA > + * values. We also add a parallel copy to the end of every block that has > + * a successor with phi nodes that, for each phi node in each successor, > + * copies the corresponding sorce of the phi node and adjust the phi to > + * used the destination of the parallel copy. > + * > + * In SSA form, each value has exactly one definition. What this does is > + * ensure that each value used in a phi also has exactly one use. The > + * destinations of phis are only used by the parallel copy immediately > + * following the phi nodes and. Thanks to the parallel copy at the end of > + * the predecessor block, the sources of phi nodes are are the only use of > + * that value. This allows us to immediately assign all the sources and > + * destinations of any given phi node to the same register without worrying > + * about interference at all. We do coalescing to get rid of the parallel > + * copies where possible. > + * > + * Before this pass can be run, we have to iterate over the blocks with > + * add_parallel_cop_to_end_of_block to ensure that the parallel copies at
add_parallel_copy_to_end_of_block > + * the ends of blocks exist. We can create the ones at the beginnings as > + * we go, but the ones at the ends of blocks need to be created ahead of > + * time because of potential back-edges in the CFG. > + */ > static bool > isolate_phi_nodes_block(nir_block *block, void *void_state) > { > @@ -305,7 +347,8 @@ isolate_phi_nodes_block(nir_block *block, void > *void_state) > assert(phi->dest.is_ssa); > foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { > nir_parallel_copy_instr *pcopy = > - block_get_parallel_copy_at_end(src->pred, state->dead_ctx); > + get_parallel_copy_at_end_of_block(src->pred); > + assert(pcopy); > > nir_parallel_copy_copy *copy = ralloc(state->dead_ctx, > nir_parallel_copy_copy); > @@ -420,29 +463,26 @@ agressive_coalesce_block(nir_block *block, void > *void_state) > { > struct from_ssa_state *state = void_state; > > + nir_parallel_copy_instr *start_pcopy = NULL; > nir_foreach_instr(block, instr) { > /* Phi nodes only ever come at the start of a block */ > if (instr->type != nir_instr_type_phi) { > if (instr->type != nir_instr_type_parallel_copy) > break; /* The parallel copy must be right after the phis */ > > - nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(instr); > + start_pcopy = nir_instr_as_parallel_copy(instr); > > - agressive_coalesce_parallel_copy(pcopy, state); > - > - if (pcopy->at_end) > - return true; > + agressive_coalesce_parallel_copy(start_pcopy, state); > > break; > } > } > > - nir_instr *last_instr = nir_block_last_instr(block); > - if (last_instr && last_instr->type == nir_instr_type_parallel_copy) { > - nir_parallel_copy_instr *pcopy = > nir_instr_as_parallel_copy(last_instr); > - if (pcopy->at_end) > - agressive_coalesce_parallel_copy(pcopy, state); > - } > + nir_parallel_copy_instr *end_pcopy = > + get_parallel_copy_at_end_of_block(block); > + > + if (end_pcopy && end_pcopy != start_pcopy) > + agressive_coalesce_parallel_copy(end_pcopy, state); > > return true; > } > @@ -642,7 +682,6 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, > if (!copy->src.is_ssa && copy->src.reg.reg == copy->dest.reg.reg) > continue; > > - /* Set both indices equal to UINT_MAX to mark them as not indexed yet. > */ > num_copies++; > } > > @@ -806,21 +845,15 @@ resolve_parallel_copies_block(nir_block *block, void > *void_state) > resolve_parallel_copy(pcopy, state); > } > > - nir_instr *last_instr = nir_block_last_instr(block); > - if (last_instr == NULL) > - return true; /* Now empty, nothing to do. */ > - > - /* If the last instruction is a jump, the parallel copy will be before > - * the jump. > + /* It's possible that the above code already cleaned up the end parallel > + * copy. However, doing so removed it form the instructions list so we > + * won't find it here. Therefore, it's safe to go ahead and just look > + * for one and clean it up if it exists. > */ > - if (last_instr->type == nir_instr_type_jump) > - last_instr = nir_instr_prev(last_instr); > - > - if (last_instr && last_instr->type == nir_instr_type_parallel_copy) { > - nir_parallel_copy_instr *pcopy = > nir_instr_as_parallel_copy(last_instr); > - if (pcopy->at_end) > - resolve_parallel_copy(pcopy, state); > - } > + nir_parallel_copy_instr *end_pcopy = > + get_parallel_copy_at_end_of_block(block); > + if (end_pcopy) > + resolve_parallel_copy(end_pcopy, state); > > return true; > } > @@ -836,6 +869,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl) > state.merge_node_table = _mesa_hash_table_create(NULL, > _mesa_key_pointer_equal); > > + nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state); > nir_foreach_block(impl, isolate_phi_nodes_block, &state); > > /* Mark metadata as dirty before we ask for liveness analysis */ > -- > 2.2.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev