from_ssa: Clean up parallel copy handling and document it better

Connor Abbott Mon, 05 Jan 2015 23:20:31 -0800

On Wed, Dec 17, 2014 at 8:04 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> Previously, we were doing a lazy creation of the parallel copy
> instructions.  This is confusing, hard to get right, and involves some
> extra state tracking of the copies.  This commit adds an extra walk over
> the basic blocks to add the block-end parallel copies up front.  This
> should be much less confusing and, consequently, easier to get right.  This
> commit also adds more comments about parallel copies to help explain what
> all is going on.
>
> As a consequence of these changes, we can now remove the at_end parameter
> from nir_parallel_copy_instr.
> ---
>  src/glsl/nir/nir.c          |   1 -
>  src/glsl/nir/nir.h          |   7 ---
>  src/glsl/nir/nir_from_ssa.c | 150 
> +++++++++++++++++++++++++++-----------------
>  3 files changed, 92 insertions(+), 66 deletions(-)
>
> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
> index d0cab5a..17c1efe 100644
> --- a/src/glsl/nir/nir.c
> +++ b/src/glsl/nir/nir.c
> @@ -471,7 +471,6 @@ nir_parallel_copy_instr_create(void *mem_ctx)
>     nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
>     instr_init(&instr->instr, nir_instr_type_parallel_copy);
>
> -   instr->at_end = false;
>     exec_list_make_empty(&instr->copies);
>
>     return instr;
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index dbfa461..a23bc5f 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -967,13 +967,6 @@ typedef struct {
>
>  typedef struct {
>     nir_instr instr;
> -
> -   /* Indicates that this is the parallel copy at the end of the block.
> -    * When isolating phi nodes, we create 2 parallel copies in most blocks;
> -    * this flag helps tell them apart.
> -    */
> -   bool at_end;
> -
>     struct exec_list copies;
>  } nir_parallel_copy_instr;
>
> diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
> index 1d17ac4..29fcc64 100644
> --- a/src/glsl/nir/nir_from_ssa.c
> +++ b/src/glsl/nir/nir_from_ssa.c
> @@ -229,48 +229,90 @@ merge_sets_interfere(merge_set *a, merge_set *b)
>     return false;
>  }
>
> -static nir_parallel_copy_instr *
> -block_get_parallel_copy_at_end(nir_block *block, void *mem_ctx)
> +static bool
> +add_parallel_copy_to_end_of_block(nir_block *block, void *void_state)
>  {
> -   nir_instr *last_instr = nir_block_last_instr(block);
> +   struct from_ssa_state *state = void_state;
>
> -   /* First we try and find a parallel copy if it already exists.  If the
> -    * last instruction is a jump, it will be right before the jump;
> -    * otherwise, it will be the last instruction.
> -    */
> -   nir_instr *pcopy_instr;
> -   if (last_instr != NULL && last_instr->type == nir_instr_type_jump)
> -      pcopy_instr = nir_instr_prev(last_instr);
> -   else
> -      pcopy_instr = last_instr;
> +   bool need_end_copy = false;
> +   if (block->successors[0]) {
> +      nir_instr *instr = nir_block_first_instr(block->successors[0]);
> +      if (instr && instr->type == nir_instr_type_phi)
> +         need_end_copy = true;
> +   }
>
> -   if (pcopy_instr != NULL &&
> -       pcopy_instr->type == nir_instr_type_parallel_copy) {
> -      /* A parallel copy already exists. */
> -      nir_parallel_copy_instr *pcopy = 
> nir_instr_as_parallel_copy(pcopy_instr);
> +   if (block->successors[1]) {
> +      nir_instr *instr = nir_block_first_instr(block->successors[1]);
> +      if (instr && instr->type == nir_instr_type_phi)
> +         need_end_copy = true;
> +   }
>
> -      /* This parallel copy may be the copy for the beginning of some
> -       * block, so we need to check for that before we return it.
> +   if (need_end_copy) {
> +      /* If one of our successors has at least one phi node, we need to
> +       * create a parallel copy at the end of the block but before the jump
> +       * (if there is one).
>         */
> -      if (pcopy->at_end)
> -         return pcopy;
> +      nir_parallel_copy_instr *pcopy =
> +         nir_parallel_copy_instr_create(state->dead_ctx);
> +
> +      nir_instr *last_instr = nir_block_last_instr(block);
> +      if (last_instr && last_instr->type == nir_instr_type_jump) {
> +         nir_instr_insert_before(last_instr, &pcopy->instr);
> +      } else {
> +         nir_instr_insert_after_block(block, &pcopy->instr);
> +      }
>     }
>
> -   /* At this point, we haven't found a suitable parallel copy, so we
> -    * have to create one.
> -    */
> -   nir_parallel_copy_instr *pcopy = nir_parallel_copy_instr_create(mem_ctx);
> -   pcopy->at_end = true;
> +   return true;
> +}
>
> -   if (last_instr && last_instr->type == nir_instr_type_jump) {
> -      nir_instr_insert_before(last_instr, &pcopy->instr);
> -   } else {
> -      nir_instr_insert_after_block(block, &pcopy->instr);
> -   }
> +static nir_parallel_copy_instr *
> +get_parallel_copy_at_end_of_block(nir_block *block)
> +{
> +   nir_instr *last_instr = nir_block_last_instr(block);
> +   if (last_instr == NULL)
> +      return NULL;
>
> -   return pcopy;
> +   /* The last instruction may be a jump in which case the parallel copy is
> +    * right before it.
> +    */
> +   if (last_instr->type == nir_instr_type_jump)
> +      last_instr = nir_instr_prev(last_instr);
> +
> +   if (last_instr->type == nir_instr_type_parallel_copy)
> +      return nir_instr_as_parallel_copy(last_instr);
> +   else
> +      return NULL;
>  }
>
> +/** Isolate phi nodes with parallel copies
> + *
> + * In order to solve the dependency problems with the sources and
> + * destinations of phi nodes, we first isolate them by adding parallel
> + * copies to the beginnings and ends of basic blocks.  For every block with
> + * phi nodes, we add a parallel copy immediately following the last phi
> + * node that copies the destinations of all of the phi nodes to new SSA
> + * values.  We also add a parallel copy to the end of every block that has
> + * a successor with phi nodes that, for each phi node in each successor,
> + * copies the corresponding sorce of the phi node and adjust the phi to
> + * used the destination of the parallel copy.
> + *
> + * In SSA form, each value has exactly one definition.  What this does is
> + * ensure that each value used in a phi also has exactly one use.  The
> + * destinations of phis are only used by the parallel copy immediately
> + * following the phi nodes and.  Thanks to the parallel copy at the end of
> + * the predecessor block, the sources of phi nodes are are the only use of
> + * that value.  This allows us to immediately assign all the sources and
> + * destinations of any given phi node to the same register without worrying
> + * about interference at all.  We do coalescing to get rid of the parallel
> + * copies where possible.
> + *
> + * Before this pass can be run, we have to iterate over the blocks with
> + * add_parallel_cop_to_end_of_block to ensure that the parallel copies at


add_parallel_copy_to_end_of_block

> + * the ends of blocks exist.  We can create the ones at the beginnings as
> + * we go, but the ones at the ends of blocks need to be created ahead of
> + * time because of potential back-edges in the CFG.
> + */
>  static bool
>  isolate_phi_nodes_block(nir_block *block, void *void_state)
>  {
> @@ -305,7 +347,8 @@ isolate_phi_nodes_block(nir_block *block, void 
> *void_state)
>        assert(phi->dest.is_ssa);
>        foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
>           nir_parallel_copy_instr *pcopy =
> -            block_get_parallel_copy_at_end(src->pred, state->dead_ctx);
> +            get_parallel_copy_at_end_of_block(src->pred);
> +         assert(pcopy);
>
>           nir_parallel_copy_copy *copy = ralloc(state->dead_ctx,
>                                                 nir_parallel_copy_copy);
> @@ -420,29 +463,26 @@ agressive_coalesce_block(nir_block *block, void 
> *void_state)
>  {
>     struct from_ssa_state *state = void_state;
>
> +   nir_parallel_copy_instr *start_pcopy = NULL;
>     nir_foreach_instr(block, instr) {
>        /* Phi nodes only ever come at the start of a block */
>        if (instr->type != nir_instr_type_phi) {
>           if (instr->type != nir_instr_type_parallel_copy)
>              break; /* The parallel copy must be right after the phis */
>
> -         nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(instr);
> +         start_pcopy = nir_instr_as_parallel_copy(instr);
>
> -         agressive_coalesce_parallel_copy(pcopy, state);
> -
> -         if (pcopy->at_end)
> -            return true;
> +         agressive_coalesce_parallel_copy(start_pcopy, state);
>
>           break;
>        }
>     }
>
> -   nir_instr *last_instr = nir_block_last_instr(block);
> -   if (last_instr && last_instr->type == nir_instr_type_parallel_copy) {
> -      nir_parallel_copy_instr *pcopy = 
> nir_instr_as_parallel_copy(last_instr);
> -      if (pcopy->at_end)
> -         agressive_coalesce_parallel_copy(pcopy, state);
> -   }
> +   nir_parallel_copy_instr *end_pcopy =
> +      get_parallel_copy_at_end_of_block(block);
> +
> +   if (end_pcopy && end_pcopy != start_pcopy)
> +      agressive_coalesce_parallel_copy(end_pcopy, state);
>
>     return true;
>  }
> @@ -642,7 +682,6 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
>        if (!copy->src.is_ssa && copy->src.reg.reg == copy->dest.reg.reg)
>           continue;
>
> -      /* Set both indices equal to UINT_MAX to mark them as not indexed yet. 
> */
>        num_copies++;
>     }
>
> @@ -806,21 +845,15 @@ resolve_parallel_copies_block(nir_block *block, void 
> *void_state)
>        resolve_parallel_copy(pcopy, state);
>     }
>
> -   nir_instr *last_instr = nir_block_last_instr(block);
> -   if (last_instr == NULL)
> -      return true; /* Now empty, nothing to do. */
> -
> -   /* If the last instruction is a jump, the parallel copy will be before
> -    * the jump.
> +   /* It's possible that the above code already cleaned up the end parallel
> +    * copy.  However, doing so removed it form the instructions list so we
> +    * won't find it here.  Therefore, it's safe to go ahead and just look
> +    * for one and clean it up if it exists.
>      */
> -   if (last_instr->type == nir_instr_type_jump)
> -      last_instr = nir_instr_prev(last_instr);
> -
> -   if (last_instr && last_instr->type == nir_instr_type_parallel_copy) {
> -      nir_parallel_copy_instr *pcopy = 
> nir_instr_as_parallel_copy(last_instr);
> -      if (pcopy->at_end)
> -         resolve_parallel_copy(pcopy, state);
> -   }
> +   nir_parallel_copy_instr *end_pcopy =
> +      get_parallel_copy_at_end_of_block(block);
> +   if (end_pcopy)
> +      resolve_parallel_copy(end_pcopy, state);
>
>     return true;
>  }
> @@ -836,6 +869,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl)
>     state.merge_node_table = _mesa_hash_table_create(NULL,
>                                                      _mesa_key_pointer_equal);
>
> +   nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state);
>     nir_foreach_block(impl, isolate_phi_nodes_block, &state);
>
>     /* Mark metadata as dirty before we ask for liveness analysis */
> --
> 2.2.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 139/133] nir/from_ssa: Clean up parallel copy handling and document it better

Reply via email to