On Wed, 2016-10-05 at 09:59 -0700, Jason Ekstrand wrote: > On Tue, Oct 4, 2016 at 6:46 PM, Timothy Arceri <timothy.arceri@collab > ora.com> wrote: > > On Tue, 2016-10-04 at 16:47 -0700, Jason Ekstrand wrote: > > > On Fri, Sep 16, 2016 at 6:24 AM, Timothy Arceri <timothy.arceri@c > > olla > > > bora.com> wrote: > > > > From: Thomas Helland <thomashellan...@gmail.com> > > > > > > > > V2: Do a "depth first search" to convert to LCSSA > > > > > > > > V3: Small comment fixup > > > > > > > > V4: Rebase, adapt to removal of function overloads > > > > > > > > V5: Rebase, adapt to relocation of nir to compiler/nir > > > > Still need to adapt to potential if-uses > > > > Work around nir_validate issue > > > > > > > > V6 (Timothy): > > > > - tidy lcssa and stop leaking memory > > > > - dont rewrite the src for the lcssa phi node > > > > - validate lcssa phi srcs to avoid postvalidate assert > > > > - don't add new phi if one already exists > > > > - more lcssa phi validation fixes > > > > - Rather than marking ssa defs inside a loop just mark blocks > > > > inside > > > > a loop. This is simpler and fixes lcssa for intrinsics which > > do > > > > not have a destination. > > > > - don't create LCSSA phis for loops we won't unroll > > > > - require loop metadata for lcssa pass > > > > - handle case were the ssa defs use outside the loop is > > already a > > > > phi > > > > > > > > V7: (Timothy) > > > > - pass indirect mask to metadata call > > > > --- > > > > src/compiler/Makefile.sources | 1 + > > > > src/compiler/nir/nir.h | 6 ++ > > > > src/compiler/nir/nir_to_lcssa.c | 227 > > > > ++++++++++++++++++++++++++++++++++++++++ > > > > src/compiler/nir/nir_validate.c | 11 +- > > > > 4 files changed, 242 insertions(+), 3 deletions(-) > > > > create mode 100644 src/compiler/nir/nir_to_lcssa.c > > > > > > > > diff --git a/src/compiler/Makefile.sources > > > > b/src/compiler/Makefile.sources > > > > index 7ed26a9..8ef6080 100644 > > > > --- a/src/compiler/Makefile.sources > > > > +++ b/src/compiler/Makefile.sources > > > > @@ -247,6 +247,7 @@ NIR_FILES = \ > > > > nir/nir_search_helpers.h \ > > > > nir/nir_split_var_copies.c \ > > > > nir/nir_sweep.c \ > > > > + nir/nir_to_lcssa.c \ > > > > nir/nir_to_ssa.c \ > > > > nir/nir_validate.c \ > > > > nir/nir_vla.h \ > > > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > > > > index cc8f4b6..29a6f45 100644 > > > > --- a/src/compiler/nir/nir.h > > > > +++ b/src/compiler/nir/nir.h > > > > @@ -1387,6 +1387,8 @@ typedef struct { > > > > struct exec_list srcs; /** < list of nir_phi_src */ > > > > > > > > nir_dest dest; > > > > + > > > > + bool is_lcssa_phi; > > > > } nir_phi_instr; > > > > > > > > typedef struct { > > > > @@ -2643,6 +2645,10 @@ void nir_convert_to_ssa(nir_shader > > *shader); > > > > bool nir_repair_ssa_impl(nir_function_impl *impl); > > > > bool nir_repair_ssa(nir_shader *shader); > > > > > > > > +void nir_to_lcssa_impl(nir_function_impl *impl, > > > > + nir_variable_mode indirect_mask); > > > > +void nir_to_lcssa(nir_shader *shader, nir_variable_mode > > > > indirect_mask); > > > > + > > > > /* If phi_webs_only is true, only convert SSA values involved > > in > > > > phi nodes to > > > > * registers. If false, convert all values (even those not > > > > involved in a phi > > > > * node) to registers. > > > > diff --git a/src/compiler/nir/nir_to_lcssa.c > > > > b/src/compiler/nir/nir_to_lcssa.c > > > > new file mode 100644 > > > > index 0000000..25d0bdb > > > > --- /dev/null > > > > +++ b/src/compiler/nir/nir_to_lcssa.c > > > > @@ -0,0 +1,227 @@ > > > > +/* > > > > + * Copyright © 2015 Thomas Helland > > > > + * > > > > + * Permission is hereby granted, free of charge, to any person > > > > obtaining a > > > > + * copy of this software and associated documentation files > > (the > > > > "Software"), > > > > + * to deal in the Software without restriction, including > > without > > > > limitation > > > > + * the rights to use, copy, modify, merge, publish, > > distribute, > > > > sublicense, > > > > + * and/or sell copies of the Software, and to permit persons > > to > > > > whom the > > > > + * Software is furnished to do so, subject to the following > > > > conditions: > > > > + * > > > > + * The above copyright notice and this permission notice > > > > (including the next > > > > + * paragraph) shall be included in all copies or substantial > > > > portions of the > > > > + * Software. > > > > + * > > > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, > > > > EXPRESS OR > > > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > > > MERCHANTABILITY, > > > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN > > NO > > > > EVENT SHALL > > > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > > > DAMAGES OR OTHER > > > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > > OTHERWISE, > > > > ARISING > > > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR > > > > OTHER DEALINGS > > > > + * IN THE SOFTWARE. > > > > + */ > > > > + > > > > +/* > > > > + * This pass converts the ssa-graph into "Loop Closed SSA > > form". > > > > This is > > > > + * done by placing phi nodes at the exits of the loop for all > > > > values > > > > + * that are used outside the loop. The result is it > > transforms: > > > > + * > > > > + * loop { -> loop { > > > > + * ssa2 = .... -> ssa2 = ... > > > > + * if (cond) -> if (cond) { > > > > + * break; -> break; > > > > + * ssa3 = ssa2 * ssa4 -> } > > > > + * } -> ssa3 = ssa2 * ssa4 > > > > + * ssa6 = ssa2 + 4 -> } > > > > + * ssa5 = lcssa_phi(ssa2) > > > > + * ssa6 = ssa5 + 4 > > > > + */ > > > > > > Let me make sure I understand this correctly. The point here > > seems > > > to be to ensure that SSA defs can never escape a loop without > > going > > > through a phi. Correct? This can happen if the ssa def, for > > > whatever reason, dominates all of the breaks in the loop. In > > that > > > case, it will dominate the block after the loop and can go > > through > > > without a phi. This raises a question: Is there any real > > difference > > > between an LCSSA phi and a regular phi? > > > > There shouldn't be any real difference besides only having a single > > src > > which validation doesn't like. I guess I'll try the suggestion you > > make > > below, but I'm concerned I'll hit more validation problems. > > > > Honestly, I'm surprised what you have now passes validation. :/ We > really should be validating that a phi has a set of predecessors > exactly equal to the predecessors of the block in which it lives. > > > > > > > > + > > > > +#include "nir.h" > > > > + > > > > +typedef struct { > > > > + /* The nir_shader we are transforming */ > > > > + nir_shader *shader; > > > > + > > > > + /* The loop we store information for */ > > > > + nir_loop *loop; > > > > + > > > > + /* Keep track of which defs are in the loop */ > > > > + BITSET_WORD *is_in_loop; > > > > + > > > > + /* General purpose bool */ > > > > + bool flag; > > > > +} lcssa_state; > > > > + > > > > +static void > > > > +mark_block_as_in_loop(nir_block *blk, void *state) > > > > +{ > > > > + lcssa_state *state_cast = (lcssa_state *) state; > > > > + BITSET_SET(state_cast->is_in_loop, blk->index); > > > > +} > > > > + > > > > +static void > > > > +is_block_outside_loop(nir_block *blk, void *void_state) > > > > +{ > > > > + lcssa_state *state = void_state; > > > > + if (!BITSET_TEST(state->is_in_loop, blk->index)) { > > > > + state->flag = true; > > > > + } > > > > +} > > > > > > These helpers aren't really needed anymore (they probably were > > prior > > > to better block iteration) and I think they're just confusing. > > > > Hmm. Your probably right. Thomas was originally checking inividual > > definitions rather than just using the blocks. > > What I meant is that the is_block_outside_loop helper made sense > because it could be passed to nir_foreach_block. Now that we have > better block iteration, the helpers that set a flag in a struct > passed in as a void * are just piles of wrapping that's just > confusing.
Oh ok, that makes sense I thought you were suggesting we didn't need the coed at all :P > > > Are you thinking > > something like: > > > > if (block_after_loop->index <= src->parent_instr->block->index) > > ... we are outside the loop .. > > > > Actually, that would probably work and would save us some iteration. > I hadn't thought of that. Thinking about it more to handle nested loops we might need to handle things before the loop also, but it still might work. if (block_after_loop->index <= src->parent_instr->block->index || first_block_in_loop->index > src->parent_instr->block->index) ... we are outside the loop .. > > > > > > > > + > > > > +static bool > > > > +convert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state) > > > > +{ > > > > + lcssa_state *state = void_state; > > > > + > > > > + state->flag = false; > > > > + > > > > + nir_block *block_after_loop = > > > > + nir_cf_node_as_block(nir_cf_node_next(&state->loop- > > > > >cf_node)); > > > > + > > > > + nir_foreach_use_safe(src, def) { > > > > + if (src->parent_instr->type == nir_instr_type_phi && > > > > + (block_after_loop->index == src->parent_instr- > > >block- > > > > >index || > > > > > > You should be able to just compare the block pointers. > > > > > > > + nir_instr_as_phi(src->parent_instr)->is_lcssa_phi)) > > > > > > I don't think you need this check.expand > > > > As I say below I don't recall how this could already be an > > lcssa_phi > > but I recall removing it once before and it caused issues. Maybe > > nested > > loops although we shouldn't be touching those. I need to run some > > tests > > (and add comments this time around). > > > > > > > > > + continue; > > > > + > > > > + is_block_outside_loop(src->parent_instr->block, > > void_state); > > > > + } > > > > + > > > > + /* There where no sources that had defs outside the loop */ > > > > + if (!state->flag) > > > > + return true; > > > > + > > > > + /* Initialize a phi-instruction */ > > > > + nir_phi_instr *phi = nir_phi_instr_create(state->shader); > > > > + phi->instr.block = block_after_loop; > > > > + nir_ssa_dest_init(&phi->instr, &phi->dest, > > > > + def->num_components, def->bit_size, > > "LCSSA- > > > > phi"); > > > > + phi->is_lcssa_phi = true; > > > > + > > > > + /* Connect the ssa-def and the phi instruction */ > > > > + nir_phi_src *phi_src = ralloc(phi, nir_phi_src); > > > > + phi_src->pred = def->parent_instr->block; > > > > > > Uh... I don't think this is what you want. I think you want to > > place > > > the phi in block_after_loop. > > > > ?? > > > > It is in the code below, this is setting the src pred to the block > > from > > the loop. > > > > nir_instr_insert_before_block(phi->instr.block, &phi->instr); > > > > Here phi->instr.block == block_after_loop. > > > > Drp... I can't read. Your code is fine. :-) > > > > > > > > + phi_src->src = nir_src_for_ssa(def); > > > > > > Is there any particular reason why we have this is_lcssa_phi > > flag? > > > Why don't we just create a regular phi node with as many sources > > as > > > the block has predecessors and make them all point to the same > > ssa > > > def? > > > > I guess that could work I'll give it a try. Although will that not > > cause some kind of validation problems? For example the ssa_def > > belongs > > to only one of the predecessors. > > > > The only time you can get into a case where you're adding this phi is > if you have the same ssa_def for all predecessors. If that's not the > case, then the ssa_def does not dominate the use and we'll have a phi > in between anyway. I don't think we have an issue there. > > > > > > > > + > > > > + exec_list_push_tail(&phi->srcs, &phi_src->node); > > > > + > > > > + nir_instr_insert_before_block(phi->instr.block, &phi- > > >instr); > > > > + > > > > + /* Run through all uses and rewrite those outside the loop > > to > > > > point to > > > > + * the phi instead of pointing to the ssa-def. > > > > + */ > > > > + nir_foreach_use_safe(src, def) { > > > > + state->flag = false; > > > > + > > > > + if (src->parent_instr->type == nir_instr_type_phi && > > > > + (block_after_loop->index == src->parent_instr- > > >block- > > > > >index || > > > > + nir_instr_as_phi(src->parent_instr)->is_lcssa_phi)) > > > > + continue; > > > > > > How is this different from "src->parent_instr == &phi->instr"? > > > > That would check that the we are not trying to rewrite the use of > > the > > phi we just added as opposed to avoiding rewriting a use if it is > > an > > existing phi. > > Right. However, I don't think you want the is_lcssa_phi. You may > have some other phi in block_after_loop that uses the ssa_def and you > don't want to overwrite that. > > > Although I'm starting to wonder how this could already be > > an lcssa_phi > > at this point. I'll need double check a few things, my mind > > has purged > > this from memory. > > I'm not sure if it could either... > > > > > > > > + > > > > + is_block_outside_loop(src->parent_instr->block, state); > > > > + > > > > + if (!state->flag) > > > > + continue; > > > > > > Yeah, those helpers need to go. > > > > > > > + > > > > + nir_instr_rewrite_src(src->parent_instr, src, > > > > + nir_src_for_ssa(&phi->dest.ssa)); > > > > + } > > > > + > > > > + return true; > > > > +} > > > > + > > > > +static void > > > > +convert_to_lcssa(nir_cf_node *cf_node, lcssa_state *state) > > > > +{ > > > > + switch (cf_node->type) { > > > > + case nir_cf_node_block: > > > > + nir_foreach_instr(instr, nir_cf_node_as_block(cf_node)) > > > > + nir_foreach_ssa_def(instr, convert_loop_exit_for_ssa, > > > > state); > > > > + return; > > > > + case nir_cf_node_if: { > > > > + nir_if *if_stmt = nir_cf_node_as_if(cf_node); > > > > + foreach_list_typed(nir_cf_node, nested_node, node, > > &if_stmt- > > > > >then_list) > > > > + convert_to_lcssa(nested_node, state); > > > > + foreach_list_typed(nir_cf_node, nested_node, node, > > &if_stmt- > > > > >else_list) > > > > + convert_to_lcssa(nested_node, state); > > > > + return; > > > > + } > > > > + case nir_cf_node_loop: > > > > + /* Since we are going depth first the innermost loop > > will > > > > already have > > > > + * been rewritten, and so there should be no defs inside > > the > > > > inner loop > > > > + * that are not already rewritten with LCSSA-phis in our > > > > current loop. > > > > + */ > > > > > > I have a feeling your recursion here could be a bit simpler. I'm > > not > > > 100% sure how off-hand. I'll think about it a bit. > > > > Yeah I've had the same thoughts but this is what the original code > > did > > and I couldn't think of anything obvious so I just left it. > > > > > > > > > + return; > > > > + default: > > > > + unreachable("unknown cf node type"); > > > > + } > > > > +} > > > > + > > > > +static void > > > > +compute_lcssa(nir_cf_node *cf_node, nir_function_impl *impl) > > > > +{ > > > > + nir_loop *loop; > > > > + > > > > + switch (cf_node->type) { > > > > + case nir_cf_node_block: > > > > + return; > > > > + case nir_cf_node_if: { > > > > + nir_if *if_stmt = nir_cf_node_as_if(cf_node); > > > > + foreach_list_typed(nir_cf_node, nested_node, node, > > &if_stmt- > > > > >then_list) > > > > + compute_lcssa(nested_node, impl); > > > > + foreach_list_typed(nir_cf_node, nested_node, node, > > &if_stmt- > > > > >else_list) > > > > + compute_lcssa(nested_node, impl); > > > > + return; > > > > + } > > > > + case nir_cf_node_loop: > > > > + loop = nir_cf_node_as_loop(cf_node); > > > > + foreach_list_typed(nir_cf_node, nested_node, node, > > &loop- > > > > >body) > > > > + compute_lcssa(nested_node, impl); > > > > + break; > > > > + default: > > > > + unreachable("unknown cf node type"); > > > > + } > > > > + > > > > + if (loop->info->limiting_terminator == NULL) > > > > + return; > > > > + > > > > + nir_function *fn = nir_cf_node_get_function(&loop- > > >cf_node)- > > > > >function; > > > > + if (!is_simple_loop(fn->shader, loop->info) && > > > > + !is_complex_loop(fn->shader, loop->info)) { > > > > + return; > > > > + } > > > > > > Why are the above two checks needed? I don't really see why we > > need > > > loop analysis for lcssa. > > > > Well we need lcssa and loop analysis to do loop unrolling so why > > not > > skip lcssa is we can't unroll the loop? > > As I've been reviewing your series, I've been thinking about how we > can decouple things. Ideally, I'd like to have: > > 1) A loop analysis pass that's pure analysis and doesn't make any > real decisions > 2) A dumb LCSSA pass that just converts to LCSSA > 3) A "smart" loop unrolling pass that makes all of the decisions > about what gets unrolled and does the unrolling Makes sense. > > I'm not really happy with having all three be as tightly coupled as > they are. Maybe they need to be? But I don't think so. It *should* be possible, at the very lease we should be decouple it more than it currently is. > > Another thought I had this morning was that maybe we would just want > to convert to LCSSA on-demand one loop at a time as we unroll. > Looking at the algorithm, I don't think that would be terribly > difficult and it would certainly solve both the progress problem and > the "don't do unnecessary work" problem. I'm not convinced it's a > good idea, but it's a thought. Yeah I had thought about that, but lcssa could be useful for other loop optimisation passes we might want to have a go at for remaining loops that are too lange to unroll. > > > Also if we remove the is_lcssa phi flag we would end up in > > an infinite > > loop of adding lcssa phis and removing them (see patch 6 which come > > to > > think of it might not be needed any longer since we already do > > this). > > > > Right. I'm ok with keeping the is_lcssa_phi flag for the sole > purpose of proper progress reporting. However, I'd like us to avoid > making "special" phi nodes with different rules. Sure. > > > > > > > > + > > > > + lcssa_state *state = rzalloc(NULL, lcssa_state); > > > > + state->is_in_loop = rzalloc_array(state, BITSET_WORD, > > > > + BITSET_WORDS(impl- > > > > >ssa_alloc)); > > > > + state->loop = loop; > > > > + > > > > + nir_foreach_block_in_cf_node(block, cf_node) { > > > > + mark_block_as_in_loop(block, state); > > > > + } > > > > + > > > > + foreach_list_typed(nir_cf_node, node, node, &loop->body) > > > > + convert_to_lcssa(node, state); > > > > + > > > > + ralloc_free(state); > > > > +} > > > > + > > > > +void > > > > +nir_to_lcssa_impl(nir_function_impl *impl, nir_variable_mode > > > > indirect_mask) > > > > +{ > > > > + nir_metadata_require(impl, nir_metadata_loop_analysis, > > > > indirect_mask); > > > > > > You need to also require block indices. > > > > Will add. > > > > > > > > > + > > > > + foreach_list_typed(nir_cf_node, node, node, &impl->body) > > > > + compute_lcssa(node, impl); > > > > +} > > > > + > > > > +void > > > > +nir_to_lcssa(nir_shader *shader, nir_variable_mode > > indirect_mask) > > > > +{ > > > > + nir_foreach_function(func, shader) { > > > > + if (func->impl) { > > > > + nir_to_lcssa_impl(func->impl, indirect_mask); > > > > + } > > > > + } > > > > +} > > > > diff --git a/src/compiler/nir/nir_validate.c > > > > b/src/compiler/nir/nir_validate.c > > > > index 60af715..9d1566c 100644 > > > > --- a/src/compiler/nir/nir_validate.c > > > > +++ b/src/compiler/nir/nir_validate.c > > > > @@ -564,8 +564,13 @@ validate_phi_instr(nir_phi_instr *instr, > > > > validate_state *state) > > > > validate_dest(&instr->dest, state); > > > > > > > > exec_list_validate(&instr->srcs); > > > > - validate_assert(state, exec_list_length(&instr->srcs) == > > > > - state->block->predecessors->entries); > > > > + > > > > + if (!instr->is_lcssa_phi) { > > > > + validate_assert(state, exec_list_length(&instr->srcs) == > > > > + state->block->predecessors->entries); > > > > + } else { > > > > + validate_assert(state, exec_list_length(&instr->srcs) == > > 1); > > > > + } > > > > } > > > > > > > > static void > > > > @@ -624,7 +629,7 @@ validate_phi_src(nir_phi_instr *instr, > > > > nir_block *pred, validate_state *state) > > > > > > > > exec_list_validate(&instr->srcs); > > > > nir_foreach_phi_src(src, instr) { > > > > - if (src->pred == pred) { > > > > + if (src->pred == pred || instr->is_lcssa_phi) { > > > > validate_assert(state, src->src.is_ssa); > > > > validate_assert(state, src->src.ssa->num_components > > == > > > > instr->dest.ssa.num_components); > > > > -- > > > > 2.7.4 > > > > > > > > _______________________________________________ > > > > mesa-dev mailing list > > > > mesa-dev@lists.freedesktop.org > > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > > > > > > > > _______________________________________________ > > > mesa-dev mailing list > > > mesa-dev@lists.freedesktop.org > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev