On Thu, Aug 25, 2016 at 12:04 AM, Matt Turner <matts...@gmail.com> wrote: > Prior to this commit rename_variables_block() is recursively called, > performing a depth-first traversal of the control flow graph. The > function uses a non-trivial amount of stack space for local variables, > which puts us in danger of smashing the stack, given a sufficiently deep > dominance tree. > > XCOM: Enemy Within contains a shader with such a dominance tree (1574 > nir_blocks in total, depth of at least 143). > > Jason tells me that he believes that any walk over the nir_blocks that > respects dominance is sufficient (a DFS might have been necessary prior > to the introduction of nir_phi_builder).
nir_phi_builder.h has some example pseudocode, and there's a comment before the part which corresponds to rename_variables() that says: // Visit each block. This needs to visit dominators first; // nir_for_each_block() will be ok. and you're right that the DFS was necessary before the switch to nir_phi_builder, since we used a different algorithm which did stuff both before and after recursing into the children of the block. So if you want, you can make this paragraph sound a little more confident. > > In fact, the introduction of nir_phi_builder made the problem worse: > rename_variables_block(), walks to the bottom of the dominance tree > before calling nir_phi_builder_value_get_block_def() which walks back to > the top of the dominance tree... > > In any case, this patch ensures we avoid that problem as well. > > Cc: mesa-sta...@lists.freedesktop.org > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97225 Reviewed-by: Connor Abbott <cwabbo...@gmail.com> > --- > Most easily reviewed with git show -w > > src/compiler/nir/nir_lower_vars_to_ssa.c | 207 > +++++++++++++++---------------- > 1 file changed, 103 insertions(+), 104 deletions(-) > > diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c > b/src/compiler/nir/nir_lower_vars_to_ssa.c > index 317647b..e2e13e2 100644 > --- a/src/compiler/nir/nir_lower_vars_to_ssa.c > +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c > @@ -479,133 +479,132 @@ lower_copies_to_load_store(struct deref_node *node, > * SSA def on the stack per block. > */ > static bool > -rename_variables_block(nir_block *block, struct lower_variables_state *state) > +rename_variables(struct lower_variables_state *state) > { > nir_builder b; > nir_builder_init(&b, state->impl); > > - nir_foreach_instr_safe(instr, block) { > - if (instr->type != nir_instr_type_intrinsic) > - continue; > - > - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); > - > - switch (intrin->intrinsic) { > - case nir_intrinsic_load_var: { > - struct deref_node *node = > - get_deref_node(intrin->variables[0], state); > - > - if (node == NULL) { > - /* If we hit this path then we are referencing an invalid > - * value. Most likely, we unrolled something and are > - * reading past the end of some array. In any case, this > - * should result in an undefined value. > - */ > - nir_ssa_undef_instr *undef = > - nir_ssa_undef_instr_create(state->shader, > - intrin->num_components, > - intrin->dest.ssa.bit_size); > - > - nir_instr_insert_before(&intrin->instr, &undef->instr); > - nir_instr_remove(&intrin->instr); > - > - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, > - nir_src_for_ssa(&undef->def)); > + nir_foreach_block(block, state->impl) { > + nir_foreach_instr_safe(instr, block) { > + if (instr->type != nir_instr_type_intrinsic) > continue; > - } > > - if (!node->lower_to_ssa) > - continue; > + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); > + > + switch (intrin->intrinsic) { > + case nir_intrinsic_load_var: { > + struct deref_node *node = > + get_deref_node(intrin->variables[0], state); > + > + if (node == NULL) { > + /* If we hit this path then we are referencing an invalid > + * value. Most likely, we unrolled something and are > + * reading past the end of some array. In any case, this > + * should result in an undefined value. > + */ > + nir_ssa_undef_instr *undef = > + nir_ssa_undef_instr_create(state->shader, > + intrin->num_components, > + intrin->dest.ssa.bit_size); > + > + nir_instr_insert_before(&intrin->instr, &undef->instr); > + nir_instr_remove(&intrin->instr); > + > + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, > + nir_src_for_ssa(&undef->def)); > + continue; > + } > > - nir_alu_instr *mov = nir_alu_instr_create(state->shader, > - nir_op_imov); > - mov->src[0].src = nir_src_for_ssa( > - nir_phi_builder_value_get_block_def(node->pb_value, block)); > - for (unsigned i = intrin->num_components; i < 4; i++) > - mov->src[0].swizzle[i] = 0; > + if (!node->lower_to_ssa) > + continue; > > - assert(intrin->dest.is_ssa); > + nir_alu_instr *mov = nir_alu_instr_create(state->shader, > + nir_op_imov); > + mov->src[0].src = nir_src_for_ssa( > + nir_phi_builder_value_get_block_def(node->pb_value, block)); > + for (unsigned i = intrin->num_components; i < 4; i++) > + mov->src[0].swizzle[i] = 0; > > - mov->dest.write_mask = (1 << intrin->num_components) - 1; > - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, > - intrin->num_components, > - intrin->dest.ssa.bit_size, NULL); > + assert(intrin->dest.is_ssa); > > - nir_instr_insert_before(&intrin->instr, &mov->instr); > - nir_instr_remove(&intrin->instr); > + mov->dest.write_mask = (1 << intrin->num_components) - 1; > + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, > + intrin->num_components, > + intrin->dest.ssa.bit_size, NULL); > > - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, > - nir_src_for_ssa(&mov->dest.dest.ssa)); > - break; > - } > - > - case nir_intrinsic_store_var: { > - struct deref_node *node = > - get_deref_node(intrin->variables[0], state); > - > - if (node == NULL) { > - /* Probably an out-of-bounds array store. That should be a > - * no-op. */ > + nir_instr_insert_before(&intrin->instr, &mov->instr); > nir_instr_remove(&intrin->instr); > - continue; > - } > > - if (!node->lower_to_ssa) > - continue; > - > - assert(intrin->num_components == > - glsl_get_vector_elements(node->type)); > - > - assert(intrin->src[0].is_ssa); > + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, > + nir_src_for_ssa(&mov->dest.dest.ssa)); > + break; > + } > > - nir_ssa_def *new_def; > - b.cursor = nir_before_instr(&intrin->instr); > + case nir_intrinsic_store_var: { > + struct deref_node *node = > + get_deref_node(intrin->variables[0], state); > > - unsigned wrmask = nir_intrinsic_write_mask(intrin); > - if (wrmask == (1 << intrin->num_components) - 1) { > - /* Whole variable store - just copy the source. Note that > - * intrin->num_components and intrin->src[0].ssa->num_components > - * may differ. > - */ > - unsigned swiz[4]; > - for (unsigned i = 0; i < 4; i++) > - swiz[i] = i < intrin->num_components ? i : 0; > + if (node == NULL) { > + /* Probably an out-of-bounds array store. That should be a > + * no-op. */ > + nir_instr_remove(&intrin->instr); > + continue; > + } > > - new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, > - intrin->num_components, false); > - } else { > - nir_ssa_def *old_def = > - nir_phi_builder_value_get_block_def(node->pb_value, block); > - /* For writemasked store_var intrinsics, we combine the newly > - * written values with the existing contents of unwritten > - * channels, creating a new SSA value for the whole vector. > - */ > - nir_ssa_def *srcs[4]; > - for (unsigned i = 0; i < intrin->num_components; i++) { > - if (wrmask & (1 << i)) { > - srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); > - } else { > - srcs[i] = nir_channel(&b, old_def, i); > + if (!node->lower_to_ssa) > + continue; > + > + assert(intrin->num_components == > + glsl_get_vector_elements(node->type)); > + > + assert(intrin->src[0].is_ssa); > + > + nir_ssa_def *new_def; > + b.cursor = nir_before_instr(&intrin->instr); > + > + unsigned wrmask = nir_intrinsic_write_mask(intrin); > + if (wrmask == (1 << intrin->num_components) - 1) { > + /* Whole variable store - just copy the source. Note that > + * intrin->num_components and > intrin->src[0].ssa->num_components > + * may differ. > + */ > + unsigned swiz[4]; > + for (unsigned i = 0; i < 4; i++) > + swiz[i] = i < intrin->num_components ? i : 0; > + > + new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, > + intrin->num_components, false); > + } else { > + nir_ssa_def *old_def = > + nir_phi_builder_value_get_block_def(node->pb_value, block); > + /* For writemasked store_var intrinsics, we combine the newly > + * written values with the existing contents of unwritten > + * channels, creating a new SSA value for the whole vector. > + */ > + nir_ssa_def *srcs[4]; > + for (unsigned i = 0; i < intrin->num_components; i++) { > + if (wrmask & (1 << i)) { > + srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); > + } else { > + srcs[i] = nir_channel(&b, old_def, i); > + } > } > + new_def = nir_vec(&b, srcs, intrin->num_components); > } > - new_def = nir_vec(&b, srcs, intrin->num_components); > - } > > - assert(new_def->num_components == intrin->num_components); > + assert(new_def->num_components == intrin->num_components); > > - nir_phi_builder_value_set_block_def(node->pb_value, block, new_def); > - nir_instr_remove(&intrin->instr); > - break; > - } > + nir_phi_builder_value_set_block_def(node->pb_value, block, > new_def); > + nir_instr_remove(&intrin->instr); > + break; > + } > > - default: > - break; > + default: > + break; > + } > } > } > > - for (unsigned i = 0; i < block->num_dom_children; ++i) > - rename_variables_block(block->dom_children[i], state); > - > return true; > } > > @@ -737,7 +736,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) > } > } > > - rename_variables_block(nir_start_block(impl), &state); > + rename_variables(&state); > > nir_phi_builder_finish(state.phi_builder); > > -- > 2.7.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev