On Sat, Mar 28, 2015 at 2:28 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > This pass performs a mark and sweep pass over a nir_shader's associated > memory - anything still connected to the program will be kept, and any > dead memory we dropped on the floor will be freed. > > The expectation is that this will be called when finished building and > optimizing the shader. However, it's also fine to call it earlier, and > many times, to free up memory earlier. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/glsl/Makefile.sources | 1 + > src/glsl/nir/nir.h | 2 + > src/glsl/nir/nir_sweep.c | 299 > ++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 302 insertions(+) > create mode 100644 src/glsl/nir/nir_sweep.c > > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources > index 8d29c55..7046407 100644 > --- a/src/glsl/Makefile.sources > +++ b/src/glsl/Makefile.sources > @@ -54,6 +54,7 @@ NIR_FILES = \ > nir/nir_search.c \ > nir/nir_search.h \ > nir/nir_split_var_copies.c \ > + nir/nir_sweep.c \ > nir/nir_to_ssa.c \ > nir/nir_types.h \ > nir/nir_validate.c \ > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index 7b886e3..946f895 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1632,6 +1632,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader); > > bool nir_opt_remove_phis(nir_shader *shader); > > +void nir_sweep(nir_shader *shader); > + > #ifdef __cplusplus > } /* extern "C" */ > #endif > diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c > new file mode 100644 > index 0000000..cba5be7 > --- /dev/null > +++ b/src/glsl/nir/nir_sweep.c > @@ -0,0 +1,299 @@ > +/* > + * Copyright © 2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "nir.h" > + > +/** > + * \file nir_sweep.c > + * > + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's > associated > + * memory - anything still connected to the program will be kept, and any > dead memory > + * we dropped on the floor will be freed. > + * > + * The expectation is that drivers should call this when finished compiling > the shader > + * (after any optimization, lowering, and so on). However, it's also fine > to call it > + * earlier, and even many times, trading CPU cycles for memory savings. > + */ > + > +#define steal_list(mem_ctx, type, list) \ > + foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); } > + > +static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node); > + > +static void > +sweep_ssa_def(nir_shader *nir, nir_ssa_def *ssa) > +{ > + ralloc_steal(nir, ssa->uses); > + ralloc_steal(nir, ssa->if_uses); > +} > + > +static void > +sweep_src(nir_shader *nir, nir_src *src) > +{ > + if (!src) > + return; > + > + if (src->is_ssa) { > + sweep_ssa_def(nir, src->ssa); > + } else { > + sweep_src(nir, src->reg.indirect); > + } > +} > + > +static void > +sweep_dest(nir_shader *nir, nir_dest *dest) > +{ > + if (dest->is_ssa) { > + sweep_ssa_def(nir, &dest->ssa); > + } else { > + sweep_src(nir, dest->reg.indirect); > + } > +} > + > +static void > +sweep_deref_chain(nir_shader *nir, nir_deref *deref) > +{ > + for (; deref; deref = deref->child) { > + ralloc_steal(nir, deref); > + } > +} > + > +static void > +sweep_alu_instr(nir_shader *nir, nir_alu_instr *alu) > +{ > + for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { > + sweep_src(nir, &alu->src[i].src); > + } > + > + sweep_dest(nir, &alu->dest.dest); > +} > + > +static void > +sweep_call_instr(nir_shader *nir, nir_call_instr *call) > +{ > + ralloc_steal(nir, call->params); > + for (int i = 0; i < call->num_params; i++) { > + sweep_deref_chain(nir, &call->params[i]->deref); > + } > + if (call->return_deref) > + sweep_deref_chain(nir, &call->return_deref->deref); > +} > + > +static void > +sweep_tex_instr(nir_shader *nir, nir_tex_instr *tex) > +{ > + if (tex->sampler) > + sweep_deref_chain(nir, &tex->sampler->deref); > + > + ralloc_steal(nir, tex->src); > + for (int i = 0; i < tex->num_srcs; i++) { > + sweep_src(nir, &tex->src[i].src); > + } > +} > + > +static void > +sweep_intrinsic_instr(nir_shader *nir, nir_intrinsic_instr *intrin) > +{ > + for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) > { > + sweep_src(nir, &intrin->src[i]); > + } > + > + for (int i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_variables; > i++) { > + sweep_deref_chain(nir, &intrin->variables[i]->deref); > + } > + > + sweep_dest(nir, &intrin->dest); > +} > + > +static void > +sweep_load_const_instr(nir_shader *nir, nir_load_const_instr *load_const) > +{ > + sweep_ssa_def(nir, &load_const->def); > +} > + > +static void > +sweep_ssa_undef_instr(nir_shader *nir, nir_ssa_undef_instr *ssa_undef) > +{ > + sweep_ssa_def(nir, &ssa_undef->def); > +} > + > +static void > +sweep_phi_instr(nir_shader *nir, nir_phi_instr *phi) > +{ > + foreach_list_typed(nir_phi_src, phi_src, node, &phi->srcs) { > + ralloc_steal(nir, phi_src); > + /* skip pred */ > + sweep_src(nir, &phi_src->src); > + } > + > + sweep_dest(nir, &phi->dest); > +} > + > +static void > +sweep_parallel_copy_instr(nir_shader *nir, nir_parallel_copy_instr > *parallel_copy) > +{ > + /* Nothing to do here? It looks like nir_from_ssa already tidies up > after itself. */ > +} > + > + > +static void > +sweep_block(nir_shader *nir, nir_block *block) > +{ > + ralloc_steal(nir, block); > + > + nir_foreach_instr(block, instr) { > + ralloc_steal(nir, instr); > + > + switch (instr->type) { > + case nir_instr_type_alu: > + sweep_alu_instr(nir, nir_instr_as_alu(instr)); > + break; > + case nir_instr_type_call: > + sweep_call_instr(nir, nir_instr_as_call(instr)); > + break; > + case nir_instr_type_tex: > + sweep_tex_instr(nir, nir_instr_as_tex(instr)); > + break; > + case nir_instr_type_intrinsic: > + sweep_intrinsic_instr(nir, nir_instr_as_intrinsic(instr)); > + break; > + case nir_instr_type_load_const: > + sweep_load_const_instr(nir, nir_instr_as_load_const(instr)); > + break; > + case nir_instr_type_ssa_undef: > + sweep_ssa_undef_instr(nir, nir_instr_as_ssa_undef(instr)); > + break; > + case nir_instr_type_jump: > + /* Nothing to do */ > + break; > + case nir_instr_type_phi: > + sweep_phi_instr(nir, nir_instr_as_phi(instr)); > + break; > + case nir_instr_type_parallel_copy: > + sweep_parallel_copy_instr(nir, nir_instr_as_parallel_copy(instr)); > + break; > + default: > + unreachable("Invalid instruction type"); > + } > + } > +}
I really don't like having to independantly sweep instructions. I think we should be able to just have everything inside the instruction parented to the instruction and ralloc_steal the instruction. That would make sweeping *much* cleaner and would also mean that deleting an instruction actually cleans it up. > +static void > +sweep_if(nir_shader *nir, nir_if *iff) > +{ > + ralloc_steal(nir, iff); This isn't sufficient. The if can have a source with a relative offset (yes, that's a crazy case) and that will have a ralloc'd source that doesn't get swept. > + foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) { > + sweep_cf_node(nir, cf_node); > + } > + > + foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) { > + sweep_cf_node(nir, cf_node); > + } > +} > + > +static void > +sweep_loop(nir_shader *nir, nir_loop *loop) > +{ > + ralloc_steal(nir, loop); > + > + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { > + sweep_cf_node(nir, cf_node); > + } > +} > + > +static void > +sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node) > +{ > + switch (cf_node->type) { > + case nir_cf_node_block: > + sweep_block(nir, nir_cf_node_as_block(cf_node)); > + break; > + case nir_cf_node_if: > + sweep_if(nir, nir_cf_node_as_if(cf_node)); > + break; > + case nir_cf_node_loop: > + sweep_loop(nir, nir_cf_node_as_loop(cf_node)); > + break; > + default: > + unreachable("Invalid CF node type"); > + } > +} > + > +static void > +sweep_impl(nir_shader *nir, nir_function_impl *impl) > +{ > + ralloc_steal(nir, impl); > + > + ralloc_steal(nir, impl->params); > + ralloc_steal(nir, impl->return_var); > + steal_list(nir, nir_variable, &impl->locals); > + steal_list(nir, nir_register, &impl->registers); > + sweep_block(nir, impl->start_block); The start block is in the cf_node list so you don't need to sweep it separately > + sweep_block(nir, impl->end_block); Put this below sweeping the cf_node list. > + > + foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) { > + sweep_cf_node(nir, cf_node); > + } > + > + /* Wipe out all the metadata, if any. */ > + nir_metadata_preserve(impl, nir_metadata_none); Is this really needed? We shouldn't be changing any metadata. In particular, we don't actually change any pointers. > +} > + > +static void > +sweep_function(nir_shader *nir, nir_function *f) > +{ > + ralloc_steal(nir, f); > + > + foreach_list_typed(nir_function_overload, overload, node, > &f->overload_list) { > + ralloc_steal(nir, overload); > + ralloc_steal(nir, overload->params); > + if (overload->impl) > + sweep_impl(nir, overload->impl); > + } > +} > + > +void > +nir_sweep(nir_shader *nir) I made the s/sweap/steal/ comment earlier. I'm still not a fan of "sweep", but I don't know that I like "steal" either. If this is a "mark and sweep" pass then the "sweep" functions are actually performing the "mark" step and ralloc_free is performing the sweep. > +{ > + void *rubbish = ralloc_context(NULL); > + > + /* First, move ownership of all the memory to a temporary context; assume > dead. */ > + ralloc_adopt(rubbish, nir); Yes, we could do it this way. Or we can simply have a dedicated context for each shader that isn't the nir_shader itself. Then we would make a new context, move everything from old to new and delete the old. I don't know for sure which I prefer. --Jason > + /* Variables and registers are not dead. Steal them back. */ > + steal_list(nir, nir_variable, &nir->uniforms); > + steal_list(nir, nir_variable, &nir->inputs); > + steal_list(nir, nir_variable, &nir->outputs); > + steal_list(nir, nir_variable, &nir->globals); > + steal_list(nir, nir_variable, &nir->system_values); > + steal_list(nir, nir_register, &nir->registers); > + > + /* Recurse into functions, stealing their contents back. */ > + foreach_list_typed(nir_function, func, node, &nir->functions) { > + sweep_function(nir, func); > + } > + > + /* Free everything we didn't steal back. */ > + ralloc_free(rubbish); > +} > -- > 2.3.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev