2015-08-11 20:25 GMT+02:00 Eric Anholt <e...@anholt.net>: > We may find a cause to do more undef optimization in the future, but for > now this fixes up things after if flattening. vc4 was handling this > internally most of the time, but a GLB2.7 shader that did a conditional > discard and assign gl_FragColor in the else was still emitting some extra > code. > > total instructions in shared programs: 100809 -> 100795 (-0.01%) > instructions in affected programs: 37 -> 23 (-37.84%) > --- > src/gallium/drivers/vc4/vc4_program.c | 1 + > src/glsl/Makefile.sources | 1 + > src/glsl/nir/nir.h | 2 + > src/glsl/nir/nir_opt_undef.c | 93 > +++++++++++++++++++++++++++++++++++ > 4 files changed, 97 insertions(+) > create mode 100644 src/glsl/nir/nir_opt_undef.c > > diff --git a/src/gallium/drivers/vc4/vc4_program.c > b/src/gallium/drivers/vc4/vc4_program.c > index e9120b7..4a3a277 100644 > --- a/src/gallium/drivers/vc4/vc4_program.c > +++ b/src/gallium/drivers/vc4/vc4_program.c > @@ -1627,6 +1627,7 @@ vc4_optimize_nir(struct nir_shader *s) > progress = nir_opt_peephole_select(s) || progress; > progress = nir_opt_algebraic(s) || progress; > progress = nir_opt_constant_folding(s) || progress; > + progress = nir_opt_undef(s) || progress; > } while (progress); > } > > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources > index a0e85ed..0b77244 100644 > --- a/src/glsl/Makefile.sources > +++ b/src/glsl/Makefile.sources > @@ -56,6 +56,7 @@ NIR_FILES = \ > nir/nir_opt_peephole_ffma.c \ > nir/nir_opt_peephole_select.c \ > nir/nir_opt_remove_phis.c \ > + nir/nir_opt_undef.c \ > nir/nir_print.c \ > nir/nir_remove_dead_variables.c \ > nir/nir_search.c \ > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index 9aae6d7..222a219 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1704,6 +1704,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader); > > bool nir_opt_remove_phis(nir_shader *shader); > > +bool nir_opt_undef(nir_shader *shader); > + > void nir_sweep(nir_shader *shader); > > #ifdef __cplusplus > diff --git a/src/glsl/nir/nir_opt_undef.c b/src/glsl/nir/nir_opt_undef.c > new file mode 100644 > index 0000000..89a75c4 > --- /dev/null > +++ b/src/glsl/nir/nir_opt_undef.c > @@ -0,0 +1,93 @@ > +/* > + * Copyright © 2015 Broadcom > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "nir.h" > + > +/** @file nir_opt_undef.c > + * > + * Handles optimization of operations involving ssa_undef. For now, we just > + * make sure that csels between undef and some other value just give the > other > + * value (on the assumption that the condition's going to be choosing the > + * defined value). This reduces work after if flattening when each side of > + * the if is defining a variable. > + * > + * Some day, we may find some use for making other operations consuming an > + * undef arg output undef, but I don't know of any cases currently. > + */ > + > +static bool > +opt_undef_alu(nir_alu_instr *instr) > +{ > + if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel) > + return false; > + > + assert(instr->dest.dest.is_ssa); > + > + for (int i = 1; i <= 2; i++) { > + if (!instr->src[i].src.is_ssa) > + continue; > + > + nir_instr *parent = instr->src[i].src.ssa->parent_instr; > + if (parent->type != nir_instr_type_ssa_undef) > + continue; > + > + nir_alu_src_copy(&instr->src[0], &instr->src[i == 1 ? 2 : 1], > + ralloc_parent(instr)); > + nir_src empty_src; > + memset(&empty_src, 0, sizeof(empty_src)); > + nir_instr_rewrite_src(&instr->instr, &instr->src[1].src, empty_src); > + nir_instr_rewrite_src(&instr->instr, &instr->src[2].src, empty_src); > + instr->op = nir_op_imov; > + > + return true; > + } > + > + return false; > +} > + > +static bool > +opt_undef_block(nir_block *block, void *data) > +{ > + bool *progress = data; > + > + nir_foreach_instr_safe(block, instr) { > + if (instr->type == nir_instr_type_alu) > + if (opt_undef_alu(nir_instr_as_alu(instr))) > + (*progress) = true; > + } > + > + return true; > +} > + > +bool > +nir_opt_undef(nir_shader *shader) > +{ > + bool progress = false; > + > + nir_foreach_overload(shader, overload) { > + if (overload->impl) > + nir_foreach_block(overload->impl, opt_undef_block, &progress); > + } > + > + return progress; > +} > -- > 2.1.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
For future reference there is a nice little summary of how one might want to deal with undef inputs to different operations at[1] at aproximately line 1300. This is for SCCP, in the LLVM compiler, but I guess an expansion to this pass would not be to different. Just thought I'd mention it since I've been looking at it a bit =) [1] https://github.com/llvm-mirror/llvm/blob/master/lib/Transforms/Scalar/SCCP.cpp _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev