This is similar to Gallium's existing glsl_to_tgsi::remove_output_read lowering pass, but done entirely inside the GLSL compiler.
Signed-off-by: Vincent Lejeune <v...@ovi.com> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> v2 [Kayden]: - Don't reallocate the array for every shader output. - Move the class into the .cpp file and create a lower_output_reads() wrapper - Simplify the logic in visit(ir_deference_variable *) - Fold add_replacement_pair into the only caller. - Use visit_leave(ir_return *) instead of enter (for paranoia, in case the return value references shader outputs) - Visit signatures rather than functions, to avoid pattern matching to find the actual void main() signature. - Add some comments - Whitespace fixes v3 [Vincent]: - Fix tab indent --- src/glsl/Makefile.sources | 1 + src/glsl/ir_optimization.h | 1 + src/glsl/lower_output_reads.cpp | 152 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 0 deletions(-) create mode 100644 src/glsl/lower_output_reads.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index c65bfe4..5e80af2 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -60,6 +60,7 @@ LIBGLSL_CXX_SOURCES := \ lower_vec_index_to_cond_assign.cpp \ lower_vec_index_to_swizzle.cpp \ lower_vector.cpp \ + lower_output_reads.cpp \ opt_algebraic.cpp \ opt_constant_folding.cpp \ opt_constant_propagation.cpp \ diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 7b32e84..085b969 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -72,6 +72,7 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions, bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform); bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); bool lower_clip_distance(exec_list *instructions); +void lower_output_reads(exec_list *instructions); bool optimize_redundant_jumps(exec_list *instructions); ir_rvalue * diff --git a/src/glsl/lower_output_reads.cpp b/src/glsl/lower_output_reads.cpp new file mode 100644 index 0000000..4b3f91c --- /dev/null +++ b/src/glsl/lower_output_reads.cpp @@ -0,0 +1,152 @@ +/* + * Copyright © 2012 Vincent Lejeune + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "program/hash_table.h" + +/** + * \file lower_output_reads.cpp + * + * In GLSL, shader output variables (such as varyings) can be both read and + * written. However, on some hardware, reading an output register causes + * trouble. + * + * This pass creates temporary shadow copies of every (used) shader output, + * and replaces all accesses to use those instead. It also adds code to the + * main() function to copy the final values to the actual shader outputs. + */ + +class output_read_remover : public ir_hierarchical_visitor { +protected: + struct replacement_pair { + ir_variable *output; + ir_variable *temp; + }; + + /** + * A hash table mapping from the original ir_variable shader outputs + * (ir_var_out mode) to the new temporaries to be used instead. + */ + hash_table *replacements; + + /** + * An array of tuples containing both the output and temporary variables. + * This is necessary because we can't iterate over the hash table. + */ + struct replacement_pair *replacements_array; + unsigned replacements_count; + unsigned replacements_array_size; + + void *mem_ctx; +public: + output_read_remover(); + ~output_read_remover(); + virtual ir_visitor_status visit(class ir_dereference_variable *); + virtual ir_visitor_status visit_leave(class ir_return *); + virtual ir_visitor_status visit_leave(class ir_function_signature *); +}; + +output_read_remover::output_read_remover() +{ + mem_ctx = ralloc_context(NULL); + + replacements = + hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); + + replacements_count = 0; + replacements_array_size = 1; + replacements_array = rzalloc_array(mem_ctx, struct replacement_pair, + replacements_array_size); +} + +output_read_remover::~output_read_remover() +{ + hash_table_dtor(replacements); + ralloc_free(mem_ctx); +} + +ir_visitor_status +output_read_remover::visit(ir_dereference_variable *ir) +{ + if (ir->var->mode != ir_var_out) + return visit_continue; + + ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var); + + /* If we don't have an existing temporary, create one. */ + if (temp == NULL) { + void *var_ctx = ralloc_parent(ir->var); + temp = new(var_ctx) ir_variable(ir->var->type, ir->var->name, + ir_var_temporary); + hash_table_insert(replacements, temp, ir->var); + + if (replacements_array_size <= replacements_count) { + replacements_array_size *= 2; + replacements_array = reralloc(mem_ctx, replacements_array, struct replacement_pair, replacements_array_size); + } + + replacements_array[replacements_count].output = ir->var; + replacements_array[replacements_count].temp = temp; + replacements_count++; + } + + /* Update the dereference to use the temporary */ + ir->var = temp; + + return visit_continue; +} + +ir_visitor_status +output_read_remover::visit_leave(ir_return *ir) +{ + for (unsigned i = 0; i < replacements_count; i++) { + ir_dereference_variable *lhs = new(ir) ir_dereference_variable(replacements_array[i].output); + ir_dereference_variable *rhs = new(ir) ir_dereference_variable(replacements_array[i].temp); + ir_assignment *assign = new(ir) ir_assignment(lhs, rhs); + ir->insert_before(assign); + } + return visit_continue; +} + +ir_visitor_status +output_read_remover::visit_leave(ir_function_signature *sig) +{ + if (strcmp(sig->function_name(), "main") != 0) + return visit_continue; + + for (unsigned i = 0; i < replacements_count; i++) { + ir_dereference_variable *lhs = new(sig) ir_dereference_variable(replacements_array[i].output); + ir_dereference_variable *rhs = new(sig) ir_dereference_variable(replacements_array[i].temp); + ir_assignment *assign = new(sig) ir_assignment(lhs, rhs); + sig->body.push_tail(assign); + } + return visit_continue; +} + +void +lower_output_reads(exec_list *instructions) +{ + output_read_remover v; + visit_list_elements(&v, instructions); +} -- 1.7.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev