On 10/28/2016 04:13 PM, Marek Olšák wrote: > From: Marek Olšák <marek.ol...@amd.com> > > --- > src/compiler/glsl/ir_optimization.h | 2 +- > src/compiler/glsl/lower_if_to_cond_assign.cpp | 55 > ++++++++++++++++++++++++--- > 2 files changed, 50 insertions(+), 7 deletions(-) > > diff --git a/src/compiler/glsl/ir_optimization.h > b/src/compiler/glsl/ir_optimization.h > index c033f6b..a662a6d 100644 > --- a/src/compiler/glsl/ir_optimization.h > +++ b/src/compiler/glsl/ir_optimization.h > @@ -102,21 +102,21 @@ bool do_dead_code_local(exec_list *instructions); > bool do_dead_code_unlinked(exec_list *instructions); > bool do_dead_functions(exec_list *instructions); > bool opt_flip_matrices(exec_list *instructions); > bool do_function_inlining(exec_list *instructions); > bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, > bool lower_sub_return = true, bool lower_main_return = false, bool > lower_continue = false, bool lower_break = false); > bool do_lower_texture_projection(exec_list *instructions); > bool do_if_simplification(exec_list *instructions); > bool opt_flatten_nested_if_blocks(exec_list *instructions); > bool do_discard_simplification(exec_list *instructions); > bool lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions, > - unsigned max_depth = 0); > + unsigned max_depth = 0, unsigned > min_branch_cost = 0); > bool do_mat_op_to_vec(exec_list *instructions); > bool do_minmax_prune(exec_list *instructions); > bool do_noop_swizzle(exec_list *instructions); > bool do_structure_splitting(exec_list *instructions); > bool do_swizzle_swizzle(exec_list *instructions); > bool do_vectorize(exec_list *instructions); > bool do_tree_grafting(exec_list *instructions); > bool do_vec_index_to_cond_assign(exec_list *instructions); > bool do_vec_index_to_swizzle(exec_list *instructions); > bool lower_discard(exec_list *instructions); > diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp > b/src/compiler/glsl/lower_if_to_cond_assign.cpp > index a413306..7b59c00 100644 > --- a/src/compiler/glsl/lower_if_to_cond_assign.cpp > +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp > @@ -17,22 +17,28 @@ > * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > * DEALINGS IN THE SOFTWARE. > */ > > /** > * \file lower_if_to_cond_assign.cpp > * > - * This attempts to flatten if-statements to conditional assignments for > - * GPUs with limited or no flow control support. > + * This flattens if-statements to conditional assignments if: > + * > + * - the GPU has limited or no flow control support > + * (controlled by max_depth) > + * > + * - small conditional branches are more expensive than conditional > assignments > + * (controlled by min_branch_cost, that's the cost for a branch to be > + * preserved) > * > * It can't handle other control flow being inside of its block, such > * as calls or loops. Hopefully loop unrolling and inlining will take > * care of those. > * > * Drivers for GPUs with no control flow support should simply call > * > * lower_if_to_cond_assign(instructions) > * > * to attempt to flatten all if-statements. > @@ -42,65 +48,73 @@ > * > * lower_if_to_cond_assign(instructions, N) > * > * to attempt to flatten any if-statements appearing at depth > N. > */ > > #include "compiler/glsl_types.h" > #include "ir.h" > #include "util/set.h" > #include "util/hash_table.h" /* Needed for the hashing functions */ > +#include "main/macros.h" /* for MAX2 */ > > namespace { > > class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { > public: > ir_if_to_cond_assign_visitor(gl_shader_stage stage, > - unsigned max_depth) > + unsigned max_depth, > + unsigned min_branch_cost) > { > this->progress = false; > this->stage = stage; > this->max_depth = max_depth; > + this->min_branch_cost = min_branch_cost; > this->depth = 0; > > this->condition_variables = > _mesa_set_create(NULL, _mesa_hash_pointer, > _mesa_key_pointer_equal); > } > > ~ir_if_to_cond_assign_visitor() > { > _mesa_set_destroy(this->condition_variables, NULL); > } > > ir_visitor_status visit_enter(ir_if *); > ir_visitor_status visit_leave(ir_if *); > > bool found_unsupported_op; > + bool found_expensive_op; > + bool is_then;
I wonder if it would be more clear to have an 'unsigned *cost' instead that points at either then_cost or else_cost. I could see arguments either way. > bool progress; > gl_shader_stage stage; > + unsigned then_cost; > + unsigned else_cost; > + unsigned min_branch_cost; > unsigned max_depth; > unsigned depth; > > struct set *condition_variables; > }; > > } /* anonymous namespace */ > > bool > lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions, > - unsigned max_depth) > + unsigned max_depth, unsigned min_branch_cost) > { > if (max_depth == UINT_MAX) > return false; > > - ir_if_to_cond_assign_visitor v(stage, max_depth); > + ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost); > > visit_list_elements(&v, instructions); > > return v.progress; > } > > void > check_ir_node(ir_instruction *ir, void *data) > { > ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data; > @@ -122,20 +136,34 @@ check_ir_node(ir_instruction *ir, void *data) > > /* Tess control shader outputs are like shared memory with complex > * side effects, so treat it that way. > */ > if (v->stage == MESA_SHADER_TESS_CTRL && > var->data.mode == ir_var_shader_out) > v->found_unsupported_op = true; > break; > } > > + /* SSBO, images, atomic counters are handled by ir_type_call */ > + case ir_type_texture: > + v->found_expensive_op = true; > + break; > + > + case ir_type_expression: > + case ir_type_dereference_array: > + case ir_type_dereference_record: > + if (v->is_then) > + v->then_cost++; > + else > + v->else_cost++; > + break; > + > default: > break; > } > } > > void > move_block_to_cond_assign(void *mem_ctx, > ir_if *if_ir, ir_rvalue *cond_expr, > exec_list *instructions, > struct set *set) > @@ -186,38 +214,53 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir) > { > (void) ir; > this->depth++; > > return visit_continue; > } > > ir_visitor_status > ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) > { > + bool must_lower = this->depth-- > this->max_depth; > + > /* Only flatten when beyond the GPU's maximum supported nesting depth. */ > - if (this->depth-- <= this->max_depth) > + if (!must_lower && this->min_branch_cost == 0) > return visit_continue; > > this->found_unsupported_op = false; > + this->found_expensive_op = false; > + this->then_cost = 0; > + this->else_cost = 0; > > ir_assignment *assign; > > /* Check that both blocks don't contain anything we can't support. */ > + this->is_then = true; > foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) { > visit_tree(then_ir, check_ir_node, this); > } > + > + this->is_then = false; > foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) { > visit_tree(else_ir, check_ir_node, this); > } > + > if (this->found_unsupported_op) > return visit_continue; /* can't handle inner unsupported opcodes */ > > + /* Skip if the branch cost is high enough or if there's an expensive op. > */ > + if (!must_lower && > + (this->found_expensive_op || > + MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost)) > + return visit_continue; > + > void *mem_ctx = ralloc_parent(ir); > > /* Store the condition to a variable. Move all of the instructions from > * the then-clause of the if-statement. Use the condition variable as a > * condition for all assignments. > */ > ir_variable *const then_var = > new(mem_ctx) ir_variable(glsl_type::bool_type, > "if_to_cond_assign_then", > ir_var_temporary); > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev