Matt Turner <matts...@gmail.com> writes: > The intention of this pass was to give us better instruction scheduling > opportunities, but it unexpectedly reduced some instruction counts as > well: > > total instructions in shared programs: 1666639 -> 1666073 (-0.03%) > instructions in affected programs: 54612 -> 54046 (-1.04%) > (and trades 4 SIMD16 programs in SS3)
Patches 1, 3, 4, 6 are: Reviewed-by: Eric Anholt <e...@anholt.net> I got lost on this one, though... > diff --git a/src/glsl/opt_rebalance_tree.cpp b/src/glsl/opt_rebalance_tree.cpp > new file mode 100644 > index 0000000..91aa999 > --- /dev/null > +++ b/src/glsl/opt_rebalance_tree.cpp > +/** > + * \file opt_rebalance_tree.cpp > + * > + * Rebalances a reduction expression tree. > + * > + * For reduction operations (e.g., x + y + z + w) we generate an expression > + * tree like > + * > + * + > + * / \ > + * + w > + * / \ > + * + z > + * / \ > + * x y > + * > + * which we can rebalance into > + * > + * + > + * / \ > + * / \ > + * + + > + * / \ / \ > + * x y z w > + * > + * to get a better instruction scheduling. > + * > + * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. > Stout > + * and Bette L. Warren. > + */ > + > +#include "ir.h" > +#include "ir_visitor.h" > +#include "ir_rvalue_visitor.h" > +#include "ir_optimization.h" > + > +/* The DSW algorithm generates a degenerate tree (really, a linked list) in > + * tree_to_vine(). We'd rather not leave a binary expression with only one > + * operand, so trivial modifications (the ternary operators below) are needed > + * to ensure that we only rotate around the ir_expression nodes of the tree. > + */ Why do we care about having NULL remainder.left briefly, if we're definitely going to be rebalancing back? > +static unsigned > +tree_to_vine(ir_expression *root) > +{ > + unsigned size = 0; > + ir_rvalue *vine_tail = root; > + ir_rvalue *remainder = root->operands[1]; > + > + while (remainder != NULL) { > + ir_expression *remainder_left = remainder->as_expression() ? > + remainder->as_expression()->operands[0]->as_expression() : NULL; A "remainder_expr = remainder->as_expression();" temp would have kept me From misreading this function a couple of times. > + > + if (remainder_left == NULL) { > + /* move vine_tail down one */ > + vine_tail = remainder; > + remainder = remainder->as_expression() ? > + remainder->as_expression()->operands[1] : NULL; > + size++; > + } else { > + /* rotate */ > + ir_expression *tempptr = remainder_left; > + remainder->as_expression()->operands[0] = tempptr->operands[1]; > + tempptr->operands[1] = remainder; > + remainder = tempptr; > + vine_tail->as_expression()->operands[1] = tempptr; > + } > + } > + > + return size; > +} > +static void > +compression(ir_expression *root, unsigned count) > +{ > + ir_expression *scanner = root; > + > + for (unsigned i = 0; i < count; i++) { > + ir_expression *child = scanner->operands[1]->as_expression(); > + scanner->operands[1] = child->operands[1]; > + scanner = scanner->operands[1]->as_expression(); > + child->operands[1] = scanner->operands[0]; > + scanner->operands[0] = child; > + } > +} > + > +static void > +vine_to_tree(ir_expression *root, unsigned size) > +{ > + int n = size - 1; > + for (int m = n / 2; m > 0; m = n / 2) { > + compression(root, m); > + n -= m + 1; > + } > +} These two functions need some comments. I'm not sure what those needed comments are. > + > +namespace { > + > +class ir_rebalance_visitor : public ir_rvalue_enter_visitor { > +public: > + ir_rebalance_visitor() > + { > + progress = false; > + } > + > + void handle_rvalue(ir_rvalue **rvalue); > + > + bool progress; > +}; > + > +struct is_reduction_data { > + ir_expression_operation operation; > + const glsl_type *type; > + unsigned num_expr; > + bool is_reduction; > + bool contains_constant; > +}; > + > +} /* anonymous namespace */ > + > +static bool > +is_reduction_operation(ir_expression_operation operation) > +{ > + switch (operation) { > + case ir_binop_add: > + case ir_binop_mul: > + case ir_binop_bit_and: > + case ir_binop_bit_xor: > + case ir_binop_bit_or: > + case ir_binop_logic_and: > + case ir_binop_logic_xor: > + case ir_binop_logic_or: > + case ir_binop_min: > + case ir_binop_max: > + return true; > + default: > + return false; > + } > +} > + > +static void > +is_reduction(ir_instruction *ir, void *data) > +{ > + struct is_reduction_data *ird = (struct is_reduction_data *)data; > + if (!ird->is_reduction) > + return; > + > + /* We don't want to balance a tree that contains multiple constants, since > + * we'll be able to constant fold them if they're not in separate > subtrees. > + */ > + if (ir->as_constant()) { > + if (ird->contains_constant) { > + ird->is_reduction = false; > + } > + ird->contains_constant = true; > + return; > + } > + > + ir_expression *expr = ir->as_expression(); > + if (!expr) > + return; > + > + /* Non-constant matrices might still contain constant vec4 that we can > + * constant fold once split up. Handling matrices will need some more > + * work. > + */ > + if (expr->type->is_matrix()) { > + ird->is_reduction = false; > + return; > + } > + > + if (ird->type != NULL && ird->type != expr->type) { > + ird->is_reduction = false; > + return; > + } > + ird->type = expr->type; > + > + ird->num_expr++; > + if (is_reduction_operation(expr->operation)) { > + if (ird->operation != 0 && ird->operation != expr->operation) > + ird->is_reduction = false; > + ird->operation = expr->operation; > + } else { > + ird->is_reduction = false; > + } It seems weird that this pass only rebalances when no expression in the tree references the result of an expression of a different operation (or, similarly, a different result type). I think that produces a weird tension for us where we sometimes don't want to pull things into bigger trees, because it would break this pass. > +static ir_rvalue * > +handle_expression(ir_expression *expr) > +{ > + struct is_reduction_data ird; > + ird.operation = (ir_expression_operation)0; > + ird.type = NULL; > + ird.num_expr = 0; > + ird.is_reduction = true; > + ird.contains_constant = false; > + > + visit_tree(expr, is_reduction, (void *)&ird); > + > + if (ird.is_reduction && ird.num_expr > 2) { > + ir_constant z = ir_constant(0.0f); > + ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr); > + > + unsigned size = tree_to_vine(&pseudo_root); > + vine_to_tree(&pseudo_root, size); > + > + expr = pseudo_root.operands[1]->as_expression(); > + } > + return expr; A comment about why we need the pseudo_root here would be nice. > +} > + > +void > +ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue) > +{ > + if (!*rvalue) > + return; > + > + ir_expression *expr = (*rvalue)->as_expression(); > + if (!expr || !is_reduction_operation(expr->operation)) > + return; > + > + ir_rvalue *new_rvalue = handle_expression(expr); > + if (new_rvalue == *rvalue) > + return; > + > + *rvalue = new_rvalue; > + this->progress = true; > +} If progress is ever set to true, when what makes sure progress is false in the next pass over the tree? Is progress just not getting set to true ever?
pgpXipROAxpN9.pgp
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev