Cuts five instructions out of SynMark's Gl32VSInstancing benchmark. --- Could be made more general, but I don't see any cases we could optimize in our existing shader collection.
src/glsl/opt_algebraic.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index eb5544a..17bcdfc 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -329,6 +329,41 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if (op_const[1] && !op_const[0]) reassociate_constant(ir, 1, op_const[1], op_expr[0]); + /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ + if (options->OptimizeForAOS) { + if (op_expr[0] && op_expr[0]->operation == ir_binop_add && + op_expr[1] && op_expr[1]->operation == ir_binop_add) { + ir_swizzle *x = op_expr[0]->operands[0]->as_swizzle(); + ir_swizzle *y = op_expr[0]->operands[1]->as_swizzle(); + ir_swizzle *z = op_expr[1]->operands[0]->as_swizzle(); + ir_swizzle *w = op_expr[1]->operands[1]->as_swizzle(); + + if (!x || x->mask.num_components != 1 || x->mask.has_duplicates || + !y || y->mask.num_components != 1 || y->mask.has_duplicates || + !z || z->mask.num_components != 1 || z->mask.has_duplicates || + !w || w->mask.num_components != 1 || w->mask.has_duplicates) { + break; + } + + bool swiz_seen[4] = {false, false, false, false}; + swiz_seen[x->mask.x] = true; + swiz_seen[y->mask.x] = true; + swiz_seen[z->mask.x] = true; + swiz_seen[w->mask.x] = true; + + if (!swiz_seen[0] || !swiz_seen[1] || + !swiz_seen[2] || !swiz_seen[3]) { + break; + } + + if (x->val->equals(y->val) && + x->val->equals(z->val) && + x->val->equals(w->val)) { + return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); + } + } + } + /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). * * (-x + y) * a + x -- 1.8.3.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev