On 06/09/2014 02:11 PM, Matt Turner wrote: > Cuts five instructions out of SynMark's Gl32VSInstancing benchmark. > --- > src/glsl/opt_algebraic.cpp | 46 > ++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 46 insertions(+) > > diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp > index d57c3e8..be65799 100644 > --- a/src/glsl/opt_algebraic.cpp > +++ b/src/glsl/opt_algebraic.cpp > @@ -119,6 +119,44 @@ update_type(ir_expression *ir) > ir->type = ir->operands[1]->type; > } > > +/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ > +static ir_expression * > +try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void > *mem_ctx) > +{ > + if (expr0 && expr0->operation == ir_binop_add && > + expr1 && expr1->operation == ir_binop_add) { > + ir_swizzle *x = expr0->operands[0]->as_swizzle(); > + ir_swizzle *y = expr0->operands[1]->as_swizzle(); > + ir_swizzle *z = expr1->operands[0]->as_swizzle(); > + ir_swizzle *w = expr1->operands[1]->as_swizzle(); > + > + if (!x || x->mask.num_components != 1 || x->mask.has_duplicates || > + !y || y->mask.num_components != 1 || y->mask.has_duplicates || > + !z || z->mask.num_components != 1 || z->mask.has_duplicates || > + !w || w->mask.num_components != 1 || w->mask.has_duplicates) {
Are the has_duplicates checks necessary? If num_components must be 1, it's not obvious to me what the extra checks are doing. > + return NULL; > + } > + > + bool swiz_seen[4] = {false, false, false, false}; > + swiz_seen[x->mask.x] = true; > + swiz_seen[y->mask.x] = true; > + swiz_seen[z->mask.x] = true; > + swiz_seen[w->mask.x] = true; > + > + if (!swiz_seen[0] || !swiz_seen[1] || > + !swiz_seen[2] || !swiz_seen[3]) { > + return NULL; > + } > + > + if (x->val->equals(y->val) && > + x->val->equals(z->val) && > + x->val->equals(w->val)) { > + return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); > + } > + } > + return NULL; > +} > + > void > ir_algebraic_visitor::reassociate_operands(ir_expression *ir1, > int op1, > @@ -332,6 +370,14 @@ ir_algebraic_visitor::handle_expression(ir_expression > *ir) > if (op_const[1] && !op_const[0]) > reassociate_constant(ir, 1, op_const[1], op_expr[0]); > > + /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ > + if (options->OptimizeForAOS) { > + ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], > + mem_ctx); > + if (expr) > + return expr; > + } > + > /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, > a). > * > * (-x + y) * a + x > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev