Not much to say about the code (the theory sounds sane) but I was wondering about the comment. Why did glsl implement this really as x * (1 - a) + y * a? The usual way for lerp would be (y - x) * a + x, i.e. two ops for most gpus (sub+mad, or sub+mul+add). But I'm wondering if that sacrifices precision or gets Infs wrong or something (this is the way the gallivm code implements TGSI_OPCODE_LRP). I guess strict IEEE conformance would really forbid that optimization though...
Roland Am 20.02.2013 02:03, schrieb Matt Turner: > From: Kenneth Graunke <kenn...@whitecape.org> > > Many GPUs have an instruction to do linear interpolation which is more > efficient than simply performing the algebra necessary (two multiplies, > an add, and a subtract). > > Pattern matching or peepholing this is more desirable, but can be > tricky. By using an opcode, we can at least make shaders which use the > mix() built-in get the more efficient behavior. > > Currently, all consumers lower ir_triop_lrp. Subsequent patches will > actually generate different code. > > v2 [mattst88]: > - Add LRP_TO_ARITH flag to ir_to_mesa.cpp. Will be removed in a > subsequent patch and ir_triop_lrp translated directly. > > Reviewed-by: Matt Turner <matts...@gmail.com> > --- > src/glsl/builtins/ir/mix.ir | 14 +++++----- > src/glsl/ir.cpp | 4 +++ > src/glsl/ir.h | 7 +++++ > src/glsl/ir_constant_expression.cpp | 13 ++++++++++ > src/glsl/ir_optimization.h | 1 + > src/glsl/ir_validate.cpp | 6 ++++ > src/glsl/lower_instructions.cpp | 35 > ++++++++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +- > src/mesa/program/ir_to_mesa.cpp | 6 ++++- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 + > 10 files changed, 81 insertions(+), 9 deletions(-) > > diff --git a/src/glsl/builtins/ir/mix.ir b/src/glsl/builtins/ir/mix.ir > index 70ae13c..e666532 100644 > --- a/src/glsl/builtins/ir/mix.ir > +++ b/src/glsl/builtins/ir/mix.ir > @@ -4,49 +4,49 @@ > (declare (in) float arg0) > (declare (in) float arg1) > (declare (in) float arg2)) > - ((return (expression float + (expression float * (var_ref arg0) > (expression float - (constant float (1.000000)) (var_ref arg2))) (expression > float * (var_ref arg1) (var_ref arg2)))))) > + ((return (expression float lrp (var_ref arg0) (var_ref arg1) (var_ref > arg2))))) > > (signature vec2 > (parameters > (declare (in) vec2 arg0) > (declare (in) vec2 arg1) > (declare (in) vec2 arg2)) > - ((return (expression vec2 + (expression vec2 * (var_ref arg0) > (expression vec2 - (constant float (1.000000)) (var_ref arg2))) (expression > vec2 * (var_ref arg1) (var_ref arg2)))))) > + ((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref > arg2))))) > > (signature vec3 > (parameters > (declare (in) vec3 arg0) > (declare (in) vec3 arg1) > (declare (in) vec3 arg2)) > - ((return (expression vec3 + (expression vec3 * (var_ref arg0) > (expression vec3 - (constant float (1.000000)) (var_ref arg2))) (expression > vec3 * (var_ref arg1) (var_ref arg2)))))) > + ((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref > arg2))))) > > (signature vec4 > (parameters > (declare (in) vec4 arg0) > (declare (in) vec4 arg1) > (declare (in) vec4 arg2)) > - ((return (expression vec4 + (expression vec4 * (var_ref arg0) > (expression vec4 - (constant float (1.000000)) (var_ref arg2))) (expression > vec4 * (var_ref arg1) (var_ref arg2)))))) > + ((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref > arg2))))) > > (signature vec2 > (parameters > (declare (in) vec2 arg0) > (declare (in) vec2 arg1) > (declare (in) float arg2)) > - ((return (expression vec2 + (expression vec2 * (var_ref arg0) > (expression float - (constant float (1.000000)) (var_ref arg2))) (expression > vec2 * (var_ref arg1) (var_ref arg2)))))) > + ((return (expression vec2 lrp (var_ref arg0) (var_ref arg1) (var_ref > arg2))))) > > (signature vec3 > (parameters > (declare (in) vec3 arg0) > (declare (in) vec3 arg1) > (declare (in) float arg2)) > - ((return (expression vec3 + (expression vec3 * (var_ref arg0) > (expression float - (constant float (1.000000)) (var_ref arg2))) (expression > vec3 * (var_ref arg1) (var_ref arg2)))))) > + ((return (expression vec3 lrp (var_ref arg0) (var_ref arg1) (var_ref > arg2))))) > > (signature vec4 > (parameters > (declare (in) vec4 arg0) > (declare (in) vec4 arg1) > (declare (in) float arg2)) > - ((return (expression vec4 + (expression vec4 * (var_ref arg0) > (expression float - (constant float (1.000000)) (var_ref arg2))) (expression > vec4 * (var_ref arg1) (var_ref arg2)))))) > + ((return (expression vec4 lrp (var_ref arg0) (var_ref arg1) (var_ref > arg2))))) > > (signature float > (parameters > diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp > index 4ccdc42..717d6f6 100644 > --- a/src/glsl/ir.cpp > +++ b/src/glsl/ir.cpp > @@ -416,6 +416,9 @@ ir_expression::get_num_operands(ir_expression_operation > op) > if (op <= ir_last_binop) > return 2; > > + if (op <= ir_last_triop) > + return 3; > + > if (op == ir_quadop_vector) > return 4; > > @@ -502,6 +505,7 @@ static const char *const operator_strs[] = { > "pow", > "packHalf2x16_split", > "ubo_load", > + "lrp", > "vector", > }; > > diff --git a/src/glsl/ir.h b/src/glsl/ir.h > index d878bd8..d63dac1 100644 > --- a/src/glsl/ir.h > +++ b/src/glsl/ir.h > @@ -1118,6 +1118,13 @@ enum ir_expression_operation { > */ > ir_last_binop = ir_binop_ubo_load, > > + ir_triop_lrp, > + > + /** > + * A sentinel marking the last of the ternary operations. > + */ > + ir_last_triop = ir_triop_lrp, > + > ir_quadop_vector, > > /** > diff --git a/src/glsl/ir_constant_expression.cpp > b/src/glsl/ir_constant_expression.cpp > index 86b863f..c2d0dc4 100644 > --- a/src/glsl/ir_constant_expression.cpp > +++ b/src/glsl/ir_constant_expression.cpp > @@ -1248,6 +1248,19 @@ ir_expression::constant_expression_value(struct > hash_table *variable_context) > } > break; > > + case ir_triop_lrp: { > + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); > + assert(op[1]->type->base_type == GLSL_TYPE_FLOAT); > + assert(op[2]->type->base_type == GLSL_TYPE_FLOAT); > + > + unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1; > + for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) { > + data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) + > + (op[1]->value.f[c] * op[2]->value.f[c2]); > + } > + break; > + } > + > case ir_quadop_vector: > for (unsigned c = 0; c < this->type->vector_elements; c++) { > switch (this->type->base_type) { > diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h > index 8f33018..2454bbe 100644 > --- a/src/glsl/ir_optimization.h > +++ b/src/glsl/ir_optimization.h > @@ -36,6 +36,7 @@ > #define LOG_TO_LOG2 0x10 > #define MOD_TO_FRACT 0x20 > #define INT_DIV_TO_MUL_RCP 0x40 > +#define LRP_TO_ARITH 0x80 > > /** > * \see class lower_packing_builtins_visitor > diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp > index d8cafd5..24ea506 100644 > --- a/src/glsl/ir_validate.cpp > +++ b/src/glsl/ir_validate.cpp > @@ -468,6 +468,12 @@ ir_validate::visit_leave(ir_expression *ir) > assert(ir->operands[1]->type == glsl_type::uint_type); > break; > > + case ir_triop_lrp: > + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); > + assert(ir->operands[0]->type == ir->operands[1]->type); > + assert(ir->operands[2]->type == ir->operands[0]->type || > ir->operands[2]->type == glsl_type::float_type); > + break; > + > case ir_quadop_vector: > /* The vector operator collects some number of scalars and generates a > * vector from them. > diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp > index a8ef765..1ce7b7c 100644 > --- a/src/glsl/lower_instructions.cpp > +++ b/src/glsl/lower_instructions.cpp > @@ -37,6 +37,7 @@ > * - POW_TO_EXP2 > * - LOG_TO_LOG2 > * - MOD_TO_FRACT > + * - LRP_TO_ARITH > * > * SUB_TO_ADD_NEG: > * --------------- > @@ -79,13 +80,20 @@ > * Many GPUs don't have a MOD instruction (945 and 965 included), and > * if we have to break it down like this anyway, it gives an > * opportunity to do things like constant fold the (1.0 / op1) easily. > + * > + * LRP_TO_ARITH: > + * ------------- > + * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2). > */ > > #include "main/core.h" /* for M_LOG2E */ > #include "glsl_types.h" > #include "ir.h" > +#include "ir_builder.h" > #include "ir_optimization.h" > > +using namespace ir_builder; > + > class lower_instructions_visitor : public ir_hierarchical_visitor { > public: > lower_instructions_visitor(unsigned lower) > @@ -105,6 +113,7 @@ private: > void exp_to_exp2(ir_expression *); > void pow_to_exp2(ir_expression *); > void log_to_log2(ir_expression *); > + void lrp_to_arith(ir_expression *); > }; > > /** > @@ -268,6 +277,27 @@ lower_instructions_visitor::mod_to_fract(ir_expression > *ir) > this->progress = true; > } > > +void > +lower_instructions_visitor::lrp_to_arith(ir_expression *ir) > +{ > + /* (lrp x y a) -> x*(1-a) + y*a */ > + > + /* Save op2 */ > + ir_variable *temp = new(ir) ir_variable(ir->operands[2]->type, > "lrp_factor", > + ir_var_temporary); > + this->base_ir->insert_before(temp); > + this->base_ir->insert_before(assign(temp, ir->operands[2])); > + > + ir_constant *one = new(ir) ir_constant(1.0f); > + > + ir->operation = ir_binop_add; > + ir->operands[0] = mul(ir->operands[0], sub(one, temp)); > + ir->operands[1] = mul(ir->operands[1], temp); > + ir->operands[2] = NULL; > + > + this->progress = true; > +} > + > ir_visitor_status > lower_instructions_visitor::visit_leave(ir_expression *ir) > { > @@ -304,6 +334,11 @@ lower_instructions_visitor::visit_leave(ir_expression > *ir) > pow_to_exp2(ir); > break; > > + case ir_triop_lrp: > + if (lowering(LRP_TO_ARITH)) > + lrp_to_arith(ir); > + break; > + > default: > return visit_continue; > } > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index c71715e..9ab18cc 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -155,7 +155,8 @@ brw_link_shader(struct gl_context *ctx, struct > gl_shader_program *shProg) > DIV_TO_MUL_RCP | > SUB_TO_ADD_NEG | > EXP_TO_EXP2 | > - LOG_TO_LOG2); > + LOG_TO_LOG2 | > + LRP_TO_ARITH); > > /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, > * if-statements need to be flattened. > diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp > index ce409ec..30305d2 100644 > --- a/src/mesa/program/ir_to_mesa.cpp > +++ b/src/mesa/program/ir_to_mesa.cpp > @@ -1478,6 +1478,10 @@ ir_to_mesa_visitor::visit(ir_expression *ir) > assert(!"not supported"); > break; > > + case ir_triop_lrp: > + assert(!"ir_triop_lrp should have been lowered."); > + break; > + > case ir_quadop_vector: > /* This operation should have already been handled. > */ > @@ -2993,7 +2997,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct > gl_shader_program *prog) > /* Lowering */ > do_mat_op_to_vec(ir); > lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 > - | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP > + | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP | > LRP_TO_ARITH > | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); > > progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, > options->EmitNoCont, options->EmitNoLoops) || progress; > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 63b7428..757bd71 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -5189,6 +5189,7 @@ st_link_shader(struct gl_context *ctx, struct > gl_shader_program *prog) > DIV_TO_MUL_RCP | > EXP_TO_EXP2 | > LOG_TO_LOG2 | > + LRP_TO_ARITH | > (options->EmitNoPow ? POW_TO_EXP2 : 0) | > (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : > 0)); > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev