-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 On 08/29/2011 10:52 AM, Kenneth Graunke wrote: > From: Bryan Cain <bryanca...@gmail.com> > > Using multiply and reciprocal for integer division involves potentially > lossy floating point conversions. This is okay for older GPUs that > represent integers as floating point, but undesirable for GPUs with > native integer division instructions. > > TGSI, for example, has UDIV/IDIV instructions for integer division, > so it makes sense to handle this directly. Likewise for i965. > > Signed-off-by: Bryan Cain <bryanca...@gmail.com> > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.roman...@intel.com> > --- > v3: Check for int/float explicitly, fixing a mistake where div_to_mul_rcp > would be run on integers when !lowering(INT_DIV_TO_MUL_RCP) and > lowering(DIV_TO_MUL_RCP). > > src/glsl/ir_optimization.h | 13 ++-- > src/glsl/lower_instructions.cpp | 125 > ++++++++++++++++------------ > src/mesa/drivers/dri/i965/brw_shader.cpp | 1 + > src/mesa/program/ir_to_mesa.cpp | 2 +- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- > 5 files changed, 80 insertions(+), 63 deletions(-) > > diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h > index f7808bd..48448d4 100644 > --- a/src/glsl/ir_optimization.h > +++ b/src/glsl/ir_optimization.h > @@ -29,12 +29,13 @@ > */ > > /* Operations for lower_instructions() */ > -#define SUB_TO_ADD_NEG 0x01 > -#define DIV_TO_MUL_RCP 0x02 > -#define EXP_TO_EXP2 0x04 > -#define POW_TO_EXP2 0x08 > -#define LOG_TO_LOG2 0x10 > -#define MOD_TO_FRACT 0x20 > +#define SUB_TO_ADD_NEG 0x01 > +#define DIV_TO_MUL_RCP 0x02 > +#define EXP_TO_EXP2 0x04 > +#define POW_TO_EXP2 0x08 > +#define LOG_TO_LOG2 0x10 > +#define MOD_TO_FRACT 0x20 > +#define INT_DIV_TO_MUL_RCP 0x40 > > bool do_common_optimization(exec_list *ir, bool linked, unsigned > max_unroll_iterations); > > diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp > index 23aa19b..d79eb0a 100644 > --- a/src/glsl/lower_instructions.cpp > +++ b/src/glsl/lower_instructions.cpp > @@ -32,6 +32,7 @@ > * Currently supported transformations: > * - SUB_TO_ADD_NEG > * - DIV_TO_MUL_RCP > + * - INT_DIV_TO_MUL_RCP > * - EXP_TO_EXP2 > * - POW_TO_EXP2 > * - LOG_TO_LOG2 > @@ -47,8 +48,8 @@ > * want to recognize add(op0, neg(op1)) or the other way around to > * produce a subtract anyway. > * > - * DIV_TO_MUL_RCP: > - * --------------- > + * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP: > + * -------------------------------------- > * Breaks an ir_unop_div expression down to op0 * (rcp(op1)). > * > * Many GPUs don't have a divide instruction (945 and 965 included), > @@ -56,6 +57,10 @@ > * reciprocal. By breaking the operation down, constant reciprocals > * can get constant folded. > * > + * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP > + * handles the integer case, converting to and from floating point so that > + * RCP is possible. > + * > * EXP_TO_EXP2 and LOG_TO_LOG2: > * ---------------------------- > * Many GPUs don't have a base e log or exponent instruction, but they > @@ -95,6 +100,7 @@ private: > > void sub_to_add_neg(ir_expression *); > void div_to_mul_rcp(ir_expression *); > + void int_div_to_mul_rcp(ir_expression *); > void mod_to_fract(ir_expression *); > void exp_to_exp2(ir_expression *); > void pow_to_exp2(ir_expression *); > @@ -127,60 +133,67 @@ > lower_instructions_visitor::sub_to_add_neg(ir_expression *ir) > void > lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) > { > - if (!ir->operands[1]->type->is_integer()) { > - /* New expression for the 1.0 / op1 */ > - ir_rvalue *expr; > - expr = new(ir) ir_expression(ir_unop_rcp, > - ir->operands[1]->type, > - ir->operands[1], > - NULL); > - > - /* op0 / op1 -> op0 * (1.0 / op1) */ > - ir->operation = ir_binop_mul; > - ir->operands[1] = expr; > + assert(ir->operands[1]->type->is_float()); > + > + /* New expression for the 1.0 / op1 */ > + ir_rvalue *expr; > + expr = new(ir) ir_expression(ir_unop_rcp, > + ir->operands[1]->type, > + ir->operands[1]); > + > + /* op0 / op1 -> op0 * (1.0 / op1) */ > + ir->operation = ir_binop_mul; > + ir->operands[1] = expr; > + > + this->progress = true; > +} > + > +void > +lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir) > +{ > + assert(ir->operands[1]->type->is_integer()); > + > + /* Be careful with integer division -- we need to do it as a > + * float and re-truncate, since rcp(n > 1) of an integer would > + * just be 0. > + */ > + ir_rvalue *op0, *op1; > + const struct glsl_type *vec_type; > + > + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, > + ir->operands[1]->type->vector_elements, > + ir->operands[1]->type->matrix_columns); > + > + if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) > + op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], > NULL); > + else > + op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], > NULL); > + > + op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL); > + > + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, > + ir->operands[0]->type->vector_elements, > + ir->operands[0]->type->matrix_columns); > + > + if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) > + op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], > NULL); > + else > + op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], > NULL); > + > + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, > + ir->type->vector_elements, > + ir->type->matrix_columns); > + > + op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); > + > + if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { > + ir->operation = ir_unop_f2i; > + ir->operands[0] = op0; > } else { > - /* Be careful with integer division -- we need to do it as a > - * float and re-truncate, since rcp(n > 1) of an integer would > - * just be 0. > - */ > - ir_rvalue *op0, *op1; > - const struct glsl_type *vec_type; > - > - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, > - ir->operands[1]->type->vector_elements, > - ir->operands[1]->type->matrix_columns); > - > - if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) > - op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], > NULL); > - else > - op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], > NULL); > - > - op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL); > - > - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, > - ir->operands[0]->type->vector_elements, > - ir->operands[0]->type->matrix_columns); > - > - if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) > - op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], > NULL); > - else > - op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], > NULL); > - > - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, > - ir->type->vector_elements, > - ir->type->matrix_columns); > - > - op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); > - > - if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { > - ir->operation = ir_unop_f2i; > - ir->operands[0] = op0; > - } else { > - ir->operation = ir_unop_i2u; > - ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0); > - } > - ir->operands[1] = NULL; > + ir->operation = ir_unop_i2u; > + ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0); > } > + ir->operands[1] = NULL; > > this->progress = true; > } > @@ -265,7 +278,9 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) > break; > > case ir_binop_div: > - if (lowering(DIV_TO_MUL_RCP)) > + if (ir->operands[1]->type->is_integer() && > lowering(INT_DIV_TO_MUL_RCP)) > + int_div_to_mul_rcp(ir); > + else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP)) > div_to_mul_rcp(ir); > break; > > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 3ff6bba..7e53097 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -100,6 +100,7 @@ brw_link_shader(struct gl_context *ctx, struct > gl_shader_program *prog) > lower_instructions(shader->ir, > MOD_TO_FRACT | > DIV_TO_MUL_RCP | > + INT_DIV_TO_MUL_RCP | > SUB_TO_ADD_NEG | > EXP_TO_EXP2 | > LOG_TO_LOG2); > diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp > index 6820e4c..dd154db 100644 > --- a/src/mesa/program/ir_to_mesa.cpp > +++ b/src/mesa/program/ir_to_mesa.cpp > @@ -3232,7 +3232,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct > gl_shader_program *prog) > /* Lowering */ > do_mat_op_to_vec(ir); > lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 > - | LOG_TO_LOG2 > + | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP > | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); > > progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, > options->EmitNoCont, options->EmitNoLoops) || progress; > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 9cac309..ec42742 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -5012,7 +5012,7 @@ st_link_shader(struct gl_context *ctx, struct > gl_shader_program *prog) > /* Lowering */ > do_mat_op_to_vec(ir); > lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 > - | LOG_TO_LOG2 > + | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP > | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); > > progress = do_lower_jumps(ir, true, true, > options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || > progress; -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/ iEYEARECAAYFAk5b1g0ACgkQX1gOwKyEAw/aYwCePGPJqQMS+jrV8ptoASkv4CaL TVUAn1+jEglPy5hPofiyaU73JMEjG1c3 =qLnO -----END PGP SIGNATURE----- _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev