This patch is: Reviewed-by: Iago Toral Quiroga <ito...@igalia.com>
On Mon, 2017-01-16 at 17:20 +0100, Nicolai Hähnle wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > --- > src/compiler/glsl/ir_optimization.h | 4 +++- > src/compiler/glsl/lower_instructions.cpp | 19 +++++++++++-------- > 2 files changed, 14 insertions(+), 9 deletions(-) > > diff --git a/src/compiler/glsl/ir_optimization.h > b/src/compiler/glsl/ir_optimization.h > index 0d6c4e6..01e5270 100644 > --- a/src/compiler/glsl/ir_optimization.h > +++ b/src/compiler/glsl/ir_optimization.h > @@ -30,7 +30,7 @@ > > /* Operations for lower_instructions() */ > #define SUB_TO_ADD_NEG 0x01 > -#define DIV_TO_MUL_RCP 0x02 > +#define FDIV_TO_MUL_RCP 0x02 > #define EXP_TO_EXP2 0x04 > #define POW_TO_EXP2 0x08 > #define LOG_TO_LOG2 0x10 > @@ -49,6 +49,8 @@ > #define FIND_LSB_TO_FLOAT_CAST 0x20000 > #define FIND_MSB_TO_FLOAT_CAST 0x40000 > #define IMUL_HIGH_TO_MUL 0x80000 > +#define DDIV_TO_MUL_RCP 0x100000 > +#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | > DDIV_TO_MUL_RCP) > > /** > * \see class lower_packing_builtins_visitor > diff --git a/src/compiler/glsl/lower_instructions.cpp > b/src/compiler/glsl/lower_instructions.cpp > index 9fc83d1..729cb13 100644 > --- a/src/compiler/glsl/lower_instructions.cpp > +++ b/src/compiler/glsl/lower_instructions.cpp > @@ -54,8 +54,8 @@ > * want to recognize add(op0, neg(op1)) or the other way around to > * produce a subtract anyway. > * > - * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP: > - * -------------------------------------- > + * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP: > + * --------------------------------------------------------- > * Breaks an ir_binop_div expression down to op0 * (rcp(op1)). > * > * Many GPUs don't have a divide instruction (945 and 965 included), > @@ -63,9 +63,11 @@ > * reciprocal. By breaking the operation down, constant reciprocals > * can get constant folded. > * > - * DIV_TO_MUL_RCP only lowers floating point division; > INT_DIV_TO_MUL_RCP > - * handles the integer case, converting to and from floating point > so that > - * RCP is possible. > + * FDIV_TO_MUL_RCP only lowers single-precision floating point > division; > + * DDIV_TO_MUL_RCP only lowers double-precision floating point > division. > + * DIV_TO_MUL_RCP is a convenience macro that sets both flags. > + * INT_DIV_TO_MUL_RCP handles the integer case, converting to and > from floating > + * point so that RCP is possible. > * > * EXP_TO_EXP2 and LOG_TO_LOG2: > * ---------------------------- > @@ -326,7 +328,8 @@ > lower_instructions_visitor::mod_to_floor(ir_expression *ir) > /* Don't generate new IR that would need to be lowered in an > additional > * pass. > */ > - if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir- > >type->is_double())) > + if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) || > + (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double())) > div_to_mul_rcp(div_expr); > > ir_expression *const floor_expr = > @@ -1599,8 +1602,8 @@ > lower_instructions_visitor::visit_leave(ir_expression *ir) > case ir_binop_div: > if (ir->operands[1]->type->is_integer() && > lowering(INT_DIV_TO_MUL_RCP)) > int_div_to_mul_rcp(ir); > - else if ((ir->operands[1]->type->is_float() || > - ir->operands[1]->type->is_double()) && > lowering(DIV_TO_MUL_RCP)) > + else if ((ir->operands[1]->type->is_float() && > lowering(FDIV_TO_MUL_RCP)) || > + (ir->operands[1]->type->is_double() && > lowering(DDIV_TO_MUL_RCP))) > div_to_mul_rcp(ir); > break; > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev