On 01/20/2015 08:09 AM, Iago Toral Quiroga wrote: > Currently, Mesa uses the lowering pass MOD_TO_FRACT to implement > mod(x,y) as y * fract(x/y). This implementation has a down side though: > it introduces precision errors due to the fract() operation. Even worse, > since the result of fract() is multiplied by y, the larger y gets the > larger the precision error we produce, so for large enough numbers the > precision loss is significant. Some examples on i965: > > Operation Precision error > ----------------------------------------------------- > mod(-1.951171875, 1.9980468750) 0.0000000447 > mod(121.57, 13.29) 0.0000023842 > mod(3769.12, 321.99) 0.0000762939 > mod(3769.12, 1321.99) 0.0001220703 > mod(-987654.125, 123456.984375) 0.0160663128 > mod( 987654.125, 123456.984375) 0.0312500000 > > This patch replaces the current lowering pass with a different one > (MOD_TO_FLOOR) that follows the recommended implementation in the GLSL > man pages: > > mod(x,y) = x - y * floor(x/y) > > This implementation eliminates the precision errors at the expense of > an additional add instruction on some systems. On systems that can do > negate with multiply-add in a single operation this new implementation > would come at no additional cost. > > v2 (Ian Romanick) > - Do not clone operands because when they are expressions we would be > duplicating them and that can lead to suboptimal code. > > Fixes the following 16 dEQP tests: > dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.mediump_* > dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.highp_*
Reviewed-by: Ian Romanick <ian.d.roman...@intel.com> > --- > src/glsl/README | 2 +- > src/glsl/ir_optimization.h | 2 +- > src/glsl/lower_instructions.cpp | 65 > +++++++++++++++----------- > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +- > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +- > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- > src/mesa/program/ir_to_mesa.cpp | 4 +- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- > 8 files changed, 47 insertions(+), 34 deletions(-) > > diff --git a/src/glsl/README b/src/glsl/README > index 2f93f12..bfcf69f 100644 > --- a/src/glsl/README > +++ b/src/glsl/README > @@ -187,7 +187,7 @@ You may also need to update the backends if they will see > the new expr type: > > You can then use the new expression from builtins (if all backends > would rather see it), or scan the IR and convert to use your new > -expression type (see ir_mod_to_fract, for example). > +expression type (see ir_mod_to_floor, for example). > > Q: How is memory management handled in the compiler? > > diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h > index 34e0b4b..912d910 100644 > --- a/src/glsl/ir_optimization.h > +++ b/src/glsl/ir_optimization.h > @@ -34,7 +34,7 @@ > #define EXP_TO_EXP2 0x04 > #define POW_TO_EXP2 0x08 > #define LOG_TO_LOG2 0x10 > -#define MOD_TO_FRACT 0x20 > +#define MOD_TO_FLOOR 0x20 > #define INT_DIV_TO_MUL_RCP 0x40 > #define BITFIELD_INSERT_TO_BFM_BFI 0x80 > #define LDEXP_TO_ARITH 0x100 > diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp > index 6842853..09afe55 100644 > --- a/src/glsl/lower_instructions.cpp > +++ b/src/glsl/lower_instructions.cpp > @@ -36,7 +36,7 @@ > * - EXP_TO_EXP2 > * - POW_TO_EXP2 > * - LOG_TO_LOG2 > - * - MOD_TO_FRACT > + * - MOD_TO_FLOOR > * - LDEXP_TO_ARITH > * - BITFIELD_INSERT_TO_BFM_BFI > * - CARRY_TO_ARITH > @@ -77,14 +77,17 @@ > * Many older GPUs don't have an x**y instruction. For these GPUs, convert > * x**y to 2**(y * log2(x)). > * > - * MOD_TO_FRACT: > + * MOD_TO_FLOOR: > * ------------- > - * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1)) > + * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1)) > * > * Many GPUs don't have a MOD instruction (945 and 965 included), and > * if we have to break it down like this anyway, it gives an > * opportunity to do things like constant fold the (1.0 / op1) easily. > * > + * Note: before we used to implement this as op1 * fract(op / op1) but this > + * implementation had significant precision errors. > + * > * LDEXP_TO_ARITH: > * ------------- > * Converts ir_binop_ldexp to arithmetic and bit operations. > @@ -136,7 +139,7 @@ private: > void sub_to_add_neg(ir_expression *); > void div_to_mul_rcp(ir_expression *); > void int_div_to_mul_rcp(ir_expression *); > - void mod_to_fract(ir_expression *); > + void mod_to_floor(ir_expression *); > void exp_to_exp2(ir_expression *); > void pow_to_exp2(ir_expression *); > void log_to_log2(ir_expression *); > @@ -276,22 +279,29 @@ lower_instructions_visitor::log_to_log2(ir_expression > *ir) > } > > void > -lower_instructions_visitor::mod_to_fract(ir_expression *ir) > +lower_instructions_visitor::mod_to_floor(ir_expression *ir) > { > - ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b", > - ir_var_temporary); > - this->base_ir->insert_before(temp); > - > - ir_assignment *const assign = > - new(ir) ir_assignment(new(ir) ir_dereference_variable(temp), > - ir->operands[1], NULL); > - > - this->base_ir->insert_before(assign); > + ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x", > + ir_var_temporary); > + ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y", > + ir_var_temporary); > + this->base_ir->insert_before(x); > + this->base_ir->insert_before(y); > + > + ir_assignment *const assign_x = > + new(ir) ir_assignment(new(ir) ir_dereference_variable(x), > + ir->operands[0], NULL); > + ir_assignment *const assign_y = > + new(ir) ir_assignment(new(ir) ir_dereference_variable(y), > + ir->operands[1], NULL); > + > + this->base_ir->insert_before(assign_x); > + this->base_ir->insert_before(assign_y); > > ir_expression *const div_expr = > - new(ir) ir_expression(ir_binop_div, ir->operands[0]->type, > - ir->operands[0], > - new(ir) ir_dereference_variable(temp)); > + new(ir) ir_expression(ir_binop_div, x->type, > + new(ir) ir_dereference_variable(x), > + new(ir) ir_dereference_variable(y)); > > /* Don't generate new IR that would need to be lowered in an additional > * pass. > @@ -299,14 +309,17 @@ lower_instructions_visitor::mod_to_fract(ir_expression > *ir) > if (lowering(DIV_TO_MUL_RCP)) > div_to_mul_rcp(div_expr); > > - ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract, > - ir->operands[0]->type, > - div_expr, > - NULL); > + ir_expression *const floor_expr = > + new(ir) ir_expression(ir_unop_floor, x->type, div_expr); > > - ir->operation = ir_binop_mul; > - ir->operands[0] = new(ir) ir_dereference_variable(temp); > - ir->operands[1] = expr; > + ir_expression *const mul_expr = > + new(ir) ir_expression(ir_binop_mul, > + new(ir) ir_dereference_variable(y), > + floor_expr); > + > + ir->operation = ir_binop_sub; > + ir->operands[0] = new(ir) ir_dereference_variable(x); > + ir->operands[1] = mul_expr; > this->progress = true; > } > > @@ -535,8 +548,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) > break; > > case ir_binop_mod: > - if (lowering(MOD_TO_FRACT) && ir->type->is_float()) > - mod_to_fract(ir); > + if (lowering(MOD_TO_FLOOR) && ir->type->is_float()) > + mod_to_floor(ir); > break; > > case ir_binop_pow: > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 38cf29f..4a7e045 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -877,7 +877,7 @@ fs_visitor::visit(ir_expression *ir) > break; > } > case ir_binop_mod: > - /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ > + /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ > assert(ir->type->is_integer()); > emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); > break; > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index d76134b..eac9982 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -157,7 +157,7 @@ brw_link_shader(struct gl_context *ctx, struct > gl_shader_program *shProg) > ? BITFIELD_INSERT_TO_BFM_BFI > : 0; > lower_instructions(shader->base.ir, > - MOD_TO_FRACT | > + MOD_TO_FLOOR | > DIV_TO_MUL_RCP | > SUB_TO_ADD_NEG | > EXP_TO_EXP2 | > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > index 8b8b27f..8129118 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > @@ -1522,7 +1522,7 @@ vec4_visitor::visit(ir_expression *ir) > break; > } > case ir_binop_mod: > - /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ > + /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ > assert(ir->type->is_integer()); > emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]); > break; > diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp > index ce3af31..fc3dad7 100644 > --- a/src/mesa/program/ir_to_mesa.cpp > +++ b/src/mesa/program/ir_to_mesa.cpp > @@ -1152,7 +1152,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir) > assert(!"not reached: should be handled by ir_div_to_mul_rcp"); > break; > case ir_binop_mod: > - /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ > + /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ > assert(ir->type->is_integer()); > emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); > break; > @@ -2942,7 +2942,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct > gl_shader_program *prog) > > /* Lowering */ > do_mat_op_to_vec(ir); > - lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 > + lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 > | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP > | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); > > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index c3d7793..c9903cd 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -5418,7 +5418,7 @@ st_link_shader(struct gl_context *ctx, struct > gl_shader_program *prog) > lower_offset_arrays(ir); > do_mat_op_to_vec(ir); > lower_instructions(ir, > - MOD_TO_FRACT | > + MOD_TO_FLOOR | > DIV_TO_MUL_RCP | > EXP_TO_EXP2 | > LOG_TO_LOG2 | > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev