On 02/06/2015 06:56 AM, Ilia Mirkin wrote: > From: Dave Airlie <airl...@gmail.com> > > These lowering passes are optional for the backend to request, currently > the TGSI softpipe backend most likely the r600g backend would want to use > these passes as is. They aim to hit the gallium opcodes from the standard > rounding/truncation functions. > > v2: also lower floor in mod_to_floor > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/glsl/ir_optimization.h | 1 + > src/glsl/lower_instructions.cpp | 212 > ++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 213 insertions(+) > > diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h > index 912d910..9f91e2f 100644 > --- a/src/glsl/ir_optimization.h > +++ b/src/glsl/ir_optimization.h > @@ -41,6 +41,7 @@ > #define CARRY_TO_ARITH 0x200 > #define BORROW_TO_ARITH 0x400 > #define SAT_TO_CLAMP 0x800 > +#define DOPS_TO_DFRAC 0x1000 > > /** > * \see class lower_packing_builtins_visitor > diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp > index 140b6d4..bf45c95 100644 > --- a/src/glsl/lower_instructions.cpp > +++ b/src/glsl/lower_instructions.cpp > @@ -42,6 +42,7 @@ > * - CARRY_TO_ARITH > * - BORROW_TO_ARITH > * - SAT_TO_CLAMP > + * - DOPS_TO_DFRAC > * > * SUB_TO_ADD_NEG: > * --------------- > @@ -112,6 +113,9 @@ > * ------------- > * Converts ir_unop_saturate into min(max(x, 0.0), 1.0) > * > + * DOPS_TO_DFRAC: > + * -------------- > + * Converts double trunc, ceil, floor, round to fract > */ > > #include "main/core.h" /* for M_LOG2E */ > @@ -151,6 +155,11 @@ private: > void sat_to_clamp(ir_expression *); > void double_dot_to_fma(ir_expression *); > void double_lrp(ir_expression *); > + void dceil_to_dfrac(ir_expression *); > + void dfloor_to_dfrac(ir_expression *); > + void dround_even_to_dfrac(ir_expression *); > + void dtrunc_to_dfrac(ir_expression *); > + void dsign_to_csel(ir_expression *); > }; > > } /* anonymous namespace */ > @@ -315,6 +324,9 @@ lower_instructions_visitor::mod_to_floor(ir_expression > *ir) > ir_expression *const floor_expr = > new(ir) ir_expression(ir_unop_floor, x->type, div_expr); > > + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) > + dfloor_to_dfrac(floor_expr); > + > ir_expression *const mul_expr = > new(ir) ir_expression(ir_binop_mul, > new(ir) ir_dereference_variable(y), > @@ -596,6 +608,182 @@ lower_instructions_visitor::double_lrp(ir_expression > *ir) > this->progress = true; > } > > +void > +lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) > +{ > + /* > + * frtemp = frac(x); > + * temp = sub(x, frtemp); > + * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0); > + */ > + ir_instruction &i = *base_ir; > + ir_constant *zero = new(ir) ir_constant(0.0, > ir->operands[0]->type->vector_elements); > + ir_constant *one = new(ir) ir_constant(1.0, > ir->operands[0]->type->vector_elements); > + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", > + ir_var_temporary); > + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", > + ir_var_temporary); > + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", > + ir_var_temporary);
For all these functions, I think you really want to use an ir_factory. Then you can just use ir_factory::make_temp and ir_factory::constant. That would reduce a lot of the clone calls... I think it would make this code a lot easier to read. > + > + i.insert_before(frtemp); > + i.insert_before(assign(frtemp, fract(ir->operands[0]))); > + > + i.insert_before(temp); > + i.insert_before(assign(temp, sub(ir->operands[0]->clone(ir, NULL), > frtemp))); > + > + i.insert_before(t2); > + i.insert_before(assign(t2, csel(nequal(frtemp, zero), one, > zero->clone(ir, NULL)))); > + ir->operation = ir_binop_add; > + ir->operands[0] = new(ir) ir_dereference_variable(temp); > + ir->operands[1] = new(ir) ir_dereference_variable(t2); > +} > + > +void > +lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) > +{ > + /* > + * frtemp = frac(x); > + * result = sub(x, frtemp); > + */ > + ir_instruction &i = *base_ir; > + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", > + ir_var_temporary); > + > + i.insert_before(frtemp); > + i.insert_before(assign(frtemp, fract(ir->operands[0]->clone(ir, NULL)))); > + > + ir->operation = ir_binop_sub; > + ir->operands[1] = new(ir) ir_dereference_variable(frtemp); > +} > +void > +lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir) > +{ > + /* > + * insane but works > + * temp = x + 0.5; > + * frtemp = frac(temp); > + * t2 = sub(temp, frtemp); > + * if (frac(x) == 0.5) > + * result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1; > + * else > + * result = t2; > + > + */ > + const unsigned vec_elem = ir->type->vector_elements; > + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, > 1); > + ir_instruction &i = *base_ir; > + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", > + ir_var_temporary); > + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", > + ir_var_temporary); > + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", > + ir_var_temporary); > + ir_variable *t3 = new(ir) ir_variable(bvec, "t3", > + ir_var_temporary); > + ir_variable *t4 = new(ir) ir_variable(bvec, "t4", > + ir_var_temporary); > + ir_variable *t5 = new(ir) ir_variable(ir->operands[0]->type, "t5", > + ir_var_temporary); > + ir_constant *p5 = new(ir) ir_constant(0.5, > ir->operands[0]->type->vector_elements); > + ir_constant *one = new(ir) ir_constant(1.0, > ir->operands[0]->type->vector_elements); > + ir_constant *zero = new(ir) ir_constant(0.0, > ir->operands[0]->type->vector_elements); > + > + i.insert_before(temp); > + i.insert_before(assign(temp, add(ir->operands[0], p5))); > + > + i.insert_before(frtemp); > + i.insert_before(assign(frtemp, fract(temp))); > + > + i.insert_before(t2); > + i.insert_before(assign(t2, sub(temp, frtemp))); > + > + i.insert_before(t3); > + i.insert_before(assign(t3, equal(fract(ir->operands[0]->clone(ir, NULL)), > p5->clone(ir, NULL)))); > + > + i.insert_before(t4); > + i.insert_before(assign(t4, equal(fract(mul(t2, p5->clone(ir, NULL))), > zero))); > + > + i.insert_before(t5); > + i.insert_before(assign(t5, csel(t4, t2, sub(t2, one)))); > + > + ir->operation = ir_triop_csel; > + ir->operands[0] = new(ir) ir_dereference_variable(t3); > + ir->operands[1] = new(ir) ir_dereference_variable(t5); > + ir->operands[2] = new(ir) ir_dereference_variable(t2); > +} > + > +void > +lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir) > +{ > + /* > + * frtemp = frac(x); > + * temp = sub(x, frtemp); > + * if (x >= 0) > + * result = temp; > + * else > + * result = temp + (frtemp == 0.0) ? 0 : 1; Isn't this result = temp + (x < 0 && frtemp == 0.0) ? 0 : 1; or result = temp + csel(x < 0 && frtemp == 0.0, 0, 1); > + */ > + const unsigned vec_elem = ir->type->vector_elements; > + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, > 1); > + ir_instruction &i = *base_ir; > + > + ir_constant *zero = new(ir) ir_constant(0.0, > ir->operands[0]->type->vector_elements); > + ir_constant *one = new(ir) ir_constant(1.0, > ir->operands[0]->type->vector_elements); > + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", > + ir_var_temporary); > + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", > + ir_var_temporary); > + ir_variable *t2 = new(ir) ir_variable(bvec, "t2", > + ir_var_temporary); > + ir_variable *t3 = new(ir) ir_variable(ir->operands[0]->type, "t3", > + ir_var_temporary); > + > + i.insert_before(frtemp); > + i.insert_before(assign(frtemp, fract(ir->operands[0]))); > + i.insert_before(temp); > + i.insert_before(assign(temp, sub(ir->operands[0]->clone(ir, NULL), > frtemp))); > + > + i.insert_before(t2); > + i.insert_before(assign(t2, less(ir->operands[0]->clone(ir, NULL), zero))); > + > + i.insert_before(t3); > + i.insert_before(assign(t3, add(temp, csel(equal(frtemp, zero->clone(ir, > NULL)), zero->clone(ir, NULL), one)))); > + > + ir->operation = ir_triop_csel; > + ir->operands[0] = new(ir) ir_dereference_variable(t2); > + ir->operands[1] = new(ir) ir_dereference_variable(t3); > + ir->operands[2] = new(ir) ir_dereference_variable(temp); > + > + this->progress = true; > +} > + > +void > +lower_instructions_visitor::dsign_to_csel(ir_expression *ir) > +{ > + /* > + * temp = x > 0.0 ? 1.0 : 0.0; > + * result = x < 0.0 ? -1.0 : temp; > + */ > + ir_instruction &i = *base_ir; > + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", > + ir_var_temporary); > + ir_constant *zero = new(ir) ir_constant(0.0, > ir->operands[0]->type->vector_elements); > + ir_constant *one = new(ir) ir_constant(1.0, > ir->operands[0]->type->vector_elements); > + ir_constant *negone = new(ir) ir_constant(-1.0, > ir->operands[0]->type->vector_elements); neg_one... I kept reading it as "ne gone". :) > + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", > + ir_var_temporary); > + i.insert_before(temp); > + i.insert_before(assign(temp, csel(greater(ir->operands[0], zero), one, > zero->clone(ir, NULL)))); > + > + i.insert_before(t2); > + i.insert_before(assign(t2, less(ir->operands[0]->clone(ir, NULL), > zero->clone(ir, NULL)))); > + ir->operation = ir_triop_csel; > + ir->operands[0] = new(ir) ir_dereference_variable(t2); > + ir->operands[1] = negone; > + ir->operands[2] = new(ir) ir_dereference_variable(temp); You can skip creating one or both of these temporaries... at least t2. ir->operands[0] = less(ir->operands[0]->clone(ir, NULL), zero->clone(ir, NULL)); The other lowering functions should get the same treatment. > +} > + > ir_visitor_status > lower_instructions_visitor::visit_leave(ir_expression *ir) > { > @@ -665,6 +853,30 @@ lower_instructions_visitor::visit_leave(ir_expression > *ir) > sat_to_clamp(ir); > break; > > + case ir_unop_trunc: > + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) > + dtrunc_to_dfrac(ir); > + break; > + > + case ir_unop_ceil: > + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) > + dceil_to_dfrac(ir); > + break; > + > + case ir_unop_floor: > + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) > + dfloor_to_dfrac(ir); > + break; > + > + case ir_unop_round_even: > + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) > + dround_even_to_dfrac(ir); > + break; > + > + case ir_unop_sign: > + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) > + dsign_to_csel(ir); > + break; > default: > return visit_continue; > } > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev