On 22 August 2013 16:08, Matt Turner <matts...@gmail.com> wrote: > --- > src/glsl/ir_optimization.h | 1 + > src/glsl/lower_instructions.cpp | 128 > ++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 129 insertions(+) > > diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h > index b79c2b7..074686c 100644 > --- a/src/glsl/ir_optimization.h > +++ b/src/glsl/ir_optimization.h > @@ -38,6 +38,7 @@ > #define INT_DIV_TO_MUL_RCP 0x40 > #define LRP_TO_ARITH 0x80 > #define BITFIELD_INSERT_TO_BFM_BFI 0x100 > +#define LDEXP_TO_ARITH 0x200 > > /** > * \see class lower_packing_builtins_visitor > diff --git a/src/glsl/lower_instructions.cpp > b/src/glsl/lower_instructions.cpp > index d32ec80..8b0a8e1 100644 > --- a/src/glsl/lower_instructions.cpp > +++ b/src/glsl/lower_instructions.cpp > @@ -37,6 +37,7 @@ > * - POW_TO_EXP2 > * - LOG_TO_LOG2 > * - MOD_TO_FRACT > + * - LDEXP_TO_ARITH > * - LRP_TO_ARITH > * - BITFIELD_INSERT_TO_BFM_BFI > * > @@ -82,6 +83,10 @@ > * if we have to break it down like this anyway, it gives an > * opportunity to do things like constant fold the (1.0 / op1) easily. > * > + * LDEXP_TO_ARITH: > + * ------------- > + * Converts ir_binop_ldexp to arithmetic and bit operations. > + * > * LRP_TO_ARITH: > * ------------- > * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2). > @@ -125,6 +130,7 @@ private: > void log_to_log2(ir_expression *); > void lrp_to_arith(ir_expression *); > void bitfield_insert_to_bfm_bfi(ir_expression *); > + void ldexp_to_arith(ir_expression *); > }; > > /** > @@ -332,6 +338,123 @@ > lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir) > this->progress = true; > } > > +void > +lower_instructions_visitor::ldexp_to_arith(ir_expression *ir) > +{ > + /* Translates > + * ir_binop_ldexp x exp > + * into > + * > + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); > + * resulting_biased_exp = extracted_biased_exp + exp; >
This comment is a little difficult to follow since it refers to exp_shift, which you don't define until down in the code. You might want to add a comment saying something like "(where exp_shift = 23, the bit location of the exponent part of an IEEE float)" > + * > + * if (resulting_biased_exp < 1) { > + * return copysign(0.0, x); > + * } > + * > + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | > + * lshift(i2u(resulting_biased_exp), exp_shift)); > + * > + * which we can't actually implement as such, since the GLSL IR doesn't > + * have vectorized if-statements. We actually implement it without > branches > + * using conditional-select: > + * > + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); > + * resulting_biased_exp = extracted_biased_exp + exp; > + * > + * is_not_zero_or_underflow = gequal(resulting_biased_exp, 1); > + * x = cond_sel(is_not_zero_or_underflow, x, copysign(0.0f, x)); > + * resulting_biased_exp = cond_sel(is_not_zero_or_underflow, > + * resulting_biased_exp, 0); > + * > + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | > + * lshift(i2u(resulting_biased_exp), exp_shift)); > + */ > + > + const unsigned vec_elem = ir->type->vector_elements; > + > + /* Types */ > + const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, > vec_elem, 1); > + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, > vec_elem, 1); > + > + /* Constants */ > + ir_constant *zeroi = ir_constant::zero(ir, ivec); > + ir_constant *zerof = ir_constant::zero(ir, ir->type); > + > + ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu, > vec_elem); > + ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem); > + > + ir_constant *exp_shift = new(ir) ir_constant(23u, vec_elem); > + > + /* Temporary variables */ > + ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); > + ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); > + > + ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", > + ir_var_temporary); > + > + ir_variable *extracted_biased_exp = > + new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); > + ir_variable *resulting_biased_exp = > + new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); > + > + ir_variable *is_not_zero_or_underflow = > + new(ir) ir_variable(bvec, "is_not_zero_or_underflow", > ir_var_temporary); > + > + ir_instruction &i = *base_ir; > + > + /* Copy <x> and <exp> arguments. */ > + i.insert_before(x); > + i.insert_before(assign(x, ir->operands[0])); > + i.insert_before(exp); > + i.insert_before(assign(exp, ir->operands[1])); > + > + /* Extract the biased exponent from <x>. */ > + i.insert_before(extracted_biased_exp); > + i.insert_before(assign(extracted_biased_exp, > + rshift(bitcast_f2i(abs(x)), exp_shift))); > + > + i.insert_before(resulting_biased_exp); > + i.insert_before(assign(resulting_biased_exp, > + add(extracted_biased_exp, exp))); > + > + /* Test if result is ±0.0, subnormal, or underflow by checking if the > + * resulting biased exponent would be less than 0x1. If so, the result > is > + * 0.0 with the sign of x. (Actually, invert the conditions so that > + * immediate values are the second arguments, which is better for i965) > + */ > + i.insert_before(zero_sign_x); > + i.insert_before(assign(zero_sign_x, > + bitcast_u2f(bit_or(bit_and(bitcast_f2u(x), > sign_mask), > + bitcast_f2u(zerof))))); > Is it guaranteed that future optimization passes will constant fold bitcast_f2u(zerof) down to the appropriate value? Other than that the patch is: Reviewed-by: Paul Berry <stereotype...@gmail.com> > + > + i.insert_before(is_not_zero_or_underflow); > + i.insert_before(assign(is_not_zero_or_underflow, > + gequal(resulting_biased_exp, > + new(ir) ir_constant(0x1, vec_elem)))); > + i.insert_before(assign(x, cond_sel(is_not_zero_or_underflow, > + x, zero_sign_x))); > + i.insert_before(assign(resulting_biased_exp, > + cond_sel(is_not_zero_or_underflow, > + resulting_biased_exp, zeroi))); > + > + /* We could test for overflows by checking if the resulting biased > exponent > + * would be greater than 0xFE. Turns out we don't need to because the > GLSL > + * spec says: > + * > + * "If this product is too large to be represented in the > + * floating-point type, the result is undefined." > + */ > + > + ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL); > + ir->operation = ir_unop_bitcast_u2f; > + ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask), > + lshift(i2u(resulting_biased_exp), > exp_shift_clone)); > + ir->operands[1] = NULL; > + > + this->progress = true; > +} > + > ir_visitor_status > lower_instructions_visitor::visit_leave(ir_expression *ir) > { > @@ -378,6 +501,11 @@ lower_instructions_visitor::visit_leave(ir_expression > *ir) > bitfield_insert_to_bfm_bfi(ir); > break; > > + case ir_binop_ldexp: > + if (lowering(LDEXP_TO_ARITH)) > + ldexp_to_arith(ir); > + break; > + > default: > return visit_continue; > } > -- > 1.8.3.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev