On Tue, Apr 12, 2016 at 1:05 AM, Samuel Iglesias Gonsálvez < sigles...@igalia.com> wrote:
> From: Iago Toral Quiroga <ito...@igalia.com> > > At least i965 hardware does not have native support for truncating doubles. > --- > src/compiler/nir/nir.h | 1 + > src/compiler/nir/nir_lower_double_ops.c | 83 > +++++++++++++++++++++++++++++++++ > 2 files changed, 84 insertions(+) > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index 434d92b..f83b2e0 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -2286,6 +2286,7 @@ typedef enum { > nir_lower_drcp = (1 << 0), > nir_lower_dsqrt = (1 << 1), > nir_lower_drsq = (1 << 2), > + nir_lower_dtrunc = (1 << 3), > } nir_lower_doubles_options; > > void nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options > options); > diff --git a/src/compiler/nir/nir_lower_double_ops.c > b/src/compiler/nir/nir_lower_double_ops.c > index 4cd153c..9eec858 100644 > --- a/src/compiler/nir/nir_lower_double_ops.c > +++ b/src/compiler/nir/nir_lower_double_ops.c > @@ -302,6 +302,81 @@ lower_sqrt_rsq(nir_builder *b, nir_ssa_def *src, bool > sqrt) > return res; > } > > +static nir_ssa_def * > +lower_trunc(nir_builder *b, nir_ssa_def *src) > +{ > + nir_ssa_def *unbiased_exp = nir_isub(b, get_exponent(b, src), > + nir_imm_int(b, 1023)); > + > + nir_ssa_def *frac_bits = nir_isub(b, nir_imm_int(b, 52), unbiased_exp); > + > + /* > + * Depending on the exponent, we compute a mask with the bits we need > to > + * remove in order to trunc the double. The mask is computed like this: > + * > + * if (unbiased_exp < 0) > + * mask = 0x0 > + * else if (unbiased_exp > 52) > + * mask = 0x7fffffffffffffff > + * else > + * mask = (1LL < frac_bits) - 1 > I'm having a bit of trouble convincing myself that this is correct. Let me walk through it one case at a time: unbiased_exp < 0: In this case, 2^exp <= 2 so src < 1 and the result should be zero. In that case we want to stomp all the bits to zero, not keep them all. unbiased_exp > 52: In this case 2^exp is large enough that all of the bits matter. We want to keep them all not zero them out. else: In this case, 2^exp >= 1 but not big enough to make all the mantissa bits matter. We need to mask off the bottom 52-exp many bits. If I'm getting this backwards, please let me know. If it's doing what I think it's doing, there are several cases this should be getting wrong. Are we testing all of those cases? One other aside: I think it's more efficient to generate the masks with either (~0u >> (32 - bits)) or (0x80000000 >> (bits - 1)) if you want the top bits. NIR should be able to easily get rid of the integer adds and subtracts. Getting rid of the -1 on (1 << frac_bits) - 1 is much harder. > + * > + * Notice that the else branch is a 64-bit integer operation that we > need > + * to implement in terms of 32-bit integer arithmetics (at least until > we > + * support 64-bit integer arithmetics). > + */ > + > + /* Compute "mask = (1LL << frac_bits) - 1" in terms of hi/lo 32-bit > chunks > + * for the else branch > + */ > + nir_ssa_def *mask_lo = > + nir_bcsel(b, > + nir_ige(b, frac_bits, nir_imm_int(b, 32)), > + nir_imm_int(b, 0xffffffff), > + nir_isub(b, > + nir_ishl(b, > + nir_imm_int(b, 1), > + frac_bits), > + nir_imm_int(b, 1))); > + > + nir_ssa_def *mask_hi = > + nir_bcsel(b, > + nir_ilt(b, frac_bits, nir_imm_int(b, 33)), > + nir_imm_int(b, 0), > + nir_isub(b, > + nir_ishl(b, > + nir_imm_int(b, 1), > + nir_isub(b, > + frac_bits, > + nir_imm_int(b, 32))), > + nir_imm_int(b, 1))); > + > + /* Compute the correct mask to use based on unbiased_exp */ > + nir_ssa_def *mask = > + nir_bcsel(b, > + nir_ilt(b, unbiased_exp, nir_imm_int(b, 0)), > + nir_pack_double_2x32_split(b, > + nir_imm_int(b, 0xffffffff), > + nir_imm_int(b, 0x7fffffff)), > + nir_bcsel(b, nir_ige(b, unbiased_exp, nir_imm_int(b, 53)), > + nir_imm_double(b, 0.0), > + nir_pack_double_2x32_split(b, mask_lo, > mask_hi))); > + > + /* Mask off relevant mantissa bits (0..31 in the low 32-bits > + * and 0..19 in the high 32 bits) > + */ > + mask_lo = nir_unpack_double_2x32_split_x(b, mask); > + mask_hi = nir_unpack_double_2x32_split_y(b, mask); > + > + nir_ssa_def *src_lo = nir_unpack_double_2x32_split_x(b, src); > + nir_ssa_def *src_hi = nir_unpack_double_2x32_split_y(b, src); > + > + nir_ssa_def *zero = nir_imm_int(b, 0); > + nir_ssa_def *new_src_lo = nir_bfi(b, mask_lo, zero, src_lo); > + nir_ssa_def *new_src_hi = nir_bfi(b, mask_hi, zero, src_hi); > + return nir_pack_double_2x32_split(b, new_src_lo, new_src_hi); > +} > + > static void > lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options > options) > { > @@ -325,6 +400,11 @@ lower_doubles_instr(nir_alu_instr *instr, > nir_lower_doubles_options options) > return; > break; > > + case nir_op_ftrunc: > + if (!(options & nir_lower_dtrunc)) > + return; > + break; > + > default: > return; > } > @@ -348,6 +428,9 @@ lower_doubles_instr(nir_alu_instr *instr, > nir_lower_doubles_options options) > case nir_op_frsq: > result = lower_sqrt_rsq(&bld, src, false); > break; > + case nir_op_ftrunc: > + result = lower_trunc(&bld, src); > + break; > default: > unreachable("unhandled opcode"); > } > -- > 2.5.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev