int64: Add some more lowering helpers

Jason Ekstrand Tue, 16 Oct 2018 18:27:02 -0700

On Tue, Oct 16, 2018 at 7:51 PM Matt Turner <matts...@gmail.com> wrote:


> On Sun, Oct 14, 2018 at 3:58 PM Jason Ekstrand <ja...@jlekstrand.net>
> wrote:
> >
> > On October 14, 2018 17:12:34 Matt Turner <matts...@gmail.com> wrote:
> >
> > > From: Jason Ekstrand <jason.ekstr...@intel.com>
> > >
> > > [mattst88]: Found in an old branch of Jason's.
> > >
> > > Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare,
> > >                   imin, imax, umin, umax
> > > Matt implemented:  ixor, imov, bcsel
> > > ---
> > > src/compiler/nir/nir_lower_int64.c | 186
> +++++++++++++++++++++++++++++++++++++
> > > 1 file changed, 186 insertions(+)
> > >
> > > diff --git a/src/compiler/nir/nir_lower_int64.c
> > > b/src/compiler/nir/nir_lower_int64.c
> > > index 0d7f165b406..6b269830801 100644
> > > --- a/src/compiler/nir/nir_lower_int64.c
> > > +++ b/src/compiler/nir/nir_lower_int64.c
> > > @@ -24,6 +24,192 @@
> > > #include "nir.h"
> > > #include "nir_builder.h"
> > >
> > > +static nir_ssa_def *
> > > +lower_imov64(nir_builder *b, nir_ssa_def *x)
> > > +{
> > > +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +
> > > +   return nir_pack_64_2x32_split(b, nir_imov(b, x_lo), nir_imov(b,
> x_hi));
> >
> > You don't really need the movs...
>
> Thanks. I think that was really a copy-and-paste-and-replace mistake.
>
> > > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x,
> > > nir_ssa_def *y)
> > > +{
> > > +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> > > +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> > > +
> > > +   return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo),
> > > +                                    nir_bcsel(b, cond, x_hi, y_hi));
> > > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_inot64(nir_builder *b, nir_ssa_def *x)
> > > +{
> > > +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +
> > > +   return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b,
> x_hi));
> > > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> > > +{
> > > +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> > > +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> > > +
> > > +   return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo),
> > > +                                    nir_iand(b, x_hi, y_hi));
> > > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> > > +{
> > > +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> > > +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> > > +
> > > +   return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo),
> > > +                                    nir_ior(b, x_hi, y_hi));
> > > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> > > +{
> > > +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> > > +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> > > +
> > > +   return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo),
> > > +                                    nir_ixor(b, x_hi, y_hi));
> > > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> > > +{
> > > +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> > > +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> > > +
> > > +   nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo);
> > > +   nir_ssa_def *carry = nir_b2i(b, nir_ult(b, res_lo, x_lo));
> > > +   nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi));
> > > +
> > > +   return nir_pack_64_2x32_split(b, res_lo, res_hi);
> > > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> > > +{
> > > +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> > > +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> > > +
> > > +   nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo);
> > > +   /* In NIR, true is represented by ~0 which is -1 */
> >
> > We've had discussions (had some at XDC this year) about changing booleans
> > to one-bit which would break this.  Doing b2i would be safer but this
> does
> > work for now.
> >
> > > +   nir_ssa_def *borrow = nir_ult(b, x_lo, y_lo);
> > > +   nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow);
> > > +
> > > +   return nir_pack_64_2x32_split(b, res_lo, res_hi);
> > > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_ineg64(nir_builder *b, nir_ssa_def *x)
> > > +{
> > > +   /* Since isub is the same number of instructions (with better
> dependencies)
> > > +    * as iadd, subtraction is actually more efficient for ineg than
> the usual
> > > +    * 2's complement "flip the bits and add one".
> > > +    */
> > > +   return lower_isub64(b, nir_imm_int64(b, 0), x);
> > > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_iabs64(nir_builder *b, nir_ssa_def *x)
> > > +{
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
> > > +   return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x);
> >
> > lower_bcsel?  Or, since we're depending on this running multiple times,
> > just nir_ineg?  I go back and forth on whether a pass like this should
> run
> > in a loop or be smart enough to lower intermediate bits on the fly.  We
> > should probably pick one.
>
> Agreed, I don't love it. A later patch in the series calls
> nir_lower_int64() in a loop so I suppose for now I'll rely on that and
> use nir_ineg directly.
>

Connor had a really good suggestion about that....

> > +}
> > > +
> > > +static nir_ssa_def *
> > > +lower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x,
> nir_ssa_def *y)
> > > +{
> > > +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> > > +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> > > +   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
> > > +   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
> > > +
> > > +   switch (op) {
> > > +   case nir_op_ieq:
> > > +      return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo,
> y_lo));
> > > +   case nir_op_ine:
> > > +      return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo,
> y_lo));
> > > +   case nir_op_ult:
> > > +      return nir_ior(b, nir_ult(b, x_hi, y_hi),
> > > +                        nir_iand(b, nir_ieq(b, x_hi, y_hi),
> > > +                                    nir_ult(b, x_lo, y_lo)));
> > > +   case nir_op_ilt:
> > > +      /* This one is tricky.  Handled below */
> > > +      break;
> > > +   case nir_op_uge:
> > > +      /* Lower as !(x < y) in the hopes of better CSE */
> > > +      return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y));
> > > +   case nir_op_ige:
> > > +      /* Lower as !(x < y) in the hopes of better CSE */
> > > +      return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y));
> > > +   default:
> > > +      unreachable("Invalid comparison");
> > > +   }
> > > +
> > > +   assert(op == nir_op_ilt);
> > > +
> > > +   nir_ssa_def *neg_x = lower_ineg64(b, x);
> > > +   nir_ssa_def *neg_y = lower_ineg64(b, y);
> > > +   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
> > > +   nir_ssa_def *y_is_neg = nir_ilt(b, y_hi, nir_imm_int(b, 0));
> > > +
> > > +   nir_ssa_def *imm_true = nir_imm_int(b, NIR_TRUE);
> > > +   nir_ssa_def *imm_false = nir_imm_int(b, NIR_FALSE);
> > > +   nir_ssa_def *ult = lower_int64_compare(b, nir_op_ult, x, y);
> > > +   nir_ssa_def *not_ult_neg =
> > > +      nir_inot(b, lower_int64_compare(b, nir_op_ult, neg_x, neg_y));
> >
> > If you know both are negative, can't you just do an unsigned comparison
> in
> > the opposite direction?  That would save you all the expensive negation.
> > You may even be able to re-use the result above though I haven't thought
> > through it all the way.
>
> You wrote this code. You tell me :)
>

I did?  Well, year-ago me didn't know what he was doing.  That guy was an
idiot.


> In fact, I recently noticed that some piglit comparison tests are
> failing, so I suspect there's a bug in here somewhere...
>

Quite possibly.... Very little of this code got good testing.

--Jason

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/7] nir/int64: Add some more lowering helpers

Reply via email to