On Tue, Oct 16, 2018 at 7:51 PM Matt Turner <matts...@gmail.com> wrote:
> On Sun, Oct 14, 2018 at 3:58 PM Jason Ekstrand <ja...@jlekstrand.net> > wrote: > > > > On October 14, 2018 17:12:34 Matt Turner <matts...@gmail.com> wrote: > > > > > From: Jason Ekstrand <jason.ekstr...@intel.com> > > > > > > [mattst88]: Found in an old branch of Jason's. > > > > > > Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare, > > > imin, imax, umin, umax > > > Matt implemented: ixor, imov, bcsel > > > --- > > > src/compiler/nir/nir_lower_int64.c | 186 > +++++++++++++++++++++++++++++++++++++ > > > 1 file changed, 186 insertions(+) > > > > > > diff --git a/src/compiler/nir/nir_lower_int64.c > > > b/src/compiler/nir/nir_lower_int64.c > > > index 0d7f165b406..6b269830801 100644 > > > --- a/src/compiler/nir/nir_lower_int64.c > > > +++ b/src/compiler/nir/nir_lower_int64.c > > > @@ -24,6 +24,192 @@ > > > #include "nir.h" > > > #include "nir_builder.h" > > > > > > +static nir_ssa_def * > > > +lower_imov64(nir_builder *b, nir_ssa_def *x) > > > +{ > > > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + > > > + return nir_pack_64_2x32_split(b, nir_imov(b, x_lo), nir_imov(b, > x_hi)); > > > > You don't really need the movs... > > Thanks. I think that was really a copy-and-paste-and-replace mistake. > > > > +} > > > + > > > +static nir_ssa_def * > > > +lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, > > > nir_ssa_def *y) > > > +{ > > > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); > > > + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); > > > + > > > + return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo), > > > + nir_bcsel(b, cond, x_hi, y_hi)); > > > +} > > > + > > > +static nir_ssa_def * > > > +lower_inot64(nir_builder *b, nir_ssa_def *x) > > > +{ > > > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + > > > + return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, > x_hi)); > > > +} > > > + > > > +static nir_ssa_def * > > > +lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) > > > +{ > > > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); > > > + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); > > > + > > > + return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo), > > > + nir_iand(b, x_hi, y_hi)); > > > +} > > > + > > > +static nir_ssa_def * > > > +lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) > > > +{ > > > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); > > > + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); > > > + > > > + return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo), > > > + nir_ior(b, x_hi, y_hi)); > > > +} > > > + > > > +static nir_ssa_def * > > > +lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) > > > +{ > > > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); > > > + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); > > > + > > > + return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo), > > > + nir_ixor(b, x_hi, y_hi)); > > > +} > > > + > > > +static nir_ssa_def * > > > +lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) > > > +{ > > > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); > > > + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); > > > + > > > + nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo); > > > + nir_ssa_def *carry = nir_b2i(b, nir_ult(b, res_lo, x_lo)); > > > + nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi)); > > > + > > > + return nir_pack_64_2x32_split(b, res_lo, res_hi); > > > +} > > > + > > > +static nir_ssa_def * > > > +lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) > > > +{ > > > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); > > > + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); > > > + > > > + nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo); > > > + /* In NIR, true is represented by ~0 which is -1 */ > > > > We've had discussions (had some at XDC this year) about changing booleans > > to one-bit which would break this. Doing b2i would be safer but this > does > > work for now. > > > > > + nir_ssa_def *borrow = nir_ult(b, x_lo, y_lo); > > > + nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow); > > > + > > > + return nir_pack_64_2x32_split(b, res_lo, res_hi); > > > +} > > > + > > > +static nir_ssa_def * > > > +lower_ineg64(nir_builder *b, nir_ssa_def *x) > > > +{ > > > + /* Since isub is the same number of instructions (with better > dependencies) > > > + * as iadd, subtraction is actually more efficient for ineg than > the usual > > > + * 2's complement "flip the bits and add one". > > > + */ > > > + return lower_isub64(b, nir_imm_int64(b, 0), x); > > > +} > > > + > > > +static nir_ssa_def * > > > +lower_iabs64(nir_builder *b, nir_ssa_def *x) > > > +{ > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0)); > > > + return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x); > > > > lower_bcsel? Or, since we're depending on this running multiple times, > > just nir_ineg? I go back and forth on whether a pass like this should > run > > in a loop or be smart enough to lower intermediate bits on the fly. We > > should probably pick one. > > Agreed, I don't love it. A later patch in the series calls > nir_lower_int64() in a loop so I suppose for now I'll rely on that and > use nir_ineg directly. > Connor had a really good suggestion about that.... > > +} > > > + > > > +static nir_ssa_def * > > > +lower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, > nir_ssa_def *y) > > > +{ > > > + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); > > > + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); > > > + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); > > > + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); > > > + > > > + switch (op) { > > > + case nir_op_ieq: > > > + return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo, > y_lo)); > > > + case nir_op_ine: > > > + return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo, > y_lo)); > > > + case nir_op_ult: > > > + return nir_ior(b, nir_ult(b, x_hi, y_hi), > > > + nir_iand(b, nir_ieq(b, x_hi, y_hi), > > > + nir_ult(b, x_lo, y_lo))); > > > + case nir_op_ilt: > > > + /* This one is tricky. Handled below */ > > > + break; > > > + case nir_op_uge: > > > + /* Lower as !(x < y) in the hopes of better CSE */ > > > + return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y)); > > > + case nir_op_ige: > > > + /* Lower as !(x < y) in the hopes of better CSE */ > > > + return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y)); > > > + default: > > > + unreachable("Invalid comparison"); > > > + } > > > + > > > + assert(op == nir_op_ilt); > > > + > > > + nir_ssa_def *neg_x = lower_ineg64(b, x); > > > + nir_ssa_def *neg_y = lower_ineg64(b, y); > > > + nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0)); > > > + nir_ssa_def *y_is_neg = nir_ilt(b, y_hi, nir_imm_int(b, 0)); > > > + > > > + nir_ssa_def *imm_true = nir_imm_int(b, NIR_TRUE); > > > + nir_ssa_def *imm_false = nir_imm_int(b, NIR_FALSE); > > > + nir_ssa_def *ult = lower_int64_compare(b, nir_op_ult, x, y); > > > + nir_ssa_def *not_ult_neg = > > > + nir_inot(b, lower_int64_compare(b, nir_op_ult, neg_x, neg_y)); > > > > If you know both are negative, can't you just do an unsigned comparison > in > > the opposite direction? That would save you all the expensive negation. > > You may even be able to re-use the result above though I haven't thought > > through it all the way. > > You wrote this code. You tell me :) > I did? Well, year-ago me didn't know what he was doing. That guy was an idiot. > In fact, I recently noticed that some piglit comparison tests are > failing, so I suspect there's a bug in here somewhere... > Quite possibly.... Very little of this code got good testing. --Jason
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev