From: Jason Ekstrand <jason.ekstr...@intel.com> [mattst88]: Found in an old branch of Jason's.
Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare, imin, imax, umin, umax Matt implemented: ixor, imov, bcsel --- src/compiler/nir/nir_lower_int64.c | 186 +++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c index 0d7f165b406..6b269830801 100644 --- a/src/compiler/nir/nir_lower_int64.c +++ b/src/compiler/nir/nir_lower_int64.c @@ -24,6 +24,192 @@ #include "nir.h" #include "nir_builder.h" +static nir_ssa_def * +lower_imov64(nir_builder *b, nir_ssa_def *x) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + + return nir_pack_64_2x32_split(b, nir_imov(b, x_lo), nir_imov(b, x_hi)); +} + +static nir_ssa_def * +lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo), + nir_bcsel(b, cond, x_hi, y_hi)); +} + +static nir_ssa_def * +lower_inot64(nir_builder *b, nir_ssa_def *x) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + + return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi)); +} + +static nir_ssa_def * +lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo), + nir_iand(b, x_hi, y_hi)); +} + +static nir_ssa_def * +lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo), + nir_ior(b, x_hi, y_hi)); +} + +static nir_ssa_def * +lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo), + nir_ixor(b, x_hi, y_hi)); +} + +static nir_ssa_def * +lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo); + nir_ssa_def *carry = nir_b2i(b, nir_ult(b, res_lo, x_lo)); + nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi)); + + return nir_pack_64_2x32_split(b, res_lo, res_hi); +} + +static nir_ssa_def * +lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo); + /* In NIR, true is represented by ~0 which is -1 */ + nir_ssa_def *borrow = nir_ult(b, x_lo, y_lo); + nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow); + + return nir_pack_64_2x32_split(b, res_lo, res_hi); +} + +static nir_ssa_def * +lower_ineg64(nir_builder *b, nir_ssa_def *x) +{ + /* Since isub is the same number of instructions (with better dependencies) + * as iadd, subtraction is actually more efficient for ineg than the usual + * 2's complement "flip the bits and add one". + */ + return lower_isub64(b, nir_imm_int64(b, 0), x); +} + +static nir_ssa_def * +lower_iabs64(nir_builder *b, nir_ssa_def *x) +{ + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0)); + return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x); +} + +static nir_ssa_def * +lower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x); + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); + nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y); + nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y); + + switch (op) { + case nir_op_ieq: + return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo, y_lo)); + case nir_op_ine: + return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo, y_lo)); + case nir_op_ult: + return nir_ior(b, nir_ult(b, x_hi, y_hi), + nir_iand(b, nir_ieq(b, x_hi, y_hi), + nir_ult(b, x_lo, y_lo))); + case nir_op_ilt: + /* This one is tricky. Handled below */ + break; + case nir_op_uge: + /* Lower as !(x < y) in the hopes of better CSE */ + return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y)); + case nir_op_ige: + /* Lower as !(x < y) in the hopes of better CSE */ + return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y)); + default: + unreachable("Invalid comparison"); + } + + assert(op == nir_op_ilt); + + nir_ssa_def *neg_x = lower_ineg64(b, x); + nir_ssa_def *neg_y = lower_ineg64(b, y); + nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0)); + nir_ssa_def *y_is_neg = nir_ilt(b, y_hi, nir_imm_int(b, 0)); + + nir_ssa_def *imm_true = nir_imm_int(b, NIR_TRUE); + nir_ssa_def *imm_false = nir_imm_int(b, NIR_FALSE); + nir_ssa_def *ult = lower_int64_compare(b, nir_op_ult, x, y); + nir_ssa_def *not_ult_neg = + nir_inot(b, lower_int64_compare(b, nir_op_ult, neg_x, neg_y)); + + return nir_bcsel(b, x_is_neg, nir_bcsel(b, y_is_neg, not_ult_neg, imm_true), + nir_bcsel(b, y_is_neg, imm_false, ult)); +} + +static nir_ssa_def * +lower_umax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), y, x); +} + +static nir_ssa_def * +lower_imax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), y, x); +} + +static nir_ssa_def * +lower_umin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), x, y); +} + +static nir_ssa_def * +lower_imin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), x, y); +} + static nir_ssa_def * lower_imul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) { -- 2.16.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev