Signed-off-by: Elie Tournier <elie.tourn...@collabora.com> --- src/compiler/nir/nir.h | 3 +- src/compiler/nir/nir_lower_double_ops.c | 593 ++++++++++++++++++++++++++++++++ src/intel/compiler/brw_nir.c | 3 +- 3 files changed, 597 insertions(+), 2 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d9925c25c7..d161380b1b 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2575,7 +2575,8 @@ typedef enum { nir_lower_deq = (1 << 12), nir_lower_dlt = (1 << 13), nir_lower_dmul = (1 << 14), - nir_lower_ddiv = (1 << 15) + nir_lower_ddiv = (1 << 15), + nir_lower_dadd = (1 << 16) } nir_lower_doubles_options; bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 5d6944e15f..db1a3c0b72 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -229,6 +229,81 @@ short_shl64(nir_builder *b, nir_ssa_def *src_hi, nir_ssa_def *src_lo, nir_imm_int(b, 31))))); } +/* Shifts the 64-bit value formed by concatenating `src_0' and `src_1' right by + * the number of bits given in `count'. If any nonzero bits are shifted off, + * they are "jammed" into the least significant bit of the result by setting the + * least significant bit to 1. The value of `count' can be arbitrarily large; + * in particular, if `count' is greater than 64, the result will be either 0 + * or 1, depending on whether the concatenation of `src_0' and `src_1' is zero + * or nonzero. The result is broken into two 32-bit pieces which are stored at + * the locations pointed to by `z0Ptr' and `z1Ptr'. + */ +static void +shift64_right_jamming(nir_builder *b, + nir_ssa_def *src_0, + nir_ssa_def *src_1, + nir_ssa_def *count, + nir_ssa_def **z0Ptr, + nir_ssa_def **z1Ptr) +{ + nir_ssa_def *neg_count = nir_iand(b, + nir_ineg(b, count), + nir_imm_int(b, 31)); + + nir_ssa_def *zero = nir_imm_int(b, 0); + + nir_ssa_def *is_count_0 = nir_ieq(b, count, zero); + nir_ssa_def *is_count_lt32 = nir_ilt(b, count, nir_imm_int(b, 32)); + nir_ssa_def *is_count_32 = nir_ieq(b, count, nir_imm_int(b, 32)); + nir_ssa_def *is_count_lt64 = nir_ilt(b, count, nir_imm_int(b, 64)); + + *z0Ptr = nir_bcsel(b, + is_count_0, + src_0, + nir_bcsel(b, + is_count_lt32, + nir_ishr(b, src_0, count), + zero)); + + nir_ssa_def *z1_1 = nir_ior(b, + nir_ishl(b, src_0, neg_count), + nir_ior(b, + nir_ishr(b, src_1, count), + nir_ine(b, + nir_ishl(b, src_1, neg_count), + zero))); + + nir_ssa_def *z1_2 = nir_ior(b, + nir_ishr(b, src_0, + nir_iand(b, + count, + nir_imm_int(b, 31))), + nir_ine(b, + nir_ior(b, + nir_ishl(b, src_0, neg_count), + src_1), + zero)); + + *z1Ptr = + nir_bcsel(b, + is_count_0, + src_1, + nir_bcsel(b, + is_count_lt32, + z1_1, + nir_bcsel(b, + is_count_32, + nir_ior(b, src_0, + nir_ine(b, src_1, zero)), + nir_bcsel(b, + is_count_lt64, + z1_2, + nir_ine(b, + nir_ior(b, src_0, + src_1), + zero))))); +} + /* Shifts the 96-bit value formed by concatenating `src_0', `src_1', and `src_2' * right by 32 _plus_ the number of bits given in `count'. The shifted result * is at most 64 nonzero bits; these are broken into two 32-bit pieces which are @@ -469,6 +544,22 @@ add64(nir_builder *b, *z0Ptr = nir_fadd(b, x_hi, nir_fadd(b, y_hi, nir_flt(b, z, x_lo))); } +/* Subtracts the 64-bit value formed by concatenating `y_hi' and `y_lo' from the + * 64-bit value formed by concatenating `x_hi' and `x_lo'. Subtraction is modulo + * 2^64, so any borrow out (carry out) is lost. The result is broken into two + * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and + * `z1Ptr'. + */ +static void +sub64(nir_builder *b, + nir_ssa_def *x_hi, nir_ssa_def *x_lo, + nir_ssa_def *y_hi, nir_ssa_def *y_lo, + nir_ssa_def **z0Ptr, nir_ssa_def **z1Ptr) +{ + *z1Ptr = nir_fsub(b, x_lo, y_lo); + *z0Ptr = nir_fsub(b, nir_fsub(b, x_hi, y_hi), nir_flt(b, x_lo, y_lo)); +} + /* Multiplies `x' by `y' to obtain a 64-bit product. The product is broken * into two 32-bit pieces which are stored at the locations pointed to by * `z0Ptr' and `z1Ptr'. @@ -712,6 +803,61 @@ round_pack_fp64(nir_builder *b, z_frac_0, z_frac_1, z_frac_2)); } +/* Takes an abstract floating-point value having sign `z_si', exponent `z_exp', + * and significand formed by the concatenation of `z_frac_0' and `z_frac_1', + * and returns the proper double-precision floating-point value corresponding + * to the abstract input. This routine is just like `roundAndPackFloat64' + * except that the input significand has fewer bits and does not have to be + * normalized. In all cases, `z_exp' must be 1 less than the "true" floating- + * point exponent. + */ +static nir_ssa_def * +normalize_round_pack_fp64(nir_builder *b, + nir_ssa_def *z_si, + nir_ssa_def *z_exp, + nir_ssa_def *z_frac_0, + nir_ssa_def *z_frac_1) +{ + nir_ssa_def *zero = nir_imm_int(b, 0); + nir_ssa_def *is_z_frac_0_zero = nir_ieq(b, z_frac_0, zero); + + z_frac_0 = nir_bcsel(b, is_z_frac_0_zero, + z_frac_1, + z_frac_0); + z_frac_1 = nir_bcsel(b, is_z_frac_0_zero, + zero, + z_frac_1); + z_exp = nir_bcsel(b, is_z_frac_0_zero, + nir_isub(b, z_exp, nir_imm_int(b, 32)), + z_exp); + + nir_ssa_def *shift_count = nir_isub(b, + count_leading_zeros(b, z_frac_0), + nir_imm_int(b, 11)); + + nir_ssa_def *z_frac_0_tmp; + nir_ssa_def *z_frac_1_tmp; + nir_ssa_def *z_frac_2; + short_shl64(b, z_frac_0, z_frac_1, + shift_count, + &z_frac_0_tmp, &z_frac_1_tmp); + shift64_extra_right_jamming(b, + z_frac_0, z_frac_1, + zero, + nir_ineg(b, shift_count), + &z_frac_0, &z_frac_1, &z_frac_2); + + z_exp = nir_isub(b, z_exp, shift_count); + return nir_bcsel(b, + nir_ige(b, shift_count, zero), + round_pack_fp64(b, z_si, + z_exp, + z_frac_0_tmp, z_frac_1_tmp, zero), + round_pack_fp64(b, z_si, + z_exp, + z_frac_0, z_frac_1, z_frac_2)); +} + static nir_ssa_def * lower_rcp(nir_builder *b, nir_ssa_def *src) { @@ -1476,6 +1622,441 @@ lower_fdiv64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) div64(b, x, y))))); } +static nir_ssa_def * +add_frac(nir_builder *b, + nir_ssa_def *x_frac_hi, + nir_ssa_def *x_frac_lo, + nir_ssa_def *y_frac_hi, + nir_ssa_def *y_frac_lo, + nir_ssa_def *z_si, + nir_ssa_def *z_exp, + nir_ssa_def *z_frac_0, + nir_ssa_def *z_frac_1, + nir_ssa_def *z_frac_2) +{ + x_frac_hi = nir_ior(b, x_frac_hi, nir_imm_int(b, 0x00100000)); + add64(b, x_frac_hi, x_frac_lo, y_frac_hi, y_frac_lo, &z_frac_0, &z_frac_1); + z_exp = nir_isub(b, z_exp, nir_imm_int(b, 1)); + + nir_ssa_def *without_shift = round_pack_fp64(b, z_si, z_exp, + z_frac_0, + z_frac_1, + z_frac_2); + shift64_extra_right_jamming(b, + z_frac_0, z_frac_1, z_frac_2, + nir_imm_int(b, 1), + &z_frac_0, + &z_frac_1, + &z_frac_2); + + nir_ssa_def *with_shift = round_pack_fp64(b, z_si, + nir_iadd(b, z_exp, + nir_imm_int(b, 1)), + z_frac_0, + z_frac_1, + z_frac_2); + return nir_bcsel(b, + nir_flt(b, z_frac_0, nir_imm_int(b, 0x00200000)), + without_shift, + with_shift); +} + +/* Returns the result of adding the absolute values of the double-precision + * floating-point values `x' and `y'. If `z_si' is 1, the sum is negated + * before being returned. `z_si' is ignored if the result is a NaN. The + * addition is performed according to the IEEE Standard for Floating-Point + * Arithmetic. + */ +static nir_ssa_def * +add_frac_fp64(nir_builder *b, nir_ssa_def *z_si, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_exp = get_exponent(b, x); + nir_ssa_def *x_frac_lo = get_frac_hi(b, x); + nir_ssa_def *x_frac_hi = get_frac_lo(b, x); + nir_ssa_def *y_exp = get_exponent(b, y); + nir_ssa_def *y_frac_lo = get_frac_lo(b, y); + nir_ssa_def *y_frac_hi = get_frac_hi(b, y); + + nir_ssa_def *exp_diff = nir_isub(b, x_exp, y_exp); + nir_ssa_def *x_frac = nir_ior(b, x_frac_hi, x_frac_lo); + nir_ssa_def *y_frac = nir_ior(b, y_frac_hi, y_frac_lo); + nir_ssa_def *x_y_frac = nir_ior(b, x_frac, y_frac); + + nir_ssa_def *zero = nir_imm_int(b, 0); + + /* Result of NaN, Inf and subnormal addition */ + nir_ssa_def *propagate_nan = propagate_fp64_nan(b, x, y); + + nir_ssa_def *pack_inf_fp64 = pack_fp64(b, + z_si, + nir_imm_int(b, 0x7FF), + zero, + zero); + + /* Case (0 < exp_diff) && (y_exp == 0) */ + nir_ssa_def *y_frac_hi_tmp = y_frac_hi; + nir_ssa_def *y_frac_lo_tmp = y_frac_lo; + nir_ssa_def *z_frac_0; + nir_ssa_def *z_frac_1; + nir_ssa_def *z_frac_2; + nir_ssa_def *exp_diff_tmp = nir_isub(b, exp_diff, nir_imm_int(b, 1)); + shift64_extra_right_jamming(b, y_frac_hi, y_frac_lo, + zero, exp_diff_tmp, + &y_frac_hi_tmp, &y_frac_lo_tmp, + &z_frac_2); + nir_ssa_def *case_1 = add_frac(b, x_frac_hi, x_frac_lo, + y_frac_hi_tmp, y_frac_lo_tmp, + z_si, + x_exp, + z_frac_0, + z_frac_1, + z_frac_2); + + /* Case (0 < exp_diff) && (y_exp != 0) */ + y_frac_hi_tmp = y_frac_hi; + y_frac_lo_tmp = y_frac_lo; + y_frac_hi_tmp = nir_ior(b, y_frac_hi_tmp, nir_imm_int(b, 0x00100000)); + shift64_extra_right_jamming(b, y_frac_hi_tmp, y_frac_lo, + zero, exp_diff, + &y_frac_hi_tmp, &y_frac_lo_tmp, + &z_frac_2); + nir_ssa_def *case_2 = add_frac(b, x_frac_hi, x_frac_lo, + y_frac_hi_tmp, y_frac_lo_tmp, + z_si, + x_exp, + z_frac_0, + z_frac_1, + z_frac_2); + + /* Case (exp_diff < 0) && (x_exp == 0) */ + nir_ssa_def *x_frac_hi_tmp = x_frac_hi; + nir_ssa_def *x_frac_lo_tmp = x_frac_lo; + exp_diff_tmp = exp_diff; + exp_diff_tmp = nir_iadd(b, exp_diff_tmp, nir_imm_int(b, 1)); + shift64_extra_right_jamming(b, x_frac_hi, x_frac_lo, + zero, nir_ineg(b, exp_diff_tmp), + &x_frac_hi_tmp, &x_frac_lo_tmp, + &z_frac_2); + nir_ssa_def *case_3 = add_frac(b, x_frac_hi_tmp, x_frac_lo_tmp, + y_frac_hi, y_frac_lo, + z_si, + y_exp, + z_frac_0, + z_frac_1, + z_frac_2); + + /* Case (exp_diff < 0) && (x_exp != 0) */ + x_frac_hi_tmp = x_frac_hi; + x_frac_lo_tmp = x_frac_lo; + x_frac_hi_tmp = nir_ior(b, x_frac_hi_tmp, nir_imm_int(b, 0x00100000)); + shift64_extra_right_jamming(b, x_frac_hi_tmp, x_frac_lo, + zero, nir_ineg(b, exp_diff), + &x_frac_hi_tmp, &x_frac_lo_tmp, + &z_frac_2); + nir_ssa_def *case_4 = add_frac(b, x_frac_hi_tmp, x_frac_lo_tmp, + y_frac_hi, y_frac_lo, + z_si, + y_exp, + z_frac_0, + z_frac_1, + z_frac_2); + + /* Case (exp_diff == 0) && (x_exp != 0x7FF) */ + add64(b, x_frac_hi, x_frac_lo, y_frac_hi, y_frac_lo, &z_frac_0, &z_frac_1); + nir_ssa_def *res = pack_fp64(b, z_si, zero, z_frac_0, z_frac_1); + z_frac_0 = nir_ior(b, z_frac_0, nir_imm_int(b, 0x00200000)); + + shift64_extra_right_jamming(b, + z_frac_0, z_frac_1, zero, + nir_imm_int(b, 1), + &z_frac_0, + &z_frac_1, + &z_frac_2); + + nir_ssa_def *return_else = nir_bcsel(b, + nir_ieq(b, x_exp, zero), + res, + round_pack_fp64(b, + z_si, + x_exp, + z_frac_0, + z_frac_1, + z_frac_2)); + + return + nir_bcsel(b, + nir_ilt(b, zero, exp_diff), + nir_bcsel(b, + nir_ieq(b, x_exp, nir_imm_int(b, 0x7FF)), + nir_bcsel(b, + x_frac, + propagate_nan, + x), + nir_bcsel(b, + nir_ieq(b, y_exp, zero), + case_1, + case_2)), + nir_bcsel(b, + nir_ilt(b, exp_diff, zero), + nir_bcsel(b, + nir_ieq(b, y_exp, nir_imm_int(b, 0x7FF)), + nir_bcsel(b, + y_frac, + propagate_nan, + pack_inf_fp64), + nir_bcsel(b, + nir_ieq(b, x_exp, zero), + case_3, + case_4)), + nir_bcsel(b, + nir_ieq(b, x_exp, nir_imm_int(b, 0x7FF)), + nir_bcsel(b, + x_y_frac, + propagate_nan, + x), + return_else))); + +} + +/* Returns the result of subtracting the absolute values of the double- + * precision floating-point values `x' and `y'. If `z_si' is 1, the + * difference is negated before being returned. `z_si' is ignored if the + * result is a NaN. The subtraction is performed according to the IEEE + * Standard for Floating-Point Arithmetic. + */ +static nir_ssa_def * +sub_frac_fp64(nir_builder *b, nir_ssa_def *z_si, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_exp = get_exponent(b, x); + nir_ssa_def *x_frac_lo = get_frac_hi(b, x); + nir_ssa_def *x_frac_hi = get_frac_lo(b, x); + nir_ssa_def *y_exp = get_exponent(b, y); + nir_ssa_def *y_frac_lo = get_frac_lo(b, y); + nir_ssa_def *y_frac_hi = get_frac_hi(b, y); + + nir_ssa_def *x_frac = nir_ior(b, x_frac_hi, x_frac_lo); + nir_ssa_def *y_frac = nir_ior(b, y_frac_hi, y_frac_lo); + nir_ssa_def *x_y_frac = nir_ior(b, x_frac, y_frac); + + nir_ssa_def *zero = nir_imm_int(b, 0); + + nir_ssa_def *exp_diff = nir_isub(b, x_exp, y_exp); + short_shl64(b, x_frac_hi, x_frac_lo, + nir_imm_int(b, 10), + &x_frac_hi, &x_frac_lo); + short_shl64(b, y_frac_hi, y_frac_lo, + nir_imm_int(b, 10), + &y_frac_hi, &y_frac_lo); + + /* Result of NaN, Inf and subnormal substraction */ + nir_ssa_def *propagate_nan = propagate_fp64_nan(b, x, y); + + nir_ssa_def *pack_zero_fp64 = pack_fp64(b, + zero, + zero, + zero, + zero); + + nir_ssa_def *default_nan = + nir_pack_64_2x32_split(b, + nir_imm_int(b, 0xFFFFFFFF), + nir_imm_int(b, 0xFFFFFFFF)); + + /* x_exp > y_exp */ + nir_ssa_def *z_frac_0; + nir_ssa_def *z_frac_1; + + nir_ssa_def *y_frac_hi_tmp; + nir_ssa_def *y_frac_lo_tmp; + /* if (y_exp == 0) */ + shift64_right_jamming(b, + y_frac_hi, y_frac_lo, + nir_isub(b, exp_diff, nir_imm_int(b, 1)), + &y_frac_hi_tmp, &y_frac_lo_tmp); + nir_ssa_def *x_frac_hi_ior = nir_ior(b, + x_frac_hi, + nir_imm_int(b, 0x40000000)); + sub64(b, x_frac_hi_ior, x_frac_lo, + y_frac_hi_tmp, y_frac_lo_tmp, + &z_frac_0, &z_frac_1); + nir_ssa_def *case_1 = normalize_round_pack_fp64(b, + z_si, + nir_isub(b, + x_exp, + nir_imm_int(b, 11)), + z_frac_0, + z_frac_1); + + /* if (y_exp != 0) */ + shift64_right_jamming(b, + y_frac_hi_tmp, y_frac_lo, + exp_diff, + &y_frac_hi_tmp, &y_frac_lo_tmp); + sub64(b, x_frac_hi_ior, x_frac_lo, + y_frac_hi_tmp, y_frac_lo_tmp, + &z_frac_0, &z_frac_1); + nir_ssa_def *case_2 = normalize_round_pack_fp64(b, + z_si, + nir_isub(b, + x_exp, + nir_imm_int(b, 11)), + z_frac_0, + z_frac_1); + + nir_ssa_def *x_exp_bigger = + nir_bcsel(b, + nir_ieq(b, x_exp, nir_imm_int(b, 0x7FF)), + nir_bcsel(b, + x_frac, + propagate_nan, + x), + nir_bcsel(b, + nir_ieq(b, y_exp, zero), + case_1, + case_2)); + + /* x_exp < y_exp */ + nir_ssa_def *x_frac_hi_tmp; + nir_ssa_def *x_frac_lo_tmp; + + /* if (x_exp == 0) */ + shift64_right_jamming(b, + x_frac_hi, x_frac_lo, + nir_ineg(b, nir_iadd(b, exp_diff, nir_imm_int(b, 1))), + &x_frac_hi_tmp, &x_frac_lo_tmp); + nir_ssa_def *y_frac_hi_ior = nir_ior(b, + y_frac_hi, + nir_imm_int(b, 0x40000000)); + sub64(b, y_frac_hi_ior, y_frac_lo, + x_frac_hi_tmp, x_frac_lo_tmp, + &z_frac_0, &z_frac_1); + nir_ssa_def *case_3 = normalize_round_pack_fp64(b, + nir_ixor(b, + z_si, + nir_imm_int(b, 1)), + nir_isub(b, + y_exp, + nir_imm_int(b, 11)), + z_frac_0, + z_frac_1); + + /* if (x_exp != 0) */ + shift64_right_jamming(b, + x_frac_hi_ior, x_frac_lo, + nir_ineg(b, exp_diff), + &x_frac_hi_tmp, &x_frac_lo_tmp); + sub64(b, y_frac_hi_ior, y_frac_lo, + x_frac_hi_tmp, x_frac_lo_tmp, + &z_frac_0, &z_frac_1); + nir_ssa_def *case_4 = normalize_round_pack_fp64(b, + z_si, + nir_isub(b, + y_exp, + nir_imm_int(b, 11)), + z_frac_0, + z_frac_1); + + nir_ssa_def *y_exp_bigger = + nir_bcsel(b, + nir_ieq(b, y_exp, nir_imm_int(b, 0x7FF)), + nir_bcsel(b, + y_frac, + propagate_nan, + pack_fp64(b, + nir_ixor(b, z_si, nir_imm_int(b, 1)), + nir_imm_int(b, 0x7FF), + zero, + zero)), + nir_bcsel(b, + nir_ieq(b, x_exp, zero), + case_3, + case_4)); + + /* x_frac_hi > y_frac_hi or x_frac_lo > y_frac_lo */ + sub64(b, x_frac_hi, x_frac_lo, y_frac_hi, y_frac_lo, &z_frac_0, &z_frac_1); + nir_ssa_def *x_bigger = + nir_bcsel(b, + nir_ieq(b, x_exp, zero), + normalize_round_pack_fp64(b, + z_si, + nir_imm_int(b, -10), + z_frac_0, + z_frac_1), + normalize_round_pack_fp64(b, + z_si, + nir_isub(b, + x_exp, + nir_imm_int(b, 11)), + z_frac_0, + z_frac_1)); + + /* x_frac_hi < y_frac_lo or x_frac_lo < y_frac_lo */ + sub64(b, y_frac_hi, y_frac_lo, x_frac_hi, x_frac_lo, &z_frac_0, &z_frac_1); + nir_ssa_def *y_bigger = + nir_bcsel(b, + nir_ieq(b, x_exp, zero), + normalize_round_pack_fp64(b, + nir_ixor(b, + z_si, + nir_imm_int(b, 1)), + nir_imm_int(b, -10), + z_frac_0, + z_frac_1), + normalize_round_pack_fp64(b, + nir_ixor(b, + z_si, + nir_imm_int(b, 1)), + nir_isub(b, + y_exp, + nir_imm_int(b, 11)), + z_frac_0, + z_frac_1)); + + /* Select the value to return */ + nir_ssa_def *select = + nir_bcsel(b, + nir_ilt(b, y_frac_hi, x_frac_hi), + x_bigger, + nir_bcsel(b, + nir_ilt(b, x_frac_hi, y_frac_hi), + y_bigger, + nir_bcsel(b, + nir_ilt(b, y_frac_lo, x_frac_lo), + x_bigger, + nir_bcsel(b, + nir_ilt(b, x_frac_lo, + y_frac_lo), + y_bigger, + pack_zero_fp64)))); + + return + nir_bcsel(b, + nir_ilt(b, zero, exp_diff), + x_exp_bigger, + nir_bcsel(b, + nir_ilt(b, exp_diff, zero), + y_exp_bigger, + nir_bcsel(b, + nir_ieq(b, x_exp, nir_imm_int(b, 0x7FF)), + nir_bcsel(b, + x_y_frac, + propagate_nan, + default_nan), + select))); +} + +static nir_ssa_def * +lower_fadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) +{ + nir_ssa_def *x_si = get_sign(b, x); + nir_ssa_def *y_si = get_sign(b, y); + + return nir_bcsel(b, + nir_ieq(b, x_si, y_si), + add_frac_fp64(b, x_si, x, y), + sub_frac_fp64(b, x_si, x, y)); +} + static bool lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) { @@ -1564,6 +2145,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) return false; break; + case nir_op_fadd: + if (!(options & nir_lower_dadd)) + return false; + break; + default: return false; } @@ -1650,6 +2236,13 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) } break; + case nir_op_fadd: { + nir_ssa_def *src1 = nir_fmov_alu(&bld, instr->src[1], + instr->dest.dest.ssa.num_components); + result = lower_fadd64(&bld, src, src1); + } + break; + default: unreachable("unhandled opcode"); } diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 1c1867a3ad..67e8cea2b6 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -516,7 +516,8 @@ nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, nir_lower_deq | nir_lower_dlt | nir_lower_dmul | - nir_lower_ddiv); + nir_lower_ddiv | + nir_lower_dadd); OPT(nir_lower_64bit_pack); } while (progress); -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev