From: Ian Romanick <ian.d.roman...@intel.com> Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 50 ++++++++++++++++++++++------ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 52 +++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 21 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index f15bf3e..f8db28a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -623,8 +623,32 @@ fs_visitor::nir_emit_find_msb_using_lzd(const fs_builder &bld, bool is_signed) { fs_inst *inst; + fs_reg temp = src; - bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src); + if (is_signed) { + /* LZD of an absolute value source almost always does the right + * thing. There are two problem values: + * + * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns + * 0. However, findMSB(int(0x80000000)) == 30. + * + * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns + * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: + * + * For a value of zero or negative one, -1 will be returned. + * + * For all negative number cases, including 0x80000000 and + * 0xffffffff, the correct value is obtained from LZD if instead of + * negating the (already negative) value the logical-not is used. A + * conditonal logical-not can be achieved in two instructions. + */ + temp = vgrf(glsl_type::int_type); + + bld.ASR(temp, src, brw_imm_d(31)); + bld.XOR(temp, temp, src); + } + + bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), temp); /* LZD counts from the MSB side, while GLSL's findMSB() wants the count * from the LSB side. Subtract the result from 31 to convert the MSB @@ -1339,17 +1363,23 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_ifind_msb: { assert(nir_dest_bit_size(instr->dest.dest) < 64); - bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ - bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + if (devinfo->gen < 7) { + nir_emit_find_msb_using_lzd(bld, result, op[0], true); + } else { + bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); - inst = bld.ADD(result, result, brw_imm_d(31)); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->src[0].negate = true; + /* FBH counts from the MSB side, while GLSL's findMSB() wants the + * count from the LSB side. If FBH didn't return an error + * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB + * count into an LSB count. + */ + bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + + inst = bld.ADD(result, result, brw_imm_d(31)); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; + } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index cd88fb6..2fc2cf2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -999,8 +999,32 @@ vec4_visitor::nir_emit_find_msb_using_lzd(const dst_reg &dst, bool is_signed) { vec4_instruction *inst; + src_reg temp = src; - emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), src); + if (is_signed) { + /* LZD of an absolute value source almost always does the right + * thing. There are two problem values: + * + * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns + * 0. However, findMSB(int(0x80000000)) == 30. + * + * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns + * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: + * + * For a value of zero or negative one, -1 will be returned. + * + * For all negative number cases, including 0x80000000 and + * 0xffffffff, the correct value is obtained from LZD if instead of + * negating the (already negative) value the logical-not is used. A + * conditonal logical-not can be achieved in two instructions. + */ + temp = src_reg(this, glsl_type::ivec4_type); + + emit(BRW_OPCODE_ASR, dst_reg(temp), src, brw_imm_d(31)); + emit(BRW_OPCODE_XOR, dst_reg(temp), temp, src); + } + + emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), temp); /* LZD counts from the MSB side, while GLSL's findMSB() wants the count * from the LSB side. Subtract the result from 31 to convert the MSB count @@ -1484,18 +1508,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_ifind_msb: { - emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); - - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ src_reg src(dst); - emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ)); - inst = emit(ADD(dst, src, brw_imm_d(31))); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->src[0].negate = true; + if (devinfo->gen < 7) { + nir_emit_find_msb_using_lzd(dst, op[0], true); + } else { + emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); + + /* FBH counts from the MSB side, while GLSL's findMSB() wants the + * count from the LSB side. If FBH didn't return an error + * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB + * count into an LSB count. + */ + emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ)); + + inst = emit(ADD(dst, src, brw_imm_d(31))); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; + } break; } -- 2.5.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev