From: Ian Romanick <ian.d.roman...@intel.com> This uses one less instruction.
Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> --- src/mesa/drivers/dri/i965/brw_fs.h | 4 ++++ src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 3 +++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 26 +++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_vec4.h | 4 ++++ src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 +++ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 22 ++++++++++++++++++++ 6 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4237197..22ce092 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -237,6 +237,10 @@ public: nir_tex_instr *instr); void nir_emit_jump(const brw::fs_builder &bld, nir_jump_instr *instr); + void nir_emit_find_msb_using_lzd(const brw::fs_builder &bld, + const fs_reg &result, + const fs_reg &src, + bool is_signed); fs_reg get_nir_src(const nir_src &src); fs_reg get_nir_src_imm(const nir_src &src); fs_reg get_nir_dest(const nir_dest &dest); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index d25d26a..bda4a26 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1761,6 +1761,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) /* FBL only supports UD type for dst. */ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; + case BRW_OPCODE_LZD: + brw_LZD(p, dst, src[0]); + break; case BRW_OPCODE_CBIT: assert(devinfo->gen >= 7); /* CBIT only supports UD type for dst. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index b3f5dfd..f15bf3e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -617,6 +617,25 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, } void +fs_visitor::nir_emit_find_msb_using_lzd(const fs_builder &bld, + const fs_reg &result, + const fs_reg &src, + bool is_signed) +{ + fs_inst *inst; + + bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src); + + /* LZD counts from the MSB side, while GLSL's findMSB() wants the count + * from the LSB side. Subtract the result from 31 to convert the MSB + * count into an LSB count. If no bits are set, LZD will return 32. + * 31-32 = -1, which is exactly what findMSB() is supposed to return. + */ + inst = bld.ADD(result, retype(result, BRW_REGISTER_TYPE_D), brw_imm_d(31)); + inst->src[0].negate = true; +} + +void fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) { struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key; @@ -1312,7 +1331,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) bld.CBIT(result, op[0]); break; - case nir_op_ufind_msb: + case nir_op_ufind_msb: { + assert(nir_dest_bit_size(instr->dest.dest) < 64); + nir_emit_find_msb_using_lzd(bld, result, op[0], false); + break; + } + case nir_op_ifind_msb: { assert(nir_dest_bit_size(instr->dest.dest) < 64); bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 76dea04..4be6833 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -326,6 +326,10 @@ public: virtual void nir_emit_undef(nir_ssa_undef_instr *instr); virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr); + void nir_emit_find_msb_using_lzd(const dst_reg &dst, + const src_reg &src, + bool is_signed); + dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type); dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type); dst_reg get_nir_dest(const nir_dest &dest); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index bb0254e..193e748 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1637,6 +1637,9 @@ generate_code(struct brw_codegen *p, /* FBL only supports UD type for dst. */ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; + case BRW_OPCODE_LZD: + brw_LZD(p, dst, src[0]); + break; case BRW_OPCODE_CBIT: assert(devinfo->gen >= 7); /* CBIT only supports UD type for dst. */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index f3b4528..cd88fb6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -994,6 +994,25 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr, } void +vec4_visitor::nir_emit_find_msb_using_lzd(const dst_reg &dst, + const src_reg &src, + bool is_signed) +{ + vec4_instruction *inst; + + emit(BRW_OPCODE_LZD, retype(dst, BRW_REGISTER_TYPE_UD), src); + + /* LZD counts from the MSB side, while GLSL's findMSB() wants the count + * from the LSB side. Subtract the result from 31 to convert the MSB count + * into an LSB count. If no bits are set, LZD will return 32. 31-32 = -1, + * which is exactly what findMSB() is supposed to return. + */ + inst = emit(ADD(dst, retype(src_reg(dst), BRW_REGISTER_TYPE_D), + brw_imm_d(31))); + inst->src[0].negate = true; +} + +void vec4_visitor::nir_emit_alu(nir_alu_instr *instr) { vec4_instruction *inst; @@ -1461,6 +1480,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_ufind_msb: + nir_emit_find_msb_using_lzd(dst, op[0], false); + break; + case nir_op_ifind_msb: { emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); -- 2.5.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev