On Thu, Mar 3, 2016 at 7:21 AM, Iago Toral <ito...@igalia.com> wrote: > On Wed, 2016-03-02 at 15:45 -0800, Matt Turner wrote: >> instructions in affected programs: 31535 -> 29966 (-4.98%) >> helped: 23 >> >> cycles in affected programs: 272648 -> 266022 (-2.43%) >> helped: 14 >> HURT: 1 >> >> The patch decreases the number of instructions in the two Unigine >> programs by: >> >> #1721: 4374 -> 4155 instructions (-5.01%) >> #1706: 3582 -> 3363 instructions (-6.11%) >> --- >> src/mesa/drivers/dri/i965/brw_fs.h | 2 ++ >> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 46 >> ++++++++++++++++++++++++++++++++ >> 2 files changed, 48 insertions(+) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h >> b/src/mesa/drivers/dri/i965/brw_fs.h >> index 7446ca1..21c7813 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.h >> +++ b/src/mesa/drivers/dri/i965/brw_fs.h >> @@ -272,6 +272,8 @@ public: >> void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, >> unsigned wr_mask); >> >> + bool optimize_extract_to_float(nir_alu_instr *instr, >> + const fs_reg &result); >> bool optimize_frontfacing_ternary(nir_alu_instr *instr, >> const fs_reg &result); >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> index db20c71..04e9b8f 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> @@ -500,6 +500,49 @@ fs_visitor::nir_emit_instr(nir_instr *instr) >> } >> } >> >> +/** >> + * Recognizes a parent instruction of nir_op_extract_* and changes the type >> to >> + * match instr. >> + */ >> +bool >> +fs_visitor::optimize_extract_to_float(nir_alu_instr *instr, >> + const fs_reg &result) >> +{ >> + if (!instr->src[0].src.is_ssa || >> + !instr->src[0].src.ssa->parent_instr) >> + return false; >> + >> + if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu) >> + return false; >> + >> + nir_alu_instr *src0 = >> + nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); >> + >> + if (src0->op != nir_op_extract_u8 && src0->op != nir_op_extract_u16 && >> + src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16) >> + return false; >> + >> + nir_const_value *element = nir_src_as_const_value(src0->src[1].src); >> + assert(element != NULL); >> + >> + enum opcode extract_op; >> + if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) { >> + assert(element->u[0] <= 1); >> + extract_op = SHADER_OPCODE_EXTRACT_WORD; >> + } else { >> + assert(element->u[0] <= 3); >> + extract_op = SHADER_OPCODE_EXTRACT_BYTE; >> + } >> + >> + fs_reg op0 = get_nir_src(src0->src[0].src); >> + op0.type = brw_type_for_nir_type(nir_op_infos[src0->op].input_types[0]); >> + op0 = offset(op0, bld, src0->src[0].swizzle[0]); >> + >> + set_saturate(instr->dest.saturate, >> + bld.emit(extract_op, result, op0, >> brw_imm_ud(element->u[0]))); > > So this relies on dead code elimination to remove the original extract > opcode, right?
Exactly right. > Series is: > Reviewed-by: Iago Toral Quiroga <ito...@igalia.com> Thanks! _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev