On Wed, 2016-03-02 at 15:45 -0800, Matt Turner wrote: > instructions in affected programs: 31535 -> 29966 (-4.98%) > helped: 23 > > cycles in affected programs: 272648 -> 266022 (-2.43%) > helped: 14 > HURT: 1 > > The patch decreases the number of instructions in the two Unigine > programs by: > > #1721: 4374 -> 4155 instructions (-5.01%) > #1706: 3582 -> 3363 instructions (-6.11%) > --- > src/mesa/drivers/dri/i965/brw_fs.h | 2 ++ > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 46 > ++++++++++++++++++++++++++++++++ > 2 files changed, 48 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 7446ca1..21c7813 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -272,6 +272,8 @@ public: > void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, > unsigned wr_mask); > > + bool optimize_extract_to_float(nir_alu_instr *instr, > + const fs_reg &result); > bool optimize_frontfacing_ternary(nir_alu_instr *instr, > const fs_reg &result); > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index db20c71..04e9b8f 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -500,6 +500,49 @@ fs_visitor::nir_emit_instr(nir_instr *instr) > } > } > > +/** > + * Recognizes a parent instruction of nir_op_extract_* and changes the type > to > + * match instr. > + */ > +bool > +fs_visitor::optimize_extract_to_float(nir_alu_instr *instr, > + const fs_reg &result) > +{ > + if (!instr->src[0].src.is_ssa || > + !instr->src[0].src.ssa->parent_instr) > + return false; > + > + if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu) > + return false; > + > + nir_alu_instr *src0 = > + nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); > + > + if (src0->op != nir_op_extract_u8 && src0->op != nir_op_extract_u16 && > + src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16) > + return false; > + > + nir_const_value *element = nir_src_as_const_value(src0->src[1].src); > + assert(element != NULL); > + > + enum opcode extract_op; > + if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) { > + assert(element->u[0] <= 1); > + extract_op = SHADER_OPCODE_EXTRACT_WORD; > + } else { > + assert(element->u[0] <= 3); > + extract_op = SHADER_OPCODE_EXTRACT_BYTE; > + } > + > + fs_reg op0 = get_nir_src(src0->src[0].src); > + op0.type = brw_type_for_nir_type(nir_op_infos[src0->op].input_types[0]); > + op0 = offset(op0, bld, src0->src[0].swizzle[0]); > + > + set_saturate(instr->dest.saturate, > + bld.emit(extract_op, result, op0, > brw_imm_ud(element->u[0])));
So this relies on dead code elimination to remove the original extract opcode, right? Series is: Reviewed-by: Iago Toral Quiroga <ito...@igalia.com> > + return true; > +} > + > bool > fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, > const fs_reg &result) > @@ -671,6 +714,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, > nir_alu_instr *instr) > switch (instr->op) { > case nir_op_i2f: > case nir_op_u2f: > + if (optimize_extract_to_float(instr, result)) > + return; > + > inst = bld.MOV(result, op[0]); > inst->saturate = instr->dest.saturate; > break; _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev